git.hungrycats.org Git - xscreensaver/blob - hacks/glx/flurry-smoke.c

   1 /*
   2
   3 Copyright (c) 2002, Calum Robinson
   4 All rights reserved.
   5
   6 Redistribution and use in source and binary forms, with or without
   7 modification, are permitted provided that the following conditions are met:
   8
   9 * Redistributions of source code must retain the above copyright notice, this
  10   list of conditions and the following disclaimer.
  11
  12 * Redistributions in binary form must reproduce the above copyright notice,
  13   this list of conditions and the following disclaimer in the documentation
  14   and/or other materials provided with the distribution.
  15
  16 * Neither the name of the author nor the names of its contributors may be used
  17   to endorse or promote products derived from this software without specific
  18   prior written permission.
  19
  20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  21 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  22 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  23 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
  24 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  25 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  27 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30
  31 */
  32
  33 /* Smoke.cpp: implementation of the Smoke class. */
  34
  35 #ifdef HAVE_CONFIG_H
  36 # include "config.h"
  37 #endif
  38
  39 #include "flurry.h"
  40
  41 #define MAXANGLES 16384
  42 #define NOT_QUITE_DEAD 3
  43
  44 #define intensity 75000.0f;
  45
  46 void InitSmoke(SmokeV *s)
  47 {
  48     int i;
  49     s->nextParticle = 0;
  50     s->nextSubParticle = 0;
  51     s->lastParticleTime = 0.25f;
  52     s->firstTime = 1;
  53     s->frame = 0;
  54     for (i=0;i<3;i++) {
  55         s->old[i] = RandFlt(-100.0, 100.0);
  56     }
  57 }
  58
  59 void UpdateSmoke_ScalarBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
  60 {
  61     int i,j,k;
  62     float sx = flurry->star->position[0];
  63     float sy = flurry->star->position[1];
  64     float sz = flurry->star->position[2];
  65     double frameRate;
  66     double frameRateModifier;
  67
  68
  69     s->frame++;
  70
  71     if(!s->firstTime) {
  72         /* release 12 puffs every frame */
  73         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
  74             float dx,dy,dz,deltax,deltay,deltaz;
  75             float f;
  76             float rsquared;
  77             float mag;
  78
  79             dx = s->old[0] - sx;
  80             dy = s->old[1] - sy;
  81             dz = s->old[2] - sz;
  82             mag = 5.0f;
  83             deltax = (dx * mag);
  84             deltay = (dy * mag);
  85             deltaz = (dz * mag);
  86             for(i=0;i<flurry->numStreams;i++) {
  87                 float streamSpeedCoherenceFactor;
  88
  89                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
  90                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
  91                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
  92                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
  93                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
  94                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
  95                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
  96                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
  97                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
  98                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
  99                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 100                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 101                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 102                 rsquared = (dx*dx+dy*dy+dz*dz);
 103                 f = streamSpeed * streamSpeedCoherenceFactor;
 104
 105                 mag = f / (float) sqrt(rsquared);
 106
 107                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 108                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 109                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 110                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 111                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 112                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 113                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 114                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 115                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 116                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 117                 s->nextSubParticle++;
 118                 if (s->nextSubParticle==4) {
 119                     s->nextParticle++;
 120                     s->nextSubParticle=0;
 121                 }
 122                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 123                     s->nextParticle = 0;
 124                     s->nextSubParticle = 0;
 125                 }
 126             }
 127
 128             s->lastParticleTime = flurry->fTime;
 129         }
 130     } else {
 131         s->lastParticleTime = flurry->fTime;
 132         s->firstTime = 0;
 133     }
 134
 135     for(i=0;i<3;i++) {
 136         s->old[i] = flurry->star->position[i];
 137     }
 138
 139     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 140     frameRateModifier = 42.5f / frameRate;
 141
 142     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 143         for(k=0; k<4; k++) {
 144             float dx,dy,dz;
 145             float f;
 146             float rsquared;
 147             float mag;
 148             float deltax;
 149             float deltay;
 150             float deltaz;
 151
 152             if (s->p[i].dead.i[k]) {
 153                 continue;
 154             }
 155
 156             deltax = s->p[i].delta[0].f[k];
 157             deltay = s->p[i].delta[1].f[k];
 158             deltaz = s->p[i].delta[2].f[k];
 159
 160             for(j=0;j<flurry->numStreams;j++) {
 161                 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
 162                 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
 163                 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
 164                 rsquared = (dx*dx+dy*dy+dz*dz);
 165
 166                 f = (gravity/rsquared) * frameRateModifier;
 167
 168                 if ((((i*4)+k) % flurry->numStreams) == j) {
 169                     f *= 1.0f + streamBias;
 170                 }
 171
 172                 mag = f / (float) sqrt(rsquared);
 173
 174                 deltax -= (dx * mag);
 175                 deltay -= (dy * mag);
 176                 deltaz -= (dz * mag);
 177             }
 178
 179             /* slow this particle down by flurry->drag */
 180             deltax *= flurry->drag;
 181             deltay *= flurry->drag;
 182             deltaz *= flurry->drag;
 183
 184             if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
 185                 s->p[i].dead.i[k] = 1;
 186                 continue;
 187             }
 188
 189             /* update the position */
 190             s->p[i].delta[0].f[k] = deltax;
 191             s->p[i].delta[1].f[k] = deltay;
 192             s->p[i].delta[2].f[k] = deltaz;
 193             for(j=0;j<3;j++) {
 194                 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
 195                 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
 196             }
 197         }
 198     }
 199 }
 200
 201 #if 0
 202 #ifdef __ppc__
 203
 204 void UpdateSmoke_ScalarFrsqrte(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
 205 {
 206     int i,j,k;
 207     float sx = flurry->star->position[0];
 208     float sy = flurry->star->position[1];
 209     float sz = flurry->star->position[2];
 210     double frameRate;
 211     double frameRateModifier;
 212
 213
 214     s->frame++;
 215
 216     if(!s->firstTime) {
 217         /* release 12 puffs every frame */
 218         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
 219             float dx,dy,dz,deltax,deltay,deltaz;
 220             float f;
 221             float rsquared;
 222             float mag;
 223
 224             dx = s->old[0] - sx;
 225             dy = s->old[1] - sy;
 226             dz = s->old[2] - sz;
 227             mag = 5.0f;
 228             deltax = (dx * mag);
 229             deltay = (dy * mag);
 230             deltaz = (dz * mag);
 231             for(i=0;i<flurry->numStreams;i++) {
 232                 float streamSpeedCoherenceFactor;
 233
 234                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
 235                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
 236                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
 237                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
 238                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
 239                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
 240                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
 241                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
 242                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
 243                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
 244                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 245                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 246                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 247                 rsquared = (dx*dx+dy*dy+dz*dz);
 248                 f = streamSpeed * streamSpeedCoherenceFactor;
 249
 250                 mag = f / (float) sqrt(rsquared);
 251                 /*
 252                     reciprocal square-root estimate replaced above divide and call to system sqrt()
 253
 254                     asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
 255                     mag *= f;
 256                 */
 257
 258                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 259                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 260                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 261                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 262                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 263                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 264                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 265                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 266                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 267                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 268                 s->nextSubParticle++;
 269                 if (s->nextSubParticle==4) {
 270                     s->nextParticle++;
 271                     s->nextSubParticle=0;
 272                 }
 273                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 274                     s->nextParticle = 0;
 275                     s->nextSubParticle = 0;
 276                 }
 277             }
 278
 279             s->lastParticleTime = flurry->fTime;
 280         }
 281     } else {
 282         s->lastParticleTime = flurry->fTime;
 283         s->firstTime = 0;
 284     }
 285
 286     for(i=0;i<3;i++) {
 287         s->old[i] = flurry->star->position[i];
 288     }
 289
 290     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 291     frameRateModifier = 42.5f / frameRate;
 292
 293     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 294         for(k=0; k<4; k++) {
 295             float dx,dy,dz;
 296             float f;
 297             float rsquared;
 298             float mag;
 299             float deltax;
 300             float deltay;
 301             float deltaz;
 302
 303             if (s->p[i].dead.i[k]) {
 304                 continue;
 305             }
 306
 307             deltax = s->p[i].delta[0].f[k];
 308             deltay = s->p[i].delta[1].f[k];
 309             deltaz = s->p[i].delta[2].f[k];
 310
 311             for(j=0;j<flurry->numStreams;j++) {
 312                 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
 313                 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
 314                 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
 315                 rsquared = (dx*dx+dy*dy+dz*dz);
 316
 317                 /*
 318                     asm("fres %0, %1" : "=f" (f) : "f" (rsquared));
 319                     f *= gravity*frameRateModifier;
 320                 */
 321                 f = ( gravity  * frameRateModifier ) / rsquared;
 322
 323                 if((((i*4)+k) % flurry->numStreams) == j) {
 324                     f *= 1.0f + streamBias;
 325                 }
 326
 327                 mag = f / (float) sqrt(rsquared);
 328
 329                 /* reciprocal square-root estimate replaced above divide and call to system sqrt() */
 330
 331                 deltax -= (dx * mag);
 332                 deltay -= (dy * mag);
 333                 deltaz -= (dz * mag);
 334             }
 335
 336             /* slow this particle down by flurry->drag */
 337             deltax *= flurry->drag;
 338             deltay *= flurry->drag;
 339             deltaz *= flurry->drag;
 340
 341             if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
 342                 s->p[i].dead.i[k] = 1;
 343                 continue;
 344             }
 345
 346             /* update the position */
 347             s->p[i].delta[0].f[k] = deltax;
 348             s->p[i].delta[1].f[k] = deltay;
 349             s->p[i].delta[2].f[k] = deltaz;
 350             for(j=0;j<3;j++) {
 351                 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
 352                 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
 353             }
 354         }
 355     }
 356 }
 357
 358 #endif
 359
 360 #ifdef __VEC__
 361
 362 void UpdateSmoke_VectorBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
 363 {
 364     unsigned int i,j;
 365     float sx = flurry->star->position[0];
 366     float sy = flurry->star->position[1];
 367     float sz = flurry->star->position[2];
 368     double frameRate;
 369     floatToVector frameRateModifier;
 370     floatToVector gravityV;
 371     floatToVector dragV;
 372     floatToVector deltaTimeV;
 373     const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
 374     const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
 375     const vector float biasConst = (vector float)(streamBias);
 376
 377     gravityV.f[0] = gravity;
 378     gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
 379
 380     dragV.f[0] = flurry->drag;
 381     dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
 382
 383     deltaTimeV.f[0] = flurry->fDeltaTime;
 384     deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
 385
 386     s->frame++;
 387
 388     if(!s->firstTime) {
 389         /* release 12 puffs every frame */
 390         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
 391             float dx,dy,dz,deltax,deltay,deltaz;
 392             float f;
 393             float rsquared;
 394             float mag;
 395
 396             dx = s->old[0] - sx;
 397             dy = s->old[1] - sy;
 398             dz = s->old[2] - sz;
 399             mag = 5.0f;
 400             deltax = (dx * mag);
 401             deltay = (dy * mag);
 402             deltaz = (dz * mag);
 403             for(i=0;i<flurry->numStreams;i++) {
 404                 float streamSpeedCoherenceFactor;
 405
 406                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
 407                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
 408                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
 409                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
 410                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
 411                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
 412                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
 413                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
 414                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
 415                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
 416                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 417                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 418                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 419                 rsquared = (dx*dx+dy*dy+dz*dz);
 420                 f = streamSpeed * streamSpeedCoherenceFactor;
 421
 422                 mag = f / (float) sqrt(rsquared);
 423                 /*
 424                     asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
 425                     mag *= f;
 426                 */
 427
 428                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 429                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 430                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 431                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 432                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 433                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 434                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 435                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 436                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 437                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 438                 s->nextSubParticle++;
 439                 if (s->nextSubParticle==4) {
 440                     s->nextParticle++;
 441                     s->nextSubParticle=0;
 442                 }
 443                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 444                     s->nextParticle = 0;
 445                     s->nextSubParticle = 0;
 446                 }
 447             }
 448
 449             s->lastParticleTime = flurry->fTime;
 450         }
 451     } else {
 452         s->lastParticleTime = flurry->fTime;
 453         s->firstTime = 0;
 454     }
 455
 456     for(i=0;i<3;i++) {
 457         s->old[i] = flurry->star->position[i];
 458     }
 459
 460     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 461     frameRateModifier.f[0] = 42.5f / frameRate;
 462     frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
 463
 464     frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
 465
 466     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 467         /* floatToVector f; */
 468         vector float deltax, deltay, deltaz;
 469         vector float distTemp;
 470         vector unsigned int deadTemp;
 471         /* floatToVector infopos0, infopos1, infopos2; */
 472         intToVector mod;
 473         vector unsigned int jVec;
 474
 475
 476         vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
 477
 478         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 479             continue;
 480         }
 481
 482         deltax = s->p[i].delta[0].v;
 483         deltay = s->p[i].delta[1].v;
 484         deltaz = s->p[i].delta[2].v;
 485
 486         mod.i[0] = (i<<2 + 0) % flurry->numStreams;
 487         if(mod.i[0]+1 == flurry->numStreams) {
 488             mod.i[1] = 0;
 489         } else {
 490             mod.i[1] = mod.i[0]+1;
 491         }
 492         if(mod.i[1]+1 == flurry->numStreams) {
 493             mod.i[2] = 0;
 494         } else {
 495             mod.i[2] = mod.i[1]+1;
 496         }
 497         if(mod.i[2]+1 == flurry->numStreams) {
 498             mod.i[3] = 0;
 499         } else {
 500             mod.i[3] = mod.i[2]+1;
 501         }
 502
 503         jVec = vec_xor(jVec, jVec);
 504
 505         vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
 506         for(j=0; j<flurry->numStreams;j++) {
 507             vector float ip0, ip1 = (vector float)(0.0), ip2;
 508             vector float dx, dy, dz;
 509             vector float rsquared, f;
 510             vector float one_over_rsquared;
 511             vector float biasTemp;
 512             vector float mag;
 513             vector bool int biasOr;
 514
 515             ip0 = vec_ld(0, flurry->spark[j]->position);
 516             if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
 517                 ip1 = vec_ld(16, flurry->spark[j]->position);
 518             }
 519
 520             ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
 521             ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
 522             ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
 523             ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
 524
 525             dx = vec_sub(s->p[i].position[0].v, ip0);
 526             dy = vec_sub(s->p[i].position[1].v, ip1);
 527             dz = vec_sub(s->p[i].position[2].v, ip2);
 528
 529             rsquared = vec_madd(dx, dx, zero);
 530             rsquared = vec_madd(dy, dy, rsquared);
 531             rsquared = vec_madd(dz, dz, rsquared);
 532
 533             biasOr = vec_cmpeq(jVec, mod.v);
 534             biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
 535
 536             f = vec_madd(biasTemp, frameRateModifier.v, zero);
 537             one_over_rsquared = vec_re(rsquared);
 538             f = vec_madd(f, one_over_rsquared, zero);
 539
 540             mag = vec_rsqrte(rsquared);
 541             mag = vec_madd(mag, f, zero);
 542
 543             deltax = vec_nmsub(dx, mag, deltax);
 544             deltay = vec_nmsub(dy, mag, deltay);
 545             deltaz = vec_nmsub(dz, mag, deltaz);
 546
 547             jVec = vec_add(jVec, (vector unsigned int)(1));
 548         }
 549
 550         /* slow this particle down by flurry->drag */
 551         deltax = vec_madd(deltax, dragV.v, zero);
 552         deltay = vec_madd(deltay, dragV.v, zero);
 553         deltaz = vec_madd(deltaz, dragV.v, zero);
 554
 555         distTemp = vec_madd(deltax, deltax, zero);
 556         distTemp = vec_madd(deltay, deltay, distTemp);
 557         distTemp = vec_madd(deltaz, deltaz, distTemp);
 558
 559         deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
 560         deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
 561         s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
 562         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 563             continue;
 564         }
 565
 566         /* update the position */
 567         s->p[i].delta[0].v = deltax;
 568         s->p[i].delta[1].v = deltay;
 569         s->p[i].delta[2].v = deltaz;
 570         for(j=0;j<3;j++) {
 571             s->p[i].oldposition[j].v = s->p[i].position[j].v;
 572             s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
 573         }
 574     }
 575 }
 576
 577 void UpdateSmoke_VectorUnrolled(global_info_t *info, SmokeV *s)
 578 {
 579     unsigned int i,j;
 580     float sx = flurry->star->position[0];
 581     float sy = flurry->star->position[1];
 582     float sz = flurry->star->position[2];
 583     double frameRate;
 584     floatToVector frameRateModifier;
 585     floatToVector gravityV;
 586     floatToVector dragV;
 587     floatToVector deltaTimeV;
 588     const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
 589     const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
 590     const vector float biasConst = (vector float)(streamBias);
 591
 592     gravityV.f[0] = gravity;
 593     gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
 594
 595     dragV.f[0] = flurry->drag;
 596     dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
 597
 598     deltaTimeV.f[0] = flurry->fDeltaTime;
 599     deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
 600
 601     s->frame++;
 602
 603     if(!s->firstTime) {
 604         /* release 12 puffs every frame */
 605         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
 606             float dx,dy,dz,deltax,deltay,deltaz;
 607             float f;
 608             float rsquared;
 609             float mag;
 610
 611             dx = s->old[0] - sx;
 612             dy = s->old[1] - sy;
 613             dz = s->old[2] - sz;
 614             mag = 5.0f;
 615             deltax = (dx * mag);
 616             deltay = (dy * mag);
 617             deltaz = (dz * mag);
 618             for(i=0;i<flurry->numStreams;i++) {
 619                 float streamSpeedCoherenceFactor;
 620
 621                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
 622                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
 623                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
 624                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
 625                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
 626                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
 627                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
 628                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
 629                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
 630                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
 631                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 632                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 633                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 634                 rsquared = (dx*dx+dy*dy+dz*dz);
 635                 f = streamSpeed * streamSpeedCoherenceFactor;
 636
 637                 mag = f / (float) sqrt(rsquared);
 638                 /*
 639                     asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
 640                     mag *= f;
 641                 */
 642
 643                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 644                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 645                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 646                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 647                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 648                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 649                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 650                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 651                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 652                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 653                 s->nextSubParticle++;
 654                 if (s->nextSubParticle==4) {
 655                     s->nextParticle++;
 656                     s->nextSubParticle=0;
 657                 }
 658                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 659                     s->nextParticle = 0;
 660                     s->nextSubParticle = 0;
 661                 }
 662             }
 663
 664             s->lastParticleTime = flurry->fTime;
 665         }
 666     } else {
 667         s->lastParticleTime = flurry->fTime;
 668         s->firstTime = 0;
 669     }
 670
 671     for(i=0;i<3;i++) {
 672         s->old[i] = flurry->star->position[i];
 673     }
 674
 675     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 676     frameRateModifier.f[0] = 42.5f / frameRate;
 677     frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
 678
 679     frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
 680
 681     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 682         /* floatToVector f; */
 683         vector float deltax, deltay, deltaz;
 684         vector float distTemp;
 685         vector unsigned int deadTemp;
 686         /* floatToVector infopos0, infopos1, infopos2; */
 687         intToVector mod;
 688         vector unsigned int jVec;
 689         vector unsigned int intOne = vec_splat_u32(1);
 690         vector float floatOne = vec_ctf(intOne, 0);
 691
 692
 693         vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
 694
 695         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 696             continue;
 697         }
 698
 699         deltax = s->p[i].delta[0].v;
 700         deltay = s->p[i].delta[1].v;
 701         deltaz = s->p[i].delta[2].v;
 702
 703         mod.i[0] = (i<<2 + 0) % flurry->numStreams;
 704         if(mod.i[0]+1 == flurry->numStreams) {
 705             mod.i[1] = 0;
 706         } else {
 707             mod.i[1] = mod.i[0]+1;
 708         }
 709         if(mod.i[1]+1 == flurry->numStreams) {
 710             mod.i[2] = 0;
 711         } else {
 712             mod.i[2] = mod.i[1]+1;
 713         }
 714         if(mod.i[2]+1 == flurry->numStreams) {
 715             mod.i[3] = 0;
 716         } else {
 717             mod.i[3] = mod.i[2]+1;
 718         }
 719
 720         jVec = vec_xor(jVec, jVec);
 721
 722         vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
 723         for(j=0; j + 3 < flurry->numStreams;j+=4)
 724         {
 725             vector float dxa, dya, dza;
 726             vector float dxb, dyb, dzb;
 727             vector float dxc, dyc, dzc;
 728             vector float dxd, dyd, dzd;
 729             vector float ip0a, ip1a;
 730             vector float ip0b, ip1b;
 731             vector float ip0c, ip1c;
 732             vector float ip0d, ip1d;
 733             vector float rsquaredA;
 734             vector float rsquaredB;
 735             vector float rsquaredC;
 736             vector float rsquaredD;
 737             vector float fA, fB, fC, fD;
 738             vector float biasTempA;
 739             vector float biasTempB;
 740             vector float biasTempC;
 741             vector float biasTempD;
 742             vector float magA;
 743             vector float magB;
 744             vector float magC;
 745             vector float magD;
 746
 747             vector float one_over_rsquaredA;
 748             vector float one_over_rsquaredB;
 749             vector float one_over_rsquaredC;
 750             vector float one_over_rsquaredD;
 751             vector bool int biasOrA,biasOrB,biasOrC,biasOrD;
 752
 753             /* load vectors */
 754             ip0a = vec_ld(0, flurry->spark[j]->position);
 755             ip0b = vec_ld(0, flurry->spark[j+1]->position);
 756             ip0c = vec_ld(0, flurry->spark[j+2]->position);
 757             ip0d = vec_ld(0, flurry->spark[j+3]->position);
 758             ip1a = vec_ld( 12, flurry->spark[j]->position );
 759             ip1b = vec_ld( 12, flurry->spark[j+1]->position );
 760             ip1c = vec_ld( 12, flurry->spark[j+2]->position );
 761             ip1d = vec_ld( 12, flurry->spark[j+3]->position );
 762
 763             /* align them */
 764             ip0a = vec_perm(ip0a, ip1a, vec_lvsl(0, flurry->spark[j]->position));
 765             ip0b = vec_perm(ip0b, ip1b, vec_lvsl(0, flurry->spark[j+1]->position));
 766             ip0c = vec_perm(ip0c, ip1c, vec_lvsl(0, flurry->spark[j+2]->position));
 767             ip0d = vec_perm(ip0d, ip1d, vec_lvsl(0, flurry->spark[j+3]->position));
 768
 769             dxa = vec_splat( ip0a, 0  );
 770             dxb = vec_splat( ip0b, 0  );
 771             dxc = vec_splat( ip0c, 0  );
 772             dxd = vec_splat( ip0d, 0  );
 773             dxa = vec_sub( s->p[i].position[0].v, dxa );
 774             dxb = vec_sub( s->p[i].position[0].v, dxb );
 775             dxc = vec_sub( s->p[i].position[0].v, dxc );
 776             dxd = vec_sub( s->p[i].position[0].v, dxd );
 777
 778             dya = vec_splat( ip0a, 1  );
 779             dyb = vec_splat( ip0b, 1  );
 780             dyc = vec_splat( ip0c, 1  );
 781             dyd = vec_splat( ip0d, 1  );
 782             dya = vec_sub( s->p[i].position[1].v, dya );
 783             dyb = vec_sub( s->p[i].position[1].v, dyb );
 784             dyc = vec_sub( s->p[i].position[1].v, dyc );
 785             dyd = vec_sub( s->p[i].position[1].v, dyd );
 786
 787             dza = vec_splat( ip0a, 2  );
 788             dzb = vec_splat( ip0b, 2  );
 789             dzc = vec_splat( ip0c, 2  );
 790             dzd = vec_splat( ip0d, 2  );
 791             dza = vec_sub( s->p[i].position[2].v, dza );
 792             dzb = vec_sub( s->p[i].position[2].v, dzb );
 793             dzc = vec_sub( s->p[i].position[2].v, dzc );
 794             dzd = vec_sub( s->p[i].position[2].v, dzd );
 795
 796             rsquaredA = vec_madd( dxa, dxa, zero );
 797             rsquaredB = vec_madd( dxb, dxb, zero );
 798             rsquaredC = vec_madd( dxc, dxc, zero );
 799             rsquaredD = vec_madd( dxd, dxd, zero );
 800
 801             rsquaredA = vec_madd( dya, dya, rsquaredA );
 802             rsquaredB = vec_madd( dyb, dyb, rsquaredB );
 803             rsquaredC = vec_madd( dyc, dyc, rsquaredC );
 804             rsquaredD = vec_madd( dyd, dyd, rsquaredD );
 805
 806             rsquaredA = vec_madd( dza, dza, rsquaredA );
 807             rsquaredB = vec_madd( dzb, dzb, rsquaredB );
 808             rsquaredC = vec_madd( dzc, dzc, rsquaredC );
 809             rsquaredD = vec_madd( dzd, dzd, rsquaredD );
 810
 811             biasOrA = vec_cmpeq( jVec, mod.v );
 812             jVec = vec_add(jVec, intOne);
 813             biasOrB = vec_cmpeq( jVec, mod.v );
 814             jVec = vec_add(jVec, intOne);
 815             biasOrC = vec_cmpeq( jVec, mod.v );
 816             jVec = vec_add(jVec, intOne);
 817             biasOrD = vec_cmpeq( jVec, mod.v );
 818             jVec = vec_add(jVec, intOne);
 819
 820             biasTempA = vec_add( vec_and( biasOrA, biasConst), floatOne);
 821             biasTempB = vec_add( vec_and( biasOrB, biasConst), floatOne);
 822             biasTempC = vec_add( vec_and( biasOrC, biasConst), floatOne);
 823             biasTempD = vec_add( vec_and( biasOrD, biasConst), floatOne);
 824
 825             fA = vec_madd( biasTempA, frameRateModifier.v, zero);
 826             fB = vec_madd( biasTempB, frameRateModifier.v, zero);
 827             fC = vec_madd( biasTempC, frameRateModifier.v, zero);
 828             fD = vec_madd( biasTempD, frameRateModifier.v, zero);
 829             one_over_rsquaredA = vec_re( rsquaredA );
 830             one_over_rsquaredB = vec_re( rsquaredB );
 831             one_over_rsquaredC = vec_re( rsquaredC );
 832             one_over_rsquaredD = vec_re( rsquaredD );
 833             fA = vec_madd( fA, one_over_rsquaredA, zero);
 834             fB = vec_madd( fB, one_over_rsquaredB, zero);
 835             fC = vec_madd( fC, one_over_rsquaredC, zero);
 836             fD = vec_madd( fD, one_over_rsquaredD, zero);
 837             magA = vec_rsqrte( rsquaredA );
 838             magB = vec_rsqrte( rsquaredB );
 839             magC = vec_rsqrte( rsquaredC );
 840             magD = vec_rsqrte( rsquaredD );
 841             magA = vec_madd( magA, fA, zero );
 842             magB = vec_madd( magB, fB, zero );
 843             magC = vec_madd( magC, fC, zero );
 844             magD = vec_madd( magD, fD, zero );
 845             deltax = vec_nmsub( dxa, magA, deltax );
 846             deltay = vec_nmsub( dya, magA, deltay );
 847             deltaz = vec_nmsub( dza, magA, deltaz );
 848
 849             deltax = vec_nmsub( dxb, magB, deltax );
 850             deltay = vec_nmsub( dyb, magB, deltay );
 851             deltaz = vec_nmsub( dzb, magB, deltaz );
 852
 853             deltax = vec_nmsub( dxc, magC, deltax );
 854             deltay = vec_nmsub( dyc, magC, deltay );
 855             deltaz = vec_nmsub( dzc, magC, deltaz );
 856
 857             deltax = vec_nmsub( dxd, magD, deltax );
 858             deltay = vec_nmsub( dyd, magD, deltay );
 859             deltaz = vec_nmsub( dzd, magD, deltaz );
 860         }
 861
 862
 863         for(;j<flurry->numStreams;j++) {
 864             vector float ip0, ip1 = (vector float)(0.0), ip2;
 865             vector float dx, dy, dz;
 866             vector float rsquared, f;
 867             vector float one_over_rsquared;
 868             vector float biasTemp;
 869             vector float mag;
 870             vector bool int biasOr;
 871
 872             ip0 = vec_ld(0, flurry->spark[j]->position);
 873             if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
 874                 ip1 = vec_ld(16, flurry->spark[j]->position);
 875             }
 876
 877             ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
 878             ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
 879             ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
 880             ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
 881
 882             dx = vec_sub(s->p[i].position[0].v, ip0);
 883             dy = vec_sub(s->p[i].position[1].v, ip1);
 884             dz = vec_sub(s->p[i].position[2].v, ip2);
 885
 886             rsquared = vec_madd(dx, dx, zero);
 887             rsquared = vec_madd(dy, dy, rsquared);
 888             rsquared = vec_madd(dz, dz, rsquared);
 889
 890             biasOr = vec_cmpeq(jVec, mod.v);
 891             biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
 892
 893             f = vec_madd(biasTemp, frameRateModifier.v, zero);
 894             one_over_rsquared = vec_re(rsquared);
 895             f = vec_madd(f, one_over_rsquared, zero);
 896
 897             mag = vec_rsqrte(rsquared);
 898             mag = vec_madd(mag, f, zero);
 899
 900             deltax = vec_nmsub(dx, mag, deltax);
 901             deltay = vec_nmsub(dy, mag, deltay);
 902             deltaz = vec_nmsub(dz, mag, deltaz);
 903
 904             jVec = vec_add(jVec, (vector unsigned int)(1));
 905         }
 906
 907         /* slow this particle down by flurry->drag */
 908         deltax = vec_madd(deltax, dragV.v, zero);
 909         deltay = vec_madd(deltay, dragV.v, zero);
 910         deltaz = vec_madd(deltaz, dragV.v, zero);
 911
 912         distTemp = vec_madd(deltax, deltax, zero);
 913         distTemp = vec_madd(deltay, deltay, distTemp);
 914         distTemp = vec_madd(deltaz, deltaz, distTemp);
 915
 916         deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
 917         deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
 918         s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
 919         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 920             continue;
 921         }
 922
 923         /* update the position */
 924         s->p[i].delta[0].v = deltax;
 925         s->p[i].delta[1].v = deltay;
 926         s->p[i].delta[2].v = deltaz;
 927         for(j=0;j<3;j++) {
 928             s->p[i].oldposition[j].v = s->p[i].position[j].v;
 929             s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
 930         }
 931     }
 932 }
 933
 934 #endif
 935 #endif /* 0 */
 936
 937 void DrawSmoke_Scalar(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
 938 {
 939         int svi = 0;
 940         int sci = 0;
 941         int sti = 0;
 942         int si = 0;
 943         float width;
 944         float sx,sy;
 945         float u0,v0,u1,v1;
 946         float w,z;
 947         float screenRatio = global->sys_glWidth / 1024.0f;
 948         float hslash2 = global->sys_glHeight * 0.5f;
 949         float wslash2 = global->sys_glWidth * 0.5f;
 950         int i,k;
 951
 952         width = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
 953
 954         for (i=0;i<NUMSMOKEPARTICLES/4;i++)
 955         {
 956             for (k=0; k<4; k++) {
 957                 float thisWidth;
 958                 float oldz;
 959
 960                 if (s->p[i].dead.i[k]) {
 961                     continue;
 962                 }
 963                 thisWidth = (streamSize + (flurry->fTime - s->p[i].time.f[k])*flurry->streamExpansion) * screenRatio;
 964                 if (thisWidth >= width)
 965                 {
 966                         s->p[i].dead.i[k] = 1;
 967                         continue;
 968                 }
 969                 z = s->p[i].position[2].f[k];
 970                 sx = s->p[i].position[0].f[k] * global->sys_glWidth / z + wslash2;
 971                 sy = s->p[i].position[1].f[k] * global->sys_glWidth / z + hslash2;
 972                 oldz = s->p[i].oldposition[2].f[k];
 973                 if (sx > global->sys_glWidth+50.0f || sx < -50.0f || sy > global->sys_glHeight+50.0f || sy < -50.0f || z < 25.0f || oldz < 25.0f)
 974                 {
 975                         continue;
 976                 }
 977
 978                 w = MAX_(1.0f,thisWidth/z);
 979                 {
 980                         float oldx = s->p[i].oldposition[0].f[k];
 981                         float oldy = s->p[i].oldposition[1].f[k];
 982                         float oldscreenx = (oldx * global->sys_glWidth / oldz) + wslash2;
 983                         float oldscreeny = (oldy * global->sys_glWidth / oldz) + hslash2;
 984                         float dx = (sx-oldscreenx);
 985                         float dy = (sy-oldscreeny);
 986
 987                         float d = FastDistance2D(dx, dy);
 988
 989                         float sm, os, ow;
 990                         if (d)
 991                         {
 992                                 sm = w/d;
 993                         }
 994                         else
 995                         {
 996                                 sm = 0.0f;
 997                         }
 998                         ow = MAX_(1.0f,thisWidth/oldz);
 999                         if (d)
1000                         {
1001                                 os = ow/d;
1002                         }
1003                         else
1004                         {
1005                                 os = 0.0f;
1006                         }
1007
1008                         {
1009                                 floatToVector cmv;
1010                                 float cm;
1011                                 float m = 1.0f + sm;
1012
1013                                 float dxs = dx*sm;
1014                                 float dys = dy*sm;
1015                                 float dxos = dx*os;
1016                                 float dyos = dy*os;
1017                                 float dxm = dx*m;
1018                                 float dym = dy*m;
1019
1020                                 s->p[i].animFrame.i[k]++;
1021                                 if (s->p[i].animFrame.i[k] >= 64)
1022                                 {
1023                                         s->p[i].animFrame.i[k] = 0;
1024                                 }
1025
1026                                 u0 = (s->p[i].animFrame.i[k]& 7) * 0.125f;
1027                                 v0 = (s->p[i].animFrame.i[k]>>3) * 0.125f;
1028                                 u1 = u0 + 0.125f;
1029                                 v1 = v0 + 0.125f;
1030                                 cm = (1.375f - thisWidth/width);
1031                                 if (s->p[i].dead.i[k] == 3)
1032                                 {
1033                                         cm *= 0.125f;
1034                                         s->p[i].dead.i[k] = 1;
1035                                 }
1036                                 si++;
1037                                 cm *= brightness;
1038                                 cmv.f[0] = s->p[i].color[0].f[k]*cm;
1039                                 cmv.f[1] = s->p[i].color[1].f[k]*cm;
1040                                 cmv.f[2] = s->p[i].color[2].f[k]*cm;
1041                                 cmv.f[3] = s->p[i].color[3].f[k]*cm;
1042
1043 #if 0
1044                                 /* MDT we can't use vectors in the Scalar routine */
1045                                 s->seraphimColors[sci++].v = cmv.v;
1046                                 s->seraphimColors[sci++].v = cmv.v;
1047                                 s->seraphimColors[sci++].v = cmv.v;
1048                                 s->seraphimColors[sci++].v = cmv.v;
1049 #else
1050                                 {
1051                                     int ii, jj;
1052                                     for (jj = 0; jj < 4; jj++) {
1053                                         for (ii = 0; ii < 4; ii++) {
1054                                             s->seraphimColors[sci].f[ii] = cmv.f[ii];
1055                                         }
1056                                         sci += 1;
1057                                     }
1058                                 }
1059 #endif
1060
1061                                 s->seraphimTextures[sti++] = u0;
1062                                 s->seraphimTextures[sti++] = v0;
1063                                 s->seraphimTextures[sti++] = u0;
1064                                 s->seraphimTextures[sti++] = v1;
1065
1066                                 s->seraphimTextures[sti++] = u1;
1067                                 s->seraphimTextures[sti++] = v1;
1068                                 s->seraphimTextures[sti++] = u1;
1069                                 s->seraphimTextures[sti++] = v0;
1070
1071                                 s->seraphimVertices[svi].f[0] = sx+dxm-dys;
1072                                 s->seraphimVertices[svi].f[1] = sy+dym+dxs;
1073                                 s->seraphimVertices[svi].f[2] = sx+dxm+dys;
1074                                 s->seraphimVertices[svi].f[3] = sy+dym-dxs;
1075                                 svi++;
1076
1077                                 s->seraphimVertices[svi].f[0] = oldscreenx-dxm+dyos;
1078                                 s->seraphimVertices[svi].f[1] = oldscreeny-dym-dxos;
1079                                 s->seraphimVertices[svi].f[2] = oldscreenx-dxm-dyos;
1080                                 s->seraphimVertices[svi].f[3] = oldscreeny-dym+dxos;
1081                                 svi++;
1082                         }
1083                 }
1084             }
1085         }
1086         glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1087         glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1088         glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1089         glDrawArrays(GL_QUADS,0,si*4);
1090 }
1091
1092 #if 0
1093 #ifdef __VEC__
1094
1095 void DrawSmoke_Vector(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
1096 {
1097     const vector float zero = (vector float)(0.0);
1098     int svi = 0;
1099     int sci = 0;
1100     int sti = 0;
1101     int si = 0;
1102     floatToVector width;
1103     vector float sx,sy;
1104     floatToVector u0,v0,u1,v1;
1105     vector float one_over_z;
1106     vector float w;
1107     floatToVector z;
1108     float screenRatio = global->sys_glWidth / 1024.0f;
1109     float hslash2 = global->sys_glHeight * 0.5f;
1110     float wslash2 = global->sys_glWidth * 0.5f;
1111     int i,kk;
1112     floatToVector briteV, fTimeV, expansionV, screenRatioV, hslash2V, wslash2V, streamSizeV;
1113     floatToVector glWidthV;
1114     floatToVector cm;
1115     vector float cmv[4];
1116     vector float svec[4], ovec[4];
1117     vector float oldscreenx, oldscreeny;
1118     vector float sm;
1119     vector float frameAnd7;
1120     vector float frameShift3;
1121     vector float one_over_width;
1122     vector float dx, dy;
1123     vector float os;
1124     vector unsigned int vSi = vec_splat_u32(0);
1125     const vector float eighth = (vector float)(0.125);
1126     float glWidth50 = global->sys_glWidth + 50.0f;
1127     float glHeight50 = global->sys_glHeight + 50.0f;
1128     vector float vGLWidth50, vGLHeight50;
1129     unsigned int blitBool;
1130
1131     vec_dst((int *)(&(s->p[0])), 0x00020200, 2);
1132
1133     {
1134         vector unsigned char permute1 = vec_lvsl( 0, &glWidth50 );
1135         vector unsigned char permute2 = vec_lvsl( 0, &glHeight50 );
1136         permute1 = (vector unsigned char) vec_splat( (vector unsigned int) permute1, 0 );
1137         permute2 = (vector unsigned char) vec_splat( (vector unsigned int) permute2, 0 );
1138         vGLWidth50 = vec_lde( 0, &glWidth50 );
1139         vGLHeight50 = vec_lde( 0, &glHeight50 );
1140         vGLWidth50 = vec_perm( vGLWidth50, vGLWidth50, permute1 );
1141         vGLHeight50 = vec_perm( vGLHeight50, vGLHeight50, permute2 );
1142     }
1143
1144     width.f[0] = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
1145     width.v = (vector float) vec_splat((vector unsigned int)width.v, 0);
1146
1147     briteV.f[0] = brightness;
1148     briteV.v = (vector float) vec_splat((vector unsigned int)briteV.v, 0);
1149
1150     fTimeV.f[0] = (float) flurry->fTime;
1151     fTimeV.v = (vector float) vec_splat((vector unsigned int)fTimeV.v, 0);
1152
1153     expansionV.f[0] = flurry->streamExpansion;
1154     expansionV.v = (vector float) vec_splat((vector unsigned int)expansionV.v, 0);
1155
1156     screenRatioV.f[0] = screenRatio;
1157     screenRatioV.v = (vector float) vec_splat((vector unsigned int)screenRatioV.v, 0);
1158
1159     hslash2V.f[0] = hslash2;
1160     hslash2V.v = (vector float) vec_splat((vector unsigned int)hslash2V.v, 0);
1161
1162     wslash2V.f[0] = wslash2;
1163     wslash2V.v = (vector float) vec_splat((vector unsigned int)wslash2V.v, 0);
1164
1165     streamSizeV.f[0] = streamSize;
1166     streamSizeV.v = (vector float) vec_splat((vector unsigned int)streamSizeV.v, 0);
1167
1168     glWidthV.f[0] = global->sys_glWidth;
1169     glWidthV.v = (vector float) vec_splat((vector unsigned int)glWidthV.v, 0);
1170
1171     for (i=0;i<NUMSMOKEPARTICLES/4;i++) {
1172         vector float thisWidth;
1173         vector float oldz;
1174         vector float oldx, oldy, one_over_oldz;
1175         vector float xabs, yabs, mn;
1176         vector float d;
1177         vector float one_over_d;
1178         vector bool int dnz;
1179         vector float ow;
1180
1181         vec_dst((int *)(&(s->p[i+4])), 0x00020200, 2);
1182
1183         if (vec_all_eq(s->p[i].dead.v, (vector unsigned int)(1))) continue;
1184
1185         blitBool = 0; /* keep track of particles that actually need to be drawn */
1186
1187         thisWidth = vec_sub(fTimeV.v, s->p[i].time.v);
1188         thisWidth = vec_madd(thisWidth, expansionV.v, streamSizeV.v);
1189         thisWidth = vec_madd(thisWidth, screenRatioV.v, zero);
1190
1191         z.v = s->p[i].position[2].v;
1192         one_over_z = vec_re(z.v);
1193
1194         sx = vec_madd(s->p[i].position[0].v, glWidthV.v, zero);
1195         sx = vec_madd(sx, one_over_z, wslash2V.v);
1196         sy = vec_madd(s->p[i].position[1].v, glWidthV.v, zero);
1197         sy = vec_madd(sy, one_over_z, hslash2V.v);
1198
1199         oldz = s->p[i].oldposition[2].v;
1200
1201         w = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_z, zero));
1202
1203         oldx = s->p[i].oldposition[0].v;
1204         oldy = s->p[i].oldposition[1].v;
1205         one_over_oldz = vec_re(oldz);
1206         oldscreenx = vec_madd(oldx, glWidthV.v, zero);
1207         oldscreenx = vec_madd(oldscreenx, one_over_oldz, wslash2V.v);
1208         oldscreeny = vec_madd(oldy, glWidthV.v, zero);
1209         oldscreeny = vec_madd(oldscreeny, one_over_oldz, hslash2V.v);
1210         dx = vec_sub(sx,oldscreenx);
1211         dy = vec_sub(sy,oldscreeny);
1212
1213         xabs = vec_abs(dx);
1214         yabs = vec_abs(dy);
1215         mn = vec_min(xabs,yabs);
1216         d = vec_add(xabs,yabs);
1217         d = vec_madd(mn, (vector float)(-0.6875), d);
1218
1219         ow = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_oldz, zero));
1220         one_over_d = vec_re(d);
1221         dnz = vec_cmpgt(d, zero);
1222         sm = vec_madd(w, one_over_d, zero);
1223         sm = vec_and(sm, dnz);
1224         os = vec_madd(ow, one_over_d, zero);
1225         os = vec_and(os, dnz);
1226
1227         {
1228             intToVector tempMask;
1229             vector bool int mask = vec_cmpeq( s->p[i].dead.v, vec_splat_u32(1) ); /* -1 where true */
1230             vector bool int  gtMask = vec_cmpge( thisWidth, width.v ); /* -1 where true */
1231             vector bool int  glWidth50Test = vec_cmpgt( sx, (vector float)(vGLWidth50) ); /* -1 where true */
1232             vector bool int  glHeight50Test = vec_cmpgt( sy, (vector float)(vGLHeight50) ); /* -1 where true */
1233             vector bool int  test50x    = vec_cmplt( sx, (vector float) (-50.0) );
1234             vector bool int  test50y    = vec_cmplt( sy, (vector float) (-50.0) );
1235             vector bool int  testz      = vec_cmplt( z.v, (vector float) (25.0) );
1236             vector bool int  testoldz   = vec_cmplt( oldz, (vector float) (25.0) );
1237             mask = vec_or( mask, gtMask );
1238             s->p[i].dead.v = vec_and( mask, vec_splat_u32( 1 ) );
1239             mask = vec_or( mask, glWidth50Test );
1240             mask = vec_or( mask, glHeight50Test );
1241             mask = vec_or( mask, test50x );
1242             mask = vec_or( mask, test50y );
1243             mask = vec_or( mask, testz );
1244             mask = vec_or( mask, testoldz );
1245             tempMask.v = (vector unsigned int)mask;
1246
1247             s->p[i].animFrame.v = vec_sub( s->p[i].animFrame.v, vec_nor( mask, mask ) );
1248             s->p[i].animFrame.v = vec_and( s->p[i].animFrame.v, (vector unsigned int)(63) );
1249
1250             frameAnd7 = vec_ctf(vec_and(s->p[i].animFrame.v, (vector unsigned int)(7)),0);
1251             u0.v = vec_madd(frameAnd7, eighth, zero);
1252
1253             frameShift3 = vec_ctf(vec_sr(s->p[i].animFrame.v, (vector unsigned int)(3)),0);
1254             v0.v = vec_madd(frameAnd7, eighth, zero);
1255
1256             u1.v = vec_add(u0.v, eighth);
1257             v1.v = vec_add(v0.v, eighth);
1258
1259             one_over_width = vec_re(width.v);
1260             cm.v = vec_sel( vec_nmsub(thisWidth, one_over_width, (vector float)(1.375)), cm.v, mask );
1261             cm.v = vec_madd(cm.v, briteV.v, zero);
1262
1263             vSi = vec_sub( vSi, vec_nor( mask, mask ) );
1264             {
1265                 vector unsigned int blitMask = (vector unsigned int) (1, 2, 4, 8);
1266                 vector unsigned int temp = (vector unsigned int)mask;
1267                 temp = vec_andc( blitMask, temp  );
1268                 temp = vec_add( temp, vec_sld( temp, temp, 8 ) );
1269                 temp = vec_add( temp, vec_sld( temp, temp, 4 ) );
1270                 vec_ste( temp, 0, &blitBool );
1271
1272             }
1273
1274             {
1275                 vector float temp1, temp2, temp3, temp4;
1276                 vector float result1a, result1b, result2a, result2b, result3a, result3b, result4a, result4b;
1277
1278                 temp1 = vec_mergeh( u0.v, u0.v );
1279                 temp2 = vec_mergel( u0.v, u0.v );
1280                 temp3 = vec_mergeh( v0.v, v1.v );
1281                 temp4 = vec_mergel( v0.v, v1.v );
1282
1283                 result1a = vec_mergeh( temp1, temp3 );
1284                 result1b = vec_mergel( temp1, temp3 );
1285                 result2a = vec_mergeh( temp2, temp4 );
1286                 result2b = vec_mergel( temp2, temp4 );
1287
1288                 temp1 = vec_mergeh( u1.v, u1.v );
1289                 temp2 = vec_mergel( u1.v, u1.v );
1290                 temp3 = vec_mergeh( v1.v, v0.v );
1291                 temp4 = vec_mergel( v1.v, v0.v );
1292
1293                 result3a = vec_mergeh( temp1, temp3 );
1294                 result3b = vec_mergel( temp1, temp3 );
1295                 result4a = vec_mergeh( temp2, temp4 );
1296                 result4b = vec_mergel( temp2, temp4 );
1297
1298                 if( blitBool & 1 )
1299                 {
1300                     vec_st( result1a, 0, &s->seraphimTextures[sti] );
1301                     vec_st( result3a, 16, &s->seraphimTextures[sti]);
1302                     sti+= 8;
1303                 }
1304                 if( blitBool & 2 )
1305                 {
1306                     vec_st( result1b, 0, &s->seraphimTextures[sti]);
1307                     vec_st( result3b, 16, &s->seraphimTextures[sti]);
1308                     sti+= 8;
1309                 }
1310                 if( blitBool & 4 )
1311                 {
1312                     vec_st( result2a, 0, &s->seraphimTextures[sti]);
1313                     vec_st( result4a, 16, &s->seraphimTextures[sti]);
1314                     sti+= 8;
1315                 }
1316                 if( blitBool & 8 )
1317                 {
1318                     vec_st( result2b, 0, &s->seraphimTextures[sti]);
1319                     vec_st( result4b, 16, &s->seraphimTextures[sti]);
1320                     sti+= 8;
1321                 }
1322             }
1323         }
1324
1325         cmv[0] = vec_madd(s->p[i].color[0].v, cm.v, zero);
1326         cmv[1] = vec_madd(s->p[i].color[1].v, cm.v, zero);
1327         cmv[2] = vec_madd(s->p[i].color[2].v, cm.v, zero);
1328         cmv[3] = vec_madd(s->p[i].color[3].v, cm.v, zero);
1329         {
1330             vector float vI0, vI1, vI2, vI3;
1331
1332             vI0 = vec_mergeh ( cmv[0], cmv[2] );
1333             vI1 = vec_mergeh ( cmv[1], cmv[3] );
1334             vI2 = vec_mergel ( cmv[0], cmv[2] );
1335             vI3 = vec_mergel ( cmv[1], cmv[3] );
1336
1337             cmv[0] = vec_mergeh ( vI0, vI1 );
1338             cmv[1] = vec_mergel ( vI0, vI1 );
1339             cmv[2] = vec_mergeh ( vI2, vI3 );
1340             cmv[3] = vec_mergel ( vI2, vI3 );
1341         }
1342
1343         vec_dst( cmv, 0x0D0100D0, 1 );
1344
1345         {
1346             vector float sxd, syd;
1347             vector float sxdm, sxdp, sydm, sydp;
1348             vector float oxd, oyd;
1349             vector float oxdm, oxdp, oydm, oydp;
1350             vector float vI0, vI1, vI2, vI3;
1351             vector float dxs, dys;
1352             vector float dxos, dyos;
1353             vector float dxm, dym;
1354             vector float m;
1355
1356             m = vec_add((vector float)(1.0), sm);
1357
1358             dxs = vec_madd(dx, sm, zero);
1359             dys = vec_madd(dy, sm, zero);
1360             dxos = vec_madd(dx, os, zero);
1361             dyos = vec_madd(dy, os, zero);
1362             dxm = vec_madd(dx, m, zero);
1363             dym = vec_madd(dy, m, zero);
1364
1365             sxd = vec_add(sx, dxm);
1366             sxdm = vec_sub(sxd, dys);
1367             sxdp = vec_add(sxd, dys);
1368
1369             syd = vec_add(sy, dym);
1370             sydm = vec_sub(syd, dxs);
1371             sydp = vec_add(syd, dxs);
1372
1373             oxd = vec_sub(oldscreenx, dxm);
1374             oxdm = vec_sub(oxd, dyos);
1375             oxdp = vec_add(oxd, dyos);
1376
1377             oyd = vec_sub(oldscreeny, dym);
1378             oydm = vec_sub(oyd, dxos);
1379             oydp = vec_add(oyd, dxos);
1380
1381             vI0 = vec_mergeh ( sxdm, sxdp );
1382             vI1 = vec_mergeh ( sydp, sydm );
1383             vI2 = vec_mergel ( sxdm, sxdp );
1384             vI3 = vec_mergel ( sydp, sydm );
1385
1386             svec[0] = vec_mergeh ( vI0, vI1 );
1387             svec[1] = vec_mergel ( vI0, vI1 );
1388             svec[2] = vec_mergeh ( vI2, vI3 );
1389             svec[3] = vec_mergel ( vI2, vI3 );
1390
1391             vI0 = vec_mergeh ( oxdp, oxdm );
1392             vI1 = vec_mergeh ( oydm, oydp );
1393             vI2 = vec_mergel ( oxdp, oxdm );
1394             vI3 = vec_mergel ( oydm, oydp );
1395
1396             ovec[0] = vec_mergeh ( vI0, vI1 );
1397             ovec[1] = vec_mergel ( vI0, vI1 );
1398             ovec[2] = vec_mergeh ( vI2, vI3 );
1399             ovec[3] = vec_mergel ( vI2, vI3 );
1400         }
1401
1402         {
1403             int offset0 = (sci + 0) * sizeof( vector float );
1404             int offset1 = (sci + 1) * sizeof( vector float );
1405             int offset2 = (sci + 2) * sizeof( vector float );
1406             int offset3 = (sci + 3) * sizeof( vector float );
1407             int offset4 = (svi + 0) * sizeof( vector float );
1408             int offset5 = (svi + 1) * sizeof( vector float );
1409             vector float *colors = (vector float *)s->seraphimColors;
1410             vector float *vertices = (vector float *)s->seraphimVertices;
1411             for (kk=0; kk<4; kk++) {
1412                 if (blitBool>>kk & 1) {
1413                     vector float vcmv = cmv[kk];
1414                     vector float vsvec = svec[kk];
1415                     vector float vovec = ovec[kk];
1416
1417                     vec_st( vcmv, offset0, colors );
1418                     vec_st( vcmv, offset1, colors );
1419                     vec_st( vcmv, offset2, colors );
1420                     vec_st( vcmv, offset3, colors );
1421                     vec_st( vsvec, offset4, vertices );
1422                     vec_st( vovec, offset5, vertices );
1423                     colors += 4;
1424                     vertices += 2;
1425                     sci += 4;
1426                     svi += 2;
1427                 }
1428             }
1429         }
1430     }
1431     vSi = vec_add( vSi, vec_sld( vSi, vSi, 8 ) );
1432     vSi = vec_add( vSi, vec_sld( vSi, vSi, 4 ) );
1433     vec_ste( (vector signed int) vSi, 0, &si );
1434
1435     glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1436     glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1437     glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1438     glDrawArrays(GL_QUADS,0,si*4);
1439 }
1440
1441 #endif
1442 #endif /* 0 */