git.hungrycats.org Git - xscreensaver/blob - hacks/glx/flurry-smoke.c

   1 /*
   2
   3 Copyright (c) 2002, Calum Robinson
   4 All rights reserved.
   5
   6 Redistribution and use in source and binary forms, with or without
   7 modification, are permitted provided that the following conditions are met:
   8
   9 * Redistributions of source code must retain the above copyright notice, this
  10   list of conditions and the following disclaimer.
  11
  12 * Redistributions in binary form must reproduce the above copyright notice,
  13   this list of conditions and the following disclaimer in the documentation
  14   and/or other materials provided with the distribution.
  15
  16 * Neither the name of the author nor the names of its contributors may be used
  17   to endorse or promote products derived from this software without specific
  18   prior written permission.
  19
  20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  21 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  22 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  23 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
  24 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  25 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  27 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30
  31 */
  32
  33 /* Smoke.cpp: implementation of the Smoke class. */
  34
  35 #ifdef HAVE_CONFIG_H
  36 # include "config.h"
  37 #endif
  38
  39 #include "flurry.h"
  40
  41 #define MAXANGLES 16384
  42 #define NOT_QUITE_DEAD 3
  43
  44 #define intensity 75000.0f;
  45
  46 void InitSmoke(SmokeV *s)
  47 {
  48     int i;
  49     s->nextParticle = 0;
  50     s->nextSubParticle = 0;
  51     s->lastParticleTime = 0.25f;
  52     s->firstTime = 1;
  53     s->frame = 0;
  54     for (i=0;i<3;i++) {
  55         s->old[i] = RandFlt(-100.0, 100.0);
  56     }
  57 }
  58
  59 void UpdateSmoke_ScalarBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
  60 {
  61     int i,j,k;
  62     float sx = flurry->star->position[0];
  63     float sy = flurry->star->position[1];
  64     float sz = flurry->star->position[2];
  65     double frameRate;
  66     double frameRateModifier;
  67
  68
  69     s->frame++;
  70
  71     if(!s->firstTime) {
  72         /* release 12 puffs every frame */
  73         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
  74             float dx,dy,dz,deltax,deltay,deltaz;
  75             float f;
  76             float rsquared;
  77             float mag;
  78
  79             dx = s->old[0] - sx;
  80             dy = s->old[1] - sy;
  81             dz = s->old[2] - sz;
  82             mag = 5.0f;
  83             deltax = (dx * mag);
  84             deltay = (dy * mag);
  85             deltaz = (dz * mag);
  86             for(i=0;i<flurry->numStreams;i++) {
  87                 float streamSpeedCoherenceFactor;
  88
  89                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
  90                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
  91                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
  92                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
  93                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
  94                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
  95                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
  96                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
  97                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
  98                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
  99                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 100                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 101                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 102                 rsquared = (dx*dx+dy*dy+dz*dz);
 103                 f = streamSpeed * streamSpeedCoherenceFactor;
 104
 105                 mag = f / (float) sqrt(rsquared);
 106
 107                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 108                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 109                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 110                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 111                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 112                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 113                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 114                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 115                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 116                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 117                 s->nextSubParticle++;
 118                 if (s->nextSubParticle==4) {
 119                     s->nextParticle++;
 120                     s->nextSubParticle=0;
 121                 }
 122                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 123                     s->nextParticle = 0;
 124                     s->nextSubParticle = 0;
 125                 }
 126             }
 127
 128             s->lastParticleTime = flurry->fTime;
 129         }
 130     } else {
 131         s->lastParticleTime = flurry->fTime;
 132         s->firstTime = 0;
 133     }
 134
 135     for(i=0;i<3;i++) {
 136         s->old[i] = flurry->star->position[i];
 137     }
 138
 139     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 140     frameRateModifier = 42.5f / frameRate;
 141
 142     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 143         for(k=0; k<4; k++) {
 144             float dx,dy,dz;
 145             float f;
 146             float rsquared;
 147             float mag;
 148             float deltax;
 149             float deltay;
 150             float deltaz;
 151
 152             if (s->p[i].dead.i[k]) {
 153                 continue;
 154             }
 155
 156             deltax = s->p[i].delta[0].f[k];
 157             deltay = s->p[i].delta[1].f[k];
 158             deltaz = s->p[i].delta[2].f[k];
 159
 160             for(j=0;j<flurry->numStreams;j++) {
 161                 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
 162                 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
 163                 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
 164                 rsquared = (dx*dx+dy*dy+dz*dz);
 165
 166                 f = (gravity/rsquared) * frameRateModifier;
 167
 168                 if ((((i*4)+k) % flurry->numStreams) == j) {
 169                     f *= 1.0f + streamBias;
 170                 }
 171                 mag = f / (float) sqrt(rsquared);
 172
 173                 deltax -= (dx * mag);
 174                 deltay -= (dy * mag);
 175                 deltaz -= (dz * mag);
 176             }
 177
 178             /* slow this particle down by flurry->drag */
 179             deltax *= flurry->drag;
 180             deltay *= flurry->drag;
 181             deltaz *= flurry->drag;
 182
 183             if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
 184                 s->p[i].dead.i[k] = 1;
 185                 continue;
 186             }
 187
 188             /* update the position */
 189             s->p[i].delta[0].f[k] = deltax;
 190             s->p[i].delta[1].f[k] = deltay;
 191             s->p[i].delta[2].f[k] = deltaz;
 192             for(j=0;j<3;j++) {
 193                 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
 194                 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
 195             }
 196         }
 197     }
 198 }
 199
 200 #if 0
 201 #ifdef __ppc__
 202
 203 void UpdateSmoke_ScalarFrsqrte(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
 204 {
 205     int i,j,k;
 206     float sx = flurry->star->position[0];
 207     float sy = flurry->star->position[1];
 208     float sz = flurry->star->position[2];
 209     double frameRate;
 210     double frameRateModifier;
 211
 212
 213     s->frame++;
 214
 215     if(!s->firstTime) {
 216         /* release 12 puffs every frame */
 217         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
 218             float dx,dy,dz,deltax,deltay,deltaz;
 219             float f;
 220             float rsquared;
 221             float mag;
 222
 223             dx = s->old[0] - sx;
 224             dy = s->old[1] - sy;
 225             dz = s->old[2] - sz;
 226             mag = 5.0f;
 227             deltax = (dx * mag);
 228             deltay = (dy * mag);
 229             deltaz = (dz * mag);
 230             for(i=0;i<flurry->numStreams;i++) {
 231                 float streamSpeedCoherenceFactor;
 232
 233                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
 234                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
 235                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
 236                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
 237                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
 238                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
 239                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
 240                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
 241                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
 242                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
 243                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 244                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 245                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 246                 rsquared = (dx*dx+dy*dy+dz*dz);
 247                 f = streamSpeed * streamSpeedCoherenceFactor;
 248
 249                 mag = f / (float) sqrt(rsquared);
 250                 /*
 251                     reciprocal square-root estimate replaced above divide and call to system sqrt()
 252
 253                     asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
 254                     mag *= f;
 255                 */
 256
 257                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 258                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 259                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 260                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 261                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 262                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 263                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 264                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 265                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 266                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 267                 s->nextSubParticle++;
 268                 if (s->nextSubParticle==4) {
 269                     s->nextParticle++;
 270                     s->nextSubParticle=0;
 271                 }
 272                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 273                     s->nextParticle = 0;
 274                     s->nextSubParticle = 0;
 275                 }
 276             }
 277
 278             s->lastParticleTime = flurry->fTime;
 279         }
 280     } else {
 281         s->lastParticleTime = flurry->fTime;
 282         s->firstTime = 0;
 283     }
 284
 285     for(i=0;i<3;i++) {
 286         s->old[i] = flurry->star->position[i];
 287     }
 288
 289     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 290     frameRateModifier = 42.5f / frameRate;
 291
 292     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 293         for(k=0; k<4; k++) {
 294             float dx,dy,dz;
 295             float f;
 296             float rsquared;
 297             float mag;
 298             float deltax;
 299             float deltay;
 300             float deltaz;
 301
 302             if (s->p[i].dead.i[k]) {
 303                 continue;
 304             }
 305
 306             deltax = s->p[i].delta[0].f[k];
 307             deltay = s->p[i].delta[1].f[k];
 308             deltaz = s->p[i].delta[2].f[k];
 309
 310             for(j=0;j<flurry->numStreams;j++) {
 311                 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
 312                 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
 313                 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
 314                 rsquared = (dx*dx+dy*dy+dz*dz);
 315
 316                 /*
 317                     asm("fres %0, %1" : "=f" (f) : "f" (rsquared));
 318                     f *= gravity*frameRateModifier;
 319                 */
 320                 f = ( gravity  * frameRateModifier ) / rsquared;
 321
 322                 if((((i*4)+k) % flurry->numStreams) == j) {
 323                     f *= 1.0f + streamBias;
 324                 }
 325
 326                 mag = f / (float) sqrt(rsquared);
 327
 328                 /* reciprocal square-root estimate replaced above divide and call to system sqrt() */
 329
 330                 deltax -= (dx * mag);
 331                 deltay -= (dy * mag);
 332                 deltaz -= (dz * mag);
 333             }
 334
 335             /* slow this particle down by flurry->drag */
 336             deltax *= flurry->drag;
 337             deltay *= flurry->drag;
 338             deltaz *= flurry->drag;
 339
 340             if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
 341                 s->p[i].dead.i[k] = 1;
 342                 continue;
 343             }
 344
 345             /* update the position */
 346             s->p[i].delta[0].f[k] = deltax;
 347             s->p[i].delta[1].f[k] = deltay;
 348             s->p[i].delta[2].f[k] = deltaz;
 349             for(j=0;j<3;j++) {
 350                 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
 351                 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
 352             }
 353         }
 354     }
 355 }
 356
 357 #endif
 358
 359 #ifdef __VEC__
 360
 361 void UpdateSmoke_VectorBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
 362 {
 363     unsigned int i,j;
 364     float sx = flurry->star->position[0];
 365     float sy = flurry->star->position[1];
 366     float sz = flurry->star->position[2];
 367     double frameRate;
 368     floatToVector frameRateModifier;
 369     floatToVector gravityV;
 370     floatToVector dragV;
 371     floatToVector deltaTimeV;
 372     const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
 373     const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
 374     const vector float biasConst = (vector float)(streamBias);
 375
 376     gravityV.f[0] = gravity;
 377     gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
 378
 379     dragV.f[0] = flurry->drag;
 380     dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
 381
 382     deltaTimeV.f[0] = flurry->fDeltaTime;
 383     deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
 384
 385     s->frame++;
 386
 387     if(!s->firstTime) {
 388         /* release 12 puffs every frame */
 389         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
 390             float dx,dy,dz,deltax,deltay,deltaz;
 391             float f;
 392             float rsquared;
 393             float mag;
 394
 395             dx = s->old[0] - sx;
 396             dy = s->old[1] - sy;
 397             dz = s->old[2] - sz;
 398             mag = 5.0f;
 399             deltax = (dx * mag);
 400             deltay = (dy * mag);
 401             deltaz = (dz * mag);
 402             for(i=0;i<flurry->numStreams;i++) {
 403                 float streamSpeedCoherenceFactor;
 404
 405                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
 406                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
 407                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
 408                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
 409                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
 410                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
 411                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
 412                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
 413                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
 414                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
 415                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 416                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 417                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 418                 rsquared = (dx*dx+dy*dy+dz*dz);
 419                 f = streamSpeed * streamSpeedCoherenceFactor;
 420
 421                 mag = f / (float) sqrt(rsquared);
 422                 /*
 423                     asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
 424                     mag *= f;
 425                 */
 426
 427                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 428                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 429                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 430                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 431                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 432                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 433                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 434                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 435                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 436                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 437                 s->nextSubParticle++;
 438                 if (s->nextSubParticle==4) {
 439                     s->nextParticle++;
 440                     s->nextSubParticle=0;
 441                 }
 442                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 443                     s->nextParticle = 0;
 444                     s->nextSubParticle = 0;
 445                 }
 446             }
 447
 448             s->lastParticleTime = flurry->fTime;
 449         }
 450     } else {
 451         s->lastParticleTime = flurry->fTime;
 452         s->firstTime = 0;
 453     }
 454
 455     for(i=0;i<3;i++) {
 456         s->old[i] = flurry->star->position[i];
 457     }
 458
 459     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 460     frameRateModifier.f[0] = 42.5f / frameRate;
 461     frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
 462
 463     frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
 464
 465     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 466         /* floatToVector f; */
 467         vector float deltax, deltay, deltaz;
 468         vector float distTemp;
 469         vector unsigned int deadTemp;
 470         /* floatToVector infopos0, infopos1, infopos2; */
 471         intToVector mod;
 472         vector unsigned int jVec;
 473
 474
 475         vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
 476
 477         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 478             continue;
 479         }
 480
 481         deltax = s->p[i].delta[0].v;
 482         deltay = s->p[i].delta[1].v;
 483         deltaz = s->p[i].delta[2].v;
 484
 485         mod.i[0] = (i<<2 + 0) % flurry->numStreams;
 486         if(mod.i[0]+1 == flurry->numStreams) {
 487             mod.i[1] = 0;
 488         } else {
 489             mod.i[1] = mod.i[0]+1;
 490         }
 491         if(mod.i[1]+1 == flurry->numStreams) {
 492             mod.i[2] = 0;
 493         } else {
 494             mod.i[2] = mod.i[1]+1;
 495         }
 496         if(mod.i[2]+1 == flurry->numStreams) {
 497             mod.i[3] = 0;
 498         } else {
 499             mod.i[3] = mod.i[2]+1;
 500         }
 501
 502         jVec = vec_xor(jVec, jVec);
 503
 504         vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
 505         for(j=0; j<flurry->numStreams;j++) {
 506             vector float ip0, ip1 = (vector float)(0.0), ip2;
 507             vector float dx, dy, dz;
 508             vector float rsquared, f;
 509             vector float one_over_rsquared;
 510             vector float biasTemp;
 511             vector float mag;
 512             vector bool int biasOr;
 513
 514             ip0 = vec_ld(0, flurry->spark[j]->position);
 515             if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
 516                 ip1 = vec_ld(16, flurry->spark[j]->position);
 517             }
 518
 519             ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
 520             ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
 521             ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
 522             ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
 523
 524             dx = vec_sub(s->p[i].position[0].v, ip0);
 525             dy = vec_sub(s->p[i].position[1].v, ip1);
 526             dz = vec_sub(s->p[i].position[2].v, ip2);
 527
 528             rsquared = vec_madd(dx, dx, zero);
 529             rsquared = vec_madd(dy, dy, rsquared);
 530             rsquared = vec_madd(dz, dz, rsquared);
 531
 532             biasOr = vec_cmpeq(jVec, mod.v);
 533             biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
 534
 535             f = vec_madd(biasTemp, frameRateModifier.v, zero);
 536             one_over_rsquared = vec_re(rsquared);
 537             f = vec_madd(f, one_over_rsquared, zero);
 538
 539             mag = vec_rsqrte(rsquared);
 540             mag = vec_madd(mag, f, zero);
 541
 542             deltax = vec_nmsub(dx, mag, deltax);
 543             deltay = vec_nmsub(dy, mag, deltay);
 544             deltaz = vec_nmsub(dz, mag, deltaz);
 545
 546             jVec = vec_add(jVec, (vector unsigned int)(1));
 547         }
 548
 549         /* slow this particle down by flurry->drag */
 550         deltax = vec_madd(deltax, dragV.v, zero);
 551         deltay = vec_madd(deltay, dragV.v, zero);
 552         deltaz = vec_madd(deltaz, dragV.v, zero);
 553
 554         distTemp = vec_madd(deltax, deltax, zero);
 555         distTemp = vec_madd(deltay, deltay, distTemp);
 556         distTemp = vec_madd(deltaz, deltaz, distTemp);
 557
 558         deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
 559         deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
 560         s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
 561         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 562             continue;
 563         }
 564
 565         /* update the position */
 566         s->p[i].delta[0].v = deltax;
 567         s->p[i].delta[1].v = deltay;
 568         s->p[i].delta[2].v = deltaz;
 569         for(j=0;j<3;j++) {
 570             s->p[i].oldposition[j].v = s->p[i].position[j].v;
 571             s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
 572         }
 573     }
 574 }
 575
 576 void UpdateSmoke_VectorUnrolled(global_info_t *info, SmokeV *s)
 577 {
 578     unsigned int i,j;
 579     float sx = flurry->star->position[0];
 580     float sy = flurry->star->position[1];
 581     float sz = flurry->star->position[2];
 582     double frameRate;
 583     floatToVector frameRateModifier;
 584     floatToVector gravityV;
 585     floatToVector dragV;
 586     floatToVector deltaTimeV;
 587     const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
 588     const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
 589     const vector float biasConst = (vector float)(streamBias);
 590
 591     gravityV.f[0] = gravity;
 592     gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
 593
 594     dragV.f[0] = flurry->drag;
 595     dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
 596
 597     deltaTimeV.f[0] = flurry->fDeltaTime;
 598     deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
 599
 600     s->frame++;
 601
 602     if(!s->firstTime) {
 603         /* release 12 puffs every frame */
 604         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
 605             float dx,dy,dz,deltax,deltay,deltaz;
 606             float f;
 607             float rsquared;
 608             float mag;
 609
 610             dx = s->old[0] - sx;
 611             dy = s->old[1] - sy;
 612             dz = s->old[2] - sz;
 613             mag = 5.0f;
 614             deltax = (dx * mag);
 615             deltay = (dy * mag);
 616             deltaz = (dz * mag);
 617             for(i=0;i<flurry->numStreams;i++) {
 618                 float streamSpeedCoherenceFactor;
 619
 620                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
 621                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
 622                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
 623                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
 624                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
 625                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
 626                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
 627                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
 628                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
 629                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
 630                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 631                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 632                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 633                 rsquared = (dx*dx+dy*dy+dz*dz);
 634                 f = streamSpeed * streamSpeedCoherenceFactor;
 635
 636                 mag = f / (float) sqrt(rsquared);
 637                 /*
 638                     asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
 639                     mag *= f;
 640                 */
 641
 642                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 643                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 644                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 645                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 646                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 647                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 648                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 649                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 650                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 651                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 652                 s->nextSubParticle++;
 653                 if (s->nextSubParticle==4) {
 654                     s->nextParticle++;
 655                     s->nextSubParticle=0;
 656                 }
 657                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 658                     s->nextParticle = 0;
 659                     s->nextSubParticle = 0;
 660                 }
 661             }
 662
 663             s->lastParticleTime = flurry->fTime;
 664         }
 665     } else {
 666         s->lastParticleTime = flurry->fTime;
 667         s->firstTime = 0;
 668     }
 669
 670     for(i=0;i<3;i++) {
 671         s->old[i] = flurry->star->position[i];
 672     }
 673
 674     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 675     frameRateModifier.f[0] = 42.5f / frameRate;
 676     frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
 677
 678     frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
 679
 680     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 681         /* floatToVector f; */
 682         vector float deltax, deltay, deltaz;
 683         vector float distTemp;
 684         vector unsigned int deadTemp;
 685         /* floatToVector infopos0, infopos1, infopos2; */
 686         intToVector mod;
 687         vector unsigned int jVec;
 688         vector unsigned int intOne = vec_splat_u32(1);
 689         vector float floatOne = vec_ctf(intOne, 0);
 690
 691
 692         vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
 693
 694         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 695             continue;
 696         }
 697
 698         deltax = s->p[i].delta[0].v;
 699         deltay = s->p[i].delta[1].v;
 700         deltaz = s->p[i].delta[2].v;
 701
 702         mod.i[0] = (i<<2 + 0) % flurry->numStreams;
 703         if(mod.i[0]+1 == flurry->numStreams) {
 704             mod.i[1] = 0;
 705         } else {
 706             mod.i[1] = mod.i[0]+1;
 707         }
 708         if(mod.i[1]+1 == flurry->numStreams) {
 709             mod.i[2] = 0;
 710         } else {
 711             mod.i[2] = mod.i[1]+1;
 712         }
 713         if(mod.i[2]+1 == flurry->numStreams) {
 714             mod.i[3] = 0;
 715         } else {
 716             mod.i[3] = mod.i[2]+1;
 717         }
 718
 719         jVec = vec_xor(jVec, jVec);
 720
 721         vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
 722         for(j=0; j + 3 < flurry->numStreams;j+=4)
 723         {
 724             vector float dxa, dya, dza;
 725             vector float dxb, dyb, dzb;
 726             vector float dxc, dyc, dzc;
 727             vector float dxd, dyd, dzd;
 728             vector float ip0a, ip1a;
 729             vector float ip0b, ip1b;
 730             vector float ip0c, ip1c;
 731             vector float ip0d, ip1d;
 732             vector float rsquaredA;
 733             vector float rsquaredB;
 734             vector float rsquaredC;
 735             vector float rsquaredD;
 736             vector float fA, fB, fC, fD;
 737             vector float biasTempA;
 738             vector float biasTempB;
 739             vector float biasTempC;
 740             vector float biasTempD;
 741             vector float magA;
 742             vector float magB;
 743             vector float magC;
 744             vector float magD;
 745
 746             vector float one_over_rsquaredA;
 747             vector float one_over_rsquaredB;
 748             vector float one_over_rsquaredC;
 749             vector float one_over_rsquaredD;
 750             vector bool int biasOrA,biasOrB,biasOrC,biasOrD;
 751
 752             /* load vectors */
 753             ip0a = vec_ld(0, flurry->spark[j]->position);
 754             ip0b = vec_ld(0, flurry->spark[j+1]->position);
 755             ip0c = vec_ld(0, flurry->spark[j+2]->position);
 756             ip0d = vec_ld(0, flurry->spark[j+3]->position);
 757             ip1a = vec_ld( 12, flurry->spark[j]->position );
 758             ip1b = vec_ld( 12, flurry->spark[j+1]->position );
 759             ip1c = vec_ld( 12, flurry->spark[j+2]->position );
 760             ip1d = vec_ld( 12, flurry->spark[j+3]->position );
 761
 762             /* align them */
 763             ip0a = vec_perm(ip0a, ip1a, vec_lvsl(0, flurry->spark[j]->position));
 764             ip0b = vec_perm(ip0b, ip1b, vec_lvsl(0, flurry->spark[j+1]->position));
 765             ip0c = vec_perm(ip0c, ip1c, vec_lvsl(0, flurry->spark[j+2]->position));
 766             ip0d = vec_perm(ip0d, ip1d, vec_lvsl(0, flurry->spark[j+3]->position));
 767
 768             dxa = vec_splat( ip0a, 0  );
 769             dxb = vec_splat( ip0b, 0  );
 770             dxc = vec_splat( ip0c, 0  );
 771             dxd = vec_splat( ip0d, 0  );
 772             dxa = vec_sub( s->p[i].position[0].v, dxa );
 773             dxb = vec_sub( s->p[i].position[0].v, dxb );
 774             dxc = vec_sub( s->p[i].position[0].v, dxc );
 775             dxd = vec_sub( s->p[i].position[0].v, dxd );
 776
 777             dya = vec_splat( ip0a, 1  );
 778             dyb = vec_splat( ip0b, 1  );
 779             dyc = vec_splat( ip0c, 1  );
 780             dyd = vec_splat( ip0d, 1  );
 781             dya = vec_sub( s->p[i].position[1].v, dya );
 782             dyb = vec_sub( s->p[i].position[1].v, dyb );
 783             dyc = vec_sub( s->p[i].position[1].v, dyc );
 784             dyd = vec_sub( s->p[i].position[1].v, dyd );
 785
 786             dza = vec_splat( ip0a, 2  );
 787             dzb = vec_splat( ip0b, 2  );
 788             dzc = vec_splat( ip0c, 2  );
 789             dzd = vec_splat( ip0d, 2  );
 790             dza = vec_sub( s->p[i].position[2].v, dza );
 791             dzb = vec_sub( s->p[i].position[2].v, dzb );
 792             dzc = vec_sub( s->p[i].position[2].v, dzc );
 793             dzd = vec_sub( s->p[i].position[2].v, dzd );
 794
 795             rsquaredA = vec_madd( dxa, dxa, zero );
 796             rsquaredB = vec_madd( dxb, dxb, zero );
 797             rsquaredC = vec_madd( dxc, dxc, zero );
 798             rsquaredD = vec_madd( dxd, dxd, zero );
 799
 800             rsquaredA = vec_madd( dya, dya, rsquaredA );
 801             rsquaredB = vec_madd( dyb, dyb, rsquaredB );
 802             rsquaredC = vec_madd( dyc, dyc, rsquaredC );
 803             rsquaredD = vec_madd( dyd, dyd, rsquaredD );
 804
 805             rsquaredA = vec_madd( dza, dza, rsquaredA );
 806             rsquaredB = vec_madd( dzb, dzb, rsquaredB );
 807             rsquaredC = vec_madd( dzc, dzc, rsquaredC );
 808             rsquaredD = vec_madd( dzd, dzd, rsquaredD );
 809
 810             biasOrA = vec_cmpeq( jVec, mod.v );
 811             jVec = vec_add(jVec, intOne);
 812             biasOrB = vec_cmpeq( jVec, mod.v );
 813             jVec = vec_add(jVec, intOne);
 814             biasOrC = vec_cmpeq( jVec, mod.v );
 815             jVec = vec_add(jVec, intOne);
 816             biasOrD = vec_cmpeq( jVec, mod.v );
 817             jVec = vec_add(jVec, intOne);
 818
 819             biasTempA = vec_add( vec_and( biasOrA, biasConst), floatOne);
 820             biasTempB = vec_add( vec_and( biasOrB, biasConst), floatOne);
 821             biasTempC = vec_add( vec_and( biasOrC, biasConst), floatOne);
 822             biasTempD = vec_add( vec_and( biasOrD, biasConst), floatOne);
 823
 824             fA = vec_madd( biasTempA, frameRateModifier.v, zero);
 825             fB = vec_madd( biasTempB, frameRateModifier.v, zero);
 826             fC = vec_madd( biasTempC, frameRateModifier.v, zero);
 827             fD = vec_madd( biasTempD, frameRateModifier.v, zero);
 828             one_over_rsquaredA = vec_re( rsquaredA );
 829             one_over_rsquaredB = vec_re( rsquaredB );
 830             one_over_rsquaredC = vec_re( rsquaredC );
 831             one_over_rsquaredD = vec_re( rsquaredD );
 832             fA = vec_madd( fA, one_over_rsquaredA, zero);
 833             fB = vec_madd( fB, one_over_rsquaredB, zero);
 834             fC = vec_madd( fC, one_over_rsquaredC, zero);
 835             fD = vec_madd( fD, one_over_rsquaredD, zero);
 836             magA = vec_rsqrte( rsquaredA );
 837             magB = vec_rsqrte( rsquaredB );
 838             magC = vec_rsqrte( rsquaredC );
 839             magD = vec_rsqrte( rsquaredD );
 840             magA = vec_madd( magA, fA, zero );
 841             magB = vec_madd( magB, fB, zero );
 842             magC = vec_madd( magC, fC, zero );
 843             magD = vec_madd( magD, fD, zero );
 844             deltax = vec_nmsub( dxa, magA, deltax );
 845             deltay = vec_nmsub( dya, magA, deltay );
 846             deltaz = vec_nmsub( dza, magA, deltaz );
 847
 848             deltax = vec_nmsub( dxb, magB, deltax );
 849             deltay = vec_nmsub( dyb, magB, deltay );
 850             deltaz = vec_nmsub( dzb, magB, deltaz );
 851
 852             deltax = vec_nmsub( dxc, magC, deltax );
 853             deltay = vec_nmsub( dyc, magC, deltay );
 854             deltaz = vec_nmsub( dzc, magC, deltaz );
 855
 856             deltax = vec_nmsub( dxd, magD, deltax );
 857             deltay = vec_nmsub( dyd, magD, deltay );
 858             deltaz = vec_nmsub( dzd, magD, deltaz );
 859         }
 860
 861
 862         for(;j<flurry->numStreams;j++) {
 863             vector float ip0, ip1 = (vector float)(0.0), ip2;
 864             vector float dx, dy, dz;
 865             vector float rsquared, f;
 866             vector float one_over_rsquared;
 867             vector float biasTemp;
 868             vector float mag;
 869             vector bool int biasOr;
 870
 871             ip0 = vec_ld(0, flurry->spark[j]->position);
 872             if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
 873                 ip1 = vec_ld(16, flurry->spark[j]->position);
 874             }
 875
 876             ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
 877             ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
 878             ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
 879             ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
 880
 881             dx = vec_sub(s->p[i].position[0].v, ip0);
 882             dy = vec_sub(s->p[i].position[1].v, ip1);
 883             dz = vec_sub(s->p[i].position[2].v, ip2);
 884
 885             rsquared = vec_madd(dx, dx, zero);
 886             rsquared = vec_madd(dy, dy, rsquared);
 887             rsquared = vec_madd(dz, dz, rsquared);
 888
 889             biasOr = vec_cmpeq(jVec, mod.v);
 890             biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
 891
 892             f = vec_madd(biasTemp, frameRateModifier.v, zero);
 893             one_over_rsquared = vec_re(rsquared);
 894             f = vec_madd(f, one_over_rsquared, zero);
 895
 896             mag = vec_rsqrte(rsquared);
 897             mag = vec_madd(mag, f, zero);
 898
 899             deltax = vec_nmsub(dx, mag, deltax);
 900             deltay = vec_nmsub(dy, mag, deltay);
 901             deltaz = vec_nmsub(dz, mag, deltaz);
 902
 903             jVec = vec_add(jVec, (vector unsigned int)(1));
 904         }
 905
 906         /* slow this particle down by flurry->drag */
 907         deltax = vec_madd(deltax, dragV.v, zero);
 908         deltay = vec_madd(deltay, dragV.v, zero);
 909         deltaz = vec_madd(deltaz, dragV.v, zero);
 910
 911         distTemp = vec_madd(deltax, deltax, zero);
 912         distTemp = vec_madd(deltay, deltay, distTemp);
 913         distTemp = vec_madd(deltaz, deltaz, distTemp);
 914
 915         deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
 916         deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
 917         s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
 918         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 919             continue;
 920         }
 921
 922         /* update the position */
 923         s->p[i].delta[0].v = deltax;
 924         s->p[i].delta[1].v = deltay;
 925         s->p[i].delta[2].v = deltaz;
 926         for(j=0;j<3;j++) {
 927             s->p[i].oldposition[j].v = s->p[i].position[j].v;
 928             s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
 929         }
 930     }
 931 }
 932
 933 #endif
 934 #endif /* 0 */
 935
 936 void DrawSmoke_Scalar(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
 937 {
 938         int svi = 0;
 939         int sci = 0;
 940         int sti = 0;
 941         int si = 0;
 942         float width;
 943         float sx,sy;
 944         float u0,v0,u1,v1;
 945         float w,z;
 946         float screenRatio = global->sys_glWidth / 1024.0f;
 947         float hslash2 = global->sys_glHeight * 0.5f;
 948         float wslash2 = global->sys_glWidth * 0.5f;
 949         int i,k;
 950
 951         width = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
 952
 953         for (i=0;i<NUMSMOKEPARTICLES/4;i++)
 954         {
 955             for (k=0; k<4; k++) {
 956                 float thisWidth;
 957                 float oldz;
 958
 959                 if (s->p[i].dead.i[k]) {
 960                     continue;
 961                 }
 962                 thisWidth = (streamSize + (flurry->fTime - s->p[i].time.f[k])*flurry->streamExpansion) * screenRatio;
 963                 if (thisWidth >= width)
 964                 {
 965                         s->p[i].dead.i[k] = 1;
 966                         continue;
 967                 }
 968                 z = s->p[i].position[2].f[k];
 969                 sx = s->p[i].position[0].f[k] * global->sys_glWidth / z + wslash2;
 970                 sy = s->p[i].position[1].f[k] * global->sys_glWidth / z + hslash2;
 971                 oldz = s->p[i].oldposition[2].f[k];
 972                 if (sx > global->sys_glWidth+50.0f || sx < -50.0f || sy > global->sys_glHeight+50.0f || sy < -50.0f || z < 25.0f || oldz < 25.0f)
 973                 {
 974                         continue;
 975                 }
 976
 977                 w = MAX_(1.0f,thisWidth/z);
 978                 {
 979                         float oldx = s->p[i].oldposition[0].f[k];
 980                         float oldy = s->p[i].oldposition[1].f[k];
 981                         float oldscreenx = (oldx * global->sys_glWidth / oldz) + wslash2;
 982                         float oldscreeny = (oldy * global->sys_glWidth / oldz) + hslash2;
 983                         float dx = (sx-oldscreenx);
 984                         float dy = (sy-oldscreeny);
 985
 986                         float d = FastDistance2D(dx, dy);
 987
 988                         float sm, os, ow;
 989                         if (d)
 990                         {
 991                                 sm = w/d;
 992                         }
 993                         else
 994                         {
 995                                 sm = 0.0f;
 996                         }
 997                         ow = MAX_(1.0f,thisWidth/oldz);
 998                         if (d)
 999                         {
1000                                 os = ow/d;
1001                         }
1002                         else
1003                         {
1004                                 os = 0.0f;
1005                         }
1006
1007                         {
1008                                 floatToVector cmv;
1009                                 float cm;
1010                                 float m = 1.0f + sm;
1011
1012                                 float dxs = dx*sm;
1013                                 float dys = dy*sm;
1014                                 float dxos = dx*os;
1015                                 float dyos = dy*os;
1016                                 float dxm = dx*m;
1017                                 float dym = dy*m;
1018
1019                                 s->p[i].animFrame.i[k]++;
1020                                 if (s->p[i].animFrame.i[k] >= 64)
1021                                 {
1022                                         s->p[i].animFrame.i[k] = 0;
1023                                 }
1024
1025                                 u0 = (s->p[i].animFrame.i[k]& 7) * 0.125f;
1026                                 v0 = (s->p[i].animFrame.i[k]>>3) * 0.125f;
1027                                 u1 = u0 + 0.125f;
1028                                 v1 = v0 + 0.125f;
1029                                 cm = (1.375f - thisWidth/width);
1030                                 if (s->p[i].dead.i[k] == 3)
1031                                 {
1032                                         cm *= 0.125f;
1033                                         s->p[i].dead.i[k] = 1;
1034                                 }
1035                                 si++;
1036                                 cm *= brightness;
1037                                 cmv.f[0] = s->p[i].color[0].f[k]*cm;
1038                                 cmv.f[1] = s->p[i].color[1].f[k]*cm;
1039                                 cmv.f[2] = s->p[i].color[2].f[k]*cm;
1040                                 cmv.f[3] = s->p[i].color[3].f[k]*cm;
1041
1042 #if 0
1043                                 /* MDT we can't use vectors in the Scalar routine */
1044                                 s->seraphimColors[sci++].v = cmv.v;
1045                                 s->seraphimColors[sci++].v = cmv.v;
1046                                 s->seraphimColors[sci++].v = cmv.v;
1047                                 s->seraphimColors[sci++].v = cmv.v;
1048 #else
1049                                 {
1050                                     int ii, jj;
1051                                     for (jj = 0; jj < 4; jj++) {
1052                                         for (ii = 0; ii < 4; ii++) {
1053                                             s->seraphimColors[sci].f[ii] = cmv.f[ii];
1054                                         }
1055                                         sci += 1;
1056                                     }
1057                                 }
1058 #endif
1059
1060                                 s->seraphimTextures[sti++] = u0;
1061                                 s->seraphimTextures[sti++] = v0;
1062                                 s->seraphimTextures[sti++] = u0;
1063                                 s->seraphimTextures[sti++] = v1;
1064
1065                                 s->seraphimTextures[sti++] = u1;
1066                                 s->seraphimTextures[sti++] = v1;
1067                                 s->seraphimTextures[sti++] = u1;
1068                                 s->seraphimTextures[sti++] = v0;
1069
1070                                 s->seraphimVertices[svi].f[0] = sx+dxm-dys;
1071                                 s->seraphimVertices[svi].f[1] = sy+dym+dxs;
1072                                 s->seraphimVertices[svi].f[2] = sx+dxm+dys;
1073                                 s->seraphimVertices[svi].f[3] = sy+dym-dxs;
1074                                 svi++;
1075
1076                                 s->seraphimVertices[svi].f[0] = oldscreenx-dxm+dyos;
1077                                 s->seraphimVertices[svi].f[1] = oldscreeny-dym-dxos;
1078                                 s->seraphimVertices[svi].f[2] = oldscreenx-dxm-dyos;
1079                                 s->seraphimVertices[svi].f[3] = oldscreeny-dym+dxos;
1080                                 svi++;
1081                         }
1082                 }
1083             }
1084         }
1085         glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1086         glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1087         glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1088         glDrawArrays(GL_QUADS,0,si*4);
1089 }
1090
1091 #if 0
1092 #ifdef __VEC__
1093
1094 void DrawSmoke_Vector(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
1095 {
1096     const vector float zero = (vector float)(0.0);
1097     int svi = 0;
1098     int sci = 0;
1099     int sti = 0;
1100     int si = 0;
1101     floatToVector width;
1102     vector float sx,sy;
1103     floatToVector u0,v0,u1,v1;
1104     vector float one_over_z;
1105     vector float w;
1106     floatToVector z;
1107     float screenRatio = global->sys_glWidth / 1024.0f;
1108     float hslash2 = global->sys_glHeight * 0.5f;
1109     float wslash2 = global->sys_glWidth * 0.5f;
1110     int i,kk;
1111     floatToVector briteV, fTimeV, expansionV, screenRatioV, hslash2V, wslash2V, streamSizeV;
1112     floatToVector glWidthV;
1113     floatToVector cm;
1114     vector float cmv[4];
1115     vector float svec[4], ovec[4];
1116     vector float oldscreenx, oldscreeny;
1117     vector float sm;
1118     vector float frameAnd7;
1119     vector float frameShift3;
1120     vector float one_over_width;
1121     vector float dx, dy;
1122     vector float os;
1123     vector unsigned int vSi = vec_splat_u32(0);
1124     const vector float eighth = (vector float)(0.125);
1125     float glWidth50 = global->sys_glWidth + 50.0f;
1126     float glHeight50 = global->sys_glHeight + 50.0f;
1127     vector float vGLWidth50, vGLHeight50;
1128     unsigned int blitBool;
1129
1130     vec_dst((int *)(&(s->p[0])), 0x00020200, 2);
1131
1132     {
1133         vector unsigned char permute1 = vec_lvsl( 0, &glWidth50 );
1134         vector unsigned char permute2 = vec_lvsl( 0, &glHeight50 );
1135         permute1 = (vector unsigned char) vec_splat( (vector unsigned int) permute1, 0 );
1136         permute2 = (vector unsigned char) vec_splat( (vector unsigned int) permute2, 0 );
1137         vGLWidth50 = vec_lde( 0, &glWidth50 );
1138         vGLHeight50 = vec_lde( 0, &glHeight50 );
1139         vGLWidth50 = vec_perm( vGLWidth50, vGLWidth50, permute1 );
1140         vGLHeight50 = vec_perm( vGLHeight50, vGLHeight50, permute2 );
1141     }
1142
1143     width.f[0] = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
1144     width.v = (vector float) vec_splat((vector unsigned int)width.v, 0);
1145
1146     briteV.f[0] = brightness;
1147     briteV.v = (vector float) vec_splat((vector unsigned int)briteV.v, 0);
1148
1149     fTimeV.f[0] = (float) flurry->fTime;
1150     fTimeV.v = (vector float) vec_splat((vector unsigned int)fTimeV.v, 0);
1151
1152     expansionV.f[0] = flurry->streamExpansion;
1153     expansionV.v = (vector float) vec_splat((vector unsigned int)expansionV.v, 0);
1154
1155     screenRatioV.f[0] = screenRatio;
1156     screenRatioV.v = (vector float) vec_splat((vector unsigned int)screenRatioV.v, 0);
1157
1158     hslash2V.f[0] = hslash2;
1159     hslash2V.v = (vector float) vec_splat((vector unsigned int)hslash2V.v, 0);
1160
1161     wslash2V.f[0] = wslash2;
1162     wslash2V.v = (vector float) vec_splat((vector unsigned int)wslash2V.v, 0);
1163
1164     streamSizeV.f[0] = streamSize;
1165     streamSizeV.v = (vector float) vec_splat((vector unsigned int)streamSizeV.v, 0);
1166
1167     glWidthV.f[0] = global->sys_glWidth;
1168     glWidthV.v = (vector float) vec_splat((vector unsigned int)glWidthV.v, 0);
1169
1170     for (i=0;i<NUMSMOKEPARTICLES/4;i++) {
1171         vector float thisWidth;
1172         vector float oldz;
1173         vector float oldx, oldy, one_over_oldz;
1174         vector float xabs, yabs, mn;
1175         vector float d;
1176         vector float one_over_d;
1177         vector bool int dnz;
1178         vector float ow;
1179
1180         vec_dst((int *)(&(s->p[i+4])), 0x00020200, 2);
1181
1182         if (vec_all_eq(s->p[i].dead.v, (vector unsigned int)(1))) continue;
1183
1184         blitBool = 0; /* keep track of particles that actually need to be drawn */
1185
1186         thisWidth = vec_sub(fTimeV.v, s->p[i].time.v);
1187         thisWidth = vec_madd(thisWidth, expansionV.v, streamSizeV.v);
1188         thisWidth = vec_madd(thisWidth, screenRatioV.v, zero);
1189
1190         z.v = s->p[i].position[2].v;
1191         one_over_z = vec_re(z.v);
1192
1193         sx = vec_madd(s->p[i].position[0].v, glWidthV.v, zero);
1194         sx = vec_madd(sx, one_over_z, wslash2V.v);
1195         sy = vec_madd(s->p[i].position[1].v, glWidthV.v, zero);
1196         sy = vec_madd(sy, one_over_z, hslash2V.v);
1197
1198         oldz = s->p[i].oldposition[2].v;
1199
1200         w = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_z, zero));
1201
1202         oldx = s->p[i].oldposition[0].v;
1203         oldy = s->p[i].oldposition[1].v;
1204         one_over_oldz = vec_re(oldz);
1205         oldscreenx = vec_madd(oldx, glWidthV.v, zero);
1206         oldscreenx = vec_madd(oldscreenx, one_over_oldz, wslash2V.v);
1207         oldscreeny = vec_madd(oldy, glWidthV.v, zero);
1208         oldscreeny = vec_madd(oldscreeny, one_over_oldz, hslash2V.v);
1209         dx = vec_sub(sx,oldscreenx);
1210         dy = vec_sub(sy,oldscreeny);
1211
1212         xabs = vec_abs(dx);
1213         yabs = vec_abs(dy);
1214         mn = vec_min(xabs,yabs);
1215         d = vec_add(xabs,yabs);
1216         d = vec_madd(mn, (vector float)(-0.6875), d);
1217
1218         ow = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_oldz, zero));
1219         one_over_d = vec_re(d);
1220         dnz = vec_cmpgt(d, zero);
1221         sm = vec_madd(w, one_over_d, zero);
1222         sm = vec_and(sm, dnz);
1223         os = vec_madd(ow, one_over_d, zero);
1224         os = vec_and(os, dnz);
1225
1226         {
1227             intToVector tempMask;
1228             vector bool int mask = vec_cmpeq( s->p[i].dead.v, vec_splat_u32(1) ); /* -1 where true */
1229             vector bool int  gtMask = vec_cmpge( thisWidth, width.v ); /* -1 where true */
1230             vector bool int  glWidth50Test = vec_cmpgt( sx, (vector float)(vGLWidth50) ); /* -1 where true */
1231             vector bool int  glHeight50Test = vec_cmpgt( sy, (vector float)(vGLHeight50) ); /* -1 where true */
1232             vector bool int  test50x    = vec_cmplt( sx, (vector float) (-50.0) );
1233             vector bool int  test50y    = vec_cmplt( sy, (vector float) (-50.0) );
1234             vector bool int  testz      = vec_cmplt( z.v, (vector float) (25.0) );
1235             vector bool int  testoldz   = vec_cmplt( oldz, (vector float) (25.0) );
1236             mask = vec_or( mask, gtMask );
1237             s->p[i].dead.v = vec_and( mask, vec_splat_u32( 1 ) );
1238             mask = vec_or( mask, glWidth50Test );
1239             mask = vec_or( mask, glHeight50Test );
1240             mask = vec_or( mask, test50x );
1241             mask = vec_or( mask, test50y );
1242             mask = vec_or( mask, testz );
1243             mask = vec_or( mask, testoldz );
1244             tempMask.v = (vector unsigned int)mask;
1245
1246             s->p[i].animFrame.v = vec_sub( s->p[i].animFrame.v, vec_nor( mask, mask ) );
1247             s->p[i].animFrame.v = vec_and( s->p[i].animFrame.v, (vector unsigned int)(63) );
1248
1249             frameAnd7 = vec_ctf(vec_and(s->p[i].animFrame.v, (vector unsigned int)(7)),0);
1250             u0.v = vec_madd(frameAnd7, eighth, zero);
1251
1252             frameShift3 = vec_ctf(vec_sr(s->p[i].animFrame.v, (vector unsigned int)(3)),0);
1253             v0.v = vec_madd(frameAnd7, eighth, zero);
1254
1255             u1.v = vec_add(u0.v, eighth);
1256             v1.v = vec_add(v0.v, eighth);
1257
1258             one_over_width = vec_re(width.v);
1259             cm.v = vec_sel( vec_nmsub(thisWidth, one_over_width, (vector float)(1.375)), cm.v, mask );
1260             cm.v = vec_madd(cm.v, briteV.v, zero);
1261
1262             vSi = vec_sub( vSi, vec_nor( mask, mask ) );
1263             {
1264                 vector unsigned int blitMask = (vector unsigned int) (1, 2, 4, 8);
1265                 vector unsigned int temp = (vector unsigned int)mask;
1266                 temp = vec_andc( blitMask, temp  );
1267                 temp = vec_add( temp, vec_sld( temp, temp, 8 ) );
1268                 temp = vec_add( temp, vec_sld( temp, temp, 4 ) );
1269                 vec_ste( temp, 0, &blitBool );
1270
1271             }
1272
1273             {
1274                 vector float temp1, temp2, temp3, temp4;
1275                 vector float result1a, result1b, result2a, result2b, result3a, result3b, result4a, result4b;
1276
1277                 temp1 = vec_mergeh( u0.v, u0.v );
1278                 temp2 = vec_mergel( u0.v, u0.v );
1279                 temp3 = vec_mergeh( v0.v, v1.v );
1280                 temp4 = vec_mergel( v0.v, v1.v );
1281
1282                 result1a = vec_mergeh( temp1, temp3 );
1283                 result1b = vec_mergel( temp1, temp3 );
1284                 result2a = vec_mergeh( temp2, temp4 );
1285                 result2b = vec_mergel( temp2, temp4 );
1286
1287                 temp1 = vec_mergeh( u1.v, u1.v );
1288                 temp2 = vec_mergel( u1.v, u1.v );
1289                 temp3 = vec_mergeh( v1.v, v0.v );
1290                 temp4 = vec_mergel( v1.v, v0.v );
1291
1292                 result3a = vec_mergeh( temp1, temp3 );
1293                 result3b = vec_mergel( temp1, temp3 );
1294                 result4a = vec_mergeh( temp2, temp4 );
1295                 result4b = vec_mergel( temp2, temp4 );
1296
1297                 if( blitBool & 1 )
1298                 {
1299                     vec_st( result1a, 0, &s->seraphimTextures[sti] );
1300                     vec_st( result3a, 16, &s->seraphimTextures[sti]);
1301                     sti+= 8;
1302                 }
1303                 if( blitBool & 2 )
1304                 {
1305                     vec_st( result1b, 0, &s->seraphimTextures[sti]);
1306                     vec_st( result3b, 16, &s->seraphimTextures[sti]);
1307                     sti+= 8;
1308                 }
1309                 if( blitBool & 4 )
1310                 {
1311                     vec_st( result2a, 0, &s->seraphimTextures[sti]);
1312                     vec_st( result4a, 16, &s->seraphimTextures[sti]);
1313                     sti+= 8;
1314                 }
1315                 if( blitBool & 8 )
1316                 {
1317                     vec_st( result2b, 0, &s->seraphimTextures[sti]);
1318                     vec_st( result4b, 16, &s->seraphimTextures[sti]);
1319                     sti+= 8;
1320                 }
1321             }
1322         }
1323
1324         cmv[0] = vec_madd(s->p[i].color[0].v, cm.v, zero);
1325         cmv[1] = vec_madd(s->p[i].color[1].v, cm.v, zero);
1326         cmv[2] = vec_madd(s->p[i].color[2].v, cm.v, zero);
1327         cmv[3] = vec_madd(s->p[i].color[3].v, cm.v, zero);
1328         {
1329             vector float vI0, vI1, vI2, vI3;
1330
1331             vI0 = vec_mergeh ( cmv[0], cmv[2] );
1332             vI1 = vec_mergeh ( cmv[1], cmv[3] );
1333             vI2 = vec_mergel ( cmv[0], cmv[2] );
1334             vI3 = vec_mergel ( cmv[1], cmv[3] );
1335
1336             cmv[0] = vec_mergeh ( vI0, vI1 );
1337             cmv[1] = vec_mergel ( vI0, vI1 );
1338             cmv[2] = vec_mergeh ( vI2, vI3 );
1339             cmv[3] = vec_mergel ( vI2, vI3 );
1340         }
1341
1342         vec_dst( cmv, 0x0D0100D0, 1 );
1343
1344         {
1345             vector float sxd, syd;
1346             vector float sxdm, sxdp, sydm, sydp;
1347             vector float oxd, oyd;
1348             vector float oxdm, oxdp, oydm, oydp;
1349             vector float vI0, vI1, vI2, vI3;
1350             vector float dxs, dys;
1351             vector float dxos, dyos;
1352             vector float dxm, dym;
1353             vector float m;
1354
1355             m = vec_add((vector float)(1.0), sm);
1356
1357             dxs = vec_madd(dx, sm, zero);
1358             dys = vec_madd(dy, sm, zero);
1359             dxos = vec_madd(dx, os, zero);
1360             dyos = vec_madd(dy, os, zero);
1361             dxm = vec_madd(dx, m, zero);
1362             dym = vec_madd(dy, m, zero);
1363
1364             sxd = vec_add(sx, dxm);
1365             sxdm = vec_sub(sxd, dys);
1366             sxdp = vec_add(sxd, dys);
1367
1368             syd = vec_add(sy, dym);
1369             sydm = vec_sub(syd, dxs);
1370             sydp = vec_add(syd, dxs);
1371
1372             oxd = vec_sub(oldscreenx, dxm);
1373             oxdm = vec_sub(oxd, dyos);
1374             oxdp = vec_add(oxd, dyos);
1375
1376             oyd = vec_sub(oldscreeny, dym);
1377             oydm = vec_sub(oyd, dxos);
1378             oydp = vec_add(oyd, dxos);
1379
1380             vI0 = vec_mergeh ( sxdm, sxdp );
1381             vI1 = vec_mergeh ( sydp, sydm );
1382             vI2 = vec_mergel ( sxdm, sxdp );
1383             vI3 = vec_mergel ( sydp, sydm );
1384
1385             svec[0] = vec_mergeh ( vI0, vI1 );
1386             svec[1] = vec_mergel ( vI0, vI1 );
1387             svec[2] = vec_mergeh ( vI2, vI3 );
1388             svec[3] = vec_mergel ( vI2, vI3 );
1389
1390             vI0 = vec_mergeh ( oxdp, oxdm );
1391             vI1 = vec_mergeh ( oydm, oydp );
1392             vI2 = vec_mergel ( oxdp, oxdm );
1393             vI3 = vec_mergel ( oydm, oydp );
1394
1395             ovec[0] = vec_mergeh ( vI0, vI1 );
1396             ovec[1] = vec_mergel ( vI0, vI1 );
1397             ovec[2] = vec_mergeh ( vI2, vI3 );
1398             ovec[3] = vec_mergel ( vI2, vI3 );
1399         }
1400
1401         {
1402             int offset0 = (sci + 0) * sizeof( vector float );
1403             int offset1 = (sci + 1) * sizeof( vector float );
1404             int offset2 = (sci + 2) * sizeof( vector float );
1405             int offset3 = (sci + 3) * sizeof( vector float );
1406             int offset4 = (svi + 0) * sizeof( vector float );
1407             int offset5 = (svi + 1) * sizeof( vector float );
1408             vector float *colors = (vector float *)s->seraphimColors;
1409             vector float *vertices = (vector float *)s->seraphimVertices;
1410             for (kk=0; kk<4; kk++) {
1411                 if (blitBool>>kk & 1) {
1412                     vector float vcmv = cmv[kk];
1413                     vector float vsvec = svec[kk];
1414                     vector float vovec = ovec[kk];
1415
1416                     vec_st( vcmv, offset0, colors );
1417                     vec_st( vcmv, offset1, colors );
1418                     vec_st( vcmv, offset2, colors );
1419                     vec_st( vcmv, offset3, colors );
1420                     vec_st( vsvec, offset4, vertices );
1421                     vec_st( vovec, offset5, vertices );
1422                     colors += 4;
1423                     vertices += 2;
1424                     sci += 4;
1425                     svi += 2;
1426                 }
1427             }
1428         }
1429     }
1430     vSi = vec_add( vSi, vec_sld( vSi, vSi, 8 ) );
1431     vSi = vec_add( vSi, vec_sld( vSi, vSi, 4 ) );
1432     vec_ste( (vector signed int) vSi, 0, &si );
1433
1434     glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1435     glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1436     glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1437     glDrawArrays(GL_QUADS,0,si*4);
1438 }
1439
1440 #endif
1441 #endif /* 0 */