git.hungrycats.org Git - xscreensaver/blob - hacks/glx/flurry-smoke.c

   1 /*
   2
   3 Copyright (c) 2002, Calum Robinson
   4 All rights reserved.
   5
   6 Redistribution and use in source and binary forms, with or without
   7 modification, are permitted provided that the following conditions are met:
   8
   9 * Redistributions of source code must retain the above copyright notice, this
  10   list of conditions and the following disclaimer.
  11
  12 * Redistributions in binary form must reproduce the above copyright notice,
  13   this list of conditions and the following disclaimer in the documentation
  14   and/or other materials provided with the distribution.
  15
  16 * Neither the name of the author nor the names of its contributors may be used
  17   to endorse or promote products derived from this software without specific
  18   prior written permission.
  19
  20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  21 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  22 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  23 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
  24 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  25 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  27 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30
  31 */
  32
  33 /* Smoke.cpp: implementation of the Smoke class. */
  34
  35 #include "flurry.h"
  36
  37 #define MAXANGLES 16384
  38 #define NOT_QUITE_DEAD 3
  39
  40 #define intensity 75000.0f;
  41
  42 void InitSmoke(SmokeV *s)
  43 {
  44     int i;
  45     s->nextParticle = 0;
  46     s->nextSubParticle = 0;
  47     s->lastParticleTime = 0.25f;
  48     s->firstTime = 1;
  49     s->frame = 0;
  50     for (i=0;i<3;i++) {
  51         s->old[i] = RandFlt(-100.0, 100.0);
  52     }
  53 }
  54
  55 void UpdateSmoke_ScalarBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
  56 {
  57     int i,j,k;
  58     float sx = flurry->star->position[0];
  59     float sy = flurry->star->position[1];
  60     float sz = flurry->star->position[2];
  61     double frameRate;
  62     double frameRateModifier;
  63
  64
  65     s->frame++;
  66
  67     if(!s->firstTime) {
  68         /* release 12 puffs every frame */
  69         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
  70             float dx,dy,dz,deltax,deltay,deltaz;
  71             float f;
  72             float rsquared;
  73             float mag;
  74
  75             dx = s->old[0] - sx;
  76             dy = s->old[1] - sy;
  77             dz = s->old[2] - sz;
  78             mag = 5.0f;
  79             deltax = (dx * mag);
  80             deltay = (dy * mag);
  81             deltaz = (dz * mag);
  82             for(i=0;i<flurry->numStreams;i++) {
  83                 float streamSpeedCoherenceFactor;
  84
  85                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
  86                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
  87                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
  88                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
  89                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
  90                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
  91                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
  92                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
  93                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
  94                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
  95                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
  96                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
  97                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
  98                 rsquared = (dx*dx+dy*dy+dz*dz);
  99                 f = streamSpeed * streamSpeedCoherenceFactor;
 100
 101                 mag = f / (float) sqrt(rsquared);
 102
 103                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 104                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 105                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 106                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 107                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 108                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 109                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 110                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 111                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 112                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 113                 s->nextSubParticle++;
 114                 if (s->nextSubParticle==4) {
 115                     s->nextParticle++;
 116                     s->nextSubParticle=0;
 117                 }
 118                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 119                     s->nextParticle = 0;
 120                     s->nextSubParticle = 0;
 121                 }
 122             }
 123
 124             s->lastParticleTime = flurry->fTime;
 125         }
 126     } else {
 127         s->lastParticleTime = flurry->fTime;
 128         s->firstTime = 0;
 129     }
 130
 131     for(i=0;i<3;i++) {
 132         s->old[i] = flurry->star->position[i];
 133     }
 134
 135     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 136     frameRateModifier = 42.5f / frameRate;
 137
 138     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 139         for(k=0; k<4; k++) {
 140             float dx,dy,dz;
 141             float f;
 142             float rsquared;
 143             float mag;
 144             float deltax;
 145             float deltay;
 146             float deltaz;
 147
 148             if (s->p[i].dead.i[k]) {
 149                 continue;
 150             }
 151
 152             deltax = s->p[i].delta[0].f[k];
 153             deltay = s->p[i].delta[1].f[k];
 154             deltaz = s->p[i].delta[2].f[k];
 155
 156             for(j=0;j<flurry->numStreams;j++) {
 157                 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
 158                 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
 159                 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
 160                 rsquared = (dx*dx+dy*dy+dz*dz);
 161
 162                 f = (gravity/rsquared) * frameRateModifier;
 163
 164                 if ((((i*4)+k) % flurry->numStreams) == j) {
 165                     f *= 1.0f + streamBias;
 166                 }
 167
 168                 mag = f / (float) sqrt(rsquared);
 169
 170                 deltax -= (dx * mag);
 171                 deltay -= (dy * mag);
 172                 deltaz -= (dz * mag);
 173             }
 174
 175             /* slow this particle down by flurry->drag */
 176             deltax *= flurry->drag;
 177             deltay *= flurry->drag;
 178             deltaz *= flurry->drag;
 179
 180             if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
 181                 s->p[i].dead.i[k] = 1;
 182                 continue;
 183             }
 184
 185             /* update the position */
 186             s->p[i].delta[0].f[k] = deltax;
 187             s->p[i].delta[1].f[k] = deltay;
 188             s->p[i].delta[2].f[k] = deltaz;
 189             for(j=0;j<3;j++) {
 190                 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
 191                 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
 192             }
 193         }
 194     }
 195 }
 196
 197 #ifdef __ppc__
 198
 199 void UpdateSmoke_ScalarFrsqrte(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
 200 {
 201     int i,j,k;
 202     float sx = flurry->star->position[0];
 203     float sy = flurry->star->position[1];
 204     float sz = flurry->star->position[2];
 205     double frameRate;
 206     double frameRateModifier;
 207
 208
 209     s->frame++;
 210
 211     if(!s->firstTime) {
 212         /* release 12 puffs every frame */
 213         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
 214             float dx,dy,dz,deltax,deltay,deltaz;
 215             float f;
 216             float rsquared;
 217             float mag;
 218
 219             dx = s->old[0] - sx;
 220             dy = s->old[1] - sy;
 221             dz = s->old[2] - sz;
 222             mag = 5.0f;
 223             deltax = (dx * mag);
 224             deltay = (dy * mag);
 225             deltaz = (dz * mag);
 226             for(i=0;i<flurry->numStreams;i++) {
 227                 float streamSpeedCoherenceFactor;
 228
 229                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
 230                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
 231                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
 232                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
 233                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
 234                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
 235                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
 236                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
 237                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
 238                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
 239                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 240                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 241                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 242                 rsquared = (dx*dx+dy*dy+dz*dz);
 243                 f = streamSpeed * streamSpeedCoherenceFactor;
 244
 245                 /* mag = f / (float) sqrt(rsquared); */
 246
 247                 /* reciprocal square-root estimate replaced above divide and call to system sqrt() */
 248                 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
 249                 mag *= f;
 250
 251                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 252                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 253                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 254                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 255                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 256                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 257                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 258                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 259                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 260                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 261                 s->nextSubParticle++;
 262                 if (s->nextSubParticle==4) {
 263                     s->nextParticle++;
 264                     s->nextSubParticle=0;
 265                 }
 266                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 267                     s->nextParticle = 0;
 268                     s->nextSubParticle = 0;
 269                 }
 270             }
 271
 272             s->lastParticleTime = flurry->fTime;
 273         }
 274     } else {
 275         s->lastParticleTime = flurry->fTime;
 276         s->firstTime = 0;
 277     }
 278
 279     for(i=0;i<3;i++) {
 280         s->old[i] = flurry->star->position[i];
 281     }
 282
 283     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 284     frameRateModifier = 42.5f / frameRate;
 285
 286     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 287         for(k=0; k<4; k++) {
 288             float dx,dy,dz;
 289             float f;
 290             float rsquared;
 291             float mag;
 292             float deltax;
 293             float deltay;
 294             float deltaz;
 295
 296             if (s->p[i].dead.i[k]) {
 297                 continue;
 298             }
 299
 300             deltax = s->p[i].delta[0].f[k];
 301             deltay = s->p[i].delta[1].f[k];
 302             deltaz = s->p[i].delta[2].f[k];
 303
 304             for(j=0;j<flurry->numStreams;j++) {
 305                 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
 306                 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
 307                 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
 308                 rsquared = (dx*dx+dy*dy+dz*dz);
 309
 310                 asm("fres %0, %1" : "=f" (f) : "f" (rsquared));
 311                 f *= gravity*frameRateModifier;
 312
 313                 if((((i*4)+k) % flurry->numStreams) == j) {
 314                     f *= 1.0f + streamBias;
 315                 }
 316
 317                 /* mag = f / (float) sqrt(rsquared); */
 318
 319                 /* reciprocal square-root estimate replaced above divide and call to system sqrt() */
 320                 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
 321                 mag *= f;
 322
 323                 deltax -= (dx * mag);
 324                 deltay -= (dy * mag);
 325                 deltaz -= (dz * mag);
 326             }
 327
 328             /* slow this particle down by flurry->drag */
 329             deltax *= flurry->drag;
 330             deltay *= flurry->drag;
 331             deltaz *= flurry->drag;
 332
 333             if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
 334                 s->p[i].dead.i[k] = 1;
 335                 continue;
 336             }
 337
 338             /* update the position */
 339             s->p[i].delta[0].f[k] = deltax;
 340             s->p[i].delta[1].f[k] = deltay;
 341             s->p[i].delta[2].f[k] = deltaz;
 342             for(j=0;j<3;j++) {
 343                 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
 344                 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
 345             }
 346         }
 347     }
 348 }
 349
 350 #endif
 351
 352 #ifdef __VEC__
 353
 354 void UpdateSmoke_VectorBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
 355 {
 356     unsigned int i,j;
 357     float sx = flurry->star->position[0];
 358     float sy = flurry->star->position[1];
 359     float sz = flurry->star->position[2];
 360     double frameRate;
 361     floatToVector frameRateModifier;
 362     floatToVector gravityV;
 363     floatToVector dragV;
 364     floatToVector deltaTimeV;
 365     const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
 366     const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
 367     const vector float biasConst = (vector float)(streamBias);
 368
 369     gravityV.f[0] = gravity;
 370     gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
 371
 372     dragV.f[0] = flurry->drag;
 373     dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
 374
 375     deltaTimeV.f[0] = flurry->fDeltaTime;
 376     deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
 377
 378     s->frame++;
 379
 380     if(!s->firstTime) {
 381         /* release 12 puffs every frame */
 382         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
 383             float dx,dy,dz,deltax,deltay,deltaz;
 384             float f;
 385             float rsquared;
 386             float mag;
 387
 388             dx = s->old[0] - sx;
 389             dy = s->old[1] - sy;
 390             dz = s->old[2] - sz;
 391             mag = 5.0f;
 392             deltax = (dx * mag);
 393             deltay = (dy * mag);
 394             deltaz = (dz * mag);
 395             for(i=0;i<flurry->numStreams;i++) {
 396                 float streamSpeedCoherenceFactor;
 397
 398                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
 399                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
 400                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
 401                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
 402                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
 403                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
 404                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
 405                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
 406                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
 407                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
 408                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 409                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 410                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 411                 rsquared = (dx*dx+dy*dy+dz*dz);
 412                 f = streamSpeed * streamSpeedCoherenceFactor;
 413
 414                 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
 415                 mag *= f;
 416
 417                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 418                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 419                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 420                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 421                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 422                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 423                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 424                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 425                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 426                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 427                 s->nextSubParticle++;
 428                 if (s->nextSubParticle==4) {
 429                     s->nextParticle++;
 430                     s->nextSubParticle=0;
 431                 }
 432                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 433                     s->nextParticle = 0;
 434                     s->nextSubParticle = 0;
 435                 }
 436             }
 437
 438             s->lastParticleTime = flurry->fTime;
 439         }
 440     } else {
 441         s->lastParticleTime = flurry->fTime;
 442         s->firstTime = 0;
 443     }
 444
 445     for(i=0;i<3;i++) {
 446         s->old[i] = flurry->star->position[i];
 447     }
 448
 449     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 450     frameRateModifier.f[0] = 42.5f / frameRate;
 451     frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
 452
 453     frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
 454
 455     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 456         /* floatToVector f; */
 457         vector float deltax, deltay, deltaz;
 458         vector float distTemp;
 459         vector unsigned int deadTemp;
 460         /* floatToVector infopos0, infopos1, infopos2; */
 461         intToVector mod;
 462         vector unsigned int jVec;
 463
 464
 465         vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
 466
 467         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 468             continue;
 469         }
 470
 471         deltax = s->p[i].delta[0].v;
 472         deltay = s->p[i].delta[1].v;
 473         deltaz = s->p[i].delta[2].v;
 474
 475         mod.i[0] = (i<<2 + 0) % flurry->numStreams;
 476         if(mod.i[0]+1 == flurry->numStreams) {
 477             mod.i[1] = 0;
 478         } else {
 479             mod.i[1] = mod.i[0]+1;
 480         }
 481         if(mod.i[1]+1 == flurry->numStreams) {
 482             mod.i[2] = 0;
 483         } else {
 484             mod.i[2] = mod.i[1]+1;
 485         }
 486         if(mod.i[2]+1 == flurry->numStreams) {
 487             mod.i[3] = 0;
 488         } else {
 489             mod.i[3] = mod.i[2]+1;
 490         }
 491
 492         jVec = vec_xor(jVec, jVec);
 493
 494         vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
 495         for(j=0; j<flurry->numStreams;j++) {
 496             vector float ip0, ip1 = (vector float)(0.0), ip2;
 497             vector float dx, dy, dz;
 498             vector float rsquared, f;
 499             vector float one_over_rsquared;
 500             vector float biasTemp;
 501             vector float mag;
 502             vector bool int biasOr;
 503
 504             ip0 = vec_ld(0, flurry->spark[j]->position);
 505             if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
 506                 ip1 = vec_ld(16, flurry->spark[j]->position);
 507             }
 508
 509             ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
 510             ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
 511             ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
 512             ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
 513
 514             dx = vec_sub(s->p[i].position[0].v, ip0);
 515             dy = vec_sub(s->p[i].position[1].v, ip1);
 516             dz = vec_sub(s->p[i].position[2].v, ip2);
 517
 518             rsquared = vec_madd(dx, dx, zero);
 519             rsquared = vec_madd(dy, dy, rsquared);
 520             rsquared = vec_madd(dz, dz, rsquared);
 521
 522             biasOr = vec_cmpeq(jVec, mod.v);
 523             biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
 524
 525             f = vec_madd(biasTemp, frameRateModifier.v, zero);
 526             one_over_rsquared = vec_re(rsquared);
 527             f = vec_madd(f, one_over_rsquared, zero);
 528
 529             mag = vec_rsqrte(rsquared);
 530             mag = vec_madd(mag, f, zero);
 531
 532             deltax = vec_nmsub(dx, mag, deltax);
 533             deltay = vec_nmsub(dy, mag, deltay);
 534             deltaz = vec_nmsub(dz, mag, deltaz);
 535
 536             jVec = vec_add(jVec, (vector unsigned int)(1));
 537         }
 538
 539         /* slow this particle down by flurry->drag */
 540         deltax = vec_madd(deltax, dragV.v, zero);
 541         deltay = vec_madd(deltay, dragV.v, zero);
 542         deltaz = vec_madd(deltaz, dragV.v, zero);
 543
 544         distTemp = vec_madd(deltax, deltax, zero);
 545         distTemp = vec_madd(deltay, deltay, distTemp);
 546         distTemp = vec_madd(deltaz, deltaz, distTemp);
 547
 548         deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
 549         deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
 550         s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
 551         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 552             continue;
 553         }
 554
 555         /* update the position */
 556         s->p[i].delta[0].v = deltax;
 557         s->p[i].delta[1].v = deltay;
 558         s->p[i].delta[2].v = deltaz;
 559         for(j=0;j<3;j++) {
 560             s->p[i].oldposition[j].v = s->p[i].position[j].v;
 561             s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
 562         }
 563     }
 564 }
 565
 566 void UpdateSmoke_VectorUnrolled(global_info_t *info, SmokeV *s)
 567 {
 568     unsigned int i,j;
 569     float sx = flurry->star->position[0];
 570     float sy = flurry->star->position[1];
 571     float sz = flurry->star->position[2];
 572     double frameRate;
 573     floatToVector frameRateModifier;
 574     floatToVector gravityV;
 575     floatToVector dragV;
 576     floatToVector deltaTimeV;
 577     const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
 578     const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
 579     const vector float biasConst = (vector float)(streamBias);
 580
 581     gravityV.f[0] = gravity;
 582     gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
 583
 584     dragV.f[0] = flurry->drag;
 585     dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
 586
 587     deltaTimeV.f[0] = flurry->fDeltaTime;
 588     deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
 589
 590     s->frame++;
 591
 592     if(!s->firstTime) {
 593         /* release 12 puffs every frame */
 594         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
 595             float dx,dy,dz,deltax,deltay,deltaz;
 596             float f;
 597             float rsquared;
 598             float mag;
 599
 600             dx = s->old[0] - sx;
 601             dy = s->old[1] - sy;
 602             dz = s->old[2] - sz;
 603             mag = 5.0f;
 604             deltax = (dx * mag);
 605             deltay = (dy * mag);
 606             deltaz = (dz * mag);
 607             for(i=0;i<flurry->numStreams;i++) {
 608                 float streamSpeedCoherenceFactor;
 609
 610                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
 611                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
 612                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
 613                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
 614                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
 615                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
 616                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
 617                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
 618                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
 619                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
 620                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 621                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 622                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 623                 rsquared = (dx*dx+dy*dy+dz*dz);
 624                 f = streamSpeed * streamSpeedCoherenceFactor;
 625
 626                 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
 627                 mag *= f;
 628
 629                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 630                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 631                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 632                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 633                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 634                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 635                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 636                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 637                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 638                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 639                 s->nextSubParticle++;
 640                 if (s->nextSubParticle==4) {
 641                     s->nextParticle++;
 642                     s->nextSubParticle=0;
 643                 }
 644                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 645                     s->nextParticle = 0;
 646                     s->nextSubParticle = 0;
 647                 }
 648             }
 649
 650             s->lastParticleTime = flurry->fTime;
 651         }
 652     } else {
 653         s->lastParticleTime = flurry->fTime;
 654         s->firstTime = 0;
 655     }
 656
 657     for(i=0;i<3;i++) {
 658         s->old[i] = flurry->star->position[i];
 659     }
 660
 661     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 662     frameRateModifier.f[0] = 42.5f / frameRate;
 663     frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
 664
 665     frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
 666
 667     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 668         /* floatToVector f; */
 669         vector float deltax, deltay, deltaz;
 670         vector float distTemp;
 671         vector unsigned int deadTemp;
 672         /* floatToVector infopos0, infopos1, infopos2; */
 673         intToVector mod;
 674         vector unsigned int jVec;
 675         vector unsigned int intOne = vec_splat_u32(1);
 676         vector float floatOne = vec_ctf(intOne, 0);
 677
 678
 679         vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
 680
 681         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 682             continue;
 683         }
 684
 685         deltax = s->p[i].delta[0].v;
 686         deltay = s->p[i].delta[1].v;
 687         deltaz = s->p[i].delta[2].v;
 688
 689         mod.i[0] = (i<<2 + 0) % flurry->numStreams;
 690         if(mod.i[0]+1 == flurry->numStreams) {
 691             mod.i[1] = 0;
 692         } else {
 693             mod.i[1] = mod.i[0]+1;
 694         }
 695         if(mod.i[1]+1 == flurry->numStreams) {
 696             mod.i[2] = 0;
 697         } else {
 698             mod.i[2] = mod.i[1]+1;
 699         }
 700         if(mod.i[2]+1 == flurry->numStreams) {
 701             mod.i[3] = 0;
 702         } else {
 703             mod.i[3] = mod.i[2]+1;
 704         }
 705
 706         jVec = vec_xor(jVec, jVec);
 707
 708         vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
 709         for(j=0; j + 3 < flurry->numStreams;j+=4)
 710         {
 711             vector float dxa, dya, dza;
 712             vector float dxb, dyb, dzb;
 713             vector float dxc, dyc, dzc;
 714             vector float dxd, dyd, dzd;
 715             vector float ip0a, ip1a;
 716             vector float ip0b, ip1b;
 717             vector float ip0c, ip1c;
 718             vector float ip0d, ip1d;
 719             vector float rsquaredA;
 720             vector float rsquaredB;
 721             vector float rsquaredC;
 722             vector float rsquaredD;
 723             vector float fA, fB, fC, fD;
 724             vector float biasTempA;
 725             vector float biasTempB;
 726             vector float biasTempC;
 727             vector float biasTempD;
 728             vector float magA;
 729             vector float magB;
 730             vector float magC;
 731             vector float magD;
 732
 733             vector float one_over_rsquaredA;
 734             vector float one_over_rsquaredB;
 735             vector float one_over_rsquaredC;
 736             vector float one_over_rsquaredD;
 737             vector bool int biasOrA,biasOrB,biasOrC,biasOrD;
 738
 739             /* load vectors */
 740             ip0a = vec_ld(0, flurry->spark[j]->position);
 741             ip0b = vec_ld(0, flurry->spark[j+1]->position);
 742             ip0c = vec_ld(0, flurry->spark[j+2]->position);
 743             ip0d = vec_ld(0, flurry->spark[j+3]->position);
 744             ip1a = vec_ld( 12, flurry->spark[j]->position );
 745             ip1b = vec_ld( 12, flurry->spark[j+1]->position );
 746             ip1c = vec_ld( 12, flurry->spark[j+2]->position );
 747             ip1d = vec_ld( 12, flurry->spark[j+3]->position );
 748
 749             /* align them */
 750             ip0a = vec_perm(ip0a, ip1a, vec_lvsl(0, flurry->spark[j]->position));
 751             ip0b = vec_perm(ip0b, ip1b, vec_lvsl(0, flurry->spark[j+1]->position));
 752             ip0c = vec_perm(ip0c, ip1c, vec_lvsl(0, flurry->spark[j+2]->position));
 753             ip0d = vec_perm(ip0d, ip1d, vec_lvsl(0, flurry->spark[j+3]->position));
 754
 755             dxa = vec_splat( ip0a, 0  );
 756             dxb = vec_splat( ip0b, 0  );
 757             dxc = vec_splat( ip0c, 0  );
 758             dxd = vec_splat( ip0d, 0  );
 759             dxa = vec_sub( s->p[i].position[0].v, dxa );
 760             dxb = vec_sub( s->p[i].position[0].v, dxb );
 761             dxc = vec_sub( s->p[i].position[0].v, dxc );
 762             dxd = vec_sub( s->p[i].position[0].v, dxd );
 763
 764             dya = vec_splat( ip0a, 1  );
 765             dyb = vec_splat( ip0b, 1  );
 766             dyc = vec_splat( ip0c, 1  );
 767             dyd = vec_splat( ip0d, 1  );
 768             dya = vec_sub( s->p[i].position[1].v, dya );
 769             dyb = vec_sub( s->p[i].position[1].v, dyb );
 770             dyc = vec_sub( s->p[i].position[1].v, dyc );
 771             dyd = vec_sub( s->p[i].position[1].v, dyd );
 772
 773             dza = vec_splat( ip0a, 2  );
 774             dzb = vec_splat( ip0b, 2  );
 775             dzc = vec_splat( ip0c, 2  );
 776             dzd = vec_splat( ip0d, 2  );
 777             dza = vec_sub( s->p[i].position[2].v, dza );
 778             dzb = vec_sub( s->p[i].position[2].v, dzb );
 779             dzc = vec_sub( s->p[i].position[2].v, dzc );
 780             dzd = vec_sub( s->p[i].position[2].v, dzd );
 781
 782             rsquaredA = vec_madd( dxa, dxa, zero );
 783             rsquaredB = vec_madd( dxb, dxb, zero );
 784             rsquaredC = vec_madd( dxc, dxc, zero );
 785             rsquaredD = vec_madd( dxd, dxd, zero );
 786
 787             rsquaredA = vec_madd( dya, dya, rsquaredA );
 788             rsquaredB = vec_madd( dyb, dyb, rsquaredB );
 789             rsquaredC = vec_madd( dyc, dyc, rsquaredC );
 790             rsquaredD = vec_madd( dyd, dyd, rsquaredD );
 791
 792             rsquaredA = vec_madd( dza, dza, rsquaredA );
 793             rsquaredB = vec_madd( dzb, dzb, rsquaredB );
 794             rsquaredC = vec_madd( dzc, dzc, rsquaredC );
 795             rsquaredD = vec_madd( dzd, dzd, rsquaredD );
 796
 797             biasOrA = vec_cmpeq( jVec, mod.v );
 798             jVec = vec_add(jVec, intOne);
 799             biasOrB = vec_cmpeq( jVec, mod.v );
 800             jVec = vec_add(jVec, intOne);
 801             biasOrC = vec_cmpeq( jVec, mod.v );
 802             jVec = vec_add(jVec, intOne);
 803             biasOrD = vec_cmpeq( jVec, mod.v );
 804             jVec = vec_add(jVec, intOne);
 805
 806             biasTempA = vec_add( vec_and( biasOrA, biasConst), floatOne);
 807             biasTempB = vec_add( vec_and( biasOrB, biasConst), floatOne);
 808             biasTempC = vec_add( vec_and( biasOrC, biasConst), floatOne);
 809             biasTempD = vec_add( vec_and( biasOrD, biasConst), floatOne);
 810
 811             fA = vec_madd( biasTempA, frameRateModifier.v, zero);
 812             fB = vec_madd( biasTempB, frameRateModifier.v, zero);
 813             fC = vec_madd( biasTempC, frameRateModifier.v, zero);
 814             fD = vec_madd( biasTempD, frameRateModifier.v, zero);
 815             one_over_rsquaredA = vec_re( rsquaredA );
 816             one_over_rsquaredB = vec_re( rsquaredB );
 817             one_over_rsquaredC = vec_re( rsquaredC );
 818             one_over_rsquaredD = vec_re( rsquaredD );
 819             fA = vec_madd( fA, one_over_rsquaredA, zero);
 820             fB = vec_madd( fB, one_over_rsquaredB, zero);
 821             fC = vec_madd( fC, one_over_rsquaredC, zero);
 822             fD = vec_madd( fD, one_over_rsquaredD, zero);
 823             magA = vec_rsqrte( rsquaredA );
 824             magB = vec_rsqrte( rsquaredB );
 825             magC = vec_rsqrte( rsquaredC );
 826             magD = vec_rsqrte( rsquaredD );
 827             magA = vec_madd( magA, fA, zero );
 828             magB = vec_madd( magB, fB, zero );
 829             magC = vec_madd( magC, fC, zero );
 830             magD = vec_madd( magD, fD, zero );
 831             deltax = vec_nmsub( dxa, magA, deltax );
 832             deltay = vec_nmsub( dya, magA, deltay );
 833             deltaz = vec_nmsub( dza, magA, deltaz );
 834
 835             deltax = vec_nmsub( dxb, magB, deltax );
 836             deltay = vec_nmsub( dyb, magB, deltay );
 837             deltaz = vec_nmsub( dzb, magB, deltaz );
 838
 839             deltax = vec_nmsub( dxc, magC, deltax );
 840             deltay = vec_nmsub( dyc, magC, deltay );
 841             deltaz = vec_nmsub( dzc, magC, deltaz );
 842
 843             deltax = vec_nmsub( dxd, magD, deltax );
 844             deltay = vec_nmsub( dyd, magD, deltay );
 845             deltaz = vec_nmsub( dzd, magD, deltaz );
 846         }
 847
 848
 849         for(;j<flurry->numStreams;j++) {
 850             vector float ip0, ip1 = (vector float)(0.0), ip2;
 851             vector float dx, dy, dz;
 852             vector float rsquared, f;
 853             vector float one_over_rsquared;
 854             vector float biasTemp;
 855             vector float mag;
 856             vector bool int biasOr;
 857
 858             ip0 = vec_ld(0, flurry->spark[j]->position);
 859             if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
 860                 ip1 = vec_ld(16, flurry->spark[j]->position);
 861             }
 862
 863             ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
 864             ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
 865             ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
 866             ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
 867
 868             dx = vec_sub(s->p[i].position[0].v, ip0);
 869             dy = vec_sub(s->p[i].position[1].v, ip1);
 870             dz = vec_sub(s->p[i].position[2].v, ip2);
 871
 872             rsquared = vec_madd(dx, dx, zero);
 873             rsquared = vec_madd(dy, dy, rsquared);
 874             rsquared = vec_madd(dz, dz, rsquared);
 875
 876             biasOr = vec_cmpeq(jVec, mod.v);
 877             biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
 878
 879             f = vec_madd(biasTemp, frameRateModifier.v, zero);
 880             one_over_rsquared = vec_re(rsquared);
 881             f = vec_madd(f, one_over_rsquared, zero);
 882
 883             mag = vec_rsqrte(rsquared);
 884             mag = vec_madd(mag, f, zero);
 885
 886             deltax = vec_nmsub(dx, mag, deltax);
 887             deltay = vec_nmsub(dy, mag, deltay);
 888             deltaz = vec_nmsub(dz, mag, deltaz);
 889
 890             jVec = vec_add(jVec, (vector unsigned int)(1));
 891         }
 892
 893         /* slow this particle down by flurry->drag */
 894         deltax = vec_madd(deltax, dragV.v, zero);
 895         deltay = vec_madd(deltay, dragV.v, zero);
 896         deltaz = vec_madd(deltaz, dragV.v, zero);
 897
 898         distTemp = vec_madd(deltax, deltax, zero);
 899         distTemp = vec_madd(deltay, deltay, distTemp);
 900         distTemp = vec_madd(deltaz, deltaz, distTemp);
 901
 902         deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
 903         deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
 904         s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
 905         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 906             continue;
 907         }
 908
 909         /* update the position */
 910         s->p[i].delta[0].v = deltax;
 911         s->p[i].delta[1].v = deltay;
 912         s->p[i].delta[2].v = deltaz;
 913         for(j=0;j<3;j++) {
 914             s->p[i].oldposition[j].v = s->p[i].position[j].v;
 915             s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
 916         }
 917     }
 918 }
 919
 920 #endif
 921
 922 void DrawSmoke_Scalar(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
 923 {
 924         int svi = 0;
 925         int sci = 0;
 926         int sti = 0;
 927         int si = 0;
 928         float width;
 929         float sx,sy;
 930         float u0,v0,u1,v1;
 931         float w,z;
 932         float screenRatio = global->sys_glWidth / 1024.0f;
 933         float hslash2 = global->sys_glHeight * 0.5f;
 934         float wslash2 = global->sys_glWidth * 0.5f;
 935         int i,k;
 936
 937         width = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
 938
 939         for (i=0;i<NUMSMOKEPARTICLES/4;i++)
 940         {
 941             for (k=0; k<4; k++) {
 942                 float thisWidth;
 943                 float oldz;
 944
 945                 if (s->p[i].dead.i[k]) {
 946                     continue;
 947                 }
 948                 thisWidth = (streamSize + (flurry->fTime - s->p[i].time.f[k])*flurry->streamExpansion) * screenRatio;
 949                 if (thisWidth >= width)
 950                 {
 951                         s->p[i].dead.i[k] = 1;
 952                         continue;
 953                 }
 954                 z = s->p[i].position[2].f[k];
 955                 sx = s->p[i].position[0].f[k] * global->sys_glWidth / z + wslash2;
 956                 sy = s->p[i].position[1].f[k] * global->sys_glWidth / z + hslash2;
 957                 oldz = s->p[i].oldposition[2].f[k];
 958                 if (sx > global->sys_glWidth+50.0f || sx < -50.0f || sy > global->sys_glHeight+50.0f || sy < -50.0f || z < 25.0f || oldz < 25.0f)
 959                 {
 960                         continue;
 961                 }
 962
 963                 w = MAX_(1.0f,thisWidth/z);
 964                 {
 965                         float oldx = s->p[i].oldposition[0].f[k];
 966                         float oldy = s->p[i].oldposition[1].f[k];
 967                         float oldscreenx = (oldx * global->sys_glWidth / oldz) + wslash2;
 968                         float oldscreeny = (oldy * global->sys_glWidth / oldz) + hslash2;
 969                         float dx = (sx-oldscreenx);
 970                         float dy = (sy-oldscreeny);
 971
 972                         float d = FastDistance2D(dx, dy);
 973
 974                         float sm, os, ow;
 975                         if (d)
 976                         {
 977                                 sm = w/d;
 978                         }
 979                         else
 980                         {
 981                                 sm = 0.0f;
 982                         }
 983                         ow = MAX_(1.0f,thisWidth/oldz);
 984                         if (d)
 985                         {
 986                                 os = ow/d;
 987                         }
 988                         else
 989                         {
 990                                 os = 0.0f;
 991                         }
 992
 993                         {
 994                                 floatToVector cmv;
 995                                 float cm;
 996                                 float m = 1.0f + sm;
 997
 998                                 float dxs = dx*sm;
 999                                 float dys = dy*sm;
1000                                 float dxos = dx*os;
1001                                 float dyos = dy*os;
1002                                 float dxm = dx*m;
1003                                 float dym = dy*m;
1004
1005                                 s->p[i].animFrame.i[k]++;
1006                                 if (s->p[i].animFrame.i[k] >= 64)
1007                                 {
1008                                         s->p[i].animFrame.i[k] = 0;
1009                                 }
1010
1011                                 u0 = (s->p[i].animFrame.i[k]&&7) * 0.125f;
1012                                 v0 = (s->p[i].animFrame.i[k]>>3) * 0.125f;
1013                                 u1 = u0 + 0.125f;
1014                                 v1 = v0 + 0.125f;
1015                                 u1 = u0 + 0.125f;
1016                                 v1 = v0 + 0.125f;
1017                                 cm = (1.375f - thisWidth/width);
1018                                 if (s->p[i].dead.i[k] == 3)
1019                                 {
1020                                         cm *= 0.125f;
1021                                         s->p[i].dead.i[k] = 1;
1022                                 }
1023                                 si++;
1024                                 cm *= brightness;
1025                                 cmv.f[0] = s->p[i].color[0].f[k]*cm;
1026                                 cmv.f[1] = s->p[i].color[1].f[k]*cm;
1027                                 cmv.f[2] = s->p[i].color[2].f[k]*cm;
1028                                 cmv.f[3] = s->p[i].color[3].f[k]*cm;
1029
1030 #if 0
1031                                 /* MDT we can't use vectors in the Scalar routine */
1032                                 s->seraphimColors[sci++].v = cmv.v;
1033                                 s->seraphimColors[sci++].v = cmv.v;
1034                                 s->seraphimColors[sci++].v = cmv.v;
1035                                 s->seraphimColors[sci++].v = cmv.v;
1036 #else
1037                                 {
1038                                     int ii, jj;
1039                                     for (jj = 0; jj < 4; jj++) {
1040                                         for (ii = 0; ii < 4; ii++) {
1041                                             s->seraphimColors[sci].f[ii] = cmv.f[ii];
1042                                         }
1043                                         sci += 1;
1044                                     }
1045                                 }
1046 #endif
1047
1048                                 s->seraphimTextures[sti++] = u0;
1049                                 s->seraphimTextures[sti++] = v0;
1050                                 s->seraphimTextures[sti++] = u0;
1051                                 s->seraphimTextures[sti++] = v1;
1052
1053                                 s->seraphimTextures[sti++] = u1;
1054                                 s->seraphimTextures[sti++] = v1;
1055                                 s->seraphimTextures[sti++] = u1;
1056                                 s->seraphimTextures[sti++] = v0;
1057
1058                                 s->seraphimVertices[svi].f[0] = sx+dxm-dys;
1059                                 s->seraphimVertices[svi].f[1] = sy+dym+dxs;
1060                                 s->seraphimVertices[svi].f[2] = sx+dxm+dys;
1061                                 s->seraphimVertices[svi].f[3] = sy+dym-dxs;
1062                                 svi++;
1063
1064                                 s->seraphimVertices[svi].f[0] = oldscreenx-dxm+dyos;
1065                                 s->seraphimVertices[svi].f[1] = oldscreeny-dym-dxos;
1066                                 s->seraphimVertices[svi].f[2] = oldscreenx-dxm-dyos;
1067                                 s->seraphimVertices[svi].f[3] = oldscreeny-dym+dxos;
1068                                 svi++;
1069                         }
1070                 }
1071             }
1072         }
1073         glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1074         glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1075         glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1076         glDrawArrays(GL_QUADS,0,si*4);
1077 }
1078
1079 #ifdef __VEC__
1080
1081 void DrawSmoke_Vector(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
1082 {
1083     const vector float zero = (vector float)(0.0);
1084     int svi = 0;
1085     int sci = 0;
1086     int sti = 0;
1087     int si = 0;
1088     floatToVector width;
1089     vector float sx,sy;
1090     floatToVector u0,v0,u1,v1;
1091     vector float one_over_z;
1092     vector float w;
1093     floatToVector z;
1094     float screenRatio = global->sys_glWidth / 1024.0f;
1095     float hslash2 = global->sys_glHeight * 0.5f;
1096     float wslash2 = global->sys_glWidth * 0.5f;
1097     int i,kk;
1098     floatToVector briteV, fTimeV, expansionV, screenRatioV, hslash2V, wslash2V, streamSizeV;
1099     floatToVector glWidthV;
1100     floatToVector cm;
1101     vector float cmv[4];
1102     vector float svec[4], ovec[4];
1103     vector float oldscreenx, oldscreeny;
1104     vector float sm;
1105     vector float frameAnd7;
1106     vector float frameShift3;
1107     vector float one_over_width;
1108     vector float dx, dy;
1109     vector float os;
1110     vector unsigned int vSi = vec_splat_u32(0);
1111     const vector float eighth = (vector float)(0.125);
1112     float glWidth50 = global->sys_glWidth + 50.0f;
1113     float glHeight50 = global->sys_glHeight + 50.0f;
1114     vector float vGLWidth50, vGLHeight50;
1115     unsigned int blitBool;
1116
1117     vec_dst((int *)(&(s->p[0])), 0x00020200, 2);
1118
1119     {
1120         vector unsigned char permute1 = vec_lvsl( 0, &glWidth50 );
1121         vector unsigned char permute2 = vec_lvsl( 0, &glHeight50 );
1122         permute1 = (vector unsigned char) vec_splat( (vector unsigned int) permute1, 0 );
1123         permute2 = (vector unsigned char) vec_splat( (vector unsigned int) permute2, 0 );
1124         vGLWidth50 = vec_lde( 0, &glWidth50 );
1125         vGLHeight50 = vec_lde( 0, &glHeight50 );
1126         vGLWidth50 = vec_perm( vGLWidth50, vGLWidth50, permute1 );
1127         vGLHeight50 = vec_perm( vGLHeight50, vGLHeight50, permute2 );
1128     }
1129
1130     width.f[0] = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
1131     width.v = (vector float) vec_splat((vector unsigned int)width.v, 0);
1132
1133     briteV.f[0] = brightness;
1134     briteV.v = (vector float) vec_splat((vector unsigned int)briteV.v, 0);
1135
1136     fTimeV.f[0] = (float) flurry->fTime;
1137     fTimeV.v = (vector float) vec_splat((vector unsigned int)fTimeV.v, 0);
1138
1139     expansionV.f[0] = flurry->streamExpansion;
1140     expansionV.v = (vector float) vec_splat((vector unsigned int)expansionV.v, 0);
1141
1142     screenRatioV.f[0] = screenRatio;
1143     screenRatioV.v = (vector float) vec_splat((vector unsigned int)screenRatioV.v, 0);
1144
1145     hslash2V.f[0] = hslash2;
1146     hslash2V.v = (vector float) vec_splat((vector unsigned int)hslash2V.v, 0);
1147
1148     wslash2V.f[0] = wslash2;
1149     wslash2V.v = (vector float) vec_splat((vector unsigned int)wslash2V.v, 0);
1150
1151     streamSizeV.f[0] = streamSize;
1152     streamSizeV.v = (vector float) vec_splat((vector unsigned int)streamSizeV.v, 0);
1153
1154     glWidthV.f[0] = global->sys_glWidth;
1155     glWidthV.v = (vector float) vec_splat((vector unsigned int)glWidthV.v, 0);
1156
1157     for (i=0;i<NUMSMOKEPARTICLES/4;i++) {
1158         vector float thisWidth;
1159         vector float oldz;
1160         vector float oldx, oldy, one_over_oldz;
1161         vector float xabs, yabs, mn;
1162         vector float d;
1163         vector float one_over_d;
1164         vector bool int dnz;
1165         vector float ow;
1166
1167         vec_dst((int *)(&(s->p[i+4])), 0x00020200, 2);
1168
1169         if (vec_all_eq(s->p[i].dead.v, (vector unsigned int)(1))) continue;
1170
1171         blitBool = 0; /* keep track of particles that actually need to be drawn */
1172
1173         thisWidth = vec_sub(fTimeV.v, s->p[i].time.v);
1174         thisWidth = vec_madd(thisWidth, expansionV.v, streamSizeV.v);
1175         thisWidth = vec_madd(thisWidth, screenRatioV.v, zero);
1176
1177         z.v = s->p[i].position[2].v;
1178         one_over_z = vec_re(z.v);
1179
1180         sx = vec_madd(s->p[i].position[0].v, glWidthV.v, zero);
1181         sx = vec_madd(sx, one_over_z, wslash2V.v);
1182         sy = vec_madd(s->p[i].position[1].v, glWidthV.v, zero);
1183         sy = vec_madd(sy, one_over_z, hslash2V.v);
1184
1185         oldz = s->p[i].oldposition[2].v;
1186
1187         w = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_z, zero));
1188
1189         oldx = s->p[i].oldposition[0].v;
1190         oldy = s->p[i].oldposition[1].v;
1191         one_over_oldz = vec_re(oldz);
1192         oldscreenx = vec_madd(oldx, glWidthV.v, zero);
1193         oldscreenx = vec_madd(oldscreenx, one_over_oldz, wslash2V.v);
1194         oldscreeny = vec_madd(oldy, glWidthV.v, zero);
1195         oldscreeny = vec_madd(oldscreeny, one_over_oldz, hslash2V.v);
1196         dx = vec_sub(sx,oldscreenx);
1197         dy = vec_sub(sy,oldscreeny);
1198
1199         xabs = vec_abs(dx);
1200         yabs = vec_abs(dy);
1201         mn = vec_min(xabs,yabs);
1202         d = vec_add(xabs,yabs);
1203         d = vec_madd(mn, (vector float)(-0.6875), d);
1204
1205         ow = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_oldz, zero));
1206         one_over_d = vec_re(d);
1207         dnz = vec_cmpgt(d, zero);
1208         sm = vec_madd(w, one_over_d, zero);
1209         sm = vec_and(sm, dnz);
1210         os = vec_madd(ow, one_over_d, zero);
1211         os = vec_and(os, dnz);
1212
1213         {
1214             intToVector tempMask;
1215             vector bool int mask = vec_cmpeq( s->p[i].dead.v, vec_splat_u32(1) ); /* -1 where true */
1216             vector bool int  gtMask = vec_cmpge( thisWidth, width.v ); /* -1 where true */
1217             vector bool int  glWidth50Test = vec_cmpgt( sx, (vector float)(vGLWidth50) ); /* -1 where true */
1218             vector bool int  glHeight50Test = vec_cmpgt( sy, (vector float)(vGLHeight50) ); /* -1 where true */
1219             vector bool int  test50x    = vec_cmplt( sx, (vector float) (-50.0) );
1220             vector bool int  test50y    = vec_cmplt( sy, (vector float) (-50.0) );
1221             vector bool int  testz      = vec_cmplt( z.v, (vector float) (25.0) );
1222             vector bool int  testoldz   = vec_cmplt( oldz, (vector float) (25.0) );
1223             mask = vec_or( mask, gtMask );
1224             s->p[i].dead.v = vec_and( mask, vec_splat_u32( 1 ) );
1225             mask = vec_or( mask, glWidth50Test );
1226             mask = vec_or( mask, glHeight50Test );
1227             mask = vec_or( mask, test50x );
1228             mask = vec_or( mask, test50y );
1229             mask = vec_or( mask, testz );
1230             mask = vec_or( mask, testoldz );
1231             tempMask.v = (vector unsigned int)mask;
1232
1233             s->p[i].animFrame.v = vec_sub( s->p[i].animFrame.v, vec_nor( mask, mask ) );
1234             s->p[i].animFrame.v = vec_and( s->p[i].animFrame.v, (vector unsigned int)(63) );
1235
1236             frameAnd7 = vec_ctf(vec_and(s->p[i].animFrame.v, (vector unsigned int)(7)),0);
1237             u0.v = vec_madd(frameAnd7, eighth, zero);
1238
1239             frameShift3 = vec_ctf(vec_sr(s->p[i].animFrame.v, (vector unsigned int)(3)),0);
1240             v0.v = vec_madd(frameAnd7, eighth, zero);
1241
1242             u1.v = vec_add(u0.v, eighth);
1243             v1.v = vec_add(v0.v, eighth);
1244
1245             one_over_width = vec_re(width.v);
1246             cm.v = vec_sel( vec_nmsub(thisWidth, one_over_width, (vector float)(1.375)), cm.v, mask );
1247             cm.v = vec_madd(cm.v, briteV.v, zero);
1248
1249             vSi = vec_sub( vSi, vec_nor( mask, mask ) );
1250             {
1251                 vector unsigned int blitMask = (vector unsigned int) (1, 2, 4, 8);
1252                 vector unsigned int temp = (vector unsigned int)mask;
1253                 temp = vec_andc( blitMask, temp  );
1254                 temp = vec_add( temp, vec_sld( temp, temp, 8 ) );
1255                 temp = vec_add( temp, vec_sld( temp, temp, 4 ) );
1256                 vec_ste( temp, 0, &blitBool );
1257
1258             }
1259
1260             {
1261                 vector float temp1, temp2, temp3, temp4;
1262                 vector float result1a, result1b, result2a, result2b, result3a, result3b, result4a, result4b;
1263
1264                 temp1 = vec_mergeh( u0.v, u0.v );
1265                 temp2 = vec_mergel( u0.v, u0.v );
1266                 temp3 = vec_mergeh( v0.v, v1.v );
1267                 temp4 = vec_mergel( v0.v, v1.v );
1268
1269                 result1a = vec_mergeh( temp1, temp3 );
1270                 result1b = vec_mergel( temp1, temp3 );
1271                 result2a = vec_mergeh( temp2, temp4 );
1272                 result2b = vec_mergel( temp2, temp4 );
1273
1274                 temp1 = vec_mergeh( u1.v, u1.v );
1275                 temp2 = vec_mergel( u1.v, u1.v );
1276                 temp3 = vec_mergeh( v1.v, v0.v );
1277                 temp4 = vec_mergel( v1.v, v0.v );
1278
1279                 result3a = vec_mergeh( temp1, temp3 );
1280                 result3b = vec_mergel( temp1, temp3 );
1281                 result4a = vec_mergeh( temp2, temp4 );
1282                 result4b = vec_mergel( temp2, temp4 );
1283
1284                 if( blitBool & 1 )
1285                 {
1286                     vec_st( result1a, 0, &s->seraphimTextures[sti] );
1287                     vec_st( result3a, 16, &s->seraphimTextures[sti]);
1288                     sti+= 8;
1289                 }
1290                 if( blitBool & 2 )
1291                 {
1292                     vec_st( result1b, 0, &s->seraphimTextures[sti]);
1293                     vec_st( result3b, 16, &s->seraphimTextures[sti]);
1294                     sti+= 8;
1295                 }
1296                 if( blitBool & 4 )
1297                 {
1298                     vec_st( result2a, 0, &s->seraphimTextures[sti]);
1299                     vec_st( result4a, 16, &s->seraphimTextures[sti]);
1300                     sti+= 8;
1301                 }
1302                 if( blitBool & 8 )
1303                 {
1304                     vec_st( result2b, 0, &s->seraphimTextures[sti]);
1305                     vec_st( result4b, 16, &s->seraphimTextures[sti]);
1306                     sti+= 8;
1307                 }
1308             }
1309         }
1310
1311         cmv[0] = vec_madd(s->p[i].color[0].v, cm.v, zero);
1312         cmv[1] = vec_madd(s->p[i].color[1].v, cm.v, zero);
1313         cmv[2] = vec_madd(s->p[i].color[2].v, cm.v, zero);
1314         cmv[3] = vec_madd(s->p[i].color[3].v, cm.v, zero);
1315         {
1316             vector float vI0, vI1, vI2, vI3;
1317
1318             vI0 = vec_mergeh ( cmv[0], cmv[2] );
1319             vI1 = vec_mergeh ( cmv[1], cmv[3] );
1320             vI2 = vec_mergel ( cmv[0], cmv[2] );
1321             vI3 = vec_mergel ( cmv[1], cmv[3] );
1322
1323             cmv[0] = vec_mergeh ( vI0, vI1 );
1324             cmv[1] = vec_mergel ( vI0, vI1 );
1325             cmv[2] = vec_mergeh ( vI2, vI3 );
1326             cmv[3] = vec_mergel ( vI2, vI3 );
1327         }
1328
1329         vec_dst( cmv, 0x0D0100D0, 1 );
1330
1331         {
1332             vector float sxd, syd;
1333             vector float sxdm, sxdp, sydm, sydp;
1334             vector float oxd, oyd;
1335             vector float oxdm, oxdp, oydm, oydp;
1336             vector float vI0, vI1, vI2, vI3;
1337             vector float dxs, dys;
1338             vector float dxos, dyos;
1339             vector float dxm, dym;
1340             vector float m;
1341
1342             m = vec_add((vector float)(1.0), sm);
1343
1344             dxs = vec_madd(dx, sm, zero);
1345             dys = vec_madd(dy, sm, zero);
1346             dxos = vec_madd(dx, os, zero);
1347             dyos = vec_madd(dy, os, zero);
1348             dxm = vec_madd(dx, m, zero);
1349             dym = vec_madd(dy, m, zero);
1350
1351             sxd = vec_add(sx, dxm);
1352             sxdm = vec_sub(sxd, dys);
1353             sxdp = vec_add(sxd, dys);
1354
1355             syd = vec_add(sy, dym);
1356             sydm = vec_sub(syd, dxs);
1357             sydp = vec_add(syd, dxs);
1358
1359             oxd = vec_sub(oldscreenx, dxm);
1360             oxdm = vec_sub(oxd, dyos);
1361             oxdp = vec_add(oxd, dyos);
1362
1363             oyd = vec_sub(oldscreeny, dym);
1364             oydm = vec_sub(oyd, dxos);
1365             oydp = vec_add(oyd, dxos);
1366
1367             vI0 = vec_mergeh ( sxdm, sxdp );
1368             vI1 = vec_mergeh ( sydp, sydm );
1369             vI2 = vec_mergel ( sxdm, sxdp );
1370             vI3 = vec_mergel ( sydp, sydm );
1371
1372             svec[0] = vec_mergeh ( vI0, vI1 );
1373             svec[1] = vec_mergel ( vI0, vI1 );
1374             svec[2] = vec_mergeh ( vI2, vI3 );
1375             svec[3] = vec_mergel ( vI2, vI3 );
1376
1377             vI0 = vec_mergeh ( oxdp, oxdm );
1378             vI1 = vec_mergeh ( oydm, oydp );
1379             vI2 = vec_mergel ( oxdp, oxdm );
1380             vI3 = vec_mergel ( oydm, oydp );
1381
1382             ovec[0] = vec_mergeh ( vI0, vI1 );
1383             ovec[1] = vec_mergel ( vI0, vI1 );
1384             ovec[2] = vec_mergeh ( vI2, vI3 );
1385             ovec[3] = vec_mergel ( vI2, vI3 );
1386         }
1387
1388         {
1389             int offset0 = (sci + 0) * sizeof( vector float );
1390             int offset1 = (sci + 1) * sizeof( vector float );
1391             int offset2 = (sci + 2) * sizeof( vector float );
1392             int offset3 = (sci + 3) * sizeof( vector float );
1393             int offset4 = (svi + 0) * sizeof( vector float );
1394             int offset5 = (svi + 1) * sizeof( vector float );
1395             vector float *colors = (vector float *)s->seraphimColors;
1396             vector float *vertices = (vector float *)s->seraphimVertices;
1397             for (kk=0; kk<4; kk++) {
1398                 if (blitBool>>kk & 1) {
1399                     vector float vcmv = cmv[kk];
1400                     vector float vsvec = svec[kk];
1401                     vector float vovec = ovec[kk];
1402
1403                     vec_st( vcmv, offset0, colors );
1404                     vec_st( vcmv, offset1, colors );
1405                     vec_st( vcmv, offset2, colors );
1406                     vec_st( vcmv, offset3, colors );
1407                     vec_st( vsvec, offset4, vertices );
1408                     vec_st( vovec, offset5, vertices );
1409                     colors += 4;
1410                     vertices += 2;
1411                     sci += 4;
1412                     svi += 2;
1413                 }
1414             }
1415         }
1416     }
1417     vSi = vec_add( vSi, vec_sld( vSi, vSi, 8 ) );
1418     vSi = vec_add( vSi, vec_sld( vSi, vSi, 4 ) );
1419     vec_ste( (vector signed int) vSi, 0, &si );
1420
1421     glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1422     glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1423     glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1424     glDrawArrays(GL_QUADS,0,si*4);
1425 }
1426
1427 #endif