git.hungrycats.org Git - xscreensaver/blob - hacks/glx/flurry-smoke.c

   1 /*
   2
   3 Copyright (c) 2002, Calum Robinson
   4 All rights reserved.
   5
   6 Redistribution and use in source and binary forms, with or without
   7 modification, are permitted provided that the following conditions are met:
   8
   9 * Redistributions of source code must retain the above copyright notice, this
  10   list of conditions and the following disclaimer.
  11
  12 * Redistributions in binary form must reproduce the above copyright notice,
  13   this list of conditions and the following disclaimer in the documentation
  14   and/or other materials provided with the distribution.
  15
  16 * Neither the name of the author nor the names of its contributors may be used
  17   to endorse or promote products derived from this software without specific
  18   prior written permission.
  19
  20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  21 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  22 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  23 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
  24 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  25 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  27 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30
  31 */
  32
  33 /* Smoke.cpp: implementation of the Smoke class. */
  34
  35 #include "flurry.h"
  36
  37 #define MAXANGLES 16384
  38 #define NOT_QUITE_DEAD 3
  39
  40 #define intensity 75000.0f;
  41
  42 void InitSmoke(SmokeV *s)
  43 {
  44     int i;
  45     s->nextParticle = 0;
  46     s->nextSubParticle = 0;
  47     s->lastParticleTime = 0.25f;
  48     s->firstTime = 1;
  49     s->frame = 0;
  50     for (i=0;i<3;i++) {
  51         s->old[i] = RandFlt(-100.0, 100.0);
  52     }
  53 }
  54
  55 void UpdateSmoke_ScalarBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
  56 {
  57     int i,j,k;
  58     float sx = flurry->star->position[0];
  59     float sy = flurry->star->position[1];
  60     float sz = flurry->star->position[2];
  61     double frameRate;
  62     double frameRateModifier;
  63
  64
  65     s->frame++;
  66
  67     if(!s->firstTime) {
  68         /* release 12 puffs every frame */
  69         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
  70             float dx,dy,dz,deltax,deltay,deltaz;
  71             float f;
  72             float rsquared;
  73             float mag;
  74
  75             dx = s->old[0] - sx;
  76             dy = s->old[1] - sy;
  77             dz = s->old[2] - sz;
  78             mag = 5.0f;
  79             deltax = (dx * mag);
  80             deltay = (dy * mag);
  81             deltaz = (dz * mag);
  82             for(i=0;i<flurry->numStreams;i++) {
  83                 float streamSpeedCoherenceFactor;
  84
  85                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
  86                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
  87                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
  88                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
  89                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
  90                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
  91                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
  92                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
  93                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
  94                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
  95                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
  96                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
  97                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
  98                 rsquared = (dx*dx+dy*dy+dz*dz);
  99                 f = streamSpeed * streamSpeedCoherenceFactor;
 100
 101                 mag = f / (float) sqrt(rsquared);
 102
 103                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 104                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 105                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 106                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 107                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 108                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 109                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 110                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 111                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 112                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 113                 s->nextSubParticle++;
 114                 if (s->nextSubParticle==4) {
 115                     s->nextParticle++;
 116                     s->nextSubParticle=0;
 117                 }
 118                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 119                     s->nextParticle = 0;
 120                     s->nextSubParticle = 0;
 121                 }
 122             }
 123
 124             s->lastParticleTime = flurry->fTime;
 125         }
 126     } else {
 127         s->lastParticleTime = flurry->fTime;
 128         s->firstTime = 0;
 129     }
 130
 131     for(i=0;i<3;i++) {
 132         s->old[i] = flurry->star->position[i];
 133     }
 134
 135     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 136     frameRateModifier = 42.5f / frameRate;
 137
 138     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 139         for(k=0; k<4; k++) {
 140             float dx,dy,dz;
 141             float f;
 142             float rsquared;
 143             float mag;
 144             float deltax;
 145             float deltay;
 146             float deltaz;
 147
 148             if (s->p[i].dead.i[k]) {
 149                 continue;
 150             }
 151
 152             deltax = s->p[i].delta[0].f[k];
 153             deltay = s->p[i].delta[1].f[k];
 154             deltaz = s->p[i].delta[2].f[k];
 155
 156             for(j=0;j<flurry->numStreams;j++) {
 157                 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
 158                 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
 159                 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
 160                 rsquared = (dx*dx+dy*dy+dz*dz);
 161
 162                 f = (gravity/rsquared) * frameRateModifier;
 163
 164                 if ((((i*4)+k) % flurry->numStreams) == j) {
 165                     f *= 1.0f + streamBias;
 166                 }
 167
 168                 mag = f / (float) sqrt(rsquared);
 169
 170                 deltax -= (dx * mag);
 171                 deltay -= (dy * mag);
 172                 deltaz -= (dz * mag);
 173             }
 174
 175             /* slow this particle down by flurry->drag */
 176             deltax *= flurry->drag;
 177             deltay *= flurry->drag;
 178             deltaz *= flurry->drag;
 179
 180             if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
 181                 s->p[i].dead.i[k] = 1;
 182                 continue;
 183             }
 184
 185             /* update the position */
 186             s->p[i].delta[0].f[k] = deltax;
 187             s->p[i].delta[1].f[k] = deltay;
 188             s->p[i].delta[2].f[k] = deltaz;
 189             for(j=0;j<3;j++) {
 190                 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
 191                 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
 192             }
 193         }
 194     }
 195 }
 196
 197 #if 0
 198 #ifdef __ppc__
 199
 200 void UpdateSmoke_ScalarFrsqrte(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
 201 {
 202     int i,j,k;
 203     float sx = flurry->star->position[0];
 204     float sy = flurry->star->position[1];
 205     float sz = flurry->star->position[2];
 206     double frameRate;
 207     double frameRateModifier;
 208
 209
 210     s->frame++;
 211
 212     if(!s->firstTime) {
 213         /* release 12 puffs every frame */
 214         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
 215             float dx,dy,dz,deltax,deltay,deltaz;
 216             float f;
 217             float rsquared;
 218             float mag;
 219
 220             dx = s->old[0] - sx;
 221             dy = s->old[1] - sy;
 222             dz = s->old[2] - sz;
 223             mag = 5.0f;
 224             deltax = (dx * mag);
 225             deltay = (dy * mag);
 226             deltaz = (dz * mag);
 227             for(i=0;i<flurry->numStreams;i++) {
 228                 float streamSpeedCoherenceFactor;
 229
 230                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
 231                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
 232                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
 233                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
 234                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
 235                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
 236                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
 237                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
 238                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
 239                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
 240                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 241                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 242                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 243                 rsquared = (dx*dx+dy*dy+dz*dz);
 244                 f = streamSpeed * streamSpeedCoherenceFactor;
 245
 246                 mag = f / (float) sqrt(rsquared);
 247                 /*
 248                     reciprocal square-root estimate replaced above divide and call to system sqrt()
 249
 250                     asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
 251                     mag *= f;
 252                 */
 253
 254                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 255                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 256                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 257                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 258                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 259                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 260                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 261                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 262                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 263                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 264                 s->nextSubParticle++;
 265                 if (s->nextSubParticle==4) {
 266                     s->nextParticle++;
 267                     s->nextSubParticle=0;
 268                 }
 269                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 270                     s->nextParticle = 0;
 271                     s->nextSubParticle = 0;
 272                 }
 273             }
 274
 275             s->lastParticleTime = flurry->fTime;
 276         }
 277     } else {
 278         s->lastParticleTime = flurry->fTime;
 279         s->firstTime = 0;
 280     }
 281
 282     for(i=0;i<3;i++) {
 283         s->old[i] = flurry->star->position[i];
 284     }
 285
 286     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 287     frameRateModifier = 42.5f / frameRate;
 288
 289     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 290         for(k=0; k<4; k++) {
 291             float dx,dy,dz;
 292             float f;
 293             float rsquared;
 294             float mag;
 295             float deltax;
 296             float deltay;
 297             float deltaz;
 298
 299             if (s->p[i].dead.i[k]) {
 300                 continue;
 301             }
 302
 303             deltax = s->p[i].delta[0].f[k];
 304             deltay = s->p[i].delta[1].f[k];
 305             deltaz = s->p[i].delta[2].f[k];
 306
 307             for(j=0;j<flurry->numStreams;j++) {
 308                 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
 309                 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
 310                 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
 311                 rsquared = (dx*dx+dy*dy+dz*dz);
 312
 313                 /*
 314                     asm("fres %0, %1" : "=f" (f) : "f" (rsquared));
 315                     f *= gravity*frameRateModifier;
 316                 */
 317                 f = ( gravity  * frameRateModifier ) / rsquared;
 318
 319                 if((((i*4)+k) % flurry->numStreams) == j) {
 320                     f *= 1.0f + streamBias;
 321                 }
 322
 323                 mag = f / (float) sqrt(rsquared);
 324
 325                 /* reciprocal square-root estimate replaced above divide and call to system sqrt() */
 326
 327                 deltax -= (dx * mag);
 328                 deltay -= (dy * mag);
 329                 deltaz -= (dz * mag);
 330             }
 331
 332             /* slow this particle down by flurry->drag */
 333             deltax *= flurry->drag;
 334             deltay *= flurry->drag;
 335             deltaz *= flurry->drag;
 336
 337             if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
 338                 s->p[i].dead.i[k] = 1;
 339                 continue;
 340             }
 341
 342             /* update the position */
 343             s->p[i].delta[0].f[k] = deltax;
 344             s->p[i].delta[1].f[k] = deltay;
 345             s->p[i].delta[2].f[k] = deltaz;
 346             for(j=0;j<3;j++) {
 347                 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
 348                 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
 349             }
 350         }
 351     }
 352 }
 353
 354 #endif
 355
 356 #ifdef __VEC__
 357
 358 void UpdateSmoke_VectorBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
 359 {
 360     unsigned int i,j;
 361     float sx = flurry->star->position[0];
 362     float sy = flurry->star->position[1];
 363     float sz = flurry->star->position[2];
 364     double frameRate;
 365     floatToVector frameRateModifier;
 366     floatToVector gravityV;
 367     floatToVector dragV;
 368     floatToVector deltaTimeV;
 369     const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
 370     const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
 371     const vector float biasConst = (vector float)(streamBias);
 372
 373     gravityV.f[0] = gravity;
 374     gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
 375
 376     dragV.f[0] = flurry->drag;
 377     dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
 378
 379     deltaTimeV.f[0] = flurry->fDeltaTime;
 380     deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
 381
 382     s->frame++;
 383
 384     if(!s->firstTime) {
 385         /* release 12 puffs every frame */
 386         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
 387             float dx,dy,dz,deltax,deltay,deltaz;
 388             float f;
 389             float rsquared;
 390             float mag;
 391
 392             dx = s->old[0] - sx;
 393             dy = s->old[1] - sy;
 394             dz = s->old[2] - sz;
 395             mag = 5.0f;
 396             deltax = (dx * mag);
 397             deltay = (dy * mag);
 398             deltaz = (dz * mag);
 399             for(i=0;i<flurry->numStreams;i++) {
 400                 float streamSpeedCoherenceFactor;
 401
 402                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
 403                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
 404                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
 405                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
 406                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
 407                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
 408                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
 409                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
 410                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
 411                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
 412                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 413                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 414                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 415                 rsquared = (dx*dx+dy*dy+dz*dz);
 416                 f = streamSpeed * streamSpeedCoherenceFactor;
 417
 418                 mag = f / (float) sqrt(rsquared);
 419                 /*
 420                     asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
 421                     mag *= f;
 422                 */
 423
 424                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 425                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 426                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 427                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 428                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 429                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 430                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 431                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 432                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 433                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 434                 s->nextSubParticle++;
 435                 if (s->nextSubParticle==4) {
 436                     s->nextParticle++;
 437                     s->nextSubParticle=0;
 438                 }
 439                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 440                     s->nextParticle = 0;
 441                     s->nextSubParticle = 0;
 442                 }
 443             }
 444
 445             s->lastParticleTime = flurry->fTime;
 446         }
 447     } else {
 448         s->lastParticleTime = flurry->fTime;
 449         s->firstTime = 0;
 450     }
 451
 452     for(i=0;i<3;i++) {
 453         s->old[i] = flurry->star->position[i];
 454     }
 455
 456     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 457     frameRateModifier.f[0] = 42.5f / frameRate;
 458     frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
 459
 460     frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
 461
 462     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 463         /* floatToVector f; */
 464         vector float deltax, deltay, deltaz;
 465         vector float distTemp;
 466         vector unsigned int deadTemp;
 467         /* floatToVector infopos0, infopos1, infopos2; */
 468         intToVector mod;
 469         vector unsigned int jVec;
 470
 471
 472         vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
 473
 474         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 475             continue;
 476         }
 477
 478         deltax = s->p[i].delta[0].v;
 479         deltay = s->p[i].delta[1].v;
 480         deltaz = s->p[i].delta[2].v;
 481
 482         mod.i[0] = (i<<2 + 0) % flurry->numStreams;
 483         if(mod.i[0]+1 == flurry->numStreams) {
 484             mod.i[1] = 0;
 485         } else {
 486             mod.i[1] = mod.i[0]+1;
 487         }
 488         if(mod.i[1]+1 == flurry->numStreams) {
 489             mod.i[2] = 0;
 490         } else {
 491             mod.i[2] = mod.i[1]+1;
 492         }
 493         if(mod.i[2]+1 == flurry->numStreams) {
 494             mod.i[3] = 0;
 495         } else {
 496             mod.i[3] = mod.i[2]+1;
 497         }
 498
 499         jVec = vec_xor(jVec, jVec);
 500
 501         vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
 502         for(j=0; j<flurry->numStreams;j++) {
 503             vector float ip0, ip1 = (vector float)(0.0), ip2;
 504             vector float dx, dy, dz;
 505             vector float rsquared, f;
 506             vector float one_over_rsquared;
 507             vector float biasTemp;
 508             vector float mag;
 509             vector bool int biasOr;
 510
 511             ip0 = vec_ld(0, flurry->spark[j]->position);
 512             if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
 513                 ip1 = vec_ld(16, flurry->spark[j]->position);
 514             }
 515
 516             ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
 517             ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
 518             ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
 519             ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
 520
 521             dx = vec_sub(s->p[i].position[0].v, ip0);
 522             dy = vec_sub(s->p[i].position[1].v, ip1);
 523             dz = vec_sub(s->p[i].position[2].v, ip2);
 524
 525             rsquared = vec_madd(dx, dx, zero);
 526             rsquared = vec_madd(dy, dy, rsquared);
 527             rsquared = vec_madd(dz, dz, rsquared);
 528
 529             biasOr = vec_cmpeq(jVec, mod.v);
 530             biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
 531
 532             f = vec_madd(biasTemp, frameRateModifier.v, zero);
 533             one_over_rsquared = vec_re(rsquared);
 534             f = vec_madd(f, one_over_rsquared, zero);
 535
 536             mag = vec_rsqrte(rsquared);
 537             mag = vec_madd(mag, f, zero);
 538
 539             deltax = vec_nmsub(dx, mag, deltax);
 540             deltay = vec_nmsub(dy, mag, deltay);
 541             deltaz = vec_nmsub(dz, mag, deltaz);
 542
 543             jVec = vec_add(jVec, (vector unsigned int)(1));
 544         }
 545
 546         /* slow this particle down by flurry->drag */
 547         deltax = vec_madd(deltax, dragV.v, zero);
 548         deltay = vec_madd(deltay, dragV.v, zero);
 549         deltaz = vec_madd(deltaz, dragV.v, zero);
 550
 551         distTemp = vec_madd(deltax, deltax, zero);
 552         distTemp = vec_madd(deltay, deltay, distTemp);
 553         distTemp = vec_madd(deltaz, deltaz, distTemp);
 554
 555         deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
 556         deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
 557         s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
 558         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 559             continue;
 560         }
 561
 562         /* update the position */
 563         s->p[i].delta[0].v = deltax;
 564         s->p[i].delta[1].v = deltay;
 565         s->p[i].delta[2].v = deltaz;
 566         for(j=0;j<3;j++) {
 567             s->p[i].oldposition[j].v = s->p[i].position[j].v;
 568             s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
 569         }
 570     }
 571 }
 572
 573 void UpdateSmoke_VectorUnrolled(global_info_t *info, SmokeV *s)
 574 {
 575     unsigned int i,j;
 576     float sx = flurry->star->position[0];
 577     float sy = flurry->star->position[1];
 578     float sz = flurry->star->position[2];
 579     double frameRate;
 580     floatToVector frameRateModifier;
 581     floatToVector gravityV;
 582     floatToVector dragV;
 583     floatToVector deltaTimeV;
 584     const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
 585     const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
 586     const vector float biasConst = (vector float)(streamBias);
 587
 588     gravityV.f[0] = gravity;
 589     gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
 590
 591     dragV.f[0] = flurry->drag;
 592     dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
 593
 594     deltaTimeV.f[0] = flurry->fDeltaTime;
 595     deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
 596
 597     s->frame++;
 598
 599     if(!s->firstTime) {
 600         /* release 12 puffs every frame */
 601         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
 602             float dx,dy,dz,deltax,deltay,deltaz;
 603             float f;
 604             float rsquared;
 605             float mag;
 606
 607             dx = s->old[0] - sx;
 608             dy = s->old[1] - sy;
 609             dz = s->old[2] - sz;
 610             mag = 5.0f;
 611             deltax = (dx * mag);
 612             deltay = (dy * mag);
 613             deltaz = (dz * mag);
 614             for(i=0;i<flurry->numStreams;i++) {
 615                 float streamSpeedCoherenceFactor;
 616
 617                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
 618                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
 619                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
 620                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
 621                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
 622                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
 623                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
 624                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
 625                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
 626                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
 627                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 628                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 629                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 630                 rsquared = (dx*dx+dy*dy+dz*dz);
 631                 f = streamSpeed * streamSpeedCoherenceFactor;
 632
 633                 mag = f / (float) sqrt(rsquared);
 634                 /*
 635                     asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
 636                     mag *= f;
 637                 */
 638
 639                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 640                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 641                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 642                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 643                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 644                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 645                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 646                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 647                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 648                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 649                 s->nextSubParticle++;
 650                 if (s->nextSubParticle==4) {
 651                     s->nextParticle++;
 652                     s->nextSubParticle=0;
 653                 }
 654                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 655                     s->nextParticle = 0;
 656                     s->nextSubParticle = 0;
 657                 }
 658             }
 659
 660             s->lastParticleTime = flurry->fTime;
 661         }
 662     } else {
 663         s->lastParticleTime = flurry->fTime;
 664         s->firstTime = 0;
 665     }
 666
 667     for(i=0;i<3;i++) {
 668         s->old[i] = flurry->star->position[i];
 669     }
 670
 671     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 672     frameRateModifier.f[0] = 42.5f / frameRate;
 673     frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
 674
 675     frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
 676
 677     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 678         /* floatToVector f; */
 679         vector float deltax, deltay, deltaz;
 680         vector float distTemp;
 681         vector unsigned int deadTemp;
 682         /* floatToVector infopos0, infopos1, infopos2; */
 683         intToVector mod;
 684         vector unsigned int jVec;
 685         vector unsigned int intOne = vec_splat_u32(1);
 686         vector float floatOne = vec_ctf(intOne, 0);
 687
 688
 689         vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
 690
 691         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 692             continue;
 693         }
 694
 695         deltax = s->p[i].delta[0].v;
 696         deltay = s->p[i].delta[1].v;
 697         deltaz = s->p[i].delta[2].v;
 698
 699         mod.i[0] = (i<<2 + 0) % flurry->numStreams;
 700         if(mod.i[0]+1 == flurry->numStreams) {
 701             mod.i[1] = 0;
 702         } else {
 703             mod.i[1] = mod.i[0]+1;
 704         }
 705         if(mod.i[1]+1 == flurry->numStreams) {
 706             mod.i[2] = 0;
 707         } else {
 708             mod.i[2] = mod.i[1]+1;
 709         }
 710         if(mod.i[2]+1 == flurry->numStreams) {
 711             mod.i[3] = 0;
 712         } else {
 713             mod.i[3] = mod.i[2]+1;
 714         }
 715
 716         jVec = vec_xor(jVec, jVec);
 717
 718         vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
 719         for(j=0; j + 3 < flurry->numStreams;j+=4)
 720         {
 721             vector float dxa, dya, dza;
 722             vector float dxb, dyb, dzb;
 723             vector float dxc, dyc, dzc;
 724             vector float dxd, dyd, dzd;
 725             vector float ip0a, ip1a;
 726             vector float ip0b, ip1b;
 727             vector float ip0c, ip1c;
 728             vector float ip0d, ip1d;
 729             vector float rsquaredA;
 730             vector float rsquaredB;
 731             vector float rsquaredC;
 732             vector float rsquaredD;
 733             vector float fA, fB, fC, fD;
 734             vector float biasTempA;
 735             vector float biasTempB;
 736             vector float biasTempC;
 737             vector float biasTempD;
 738             vector float magA;
 739             vector float magB;
 740             vector float magC;
 741             vector float magD;
 742
 743             vector float one_over_rsquaredA;
 744             vector float one_over_rsquaredB;
 745             vector float one_over_rsquaredC;
 746             vector float one_over_rsquaredD;
 747             vector bool int biasOrA,biasOrB,biasOrC,biasOrD;
 748
 749             /* load vectors */
 750             ip0a = vec_ld(0, flurry->spark[j]->position);
 751             ip0b = vec_ld(0, flurry->spark[j+1]->position);
 752             ip0c = vec_ld(0, flurry->spark[j+2]->position);
 753             ip0d = vec_ld(0, flurry->spark[j+3]->position);
 754             ip1a = vec_ld( 12, flurry->spark[j]->position );
 755             ip1b = vec_ld( 12, flurry->spark[j+1]->position );
 756             ip1c = vec_ld( 12, flurry->spark[j+2]->position );
 757             ip1d = vec_ld( 12, flurry->spark[j+3]->position );
 758
 759             /* align them */
 760             ip0a = vec_perm(ip0a, ip1a, vec_lvsl(0, flurry->spark[j]->position));
 761             ip0b = vec_perm(ip0b, ip1b, vec_lvsl(0, flurry->spark[j+1]->position));
 762             ip0c = vec_perm(ip0c, ip1c, vec_lvsl(0, flurry->spark[j+2]->position));
 763             ip0d = vec_perm(ip0d, ip1d, vec_lvsl(0, flurry->spark[j+3]->position));
 764
 765             dxa = vec_splat( ip0a, 0  );
 766             dxb = vec_splat( ip0b, 0  );
 767             dxc = vec_splat( ip0c, 0  );
 768             dxd = vec_splat( ip0d, 0  );
 769             dxa = vec_sub( s->p[i].position[0].v, dxa );
 770             dxb = vec_sub( s->p[i].position[0].v, dxb );
 771             dxc = vec_sub( s->p[i].position[0].v, dxc );
 772             dxd = vec_sub( s->p[i].position[0].v, dxd );
 773
 774             dya = vec_splat( ip0a, 1  );
 775             dyb = vec_splat( ip0b, 1  );
 776             dyc = vec_splat( ip0c, 1  );
 777             dyd = vec_splat( ip0d, 1  );
 778             dya = vec_sub( s->p[i].position[1].v, dya );
 779             dyb = vec_sub( s->p[i].position[1].v, dyb );
 780             dyc = vec_sub( s->p[i].position[1].v, dyc );
 781             dyd = vec_sub( s->p[i].position[1].v, dyd );
 782
 783             dza = vec_splat( ip0a, 2  );
 784             dzb = vec_splat( ip0b, 2  );
 785             dzc = vec_splat( ip0c, 2  );
 786             dzd = vec_splat( ip0d, 2  );
 787             dza = vec_sub( s->p[i].position[2].v, dza );
 788             dzb = vec_sub( s->p[i].position[2].v, dzb );
 789             dzc = vec_sub( s->p[i].position[2].v, dzc );
 790             dzd = vec_sub( s->p[i].position[2].v, dzd );
 791
 792             rsquaredA = vec_madd( dxa, dxa, zero );
 793             rsquaredB = vec_madd( dxb, dxb, zero );
 794             rsquaredC = vec_madd( dxc, dxc, zero );
 795             rsquaredD = vec_madd( dxd, dxd, zero );
 796
 797             rsquaredA = vec_madd( dya, dya, rsquaredA );
 798             rsquaredB = vec_madd( dyb, dyb, rsquaredB );
 799             rsquaredC = vec_madd( dyc, dyc, rsquaredC );
 800             rsquaredD = vec_madd( dyd, dyd, rsquaredD );
 801
 802             rsquaredA = vec_madd( dza, dza, rsquaredA );
 803             rsquaredB = vec_madd( dzb, dzb, rsquaredB );
 804             rsquaredC = vec_madd( dzc, dzc, rsquaredC );
 805             rsquaredD = vec_madd( dzd, dzd, rsquaredD );
 806
 807             biasOrA = vec_cmpeq( jVec, mod.v );
 808             jVec = vec_add(jVec, intOne);
 809             biasOrB = vec_cmpeq( jVec, mod.v );
 810             jVec = vec_add(jVec, intOne);
 811             biasOrC = vec_cmpeq( jVec, mod.v );
 812             jVec = vec_add(jVec, intOne);
 813             biasOrD = vec_cmpeq( jVec, mod.v );
 814             jVec = vec_add(jVec, intOne);
 815
 816             biasTempA = vec_add( vec_and( biasOrA, biasConst), floatOne);
 817             biasTempB = vec_add( vec_and( biasOrB, biasConst), floatOne);
 818             biasTempC = vec_add( vec_and( biasOrC, biasConst), floatOne);
 819             biasTempD = vec_add( vec_and( biasOrD, biasConst), floatOne);
 820
 821             fA = vec_madd( biasTempA, frameRateModifier.v, zero);
 822             fB = vec_madd( biasTempB, frameRateModifier.v, zero);
 823             fC = vec_madd( biasTempC, frameRateModifier.v, zero);
 824             fD = vec_madd( biasTempD, frameRateModifier.v, zero);
 825             one_over_rsquaredA = vec_re( rsquaredA );
 826             one_over_rsquaredB = vec_re( rsquaredB );
 827             one_over_rsquaredC = vec_re( rsquaredC );
 828             one_over_rsquaredD = vec_re( rsquaredD );
 829             fA = vec_madd( fA, one_over_rsquaredA, zero);
 830             fB = vec_madd( fB, one_over_rsquaredB, zero);
 831             fC = vec_madd( fC, one_over_rsquaredC, zero);
 832             fD = vec_madd( fD, one_over_rsquaredD, zero);
 833             magA = vec_rsqrte( rsquaredA );
 834             magB = vec_rsqrte( rsquaredB );
 835             magC = vec_rsqrte( rsquaredC );
 836             magD = vec_rsqrte( rsquaredD );
 837             magA = vec_madd( magA, fA, zero );
 838             magB = vec_madd( magB, fB, zero );
 839             magC = vec_madd( magC, fC, zero );
 840             magD = vec_madd( magD, fD, zero );
 841             deltax = vec_nmsub( dxa, magA, deltax );
 842             deltay = vec_nmsub( dya, magA, deltay );
 843             deltaz = vec_nmsub( dza, magA, deltaz );
 844
 845             deltax = vec_nmsub( dxb, magB, deltax );
 846             deltay = vec_nmsub( dyb, magB, deltay );
 847             deltaz = vec_nmsub( dzb, magB, deltaz );
 848
 849             deltax = vec_nmsub( dxc, magC, deltax );
 850             deltay = vec_nmsub( dyc, magC, deltay );
 851             deltaz = vec_nmsub( dzc, magC, deltaz );
 852
 853             deltax = vec_nmsub( dxd, magD, deltax );
 854             deltay = vec_nmsub( dyd, magD, deltay );
 855             deltaz = vec_nmsub( dzd, magD, deltaz );
 856         }
 857
 858
 859         for(;j<flurry->numStreams;j++) {
 860             vector float ip0, ip1 = (vector float)(0.0), ip2;
 861             vector float dx, dy, dz;
 862             vector float rsquared, f;
 863             vector float one_over_rsquared;
 864             vector float biasTemp;
 865             vector float mag;
 866             vector bool int biasOr;
 867
 868             ip0 = vec_ld(0, flurry->spark[j]->position);
 869             if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
 870                 ip1 = vec_ld(16, flurry->spark[j]->position);
 871             }
 872
 873             ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
 874             ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
 875             ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
 876             ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
 877
 878             dx = vec_sub(s->p[i].position[0].v, ip0);
 879             dy = vec_sub(s->p[i].position[1].v, ip1);
 880             dz = vec_sub(s->p[i].position[2].v, ip2);
 881
 882             rsquared = vec_madd(dx, dx, zero);
 883             rsquared = vec_madd(dy, dy, rsquared);
 884             rsquared = vec_madd(dz, dz, rsquared);
 885
 886             biasOr = vec_cmpeq(jVec, mod.v);
 887             biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
 888
 889             f = vec_madd(biasTemp, frameRateModifier.v, zero);
 890             one_over_rsquared = vec_re(rsquared);
 891             f = vec_madd(f, one_over_rsquared, zero);
 892
 893             mag = vec_rsqrte(rsquared);
 894             mag = vec_madd(mag, f, zero);
 895
 896             deltax = vec_nmsub(dx, mag, deltax);
 897             deltay = vec_nmsub(dy, mag, deltay);
 898             deltaz = vec_nmsub(dz, mag, deltaz);
 899
 900             jVec = vec_add(jVec, (vector unsigned int)(1));
 901         }
 902
 903         /* slow this particle down by flurry->drag */
 904         deltax = vec_madd(deltax, dragV.v, zero);
 905         deltay = vec_madd(deltay, dragV.v, zero);
 906         deltaz = vec_madd(deltaz, dragV.v, zero);
 907
 908         distTemp = vec_madd(deltax, deltax, zero);
 909         distTemp = vec_madd(deltay, deltay, distTemp);
 910         distTemp = vec_madd(deltaz, deltaz, distTemp);
 911
 912         deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
 913         deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
 914         s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
 915         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 916             continue;
 917         }
 918
 919         /* update the position */
 920         s->p[i].delta[0].v = deltax;
 921         s->p[i].delta[1].v = deltay;
 922         s->p[i].delta[2].v = deltaz;
 923         for(j=0;j<3;j++) {
 924             s->p[i].oldposition[j].v = s->p[i].position[j].v;
 925             s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
 926         }
 927     }
 928 }
 929
 930 #endif
 931 #endif /* 0 */
 932
 933 void DrawSmoke_Scalar(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
 934 {
 935         int svi = 0;
 936         int sci = 0;
 937         int sti = 0;
 938         int si = 0;
 939         float width;
 940         float sx,sy;
 941         float u0,v0,u1,v1;
 942         float w,z;
 943         float screenRatio = global->sys_glWidth / 1024.0f;
 944         float hslash2 = global->sys_glHeight * 0.5f;
 945         float wslash2 = global->sys_glWidth * 0.5f;
 946         int i,k;
 947
 948         width = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
 949
 950         for (i=0;i<NUMSMOKEPARTICLES/4;i++)
 951         {
 952             for (k=0; k<4; k++) {
 953                 float thisWidth;
 954                 float oldz;
 955
 956                 if (s->p[i].dead.i[k]) {
 957                     continue;
 958                 }
 959                 thisWidth = (streamSize + (flurry->fTime - s->p[i].time.f[k])*flurry->streamExpansion) * screenRatio;
 960                 if (thisWidth >= width)
 961                 {
 962                         s->p[i].dead.i[k] = 1;
 963                         continue;
 964                 }
 965                 z = s->p[i].position[2].f[k];
 966                 sx = s->p[i].position[0].f[k] * global->sys_glWidth / z + wslash2;
 967                 sy = s->p[i].position[1].f[k] * global->sys_glWidth / z + hslash2;
 968                 oldz = s->p[i].oldposition[2].f[k];
 969                 if (sx > global->sys_glWidth+50.0f || sx < -50.0f || sy > global->sys_glHeight+50.0f || sy < -50.0f || z < 25.0f || oldz < 25.0f)
 970                 {
 971                         continue;
 972                 }
 973
 974                 w = MAX_(1.0f,thisWidth/z);
 975                 {
 976                         float oldx = s->p[i].oldposition[0].f[k];
 977                         float oldy = s->p[i].oldposition[1].f[k];
 978                         float oldscreenx = (oldx * global->sys_glWidth / oldz) + wslash2;
 979                         float oldscreeny = (oldy * global->sys_glWidth / oldz) + hslash2;
 980                         float dx = (sx-oldscreenx);
 981                         float dy = (sy-oldscreeny);
 982
 983                         float d = FastDistance2D(dx, dy);
 984
 985                         float sm, os, ow;
 986                         if (d)
 987                         {
 988                                 sm = w/d;
 989                         }
 990                         else
 991                         {
 992                                 sm = 0.0f;
 993                         }
 994                         ow = MAX_(1.0f,thisWidth/oldz);
 995                         if (d)
 996                         {
 997                                 os = ow/d;
 998                         }
 999                         else
1000                         {
1001                                 os = 0.0f;
1002                         }
1003
1004                         {
1005                                 floatToVector cmv;
1006                                 float cm;
1007                                 float m = 1.0f + sm;
1008
1009                                 float dxs = dx*sm;
1010                                 float dys = dy*sm;
1011                                 float dxos = dx*os;
1012                                 float dyos = dy*os;
1013                                 float dxm = dx*m;
1014                                 float dym = dy*m;
1015
1016                                 s->p[i].animFrame.i[k]++;
1017                                 if (s->p[i].animFrame.i[k] >= 64)
1018                                 {
1019                                         s->p[i].animFrame.i[k] = 0;
1020                                 }
1021
1022                                 u0 = (s->p[i].animFrame.i[k]&&7) * 0.125f;
1023                                 v0 = (s->p[i].animFrame.i[k]>>3) * 0.125f;
1024                                 u1 = u0 + 0.125f;
1025                                 v1 = v0 + 0.125f;
1026                                 u1 = u0 + 0.125f;
1027                                 v1 = v0 + 0.125f;
1028                                 cm = (1.375f - thisWidth/width);
1029                                 if (s->p[i].dead.i[k] == 3)
1030                                 {
1031                                         cm *= 0.125f;
1032                                         s->p[i].dead.i[k] = 1;
1033                                 }
1034                                 si++;
1035                                 cm *= brightness;
1036                                 cmv.f[0] = s->p[i].color[0].f[k]*cm;
1037                                 cmv.f[1] = s->p[i].color[1].f[k]*cm;
1038                                 cmv.f[2] = s->p[i].color[2].f[k]*cm;
1039                                 cmv.f[3] = s->p[i].color[3].f[k]*cm;
1040
1041 #if 0
1042                                 /* MDT we can't use vectors in the Scalar routine */
1043                                 s->seraphimColors[sci++].v = cmv.v;
1044                                 s->seraphimColors[sci++].v = cmv.v;
1045                                 s->seraphimColors[sci++].v = cmv.v;
1046                                 s->seraphimColors[sci++].v = cmv.v;
1047 #else
1048                                 {
1049                                     int ii, jj;
1050                                     for (jj = 0; jj < 4; jj++) {
1051                                         for (ii = 0; ii < 4; ii++) {
1052                                             s->seraphimColors[sci].f[ii] = cmv.f[ii];
1053                                         }
1054                                         sci += 1;
1055                                     }
1056                                 }
1057 #endif
1058
1059                                 s->seraphimTextures[sti++] = u0;
1060                                 s->seraphimTextures[sti++] = v0;
1061                                 s->seraphimTextures[sti++] = u0;
1062                                 s->seraphimTextures[sti++] = v1;
1063
1064                                 s->seraphimTextures[sti++] = u1;
1065                                 s->seraphimTextures[sti++] = v1;
1066                                 s->seraphimTextures[sti++] = u1;
1067                                 s->seraphimTextures[sti++] = v0;
1068
1069                                 s->seraphimVertices[svi].f[0] = sx+dxm-dys;
1070                                 s->seraphimVertices[svi].f[1] = sy+dym+dxs;
1071                                 s->seraphimVertices[svi].f[2] = sx+dxm+dys;
1072                                 s->seraphimVertices[svi].f[3] = sy+dym-dxs;
1073                                 svi++;
1074
1075                                 s->seraphimVertices[svi].f[0] = oldscreenx-dxm+dyos;
1076                                 s->seraphimVertices[svi].f[1] = oldscreeny-dym-dxos;
1077                                 s->seraphimVertices[svi].f[2] = oldscreenx-dxm-dyos;
1078                                 s->seraphimVertices[svi].f[3] = oldscreeny-dym+dxos;
1079                                 svi++;
1080                         }
1081                 }
1082             }
1083         }
1084         glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1085         glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1086         glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1087         glDrawArrays(GL_QUADS,0,si*4);
1088 }
1089
1090 #if 0
1091 #ifdef __VEC__
1092
1093 void DrawSmoke_Vector(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
1094 {
1095     const vector float zero = (vector float)(0.0);
1096     int svi = 0;
1097     int sci = 0;
1098     int sti = 0;
1099     int si = 0;
1100     floatToVector width;
1101     vector float sx,sy;
1102     floatToVector u0,v0,u1,v1;
1103     vector float one_over_z;
1104     vector float w;
1105     floatToVector z;
1106     float screenRatio = global->sys_glWidth / 1024.0f;
1107     float hslash2 = global->sys_glHeight * 0.5f;
1108     float wslash2 = global->sys_glWidth * 0.5f;
1109     int i,kk;
1110     floatToVector briteV, fTimeV, expansionV, screenRatioV, hslash2V, wslash2V, streamSizeV;
1111     floatToVector glWidthV;
1112     floatToVector cm;
1113     vector float cmv[4];
1114     vector float svec[4], ovec[4];
1115     vector float oldscreenx, oldscreeny;
1116     vector float sm;
1117     vector float frameAnd7;
1118     vector float frameShift3;
1119     vector float one_over_width;
1120     vector float dx, dy;
1121     vector float os;
1122     vector unsigned int vSi = vec_splat_u32(0);
1123     const vector float eighth = (vector float)(0.125);
1124     float glWidth50 = global->sys_glWidth + 50.0f;
1125     float glHeight50 = global->sys_glHeight + 50.0f;
1126     vector float vGLWidth50, vGLHeight50;
1127     unsigned int blitBool;
1128
1129     vec_dst((int *)(&(s->p[0])), 0x00020200, 2);
1130
1131     {
1132         vector unsigned char permute1 = vec_lvsl( 0, &glWidth50 );
1133         vector unsigned char permute2 = vec_lvsl( 0, &glHeight50 );
1134         permute1 = (vector unsigned char) vec_splat( (vector unsigned int) permute1, 0 );
1135         permute2 = (vector unsigned char) vec_splat( (vector unsigned int) permute2, 0 );
1136         vGLWidth50 = vec_lde( 0, &glWidth50 );
1137         vGLHeight50 = vec_lde( 0, &glHeight50 );
1138         vGLWidth50 = vec_perm( vGLWidth50, vGLWidth50, permute1 );
1139         vGLHeight50 = vec_perm( vGLHeight50, vGLHeight50, permute2 );
1140     }
1141
1142     width.f[0] = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
1143     width.v = (vector float) vec_splat((vector unsigned int)width.v, 0);
1144
1145     briteV.f[0] = brightness;
1146     briteV.v = (vector float) vec_splat((vector unsigned int)briteV.v, 0);
1147
1148     fTimeV.f[0] = (float) flurry->fTime;
1149     fTimeV.v = (vector float) vec_splat((vector unsigned int)fTimeV.v, 0);
1150
1151     expansionV.f[0] = flurry->streamExpansion;
1152     expansionV.v = (vector float) vec_splat((vector unsigned int)expansionV.v, 0);
1153
1154     screenRatioV.f[0] = screenRatio;
1155     screenRatioV.v = (vector float) vec_splat((vector unsigned int)screenRatioV.v, 0);
1156
1157     hslash2V.f[0] = hslash2;
1158     hslash2V.v = (vector float) vec_splat((vector unsigned int)hslash2V.v, 0);
1159
1160     wslash2V.f[0] = wslash2;
1161     wslash2V.v = (vector float) vec_splat((vector unsigned int)wslash2V.v, 0);
1162
1163     streamSizeV.f[0] = streamSize;
1164     streamSizeV.v = (vector float) vec_splat((vector unsigned int)streamSizeV.v, 0);
1165
1166     glWidthV.f[0] = global->sys_glWidth;
1167     glWidthV.v = (vector float) vec_splat((vector unsigned int)glWidthV.v, 0);
1168
1169     for (i=0;i<NUMSMOKEPARTICLES/4;i++) {
1170         vector float thisWidth;
1171         vector float oldz;
1172         vector float oldx, oldy, one_over_oldz;
1173         vector float xabs, yabs, mn;
1174         vector float d;
1175         vector float one_over_d;
1176         vector bool int dnz;
1177         vector float ow;
1178
1179         vec_dst((int *)(&(s->p[i+4])), 0x00020200, 2);
1180
1181         if (vec_all_eq(s->p[i].dead.v, (vector unsigned int)(1))) continue;
1182
1183         blitBool = 0; /* keep track of particles that actually need to be drawn */
1184
1185         thisWidth = vec_sub(fTimeV.v, s->p[i].time.v);
1186         thisWidth = vec_madd(thisWidth, expansionV.v, streamSizeV.v);
1187         thisWidth = vec_madd(thisWidth, screenRatioV.v, zero);
1188
1189         z.v = s->p[i].position[2].v;
1190         one_over_z = vec_re(z.v);
1191
1192         sx = vec_madd(s->p[i].position[0].v, glWidthV.v, zero);
1193         sx = vec_madd(sx, one_over_z, wslash2V.v);
1194         sy = vec_madd(s->p[i].position[1].v, glWidthV.v, zero);
1195         sy = vec_madd(sy, one_over_z, hslash2V.v);
1196
1197         oldz = s->p[i].oldposition[2].v;
1198
1199         w = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_z, zero));
1200
1201         oldx = s->p[i].oldposition[0].v;
1202         oldy = s->p[i].oldposition[1].v;
1203         one_over_oldz = vec_re(oldz);
1204         oldscreenx = vec_madd(oldx, glWidthV.v, zero);
1205         oldscreenx = vec_madd(oldscreenx, one_over_oldz, wslash2V.v);
1206         oldscreeny = vec_madd(oldy, glWidthV.v, zero);
1207         oldscreeny = vec_madd(oldscreeny, one_over_oldz, hslash2V.v);
1208         dx = vec_sub(sx,oldscreenx);
1209         dy = vec_sub(sy,oldscreeny);
1210
1211         xabs = vec_abs(dx);
1212         yabs = vec_abs(dy);
1213         mn = vec_min(xabs,yabs);
1214         d = vec_add(xabs,yabs);
1215         d = vec_madd(mn, (vector float)(-0.6875), d);
1216
1217         ow = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_oldz, zero));
1218         one_over_d = vec_re(d);
1219         dnz = vec_cmpgt(d, zero);
1220         sm = vec_madd(w, one_over_d, zero);
1221         sm = vec_and(sm, dnz);
1222         os = vec_madd(ow, one_over_d, zero);
1223         os = vec_and(os, dnz);
1224
1225         {
1226             intToVector tempMask;
1227             vector bool int mask = vec_cmpeq( s->p[i].dead.v, vec_splat_u32(1) ); /* -1 where true */
1228             vector bool int  gtMask = vec_cmpge( thisWidth, width.v ); /* -1 where true */
1229             vector bool int  glWidth50Test = vec_cmpgt( sx, (vector float)(vGLWidth50) ); /* -1 where true */
1230             vector bool int  glHeight50Test = vec_cmpgt( sy, (vector float)(vGLHeight50) ); /* -1 where true */
1231             vector bool int  test50x    = vec_cmplt( sx, (vector float) (-50.0) );
1232             vector bool int  test50y    = vec_cmplt( sy, (vector float) (-50.0) );
1233             vector bool int  testz      = vec_cmplt( z.v, (vector float) (25.0) );
1234             vector bool int  testoldz   = vec_cmplt( oldz, (vector float) (25.0) );
1235             mask = vec_or( mask, gtMask );
1236             s->p[i].dead.v = vec_and( mask, vec_splat_u32( 1 ) );
1237             mask = vec_or( mask, glWidth50Test );
1238             mask = vec_or( mask, glHeight50Test );
1239             mask = vec_or( mask, test50x );
1240             mask = vec_or( mask, test50y );
1241             mask = vec_or( mask, testz );
1242             mask = vec_or( mask, testoldz );
1243             tempMask.v = (vector unsigned int)mask;
1244
1245             s->p[i].animFrame.v = vec_sub( s->p[i].animFrame.v, vec_nor( mask, mask ) );
1246             s->p[i].animFrame.v = vec_and( s->p[i].animFrame.v, (vector unsigned int)(63) );
1247
1248             frameAnd7 = vec_ctf(vec_and(s->p[i].animFrame.v, (vector unsigned int)(7)),0);
1249             u0.v = vec_madd(frameAnd7, eighth, zero);
1250
1251             frameShift3 = vec_ctf(vec_sr(s->p[i].animFrame.v, (vector unsigned int)(3)),0);
1252             v0.v = vec_madd(frameAnd7, eighth, zero);
1253
1254             u1.v = vec_add(u0.v, eighth);
1255             v1.v = vec_add(v0.v, eighth);
1256
1257             one_over_width = vec_re(width.v);
1258             cm.v = vec_sel( vec_nmsub(thisWidth, one_over_width, (vector float)(1.375)), cm.v, mask );
1259             cm.v = vec_madd(cm.v, briteV.v, zero);
1260
1261             vSi = vec_sub( vSi, vec_nor( mask, mask ) );
1262             {
1263                 vector unsigned int blitMask = (vector unsigned int) (1, 2, 4, 8);
1264                 vector unsigned int temp = (vector unsigned int)mask;
1265                 temp = vec_andc( blitMask, temp  );
1266                 temp = vec_add( temp, vec_sld( temp, temp, 8 ) );
1267                 temp = vec_add( temp, vec_sld( temp, temp, 4 ) );
1268                 vec_ste( temp, 0, &blitBool );
1269
1270             }
1271
1272             {
1273                 vector float temp1, temp2, temp3, temp4;
1274                 vector float result1a, result1b, result2a, result2b, result3a, result3b, result4a, result4b;
1275
1276                 temp1 = vec_mergeh( u0.v, u0.v );
1277                 temp2 = vec_mergel( u0.v, u0.v );
1278                 temp3 = vec_mergeh( v0.v, v1.v );
1279                 temp4 = vec_mergel( v0.v, v1.v );
1280
1281                 result1a = vec_mergeh( temp1, temp3 );
1282                 result1b = vec_mergel( temp1, temp3 );
1283                 result2a = vec_mergeh( temp2, temp4 );
1284                 result2b = vec_mergel( temp2, temp4 );
1285
1286                 temp1 = vec_mergeh( u1.v, u1.v );
1287                 temp2 = vec_mergel( u1.v, u1.v );
1288                 temp3 = vec_mergeh( v1.v, v0.v );
1289                 temp4 = vec_mergel( v1.v, v0.v );
1290
1291                 result3a = vec_mergeh( temp1, temp3 );
1292                 result3b = vec_mergel( temp1, temp3 );
1293                 result4a = vec_mergeh( temp2, temp4 );
1294                 result4b = vec_mergel( temp2, temp4 );
1295
1296                 if( blitBool & 1 )
1297                 {
1298                     vec_st( result1a, 0, &s->seraphimTextures[sti] );
1299                     vec_st( result3a, 16, &s->seraphimTextures[sti]);
1300                     sti+= 8;
1301                 }
1302                 if( blitBool & 2 )
1303                 {
1304                     vec_st( result1b, 0, &s->seraphimTextures[sti]);
1305                     vec_st( result3b, 16, &s->seraphimTextures[sti]);
1306                     sti+= 8;
1307                 }
1308                 if( blitBool & 4 )
1309                 {
1310                     vec_st( result2a, 0, &s->seraphimTextures[sti]);
1311                     vec_st( result4a, 16, &s->seraphimTextures[sti]);
1312                     sti+= 8;
1313                 }
1314                 if( blitBool & 8 )
1315                 {
1316                     vec_st( result2b, 0, &s->seraphimTextures[sti]);
1317                     vec_st( result4b, 16, &s->seraphimTextures[sti]);
1318                     sti+= 8;
1319                 }
1320             }
1321         }
1322
1323         cmv[0] = vec_madd(s->p[i].color[0].v, cm.v, zero);
1324         cmv[1] = vec_madd(s->p[i].color[1].v, cm.v, zero);
1325         cmv[2] = vec_madd(s->p[i].color[2].v, cm.v, zero);
1326         cmv[3] = vec_madd(s->p[i].color[3].v, cm.v, zero);
1327         {
1328             vector float vI0, vI1, vI2, vI3;
1329
1330             vI0 = vec_mergeh ( cmv[0], cmv[2] );
1331             vI1 = vec_mergeh ( cmv[1], cmv[3] );
1332             vI2 = vec_mergel ( cmv[0], cmv[2] );
1333             vI3 = vec_mergel ( cmv[1], cmv[3] );
1334
1335             cmv[0] = vec_mergeh ( vI0, vI1 );
1336             cmv[1] = vec_mergel ( vI0, vI1 );
1337             cmv[2] = vec_mergeh ( vI2, vI3 );
1338             cmv[3] = vec_mergel ( vI2, vI3 );
1339         }
1340
1341         vec_dst( cmv, 0x0D0100D0, 1 );
1342
1343         {
1344             vector float sxd, syd;
1345             vector float sxdm, sxdp, sydm, sydp;
1346             vector float oxd, oyd;
1347             vector float oxdm, oxdp, oydm, oydp;
1348             vector float vI0, vI1, vI2, vI3;
1349             vector float dxs, dys;
1350             vector float dxos, dyos;
1351             vector float dxm, dym;
1352             vector float m;
1353
1354             m = vec_add((vector float)(1.0), sm);
1355
1356             dxs = vec_madd(dx, sm, zero);
1357             dys = vec_madd(dy, sm, zero);
1358             dxos = vec_madd(dx, os, zero);
1359             dyos = vec_madd(dy, os, zero);
1360             dxm = vec_madd(dx, m, zero);
1361             dym = vec_madd(dy, m, zero);
1362
1363             sxd = vec_add(sx, dxm);
1364             sxdm = vec_sub(sxd, dys);
1365             sxdp = vec_add(sxd, dys);
1366
1367             syd = vec_add(sy, dym);
1368             sydm = vec_sub(syd, dxs);
1369             sydp = vec_add(syd, dxs);
1370
1371             oxd = vec_sub(oldscreenx, dxm);
1372             oxdm = vec_sub(oxd, dyos);
1373             oxdp = vec_add(oxd, dyos);
1374
1375             oyd = vec_sub(oldscreeny, dym);
1376             oydm = vec_sub(oyd, dxos);
1377             oydp = vec_add(oyd, dxos);
1378
1379             vI0 = vec_mergeh ( sxdm, sxdp );
1380             vI1 = vec_mergeh ( sydp, sydm );
1381             vI2 = vec_mergel ( sxdm, sxdp );
1382             vI3 = vec_mergel ( sydp, sydm );
1383
1384             svec[0] = vec_mergeh ( vI0, vI1 );
1385             svec[1] = vec_mergel ( vI0, vI1 );
1386             svec[2] = vec_mergeh ( vI2, vI3 );
1387             svec[3] = vec_mergel ( vI2, vI3 );
1388
1389             vI0 = vec_mergeh ( oxdp, oxdm );
1390             vI1 = vec_mergeh ( oydm, oydp );
1391             vI2 = vec_mergel ( oxdp, oxdm );
1392             vI3 = vec_mergel ( oydm, oydp );
1393
1394             ovec[0] = vec_mergeh ( vI0, vI1 );
1395             ovec[1] = vec_mergel ( vI0, vI1 );
1396             ovec[2] = vec_mergeh ( vI2, vI3 );
1397             ovec[3] = vec_mergel ( vI2, vI3 );
1398         }
1399
1400         {
1401             int offset0 = (sci + 0) * sizeof( vector float );
1402             int offset1 = (sci + 1) * sizeof( vector float );
1403             int offset2 = (sci + 2) * sizeof( vector float );
1404             int offset3 = (sci + 3) * sizeof( vector float );
1405             int offset4 = (svi + 0) * sizeof( vector float );
1406             int offset5 = (svi + 1) * sizeof( vector float );
1407             vector float *colors = (vector float *)s->seraphimColors;
1408             vector float *vertices = (vector float *)s->seraphimVertices;
1409             for (kk=0; kk<4; kk++) {
1410                 if (blitBool>>kk & 1) {
1411                     vector float vcmv = cmv[kk];
1412                     vector float vsvec = svec[kk];
1413                     vector float vovec = ovec[kk];
1414
1415                     vec_st( vcmv, offset0, colors );
1416                     vec_st( vcmv, offset1, colors );
1417                     vec_st( vcmv, offset2, colors );
1418                     vec_st( vcmv, offset3, colors );
1419                     vec_st( vsvec, offset4, vertices );
1420                     vec_st( vovec, offset5, vertices );
1421                     colors += 4;
1422                     vertices += 2;
1423                     sci += 4;
1424                     svi += 2;
1425                 }
1426             }
1427         }
1428     }
1429     vSi = vec_add( vSi, vec_sld( vSi, vSi, 8 ) );
1430     vSi = vec_add( vSi, vec_sld( vSi, vSi, 4 ) );
1431     vec_ste( (vector signed int) vSi, 0, &si );
1432
1433     glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1434     glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1435     glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1436     glDrawArrays(GL_QUADS,0,si*4);
1437 }
1438
1439 #endif
1440 #endif /* 0 */