git.hungrycats.org Git - xscreensaver/blob - hacks/glx/flurry-smoke.c

   1 /*
   2
   3 Copyright (c) 2002, Calum Robinson
   4 All rights reserved.
   5
   6 Redistribution and use in source and binary forms, with or without
   7 modification, are permitted provided that the following conditions are met:
   8
   9 * Redistributions of source code must retain the above copyright notice, this
  10   list of conditions and the following disclaimer.
  11
  12 * Redistributions in binary form must reproduce the above copyright notice,
  13   this list of conditions and the following disclaimer in the documentation
  14   and/or other materials provided with the distribution.
  15
  16 * Neither the name of the author nor the names of its contributors may be used
  17   to endorse or promote products derived from this software without specific
  18   prior written permission.
  19
  20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  21 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  22 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  23 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
  24 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  25 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  27 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30
  31 */
  32
  33 /* Smoke.cpp: implementation of the Smoke class. */
  34
  35 #ifdef HAVE_CONFIG_H
  36 # include "config.h"
  37 #endif
  38
  39 #include "flurry.h"
  40
  41 #define MAXANGLES 16384
  42 #define NOT_QUITE_DEAD 3
  43
  44 #define intensity 75000.0f;
  45
  46 void InitSmoke(SmokeV *s)
  47 {
  48     int i;
  49     s->nextParticle = 0;
  50     s->nextSubParticle = 0;
  51     s->lastParticleTime = 0.25f;
  52     s->firstTime = 1;
  53     s->frame = 0;
  54     for (i=0;i<3;i++) {
  55         s->old[i] = RandFlt(-100.0, 100.0);
  56     }
  57 }
  58
  59 void UpdateSmoke_ScalarBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
  60 {
  61     int i,j,k;
  62     float sx = flurry->star->position[0];
  63     float sy = flurry->star->position[1];
  64     float sz = flurry->star->position[2];
  65     double frameRate;
  66     double frameRateModifier;
  67
  68
  69     s->frame++;
  70
  71     if(!s->firstTime) {
  72         /* release 12 puffs every frame */
  73         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
  74             float dx,dy,dz,deltax,deltay,deltaz;
  75             float f;
  76             float rsquared;
  77             float mag;
  78
  79             dx = s->old[0] - sx;
  80             dy = s->old[1] - sy;
  81             dz = s->old[2] - sz;
  82             mag = 5.0f;
  83             deltax = (dx * mag);
  84             deltay = (dy * mag);
  85             deltaz = (dz * mag);
  86             for(i=0;i<flurry->numStreams;i++) {
  87                 float streamSpeedCoherenceFactor;
  88
  89                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
  90                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
  91                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
  92                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
  93                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
  94                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
  95                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
  96                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
  97                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
  98                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
  99                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 100                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 101                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 102                 rsquared = (dx*dx+dy*dy+dz*dz);
 103                 f = streamSpeed * streamSpeedCoherenceFactor;
 104
 105                 mag = f / (float) sqrt(rsquared);
 106
 107                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 108                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 109                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 110                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 111                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 112                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 113                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 114                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 115                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 116                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 117                 s->nextSubParticle++;
 118                 if (s->nextSubParticle==4) {
 119                     s->nextParticle++;
 120                     s->nextSubParticle=0;
 121                 }
 122                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 123                     s->nextParticle = 0;
 124                     s->nextSubParticle = 0;
 125                 }
 126             }
 127
 128             s->lastParticleTime = flurry->fTime;
 129         }
 130     } else {
 131         s->lastParticleTime = flurry->fTime;
 132         s->firstTime = 0;
 133     }
 134
 135     for(i=0;i<3;i++) {
 136         s->old[i] = flurry->star->position[i];
 137     }
 138
 139     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 140     frameRateModifier = 42.5f / frameRate;
 141
 142     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 143         for(k=0; k<4; k++) {
 144             float dx,dy,dz;
 145             float f;
 146             float rsquared;
 147             float mag;
 148             float deltax;
 149             float deltay;
 150             float deltaz;
 151
 152             if (s->p[i].dead.i[k]) {
 153                 continue;
 154             }
 155
 156             deltax = s->p[i].delta[0].f[k];
 157             deltay = s->p[i].delta[1].f[k];
 158             deltaz = s->p[i].delta[2].f[k];
 159
 160             for(j=0;j<flurry->numStreams;j++) {
 161                 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
 162                 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
 163                 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
 164                 rsquared = (dx*dx+dy*dy+dz*dz);
 165
 166                 f = (gravity/rsquared) * frameRateModifier;
 167
 168                 if ((((i*4)+k) % flurry->numStreams) == j) {
 169                     f *= 1.0f + streamBias;
 170                 }
 171
 172                 mag = f / (float) sqrt(rsquared);
 173
 174                 deltax -= (dx * mag);
 175                 deltay -= (dy * mag);
 176                 deltaz -= (dz * mag);
 177             }
 178
 179             /* slow this particle down by flurry->drag */
 180             deltax *= flurry->drag;
 181             deltay *= flurry->drag;
 182             deltaz *= flurry->drag;
 183
 184             if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
 185                 s->p[i].dead.i[k] = 1;
 186                 continue;
 187             }
 188
 189             /* update the position */
 190             s->p[i].delta[0].f[k] = deltax;
 191             s->p[i].delta[1].f[k] = deltay;
 192             s->p[i].delta[2].f[k] = deltaz;
 193             for(j=0;j<3;j++) {
 194                 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
 195                 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
 196             }
 197         }
 198     }
 199 }
 200
 201 #if 0
 202 #ifdef __ppc__
 203
 204 void UpdateSmoke_ScalarFrsqrte(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
 205 {
 206     int i,j,k;
 207     float sx = flurry->star->position[0];
 208     float sy = flurry->star->position[1];
 209     float sz = flurry->star->position[2];
 210     double frameRate;
 211     double frameRateModifier;
 212
 213
 214     s->frame++;
 215
 216     if(!s->firstTime) {
 217         /* release 12 puffs every frame */
 218         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
 219             float dx,dy,dz,deltax,deltay,deltaz;
 220             float f;
 221             float rsquared;
 222             float mag;
 223
 224             dx = s->old[0] - sx;
 225             dy = s->old[1] - sy;
 226             dz = s->old[2] - sz;
 227             mag = 5.0f;
 228             deltax = (dx * mag);
 229             deltay = (dy * mag);
 230             deltaz = (dz * mag);
 231             for(i=0;i<flurry->numStreams;i++) {
 232                 float streamSpeedCoherenceFactor;
 233
 234                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
 235                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
 236                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
 237                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
 238                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
 239                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
 240                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
 241                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
 242                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
 243                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
 244                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 245                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 246                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 247                 rsquared = (dx*dx+dy*dy+dz*dz);
 248                 f = streamSpeed * streamSpeedCoherenceFactor;
 249
 250                 mag = f / (float) sqrt(rsquared);
 251                 /*
 252                     reciprocal square-root estimate replaced above divide and call to system sqrt()
 253
 254                     asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
 255                     mag *= f;
 256                 */
 257
 258                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 259                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 260                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 261                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 262                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 263                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 264                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 265                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 266                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 267                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 268                 s->nextSubParticle++;
 269                 if (s->nextSubParticle==4) {
 270                     s->nextParticle++;
 271                     s->nextSubParticle=0;
 272                 }
 273                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 274                     s->nextParticle = 0;
 275                     s->nextSubParticle = 0;
 276                 }
 277             }
 278
 279             s->lastParticleTime = flurry->fTime;
 280         }
 281     } else {
 282         s->lastParticleTime = flurry->fTime;
 283         s->firstTime = 0;
 284     }
 285
 286     for(i=0;i<3;i++) {
 287         s->old[i] = flurry->star->position[i];
 288     }
 289
 290     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 291     frameRateModifier = 42.5f / frameRate;
 292
 293     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 294         for(k=0; k<4; k++) {
 295             float dx,dy,dz;
 296             float f;
 297             float rsquared;
 298             float mag;
 299             float deltax;
 300             float deltay;
 301             float deltaz;
 302
 303             if (s->p[i].dead.i[k]) {
 304                 continue;
 305             }
 306
 307             deltax = s->p[i].delta[0].f[k];
 308             deltay = s->p[i].delta[1].f[k];
 309             deltaz = s->p[i].delta[2].f[k];
 310
 311             for(j=0;j<flurry->numStreams;j++) {
 312                 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
 313                 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
 314                 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
 315                 rsquared = (dx*dx+dy*dy+dz*dz);
 316
 317                 /*
 318                     asm("fres %0, %1" : "=f" (f) : "f" (rsquared));
 319                     f *= gravity*frameRateModifier;
 320                 */
 321                 f = ( gravity  * frameRateModifier ) / rsquared;
 322
 323                 if((((i*4)+k) % flurry->numStreams) == j) {
 324                     f *= 1.0f + streamBias;
 325                 }
 326
 327                 mag = f / (float) sqrt(rsquared);
 328
 329                 /* reciprocal square-root estimate replaced above divide and call to system sqrt() */
 330
 331                 deltax -= (dx * mag);
 332                 deltay -= (dy * mag);
 333                 deltaz -= (dz * mag);
 334             }
 335
 336             /* slow this particle down by flurry->drag */
 337             deltax *= flurry->drag;
 338             deltay *= flurry->drag;
 339             deltaz *= flurry->drag;
 340
 341             if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
 342                 s->p[i].dead.i[k] = 1;
 343                 continue;
 344             }
 345
 346             /* update the position */
 347             s->p[i].delta[0].f[k] = deltax;
 348             s->p[i].delta[1].f[k] = deltay;
 349             s->p[i].delta[2].f[k] = deltaz;
 350             for(j=0;j<3;j++) {
 351                 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
 352                 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
 353             }
 354         }
 355     }
 356 }
 357
 358 #endif
 359
 360 #ifdef __VEC__
 361
 362 void UpdateSmoke_VectorBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
 363 {
 364     unsigned int i,j;
 365     float sx = flurry->star->position[0];
 366     float sy = flurry->star->position[1];
 367     float sz = flurry->star->position[2];
 368     double frameRate;
 369     floatToVector frameRateModifier;
 370     floatToVector gravityV;
 371     floatToVector dragV;
 372     floatToVector deltaTimeV;
 373     const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
 374     const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
 375     const vector float biasConst = (vector float)(streamBias);
 376
 377     gravityV.f[0] = gravity;
 378     gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
 379
 380     dragV.f[0] = flurry->drag;
 381     dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
 382
 383     deltaTimeV.f[0] = flurry->fDeltaTime;
 384     deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
 385
 386     s->frame++;
 387
 388     if(!s->firstTime) {
 389         /* release 12 puffs every frame */
 390         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
 391             float dx,dy,dz,deltax,deltay,deltaz;
 392             float f;
 393             float rsquared;
 394             float mag;
 395
 396             dx = s->old[0] - sx;
 397             dy = s->old[1] - sy;
 398             dz = s->old[2] - sz;
 399             mag = 5.0f;
 400             deltax = (dx * mag);
 401             deltay = (dy * mag);
 402             deltaz = (dz * mag);
 403             for(i=0;i<flurry->numStreams;i++) {
 404                 float streamSpeedCoherenceFactor;
 405
 406                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
 407                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
 408                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
 409                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
 410                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
 411                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
 412                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
 413                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
 414                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
 415                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
 416                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 417                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 418                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 419                 rsquared = (dx*dx+dy*dy+dz*dz);
 420                 f = streamSpeed * streamSpeedCoherenceFactor;
 421
 422                 mag = f / (float) sqrt(rsquared);
 423                 /*
 424                     asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
 425                     mag *= f;
 426                 */
 427
 428                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 429                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 430                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 431                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 432                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 433                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 434                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 435                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 436                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 437                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 438                 s->nextSubParticle++;
 439                 if (s->nextSubParticle==4) {
 440                     s->nextParticle++;
 441                     s->nextSubParticle=0;
 442                 }
 443                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 444                     s->nextParticle = 0;
 445                     s->nextSubParticle = 0;
 446                 }
 447             }
 448
 449             s->lastParticleTime = flurry->fTime;
 450         }
 451     } else {
 452         s->lastParticleTime = flurry->fTime;
 453         s->firstTime = 0;
 454     }
 455
 456     for(i=0;i<3;i++) {
 457         s->old[i] = flurry->star->position[i];
 458     }
 459
 460     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 461     frameRateModifier.f[0] = 42.5f / frameRate;
 462     frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
 463
 464     frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
 465
 466     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 467         /* floatToVector f; */
 468         vector float deltax, deltay, deltaz;
 469         vector float distTemp;
 470         vector unsigned int deadTemp;
 471         /* floatToVector infopos0, infopos1, infopos2; */
 472         intToVector mod;
 473         vector unsigned int jVec;
 474
 475
 476         vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
 477
 478         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 479             continue;
 480         }
 481
 482         deltax = s->p[i].delta[0].v;
 483         deltay = s->p[i].delta[1].v;
 484         deltaz = s->p[i].delta[2].v;
 485
 486         mod.i[0] = (i<<2 + 0) % flurry->numStreams;
 487         if(mod.i[0]+1 == flurry->numStreams) {
 488             mod.i[1] = 0;
 489         } else {
 490             mod.i[1] = mod.i[0]+1;
 491         }
 492         if(mod.i[1]+1 == flurry->numStreams) {
 493             mod.i[2] = 0;
 494         } else {
 495             mod.i[2] = mod.i[1]+1;
 496         }
 497         if(mod.i[2]+1 == flurry->numStreams) {
 498             mod.i[3] = 0;
 499         } else {
 500             mod.i[3] = mod.i[2]+1;
 501         }
 502
 503         jVec = vec_xor(jVec, jVec);
 504
 505         vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
 506         for(j=0; j<flurry->numStreams;j++) {
 507             vector float ip0, ip1 = (vector float)(0.0), ip2;
 508             vector float dx, dy, dz;
 509             vector float rsquared, f;
 510             vector float one_over_rsquared;
 511             vector float biasTemp;
 512             vector float mag;
 513             vector bool int biasOr;
 514
 515             ip0 = vec_ld(0, flurry->spark[j]->position);
 516             if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
 517                 ip1 = vec_ld(16, flurry->spark[j]->position);
 518             }
 519
 520             ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
 521             ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
 522             ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
 523             ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
 524
 525             dx = vec_sub(s->p[i].position[0].v, ip0);
 526             dy = vec_sub(s->p[i].position[1].v, ip1);
 527             dz = vec_sub(s->p[i].position[2].v, ip2);
 528
 529             rsquared = vec_madd(dx, dx, zero);
 530             rsquared = vec_madd(dy, dy, rsquared);
 531             rsquared = vec_madd(dz, dz, rsquared);
 532
 533             biasOr = vec_cmpeq(jVec, mod.v);
 534             biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
 535
 536             f = vec_madd(biasTemp, frameRateModifier.v, zero);
 537             one_over_rsquared = vec_re(rsquared);
 538             f = vec_madd(f, one_over_rsquared, zero);
 539
 540             mag = vec_rsqrte(rsquared);
 541             mag = vec_madd(mag, f, zero);
 542
 543             deltax = vec_nmsub(dx, mag, deltax);
 544             deltay = vec_nmsub(dy, mag, deltay);
 545             deltaz = vec_nmsub(dz, mag, deltaz);
 546
 547             jVec = vec_add(jVec, (vector unsigned int)(1));
 548         }
 549
 550         /* slow this particle down by flurry->drag */
 551         deltax = vec_madd(deltax, dragV.v, zero);
 552         deltay = vec_madd(deltay, dragV.v, zero);
 553         deltaz = vec_madd(deltaz, dragV.v, zero);
 554
 555         distTemp = vec_madd(deltax, deltax, zero);
 556         distTemp = vec_madd(deltay, deltay, distTemp);
 557         distTemp = vec_madd(deltaz, deltaz, distTemp);
 558
 559         deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
 560         deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
 561         s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
 562         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 563             continue;
 564         }
 565
 566         /* update the position */
 567         s->p[i].delta[0].v = deltax;
 568         s->p[i].delta[1].v = deltay;
 569         s->p[i].delta[2].v = deltaz;
 570         for(j=0;j<3;j++) {
 571             s->p[i].oldposition[j].v = s->p[i].position[j].v;
 572             s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
 573         }
 574     }
 575 }
 576
 577 void UpdateSmoke_VectorUnrolled(global_info_t *info, SmokeV *s)
 578 {
 579     unsigned int i,j;
 580     float sx = flurry->star->position[0];
 581     float sy = flurry->star->position[1];
 582     float sz = flurry->star->position[2];
 583     double frameRate;
 584     floatToVector frameRateModifier;
 585     floatToVector gravityV;
 586     floatToVector dragV;
 587     floatToVector deltaTimeV;
 588     const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
 589     const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
 590     const vector float biasConst = (vector float)(streamBias);
 591
 592     gravityV.f[0] = gravity;
 593     gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
 594
 595     dragV.f[0] = flurry->drag;
 596     dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
 597
 598     deltaTimeV.f[0] = flurry->fDeltaTime;
 599     deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
 600
 601     s->frame++;
 602
 603     if(!s->firstTime) {
 604         /* release 12 puffs every frame */
 605         if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
 606             float dx,dy,dz,deltax,deltay,deltaz;
 607             float f;
 608             float rsquared;
 609             float mag;
 610
 611             dx = s->old[0] - sx;
 612             dy = s->old[1] - sy;
 613             dz = s->old[2] - sz;
 614             mag = 5.0f;
 615             deltax = (dx * mag);
 616             deltay = (dy * mag);
 617             deltaz = (dz * mag);
 618             for(i=0;i<flurry->numStreams;i++) {
 619                 float streamSpeedCoherenceFactor;
 620
 621                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
 622                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
 623                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
 624                 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
 625                 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
 626                 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
 627                 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
 628                 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
 629                 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
 630                 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
 631                 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
 632                 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
 633                 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
 634                 rsquared = (dx*dx+dy*dy+dz*dz);
 635                 f = streamSpeed * streamSpeedCoherenceFactor;
 636
 637                 mag = f / (float) sqrt(rsquared);
 638                 /*
 639                     asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
 640                     mag *= f;
 641                 */
 642
 643                 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
 644                 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
 645                 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
 646                 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
 647                 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
 648                 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
 649                 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
 650                 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
 651                 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
 652                 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
 653                 s->nextSubParticle++;
 654                 if (s->nextSubParticle==4) {
 655                     s->nextParticle++;
 656                     s->nextSubParticle=0;
 657                 }
 658                 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
 659                     s->nextParticle = 0;
 660                     s->nextSubParticle = 0;
 661                 }
 662             }
 663
 664             s->lastParticleTime = flurry->fTime;
 665         }
 666     } else {
 667         s->lastParticleTime = flurry->fTime;
 668         s->firstTime = 0;
 669     }
 670
 671     for(i=0;i<3;i++) {
 672         s->old[i] = flurry->star->position[i];
 673     }
 674
 675     frameRate = ((double) flurry->dframe)/(flurry->fTime);
 676     frameRateModifier.f[0] = 42.5f / frameRate;
 677     frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
 678
 679     frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
 680
 681     for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
 682         /* floatToVector f; */
 683         vector float deltax, deltay, deltaz;
 684         vector float distTemp;
 685         vector unsigned int deadTemp;
 686         /* floatToVector infopos0, infopos1, infopos2; */
 687         intToVector mod;
 688         vector unsigned int jVec;
 689         vector unsigned int intOne = vec_splat_u32(1);
 690         vector float floatOne = vec_ctf(intOne, 0);
 691
 692
 693         vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
 694
 695         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 696             continue;
 697         }
 698
 699         deltax = s->p[i].delta[0].v;
 700         deltay = s->p[i].delta[1].v;
 701         deltaz = s->p[i].delta[2].v;
 702
 703         mod.i[0] = (i<<2 + 0) % flurry->numStreams;
 704         if(mod.i[0]+1 == flurry->numStreams) {
 705             mod.i[1] = 0;
 706         } else {
 707             mod.i[1] = mod.i[0]+1;
 708         }
 709         if(mod.i[1]+1 == flurry->numStreams) {
 710             mod.i[2] = 0;
 711         } else {
 712             mod.i[2] = mod.i[1]+1;
 713         }
 714         if(mod.i[2]+1 == flurry->numStreams) {
 715             mod.i[3] = 0;
 716         } else {
 717             mod.i[3] = mod.i[2]+1;
 718         }
 719
 720         jVec = vec_xor(jVec, jVec);
 721
 722         vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
 723         for(j=0; j + 3 < flurry->numStreams;j+=4)
 724         {
 725             vector float dxa, dya, dza;
 726             vector float dxb, dyb, dzb;
 727             vector float dxc, dyc, dzc;
 728             vector float dxd, dyd, dzd;
 729             vector float ip0a, ip1a;
 730             vector float ip0b, ip1b;
 731             vector float ip0c, ip1c;
 732             vector float ip0d, ip1d;
 733             vector float rsquaredA;
 734             vector float rsquaredB;
 735             vector float rsquaredC;
 736             vector float rsquaredD;
 737             vector float fA, fB, fC, fD;
 738             vector float biasTempA;
 739             vector float biasTempB;
 740             vector float biasTempC;
 741             vector float biasTempD;
 742             vector float magA;
 743             vector float magB;
 744             vector float magC;
 745             vector float magD;
 746
 747             vector float one_over_rsquaredA;
 748             vector float one_over_rsquaredB;
 749             vector float one_over_rsquaredC;
 750             vector float one_over_rsquaredD;
 751             vector bool int biasOrA,biasOrB,biasOrC,biasOrD;
 752
 753             /* load vectors */
 754             ip0a = vec_ld(0, flurry->spark[j]->position);
 755             ip0b = vec_ld(0, flurry->spark[j+1]->position);
 756             ip0c = vec_ld(0, flurry->spark[j+2]->position);
 757             ip0d = vec_ld(0, flurry->spark[j+3]->position);
 758             ip1a = vec_ld( 12, flurry->spark[j]->position );
 759             ip1b = vec_ld( 12, flurry->spark[j+1]->position );
 760             ip1c = vec_ld( 12, flurry->spark[j+2]->position );
 761             ip1d = vec_ld( 12, flurry->spark[j+3]->position );
 762
 763             /* align them */
 764             ip0a = vec_perm(ip0a, ip1a, vec_lvsl(0, flurry->spark[j]->position));
 765             ip0b = vec_perm(ip0b, ip1b, vec_lvsl(0, flurry->spark[j+1]->position));
 766             ip0c = vec_perm(ip0c, ip1c, vec_lvsl(0, flurry->spark[j+2]->position));
 767             ip0d = vec_perm(ip0d, ip1d, vec_lvsl(0, flurry->spark[j+3]->position));
 768
 769             dxa = vec_splat( ip0a, 0  );
 770             dxb = vec_splat( ip0b, 0  );
 771             dxc = vec_splat( ip0c, 0  );
 772             dxd = vec_splat( ip0d, 0  );
 773             dxa = vec_sub( s->p[i].position[0].v, dxa );
 774             dxb = vec_sub( s->p[i].position[0].v, dxb );
 775             dxc = vec_sub( s->p[i].position[0].v, dxc );
 776             dxd = vec_sub( s->p[i].position[0].v, dxd );
 777
 778             dya = vec_splat( ip0a, 1  );
 779             dyb = vec_splat( ip0b, 1  );
 780             dyc = vec_splat( ip0c, 1  );
 781             dyd = vec_splat( ip0d, 1  );
 782             dya = vec_sub( s->p[i].position[1].v, dya );
 783             dyb = vec_sub( s->p[i].position[1].v, dyb );
 784             dyc = vec_sub( s->p[i].position[1].v, dyc );
 785             dyd = vec_sub( s->p[i].position[1].v, dyd );
 786
 787             dza = vec_splat( ip0a, 2  );
 788             dzb = vec_splat( ip0b, 2  );
 789             dzc = vec_splat( ip0c, 2  );
 790             dzd = vec_splat( ip0d, 2  );
 791             dza = vec_sub( s->p[i].position[2].v, dza );
 792             dzb = vec_sub( s->p[i].position[2].v, dzb );
 793             dzc = vec_sub( s->p[i].position[2].v, dzc );
 794             dzd = vec_sub( s->p[i].position[2].v, dzd );
 795
 796             rsquaredA = vec_madd( dxa, dxa, zero );
 797             rsquaredB = vec_madd( dxb, dxb, zero );
 798             rsquaredC = vec_madd( dxc, dxc, zero );
 799             rsquaredD = vec_madd( dxd, dxd, zero );
 800
 801             rsquaredA = vec_madd( dya, dya, rsquaredA );
 802             rsquaredB = vec_madd( dyb, dyb, rsquaredB );
 803             rsquaredC = vec_madd( dyc, dyc, rsquaredC );
 804             rsquaredD = vec_madd( dyd, dyd, rsquaredD );
 805
 806             rsquaredA = vec_madd( dza, dza, rsquaredA );
 807             rsquaredB = vec_madd( dzb, dzb, rsquaredB );
 808             rsquaredC = vec_madd( dzc, dzc, rsquaredC );
 809             rsquaredD = vec_madd( dzd, dzd, rsquaredD );
 810
 811             biasOrA = vec_cmpeq( jVec, mod.v );
 812             jVec = vec_add(jVec, intOne);
 813             biasOrB = vec_cmpeq( jVec, mod.v );
 814             jVec = vec_add(jVec, intOne);
 815             biasOrC = vec_cmpeq( jVec, mod.v );
 816             jVec = vec_add(jVec, intOne);
 817             biasOrD = vec_cmpeq( jVec, mod.v );
 818             jVec = vec_add(jVec, intOne);
 819
 820             biasTempA = vec_add( vec_and( biasOrA, biasConst), floatOne);
 821             biasTempB = vec_add( vec_and( biasOrB, biasConst), floatOne);
 822             biasTempC = vec_add( vec_and( biasOrC, biasConst), floatOne);
 823             biasTempD = vec_add( vec_and( biasOrD, biasConst), floatOne);
 824
 825             fA = vec_madd( biasTempA, frameRateModifier.v, zero);
 826             fB = vec_madd( biasTempB, frameRateModifier.v, zero);
 827             fC = vec_madd( biasTempC, frameRateModifier.v, zero);
 828             fD = vec_madd( biasTempD, frameRateModifier.v, zero);
 829             one_over_rsquaredA = vec_re( rsquaredA );
 830             one_over_rsquaredB = vec_re( rsquaredB );
 831             one_over_rsquaredC = vec_re( rsquaredC );
 832             one_over_rsquaredD = vec_re( rsquaredD );
 833             fA = vec_madd( fA, one_over_rsquaredA, zero);
 834             fB = vec_madd( fB, one_over_rsquaredB, zero);
 835             fC = vec_madd( fC, one_over_rsquaredC, zero);
 836             fD = vec_madd( fD, one_over_rsquaredD, zero);
 837             magA = vec_rsqrte( rsquaredA );
 838             magB = vec_rsqrte( rsquaredB );
 839             magC = vec_rsqrte( rsquaredC );
 840             magD = vec_rsqrte( rsquaredD );
 841             magA = vec_madd( magA, fA, zero );
 842             magB = vec_madd( magB, fB, zero );
 843             magC = vec_madd( magC, fC, zero );
 844             magD = vec_madd( magD, fD, zero );
 845             deltax = vec_nmsub( dxa, magA, deltax );
 846             deltay = vec_nmsub( dya, magA, deltay );
 847             deltaz = vec_nmsub( dza, magA, deltaz );
 848
 849             deltax = vec_nmsub( dxb, magB, deltax );
 850             deltay = vec_nmsub( dyb, magB, deltay );
 851             deltaz = vec_nmsub( dzb, magB, deltaz );
 852
 853             deltax = vec_nmsub( dxc, magC, deltax );
 854             deltay = vec_nmsub( dyc, magC, deltay );
 855             deltaz = vec_nmsub( dzc, magC, deltaz );
 856
 857             deltax = vec_nmsub( dxd, magD, deltax );
 858             deltay = vec_nmsub( dyd, magD, deltay );
 859             deltaz = vec_nmsub( dzd, magD, deltaz );
 860         }
 861
 862
 863         for(;j<flurry->numStreams;j++) {
 864             vector float ip0, ip1 = (vector float)(0.0), ip2;
 865             vector float dx, dy, dz;
 866             vector float rsquared, f;
 867             vector float one_over_rsquared;
 868             vector float biasTemp;
 869             vector float mag;
 870             vector bool int biasOr;
 871
 872             ip0 = vec_ld(0, flurry->spark[j]->position);
 873             if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
 874                 ip1 = vec_ld(16, flurry->spark[j]->position);
 875             }
 876
 877             ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
 878             ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
 879             ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
 880             ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
 881
 882             dx = vec_sub(s->p[i].position[0].v, ip0);
 883             dy = vec_sub(s->p[i].position[1].v, ip1);
 884             dz = vec_sub(s->p[i].position[2].v, ip2);
 885
 886             rsquared = vec_madd(dx, dx, zero);
 887             rsquared = vec_madd(dy, dy, rsquared);
 888             rsquared = vec_madd(dz, dz, rsquared);
 889
 890             biasOr = vec_cmpeq(jVec, mod.v);
 891             biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
 892
 893             f = vec_madd(biasTemp, frameRateModifier.v, zero);
 894             one_over_rsquared = vec_re(rsquared);
 895             f = vec_madd(f, one_over_rsquared, zero);
 896
 897             mag = vec_rsqrte(rsquared);
 898             mag = vec_madd(mag, f, zero);
 899
 900             deltax = vec_nmsub(dx, mag, deltax);
 901             deltay = vec_nmsub(dy, mag, deltay);
 902             deltaz = vec_nmsub(dz, mag, deltaz);
 903
 904             jVec = vec_add(jVec, (vector unsigned int)(1));
 905         }
 906
 907         /* slow this particle down by flurry->drag */
 908         deltax = vec_madd(deltax, dragV.v, zero);
 909         deltay = vec_madd(deltay, dragV.v, zero);
 910         deltaz = vec_madd(deltaz, dragV.v, zero);
 911
 912         distTemp = vec_madd(deltax, deltax, zero);
 913         distTemp = vec_madd(deltay, deltay, distTemp);
 914         distTemp = vec_madd(deltaz, deltaz, distTemp);
 915
 916         deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
 917         deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
 918         s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
 919         if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
 920             continue;
 921         }
 922
 923         /* update the position */
 924         s->p[i].delta[0].v = deltax;
 925         s->p[i].delta[1].v = deltay;
 926         s->p[i].delta[2].v = deltaz;
 927         for(j=0;j<3;j++) {
 928             s->p[i].oldposition[j].v = s->p[i].position[j].v;
 929             s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
 930         }
 931     }
 932 }
 933
 934 #endif
 935 #endif /* 0 */
 936
 937 void DrawSmoke_Scalar(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
 938 {
 939         int svi = 0;
 940         int sci = 0;
 941         int sti = 0;
 942         int si = 0;
 943         float width;
 944         float sx,sy;
 945         float u0,v0,u1,v1;
 946         float w,z;
 947         float screenRatio = global->sys_glWidth / 1024.0f;
 948         float hslash2 = global->sys_glHeight * 0.5f;
 949         float wslash2 = global->sys_glWidth * 0.5f;
 950         int i,k;
 951
 952         width = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
 953
 954         for (i=0;i<NUMSMOKEPARTICLES/4;i++)
 955         {
 956             for (k=0; k<4; k++) {
 957                 float thisWidth;
 958                 float oldz;
 959
 960                 if (s->p[i].dead.i[k]) {
 961                     continue;
 962                 }
 963                 thisWidth = (streamSize + (flurry->fTime - s->p[i].time.f[k])*flurry->streamExpansion) * screenRatio;
 964                 if (thisWidth >= width)
 965                 {
 966                         s->p[i].dead.i[k] = 1;
 967                         continue;
 968                 }
 969                 z = s->p[i].position[2].f[k];
 970                 sx = s->p[i].position[0].f[k] * global->sys_glWidth / z + wslash2;
 971                 sy = s->p[i].position[1].f[k] * global->sys_glWidth / z + hslash2;
 972                 oldz = s->p[i].oldposition[2].f[k];
 973                 if (sx > global->sys_glWidth+50.0f || sx < -50.0f || sy > global->sys_glHeight+50.0f || sy < -50.0f || z < 25.0f || oldz < 25.0f)
 974                 {
 975                         continue;
 976                 }
 977
 978                 w = MAX_(1.0f,thisWidth/z);
 979                 {
 980                         float oldx = s->p[i].oldposition[0].f[k];
 981                         float oldy = s->p[i].oldposition[1].f[k];
 982                         float oldscreenx = (oldx * global->sys_glWidth / oldz) + wslash2;
 983                         float oldscreeny = (oldy * global->sys_glWidth / oldz) + hslash2;
 984                         float dx = (sx-oldscreenx);
 985                         float dy = (sy-oldscreeny);
 986
 987                         float d = FastDistance2D(dx, dy);
 988
 989                         float sm, os, ow;
 990                         if (d)
 991                         {
 992                                 sm = w/d;
 993                         }
 994                         else
 995                         {
 996                                 sm = 0.0f;
 997                         }
 998                         ow = MAX_(1.0f,thisWidth/oldz);
 999                         if (d)
1000                         {
1001                                 os = ow/d;
1002                         }
1003                         else
1004                         {
1005                                 os = 0.0f;
1006                         }
1007
1008                         {
1009                                 floatToVector cmv;
1010                                 float cm;
1011                                 float m = 1.0f + sm;
1012
1013                                 float dxs = dx*sm;
1014                                 float dys = dy*sm;
1015                                 float dxos = dx*os;
1016                                 float dyos = dy*os;
1017                                 float dxm = dx*m;
1018                                 float dym = dy*m;
1019
1020                                 s->p[i].animFrame.i[k]++;
1021                                 if (s->p[i].animFrame.i[k] >= 64)
1022                                 {
1023                                         s->p[i].animFrame.i[k] = 0;
1024                                 }
1025
1026                                 u0 = (s->p[i].animFrame.i[k]&&7) * 0.125f;
1027                                 v0 = (s->p[i].animFrame.i[k]>>3) * 0.125f;
1028                                 u1 = u0 + 0.125f;
1029                                 v1 = v0 + 0.125f;
1030                                 u1 = u0 + 0.125f;
1031                                 v1 = v0 + 0.125f;
1032                                 cm = (1.375f - thisWidth/width);
1033                                 if (s->p[i].dead.i[k] == 3)
1034                                 {
1035                                         cm *= 0.125f;
1036                                         s->p[i].dead.i[k] = 1;
1037                                 }
1038                                 si++;
1039                                 cm *= brightness;
1040                                 cmv.f[0] = s->p[i].color[0].f[k]*cm;
1041                                 cmv.f[1] = s->p[i].color[1].f[k]*cm;
1042                                 cmv.f[2] = s->p[i].color[2].f[k]*cm;
1043                                 cmv.f[3] = s->p[i].color[3].f[k]*cm;
1044
1045 #if 0
1046                                 /* MDT we can't use vectors in the Scalar routine */
1047                                 s->seraphimColors[sci++].v = cmv.v;
1048                                 s->seraphimColors[sci++].v = cmv.v;
1049                                 s->seraphimColors[sci++].v = cmv.v;
1050                                 s->seraphimColors[sci++].v = cmv.v;
1051 #else
1052                                 {
1053                                     int ii, jj;
1054                                     for (jj = 0; jj < 4; jj++) {
1055                                         for (ii = 0; ii < 4; ii++) {
1056                                             s->seraphimColors[sci].f[ii] = cmv.f[ii];
1057                                         }
1058                                         sci += 1;
1059                                     }
1060                                 }
1061 #endif
1062
1063                                 s->seraphimTextures[sti++] = u0;
1064                                 s->seraphimTextures[sti++] = v0;
1065                                 s->seraphimTextures[sti++] = u0;
1066                                 s->seraphimTextures[sti++] = v1;
1067
1068                                 s->seraphimTextures[sti++] = u1;
1069                                 s->seraphimTextures[sti++] = v1;
1070                                 s->seraphimTextures[sti++] = u1;
1071                                 s->seraphimTextures[sti++] = v0;
1072
1073                                 s->seraphimVertices[svi].f[0] = sx+dxm-dys;
1074                                 s->seraphimVertices[svi].f[1] = sy+dym+dxs;
1075                                 s->seraphimVertices[svi].f[2] = sx+dxm+dys;
1076                                 s->seraphimVertices[svi].f[3] = sy+dym-dxs;
1077                                 svi++;
1078
1079                                 s->seraphimVertices[svi].f[0] = oldscreenx-dxm+dyos;
1080                                 s->seraphimVertices[svi].f[1] = oldscreeny-dym-dxos;
1081                                 s->seraphimVertices[svi].f[2] = oldscreenx-dxm-dyos;
1082                                 s->seraphimVertices[svi].f[3] = oldscreeny-dym+dxos;
1083                                 svi++;
1084                         }
1085                 }
1086             }
1087         }
1088         glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1089         glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1090         glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1091         glDrawArrays(GL_QUADS,0,si*4);
1092 }
1093
1094 #if 0
1095 #ifdef __VEC__
1096
1097 void DrawSmoke_Vector(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
1098 {
1099     const vector float zero = (vector float)(0.0);
1100     int svi = 0;
1101     int sci = 0;
1102     int sti = 0;
1103     int si = 0;
1104     floatToVector width;
1105     vector float sx,sy;
1106     floatToVector u0,v0,u1,v1;
1107     vector float one_over_z;
1108     vector float w;
1109     floatToVector z;
1110     float screenRatio = global->sys_glWidth / 1024.0f;
1111     float hslash2 = global->sys_glHeight * 0.5f;
1112     float wslash2 = global->sys_glWidth * 0.5f;
1113     int i,kk;
1114     floatToVector briteV, fTimeV, expansionV, screenRatioV, hslash2V, wslash2V, streamSizeV;
1115     floatToVector glWidthV;
1116     floatToVector cm;
1117     vector float cmv[4];
1118     vector float svec[4], ovec[4];
1119     vector float oldscreenx, oldscreeny;
1120     vector float sm;
1121     vector float frameAnd7;
1122     vector float frameShift3;
1123     vector float one_over_width;
1124     vector float dx, dy;
1125     vector float os;
1126     vector unsigned int vSi = vec_splat_u32(0);
1127     const vector float eighth = (vector float)(0.125);
1128     float glWidth50 = global->sys_glWidth + 50.0f;
1129     float glHeight50 = global->sys_glHeight + 50.0f;
1130     vector float vGLWidth50, vGLHeight50;
1131     unsigned int blitBool;
1132
1133     vec_dst((int *)(&(s->p[0])), 0x00020200, 2);
1134
1135     {
1136         vector unsigned char permute1 = vec_lvsl( 0, &glWidth50 );
1137         vector unsigned char permute2 = vec_lvsl( 0, &glHeight50 );
1138         permute1 = (vector unsigned char) vec_splat( (vector unsigned int) permute1, 0 );
1139         permute2 = (vector unsigned char) vec_splat( (vector unsigned int) permute2, 0 );
1140         vGLWidth50 = vec_lde( 0, &glWidth50 );
1141         vGLHeight50 = vec_lde( 0, &glHeight50 );
1142         vGLWidth50 = vec_perm( vGLWidth50, vGLWidth50, permute1 );
1143         vGLHeight50 = vec_perm( vGLHeight50, vGLHeight50, permute2 );
1144     }
1145
1146     width.f[0] = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
1147     width.v = (vector float) vec_splat((vector unsigned int)width.v, 0);
1148
1149     briteV.f[0] = brightness;
1150     briteV.v = (vector float) vec_splat((vector unsigned int)briteV.v, 0);
1151
1152     fTimeV.f[0] = (float) flurry->fTime;
1153     fTimeV.v = (vector float) vec_splat((vector unsigned int)fTimeV.v, 0);
1154
1155     expansionV.f[0] = flurry->streamExpansion;
1156     expansionV.v = (vector float) vec_splat((vector unsigned int)expansionV.v, 0);
1157
1158     screenRatioV.f[0] = screenRatio;
1159     screenRatioV.v = (vector float) vec_splat((vector unsigned int)screenRatioV.v, 0);
1160
1161     hslash2V.f[0] = hslash2;
1162     hslash2V.v = (vector float) vec_splat((vector unsigned int)hslash2V.v, 0);
1163
1164     wslash2V.f[0] = wslash2;
1165     wslash2V.v = (vector float) vec_splat((vector unsigned int)wslash2V.v, 0);
1166
1167     streamSizeV.f[0] = streamSize;
1168     streamSizeV.v = (vector float) vec_splat((vector unsigned int)streamSizeV.v, 0);
1169
1170     glWidthV.f[0] = global->sys_glWidth;
1171     glWidthV.v = (vector float) vec_splat((vector unsigned int)glWidthV.v, 0);
1172
1173     for (i=0;i<NUMSMOKEPARTICLES/4;i++) {
1174         vector float thisWidth;
1175         vector float oldz;
1176         vector float oldx, oldy, one_over_oldz;
1177         vector float xabs, yabs, mn;
1178         vector float d;
1179         vector float one_over_d;
1180         vector bool int dnz;
1181         vector float ow;
1182
1183         vec_dst((int *)(&(s->p[i+4])), 0x00020200, 2);
1184
1185         if (vec_all_eq(s->p[i].dead.v, (vector unsigned int)(1))) continue;
1186
1187         blitBool = 0; /* keep track of particles that actually need to be drawn */
1188
1189         thisWidth = vec_sub(fTimeV.v, s->p[i].time.v);
1190         thisWidth = vec_madd(thisWidth, expansionV.v, streamSizeV.v);
1191         thisWidth = vec_madd(thisWidth, screenRatioV.v, zero);
1192
1193         z.v = s->p[i].position[2].v;
1194         one_over_z = vec_re(z.v);
1195
1196         sx = vec_madd(s->p[i].position[0].v, glWidthV.v, zero);
1197         sx = vec_madd(sx, one_over_z, wslash2V.v);
1198         sy = vec_madd(s->p[i].position[1].v, glWidthV.v, zero);
1199         sy = vec_madd(sy, one_over_z, hslash2V.v);
1200
1201         oldz = s->p[i].oldposition[2].v;
1202
1203         w = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_z, zero));
1204
1205         oldx = s->p[i].oldposition[0].v;
1206         oldy = s->p[i].oldposition[1].v;
1207         one_over_oldz = vec_re(oldz);
1208         oldscreenx = vec_madd(oldx, glWidthV.v, zero);
1209         oldscreenx = vec_madd(oldscreenx, one_over_oldz, wslash2V.v);
1210         oldscreeny = vec_madd(oldy, glWidthV.v, zero);
1211         oldscreeny = vec_madd(oldscreeny, one_over_oldz, hslash2V.v);
1212         dx = vec_sub(sx,oldscreenx);
1213         dy = vec_sub(sy,oldscreeny);
1214
1215         xabs = vec_abs(dx);
1216         yabs = vec_abs(dy);
1217         mn = vec_min(xabs,yabs);
1218         d = vec_add(xabs,yabs);
1219         d = vec_madd(mn, (vector float)(-0.6875), d);
1220
1221         ow = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_oldz, zero));
1222         one_over_d = vec_re(d);
1223         dnz = vec_cmpgt(d, zero);
1224         sm = vec_madd(w, one_over_d, zero);
1225         sm = vec_and(sm, dnz);
1226         os = vec_madd(ow, one_over_d, zero);
1227         os = vec_and(os, dnz);
1228
1229         {
1230             intToVector tempMask;
1231             vector bool int mask = vec_cmpeq( s->p[i].dead.v, vec_splat_u32(1) ); /* -1 where true */
1232             vector bool int  gtMask = vec_cmpge( thisWidth, width.v ); /* -1 where true */
1233             vector bool int  glWidth50Test = vec_cmpgt( sx, (vector float)(vGLWidth50) ); /* -1 where true */
1234             vector bool int  glHeight50Test = vec_cmpgt( sy, (vector float)(vGLHeight50) ); /* -1 where true */
1235             vector bool int  test50x    = vec_cmplt( sx, (vector float) (-50.0) );
1236             vector bool int  test50y    = vec_cmplt( sy, (vector float) (-50.0) );
1237             vector bool int  testz      = vec_cmplt( z.v, (vector float) (25.0) );
1238             vector bool int  testoldz   = vec_cmplt( oldz, (vector float) (25.0) );
1239             mask = vec_or( mask, gtMask );
1240             s->p[i].dead.v = vec_and( mask, vec_splat_u32( 1 ) );
1241             mask = vec_or( mask, glWidth50Test );
1242             mask = vec_or( mask, glHeight50Test );
1243             mask = vec_or( mask, test50x );
1244             mask = vec_or( mask, test50y );
1245             mask = vec_or( mask, testz );
1246             mask = vec_or( mask, testoldz );
1247             tempMask.v = (vector unsigned int)mask;
1248
1249             s->p[i].animFrame.v = vec_sub( s->p[i].animFrame.v, vec_nor( mask, mask ) );
1250             s->p[i].animFrame.v = vec_and( s->p[i].animFrame.v, (vector unsigned int)(63) );
1251
1252             frameAnd7 = vec_ctf(vec_and(s->p[i].animFrame.v, (vector unsigned int)(7)),0);
1253             u0.v = vec_madd(frameAnd7, eighth, zero);
1254
1255             frameShift3 = vec_ctf(vec_sr(s->p[i].animFrame.v, (vector unsigned int)(3)),0);
1256             v0.v = vec_madd(frameAnd7, eighth, zero);
1257
1258             u1.v = vec_add(u0.v, eighth);
1259             v1.v = vec_add(v0.v, eighth);
1260
1261             one_over_width = vec_re(width.v);
1262             cm.v = vec_sel( vec_nmsub(thisWidth, one_over_width, (vector float)(1.375)), cm.v, mask );
1263             cm.v = vec_madd(cm.v, briteV.v, zero);
1264
1265             vSi = vec_sub( vSi, vec_nor( mask, mask ) );
1266             {
1267                 vector unsigned int blitMask = (vector unsigned int) (1, 2, 4, 8);
1268                 vector unsigned int temp = (vector unsigned int)mask;
1269                 temp = vec_andc( blitMask, temp  );
1270                 temp = vec_add( temp, vec_sld( temp, temp, 8 ) );
1271                 temp = vec_add( temp, vec_sld( temp, temp, 4 ) );
1272                 vec_ste( temp, 0, &blitBool );
1273
1274             }
1275
1276             {
1277                 vector float temp1, temp2, temp3, temp4;
1278                 vector float result1a, result1b, result2a, result2b, result3a, result3b, result4a, result4b;
1279
1280                 temp1 = vec_mergeh( u0.v, u0.v );
1281                 temp2 = vec_mergel( u0.v, u0.v );
1282                 temp3 = vec_mergeh( v0.v, v1.v );
1283                 temp4 = vec_mergel( v0.v, v1.v );
1284
1285                 result1a = vec_mergeh( temp1, temp3 );
1286                 result1b = vec_mergel( temp1, temp3 );
1287                 result2a = vec_mergeh( temp2, temp4 );
1288                 result2b = vec_mergel( temp2, temp4 );
1289
1290                 temp1 = vec_mergeh( u1.v, u1.v );
1291                 temp2 = vec_mergel( u1.v, u1.v );
1292                 temp3 = vec_mergeh( v1.v, v0.v );
1293                 temp4 = vec_mergel( v1.v, v0.v );
1294
1295                 result3a = vec_mergeh( temp1, temp3 );
1296                 result3b = vec_mergel( temp1, temp3 );
1297                 result4a = vec_mergeh( temp2, temp4 );
1298                 result4b = vec_mergel( temp2, temp4 );
1299
1300                 if( blitBool & 1 )
1301                 {
1302                     vec_st( result1a, 0, &s->seraphimTextures[sti] );
1303                     vec_st( result3a, 16, &s->seraphimTextures[sti]);
1304                     sti+= 8;
1305                 }
1306                 if( blitBool & 2 )
1307                 {
1308                     vec_st( result1b, 0, &s->seraphimTextures[sti]);
1309                     vec_st( result3b, 16, &s->seraphimTextures[sti]);
1310                     sti+= 8;
1311                 }
1312                 if( blitBool & 4 )
1313                 {
1314                     vec_st( result2a, 0, &s->seraphimTextures[sti]);
1315                     vec_st( result4a, 16, &s->seraphimTextures[sti]);
1316                     sti+= 8;
1317                 }
1318                 if( blitBool & 8 )
1319                 {
1320                     vec_st( result2b, 0, &s->seraphimTextures[sti]);
1321                     vec_st( result4b, 16, &s->seraphimTextures[sti]);
1322                     sti+= 8;
1323                 }
1324             }
1325         }
1326
1327         cmv[0] = vec_madd(s->p[i].color[0].v, cm.v, zero);
1328         cmv[1] = vec_madd(s->p[i].color[1].v, cm.v, zero);
1329         cmv[2] = vec_madd(s->p[i].color[2].v, cm.v, zero);
1330         cmv[3] = vec_madd(s->p[i].color[3].v, cm.v, zero);
1331         {
1332             vector float vI0, vI1, vI2, vI3;
1333
1334             vI0 = vec_mergeh ( cmv[0], cmv[2] );
1335             vI1 = vec_mergeh ( cmv[1], cmv[3] );
1336             vI2 = vec_mergel ( cmv[0], cmv[2] );
1337             vI3 = vec_mergel ( cmv[1], cmv[3] );
1338
1339             cmv[0] = vec_mergeh ( vI0, vI1 );
1340             cmv[1] = vec_mergel ( vI0, vI1 );
1341             cmv[2] = vec_mergeh ( vI2, vI3 );
1342             cmv[3] = vec_mergel ( vI2, vI3 );
1343         }
1344
1345         vec_dst( cmv, 0x0D0100D0, 1 );
1346
1347         {
1348             vector float sxd, syd;
1349             vector float sxdm, sxdp, sydm, sydp;
1350             vector float oxd, oyd;
1351             vector float oxdm, oxdp, oydm, oydp;
1352             vector float vI0, vI1, vI2, vI3;
1353             vector float dxs, dys;
1354             vector float dxos, dyos;
1355             vector float dxm, dym;
1356             vector float m;
1357
1358             m = vec_add((vector float)(1.0), sm);
1359
1360             dxs = vec_madd(dx, sm, zero);
1361             dys = vec_madd(dy, sm, zero);
1362             dxos = vec_madd(dx, os, zero);
1363             dyos = vec_madd(dy, os, zero);
1364             dxm = vec_madd(dx, m, zero);
1365             dym = vec_madd(dy, m, zero);
1366
1367             sxd = vec_add(sx, dxm);
1368             sxdm = vec_sub(sxd, dys);
1369             sxdp = vec_add(sxd, dys);
1370
1371             syd = vec_add(sy, dym);
1372             sydm = vec_sub(syd, dxs);
1373             sydp = vec_add(syd, dxs);
1374
1375             oxd = vec_sub(oldscreenx, dxm);
1376             oxdm = vec_sub(oxd, dyos);
1377             oxdp = vec_add(oxd, dyos);
1378
1379             oyd = vec_sub(oldscreeny, dym);
1380             oydm = vec_sub(oyd, dxos);
1381             oydp = vec_add(oyd, dxos);
1382
1383             vI0 = vec_mergeh ( sxdm, sxdp );
1384             vI1 = vec_mergeh ( sydp, sydm );
1385             vI2 = vec_mergel ( sxdm, sxdp );
1386             vI3 = vec_mergel ( sydp, sydm );
1387
1388             svec[0] = vec_mergeh ( vI0, vI1 );
1389             svec[1] = vec_mergel ( vI0, vI1 );
1390             svec[2] = vec_mergeh ( vI2, vI3 );
1391             svec[3] = vec_mergel ( vI2, vI3 );
1392
1393             vI0 = vec_mergeh ( oxdp, oxdm );
1394             vI1 = vec_mergeh ( oydm, oydp );
1395             vI2 = vec_mergel ( oxdp, oxdm );
1396             vI3 = vec_mergel ( oydm, oydp );
1397
1398             ovec[0] = vec_mergeh ( vI0, vI1 );
1399             ovec[1] = vec_mergel ( vI0, vI1 );
1400             ovec[2] = vec_mergeh ( vI2, vI3 );
1401             ovec[3] = vec_mergel ( vI2, vI3 );
1402         }
1403
1404         {
1405             int offset0 = (sci + 0) * sizeof( vector float );
1406             int offset1 = (sci + 1) * sizeof( vector float );
1407             int offset2 = (sci + 2) * sizeof( vector float );
1408             int offset3 = (sci + 3) * sizeof( vector float );
1409             int offset4 = (svi + 0) * sizeof( vector float );
1410             int offset5 = (svi + 1) * sizeof( vector float );
1411             vector float *colors = (vector float *)s->seraphimColors;
1412             vector float *vertices = (vector float *)s->seraphimVertices;
1413             for (kk=0; kk<4; kk++) {
1414                 if (blitBool>>kk & 1) {
1415                     vector float vcmv = cmv[kk];
1416                     vector float vsvec = svec[kk];
1417                     vector float vovec = ovec[kk];
1418
1419                     vec_st( vcmv, offset0, colors );
1420                     vec_st( vcmv, offset1, colors );
1421                     vec_st( vcmv, offset2, colors );
1422                     vec_st( vcmv, offset3, colors );
1423                     vec_st( vsvec, offset4, vertices );
1424                     vec_st( vovec, offset5, vertices );
1425                     colors += 4;
1426                     vertices += 2;
1427                     sci += 4;
1428                     svi += 2;
1429                 }
1430             }
1431         }
1432     }
1433     vSi = vec_add( vSi, vec_sld( vSi, vSi, 8 ) );
1434     vSi = vec_add( vSi, vec_sld( vSi, vSi, 4 ) );
1435     vec_ste( (vector signed int) vSi, 0, &si );
1436
1437     glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1438     glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1439     glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1440     glDrawArrays(GL_QUADS,0,si*4);
1441 }
1442
1443 #endif
1444 #endif /* 0 */