3 Copyright (c) 2002, Calum Robinson
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 * Redistributions of source code must retain the above copyright notice, this
10 list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright notice,
13 this list of conditions and the following disclaimer in the documentation
14 and/or other materials provided with the distribution.
16 * Neither the name of the author nor the names of its contributors may be used
17 to endorse or promote products derived from this software without specific
18 prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
24 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 /* Smoke.cpp: implementation of the Smoke class. */
37 #define MAXANGLES 16384
38 #define NOT_QUITE_DEAD 3
40 #define intensity 75000.0f;
42 void InitSmoke(SmokeV *s)
46 s->nextSubParticle = 0;
47 s->lastParticleTime = 0.25f;
51 s->old[i] = RandFlt(-100.0, 100.0);
55 void UpdateSmoke_ScalarBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
58 float sx = flurry->star->position[0];
59 float sy = flurry->star->position[1];
60 float sz = flurry->star->position[2];
62 double frameRateModifier;
68 /* release 12 puffs every frame */
69 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
70 float dx,dy,dz,deltax,deltay,deltaz;
82 for(i=0;i<flurry->numStreams;i++) {
83 float streamSpeedCoherenceFactor;
85 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
86 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
87 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
88 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
89 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
90 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
91 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
92 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
93 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
94 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
95 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
96 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
97 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
98 rsquared = (dx*dx+dy*dy+dz*dz);
99 f = streamSpeed * streamSpeedCoherenceFactor;
101 mag = f / (float) sqrt(rsquared);
103 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
104 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
105 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
106 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
107 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
108 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
109 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
110 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
111 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
112 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
113 s->nextSubParticle++;
114 if (s->nextSubParticle==4) {
116 s->nextSubParticle=0;
118 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
120 s->nextSubParticle = 0;
124 s->lastParticleTime = flurry->fTime;
127 s->lastParticleTime = flurry->fTime;
132 s->old[i] = flurry->star->position[i];
135 frameRate = ((double) flurry->dframe)/(flurry->fTime);
136 frameRateModifier = 42.5f / frameRate;
138 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
148 if (s->p[i].dead.i[k]) {
152 deltax = s->p[i].delta[0].f[k];
153 deltay = s->p[i].delta[1].f[k];
154 deltaz = s->p[i].delta[2].f[k];
156 for(j=0;j<flurry->numStreams;j++) {
157 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
158 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
159 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
160 rsquared = (dx*dx+dy*dy+dz*dz);
162 f = (gravity/rsquared) * frameRateModifier;
164 if ((((i*4)+k) % flurry->numStreams) == j) {
165 f *= 1.0f + streamBias;
168 mag = f / (float) sqrt(rsquared);
170 deltax -= (dx * mag);
171 deltay -= (dy * mag);
172 deltaz -= (dz * mag);
175 /* slow this particle down by flurry->drag */
176 deltax *= flurry->drag;
177 deltay *= flurry->drag;
178 deltaz *= flurry->drag;
180 if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
181 s->p[i].dead.i[k] = 1;
185 /* update the position */
186 s->p[i].delta[0].f[k] = deltax;
187 s->p[i].delta[1].f[k] = deltay;
188 s->p[i].delta[2].f[k] = deltaz;
190 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
191 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
200 void UpdateSmoke_ScalarFrsqrte(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
203 float sx = flurry->star->position[0];
204 float sy = flurry->star->position[1];
205 float sz = flurry->star->position[2];
207 double frameRateModifier;
213 /* release 12 puffs every frame */
214 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
215 float dx,dy,dz,deltax,deltay,deltaz;
227 for(i=0;i<flurry->numStreams;i++) {
228 float streamSpeedCoherenceFactor;
230 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
231 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
232 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
233 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
234 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
235 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
236 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
237 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
238 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
239 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
240 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
241 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
242 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
243 rsquared = (dx*dx+dy*dy+dz*dz);
244 f = streamSpeed * streamSpeedCoherenceFactor;
246 mag = f / (float) sqrt(rsquared);
248 reciprocal square-root estimate replaced above divide and call to system sqrt()
250 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
254 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
255 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
256 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
257 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
258 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
259 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
260 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
261 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
262 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
263 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
264 s->nextSubParticle++;
265 if (s->nextSubParticle==4) {
267 s->nextSubParticle=0;
269 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
271 s->nextSubParticle = 0;
275 s->lastParticleTime = flurry->fTime;
278 s->lastParticleTime = flurry->fTime;
283 s->old[i] = flurry->star->position[i];
286 frameRate = ((double) flurry->dframe)/(flurry->fTime);
287 frameRateModifier = 42.5f / frameRate;
289 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
299 if (s->p[i].dead.i[k]) {
303 deltax = s->p[i].delta[0].f[k];
304 deltay = s->p[i].delta[1].f[k];
305 deltaz = s->p[i].delta[2].f[k];
307 for(j=0;j<flurry->numStreams;j++) {
308 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
309 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
310 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
311 rsquared = (dx*dx+dy*dy+dz*dz);
314 asm("fres %0, %1" : "=f" (f) : "f" (rsquared));
315 f *= gravity*frameRateModifier;
317 f = ( gravity * frameRateModifier ) / rsquared;
319 if((((i*4)+k) % flurry->numStreams) == j) {
320 f *= 1.0f + streamBias;
323 mag = f / (float) sqrt(rsquared);
325 /* reciprocal square-root estimate replaced above divide and call to system sqrt() */
327 deltax -= (dx * mag);
328 deltay -= (dy * mag);
329 deltaz -= (dz * mag);
332 /* slow this particle down by flurry->drag */
333 deltax *= flurry->drag;
334 deltay *= flurry->drag;
335 deltaz *= flurry->drag;
337 if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
338 s->p[i].dead.i[k] = 1;
342 /* update the position */
343 s->p[i].delta[0].f[k] = deltax;
344 s->p[i].delta[1].f[k] = deltay;
345 s->p[i].delta[2].f[k] = deltaz;
347 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
348 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
358 void UpdateSmoke_VectorBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
361 float sx = flurry->star->position[0];
362 float sy = flurry->star->position[1];
363 float sz = flurry->star->position[2];
365 floatToVector frameRateModifier;
366 floatToVector gravityV;
368 floatToVector deltaTimeV;
369 const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
370 const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
371 const vector float biasConst = (vector float)(streamBias);
373 gravityV.f[0] = gravity;
374 gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
376 dragV.f[0] = flurry->drag;
377 dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
379 deltaTimeV.f[0] = flurry->fDeltaTime;
380 deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
385 /* release 12 puffs every frame */
386 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
387 float dx,dy,dz,deltax,deltay,deltaz;
399 for(i=0;i<flurry->numStreams;i++) {
400 float streamSpeedCoherenceFactor;
402 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
403 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
404 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
405 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
406 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
407 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
408 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
409 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
410 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
411 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
412 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
413 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
414 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
415 rsquared = (dx*dx+dy*dy+dz*dz);
416 f = streamSpeed * streamSpeedCoherenceFactor;
418 mag = f / (float) sqrt(rsquared);
420 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
424 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
425 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
426 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
427 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
428 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
429 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
430 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
431 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
432 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
433 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
434 s->nextSubParticle++;
435 if (s->nextSubParticle==4) {
437 s->nextSubParticle=0;
439 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
441 s->nextSubParticle = 0;
445 s->lastParticleTime = flurry->fTime;
448 s->lastParticleTime = flurry->fTime;
453 s->old[i] = flurry->star->position[i];
456 frameRate = ((double) flurry->dframe)/(flurry->fTime);
457 frameRateModifier.f[0] = 42.5f / frameRate;
458 frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
460 frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
462 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
463 /* floatToVector f; */
464 vector float deltax, deltay, deltaz;
465 vector float distTemp;
466 vector unsigned int deadTemp;
467 /* floatToVector infopos0, infopos1, infopos2; */
469 vector unsigned int jVec;
472 vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
474 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
478 deltax = s->p[i].delta[0].v;
479 deltay = s->p[i].delta[1].v;
480 deltaz = s->p[i].delta[2].v;
482 mod.i[0] = (i<<2 + 0) % flurry->numStreams;
483 if(mod.i[0]+1 == flurry->numStreams) {
486 mod.i[1] = mod.i[0]+1;
488 if(mod.i[1]+1 == flurry->numStreams) {
491 mod.i[2] = mod.i[1]+1;
493 if(mod.i[2]+1 == flurry->numStreams) {
496 mod.i[3] = mod.i[2]+1;
499 jVec = vec_xor(jVec, jVec);
501 vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
502 for(j=0; j<flurry->numStreams;j++) {
503 vector float ip0, ip1 = (vector float)(0.0), ip2;
504 vector float dx, dy, dz;
505 vector float rsquared, f;
506 vector float one_over_rsquared;
507 vector float biasTemp;
509 vector bool int biasOr;
511 ip0 = vec_ld(0, flurry->spark[j]->position);
512 if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
513 ip1 = vec_ld(16, flurry->spark[j]->position);
516 ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
517 ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
518 ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
519 ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
521 dx = vec_sub(s->p[i].position[0].v, ip0);
522 dy = vec_sub(s->p[i].position[1].v, ip1);
523 dz = vec_sub(s->p[i].position[2].v, ip2);
525 rsquared = vec_madd(dx, dx, zero);
526 rsquared = vec_madd(dy, dy, rsquared);
527 rsquared = vec_madd(dz, dz, rsquared);
529 biasOr = vec_cmpeq(jVec, mod.v);
530 biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
532 f = vec_madd(biasTemp, frameRateModifier.v, zero);
533 one_over_rsquared = vec_re(rsquared);
534 f = vec_madd(f, one_over_rsquared, zero);
536 mag = vec_rsqrte(rsquared);
537 mag = vec_madd(mag, f, zero);
539 deltax = vec_nmsub(dx, mag, deltax);
540 deltay = vec_nmsub(dy, mag, deltay);
541 deltaz = vec_nmsub(dz, mag, deltaz);
543 jVec = vec_add(jVec, (vector unsigned int)(1));
546 /* slow this particle down by flurry->drag */
547 deltax = vec_madd(deltax, dragV.v, zero);
548 deltay = vec_madd(deltay, dragV.v, zero);
549 deltaz = vec_madd(deltaz, dragV.v, zero);
551 distTemp = vec_madd(deltax, deltax, zero);
552 distTemp = vec_madd(deltay, deltay, distTemp);
553 distTemp = vec_madd(deltaz, deltaz, distTemp);
555 deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
556 deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
557 s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
558 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
562 /* update the position */
563 s->p[i].delta[0].v = deltax;
564 s->p[i].delta[1].v = deltay;
565 s->p[i].delta[2].v = deltaz;
567 s->p[i].oldposition[j].v = s->p[i].position[j].v;
568 s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
573 void UpdateSmoke_VectorUnrolled(global_info_t *info, SmokeV *s)
576 float sx = flurry->star->position[0];
577 float sy = flurry->star->position[1];
578 float sz = flurry->star->position[2];
580 floatToVector frameRateModifier;
581 floatToVector gravityV;
583 floatToVector deltaTimeV;
584 const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
585 const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
586 const vector float biasConst = (vector float)(streamBias);
588 gravityV.f[0] = gravity;
589 gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
591 dragV.f[0] = flurry->drag;
592 dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
594 deltaTimeV.f[0] = flurry->fDeltaTime;
595 deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
600 /* release 12 puffs every frame */
601 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
602 float dx,dy,dz,deltax,deltay,deltaz;
614 for(i=0;i<flurry->numStreams;i++) {
615 float streamSpeedCoherenceFactor;
617 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
618 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
619 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
620 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
621 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
622 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
623 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
624 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
625 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
626 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
627 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
628 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
629 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
630 rsquared = (dx*dx+dy*dy+dz*dz);
631 f = streamSpeed * streamSpeedCoherenceFactor;
633 mag = f / (float) sqrt(rsquared);
635 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
639 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
640 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
641 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
642 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
643 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
644 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
645 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
646 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
647 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
648 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
649 s->nextSubParticle++;
650 if (s->nextSubParticle==4) {
652 s->nextSubParticle=0;
654 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
656 s->nextSubParticle = 0;
660 s->lastParticleTime = flurry->fTime;
663 s->lastParticleTime = flurry->fTime;
668 s->old[i] = flurry->star->position[i];
671 frameRate = ((double) flurry->dframe)/(flurry->fTime);
672 frameRateModifier.f[0] = 42.5f / frameRate;
673 frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
675 frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
677 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
678 /* floatToVector f; */
679 vector float deltax, deltay, deltaz;
680 vector float distTemp;
681 vector unsigned int deadTemp;
682 /* floatToVector infopos0, infopos1, infopos2; */
684 vector unsigned int jVec;
685 vector unsigned int intOne = vec_splat_u32(1);
686 vector float floatOne = vec_ctf(intOne, 0);
689 vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
691 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
695 deltax = s->p[i].delta[0].v;
696 deltay = s->p[i].delta[1].v;
697 deltaz = s->p[i].delta[2].v;
699 mod.i[0] = (i<<2 + 0) % flurry->numStreams;
700 if(mod.i[0]+1 == flurry->numStreams) {
703 mod.i[1] = mod.i[0]+1;
705 if(mod.i[1]+1 == flurry->numStreams) {
708 mod.i[2] = mod.i[1]+1;
710 if(mod.i[2]+1 == flurry->numStreams) {
713 mod.i[3] = mod.i[2]+1;
716 jVec = vec_xor(jVec, jVec);
718 vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
719 for(j=0; j + 3 < flurry->numStreams;j+=4)
721 vector float dxa, dya, dza;
722 vector float dxb, dyb, dzb;
723 vector float dxc, dyc, dzc;
724 vector float dxd, dyd, dzd;
725 vector float ip0a, ip1a;
726 vector float ip0b, ip1b;
727 vector float ip0c, ip1c;
728 vector float ip0d, ip1d;
729 vector float rsquaredA;
730 vector float rsquaredB;
731 vector float rsquaredC;
732 vector float rsquaredD;
733 vector float fA, fB, fC, fD;
734 vector float biasTempA;
735 vector float biasTempB;
736 vector float biasTempC;
737 vector float biasTempD;
743 vector float one_over_rsquaredA;
744 vector float one_over_rsquaredB;
745 vector float one_over_rsquaredC;
746 vector float one_over_rsquaredD;
747 vector bool int biasOrA,biasOrB,biasOrC,biasOrD;
750 ip0a = vec_ld(0, flurry->spark[j]->position);
751 ip0b = vec_ld(0, flurry->spark[j+1]->position);
752 ip0c = vec_ld(0, flurry->spark[j+2]->position);
753 ip0d = vec_ld(0, flurry->spark[j+3]->position);
754 ip1a = vec_ld( 12, flurry->spark[j]->position );
755 ip1b = vec_ld( 12, flurry->spark[j+1]->position );
756 ip1c = vec_ld( 12, flurry->spark[j+2]->position );
757 ip1d = vec_ld( 12, flurry->spark[j+3]->position );
760 ip0a = vec_perm(ip0a, ip1a, vec_lvsl(0, flurry->spark[j]->position));
761 ip0b = vec_perm(ip0b, ip1b, vec_lvsl(0, flurry->spark[j+1]->position));
762 ip0c = vec_perm(ip0c, ip1c, vec_lvsl(0, flurry->spark[j+2]->position));
763 ip0d = vec_perm(ip0d, ip1d, vec_lvsl(0, flurry->spark[j+3]->position));
765 dxa = vec_splat( ip0a, 0 );
766 dxb = vec_splat( ip0b, 0 );
767 dxc = vec_splat( ip0c, 0 );
768 dxd = vec_splat( ip0d, 0 );
769 dxa = vec_sub( s->p[i].position[0].v, dxa );
770 dxb = vec_sub( s->p[i].position[0].v, dxb );
771 dxc = vec_sub( s->p[i].position[0].v, dxc );
772 dxd = vec_sub( s->p[i].position[0].v, dxd );
774 dya = vec_splat( ip0a, 1 );
775 dyb = vec_splat( ip0b, 1 );
776 dyc = vec_splat( ip0c, 1 );
777 dyd = vec_splat( ip0d, 1 );
778 dya = vec_sub( s->p[i].position[1].v, dya );
779 dyb = vec_sub( s->p[i].position[1].v, dyb );
780 dyc = vec_sub( s->p[i].position[1].v, dyc );
781 dyd = vec_sub( s->p[i].position[1].v, dyd );
783 dza = vec_splat( ip0a, 2 );
784 dzb = vec_splat( ip0b, 2 );
785 dzc = vec_splat( ip0c, 2 );
786 dzd = vec_splat( ip0d, 2 );
787 dza = vec_sub( s->p[i].position[2].v, dza );
788 dzb = vec_sub( s->p[i].position[2].v, dzb );
789 dzc = vec_sub( s->p[i].position[2].v, dzc );
790 dzd = vec_sub( s->p[i].position[2].v, dzd );
792 rsquaredA = vec_madd( dxa, dxa, zero );
793 rsquaredB = vec_madd( dxb, dxb, zero );
794 rsquaredC = vec_madd( dxc, dxc, zero );
795 rsquaredD = vec_madd( dxd, dxd, zero );
797 rsquaredA = vec_madd( dya, dya, rsquaredA );
798 rsquaredB = vec_madd( dyb, dyb, rsquaredB );
799 rsquaredC = vec_madd( dyc, dyc, rsquaredC );
800 rsquaredD = vec_madd( dyd, dyd, rsquaredD );
802 rsquaredA = vec_madd( dza, dza, rsquaredA );
803 rsquaredB = vec_madd( dzb, dzb, rsquaredB );
804 rsquaredC = vec_madd( dzc, dzc, rsquaredC );
805 rsquaredD = vec_madd( dzd, dzd, rsquaredD );
807 biasOrA = vec_cmpeq( jVec, mod.v );
808 jVec = vec_add(jVec, intOne);
809 biasOrB = vec_cmpeq( jVec, mod.v );
810 jVec = vec_add(jVec, intOne);
811 biasOrC = vec_cmpeq( jVec, mod.v );
812 jVec = vec_add(jVec, intOne);
813 biasOrD = vec_cmpeq( jVec, mod.v );
814 jVec = vec_add(jVec, intOne);
816 biasTempA = vec_add( vec_and( biasOrA, biasConst), floatOne);
817 biasTempB = vec_add( vec_and( biasOrB, biasConst), floatOne);
818 biasTempC = vec_add( vec_and( biasOrC, biasConst), floatOne);
819 biasTempD = vec_add( vec_and( biasOrD, biasConst), floatOne);
821 fA = vec_madd( biasTempA, frameRateModifier.v, zero);
822 fB = vec_madd( biasTempB, frameRateModifier.v, zero);
823 fC = vec_madd( biasTempC, frameRateModifier.v, zero);
824 fD = vec_madd( biasTempD, frameRateModifier.v, zero);
825 one_over_rsquaredA = vec_re( rsquaredA );
826 one_over_rsquaredB = vec_re( rsquaredB );
827 one_over_rsquaredC = vec_re( rsquaredC );
828 one_over_rsquaredD = vec_re( rsquaredD );
829 fA = vec_madd( fA, one_over_rsquaredA, zero);
830 fB = vec_madd( fB, one_over_rsquaredB, zero);
831 fC = vec_madd( fC, one_over_rsquaredC, zero);
832 fD = vec_madd( fD, one_over_rsquaredD, zero);
833 magA = vec_rsqrte( rsquaredA );
834 magB = vec_rsqrte( rsquaredB );
835 magC = vec_rsqrte( rsquaredC );
836 magD = vec_rsqrte( rsquaredD );
837 magA = vec_madd( magA, fA, zero );
838 magB = vec_madd( magB, fB, zero );
839 magC = vec_madd( magC, fC, zero );
840 magD = vec_madd( magD, fD, zero );
841 deltax = vec_nmsub( dxa, magA, deltax );
842 deltay = vec_nmsub( dya, magA, deltay );
843 deltaz = vec_nmsub( dza, magA, deltaz );
845 deltax = vec_nmsub( dxb, magB, deltax );
846 deltay = vec_nmsub( dyb, magB, deltay );
847 deltaz = vec_nmsub( dzb, magB, deltaz );
849 deltax = vec_nmsub( dxc, magC, deltax );
850 deltay = vec_nmsub( dyc, magC, deltay );
851 deltaz = vec_nmsub( dzc, magC, deltaz );
853 deltax = vec_nmsub( dxd, magD, deltax );
854 deltay = vec_nmsub( dyd, magD, deltay );
855 deltaz = vec_nmsub( dzd, magD, deltaz );
859 for(;j<flurry->numStreams;j++) {
860 vector float ip0, ip1 = (vector float)(0.0), ip2;
861 vector float dx, dy, dz;
862 vector float rsquared, f;
863 vector float one_over_rsquared;
864 vector float biasTemp;
866 vector bool int biasOr;
868 ip0 = vec_ld(0, flurry->spark[j]->position);
869 if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
870 ip1 = vec_ld(16, flurry->spark[j]->position);
873 ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
874 ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
875 ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
876 ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
878 dx = vec_sub(s->p[i].position[0].v, ip0);
879 dy = vec_sub(s->p[i].position[1].v, ip1);
880 dz = vec_sub(s->p[i].position[2].v, ip2);
882 rsquared = vec_madd(dx, dx, zero);
883 rsquared = vec_madd(dy, dy, rsquared);
884 rsquared = vec_madd(dz, dz, rsquared);
886 biasOr = vec_cmpeq(jVec, mod.v);
887 biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
889 f = vec_madd(biasTemp, frameRateModifier.v, zero);
890 one_over_rsquared = vec_re(rsquared);
891 f = vec_madd(f, one_over_rsquared, zero);
893 mag = vec_rsqrte(rsquared);
894 mag = vec_madd(mag, f, zero);
896 deltax = vec_nmsub(dx, mag, deltax);
897 deltay = vec_nmsub(dy, mag, deltay);
898 deltaz = vec_nmsub(dz, mag, deltaz);
900 jVec = vec_add(jVec, (vector unsigned int)(1));
903 /* slow this particle down by flurry->drag */
904 deltax = vec_madd(deltax, dragV.v, zero);
905 deltay = vec_madd(deltay, dragV.v, zero);
906 deltaz = vec_madd(deltaz, dragV.v, zero);
908 distTemp = vec_madd(deltax, deltax, zero);
909 distTemp = vec_madd(deltay, deltay, distTemp);
910 distTemp = vec_madd(deltaz, deltaz, distTemp);
912 deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
913 deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
914 s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
915 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
919 /* update the position */
920 s->p[i].delta[0].v = deltax;
921 s->p[i].delta[1].v = deltay;
922 s->p[i].delta[2].v = deltaz;
924 s->p[i].oldposition[j].v = s->p[i].position[j].v;
925 s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
933 void DrawSmoke_Scalar(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
943 float screenRatio = global->sys_glWidth / 1024.0f;
944 float hslash2 = global->sys_glHeight * 0.5f;
945 float wslash2 = global->sys_glWidth * 0.5f;
948 width = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
950 for (i=0;i<NUMSMOKEPARTICLES/4;i++)
952 for (k=0; k<4; k++) {
956 if (s->p[i].dead.i[k]) {
959 thisWidth = (streamSize + (flurry->fTime - s->p[i].time.f[k])*flurry->streamExpansion) * screenRatio;
960 if (thisWidth >= width)
962 s->p[i].dead.i[k] = 1;
965 z = s->p[i].position[2].f[k];
966 sx = s->p[i].position[0].f[k] * global->sys_glWidth / z + wslash2;
967 sy = s->p[i].position[1].f[k] * global->sys_glWidth / z + hslash2;
968 oldz = s->p[i].oldposition[2].f[k];
969 if (sx > global->sys_glWidth+50.0f || sx < -50.0f || sy > global->sys_glHeight+50.0f || sy < -50.0f || z < 25.0f || oldz < 25.0f)
974 w = MAX_(1.0f,thisWidth/z);
976 float oldx = s->p[i].oldposition[0].f[k];
977 float oldy = s->p[i].oldposition[1].f[k];
978 float oldscreenx = (oldx * global->sys_glWidth / oldz) + wslash2;
979 float oldscreeny = (oldy * global->sys_glWidth / oldz) + hslash2;
980 float dx = (sx-oldscreenx);
981 float dy = (sy-oldscreeny);
983 float d = FastDistance2D(dx, dy);
994 ow = MAX_(1.0f,thisWidth/oldz);
1007 float m = 1.0f + sm;
1016 s->p[i].animFrame.i[k]++;
1017 if (s->p[i].animFrame.i[k] >= 64)
1019 s->p[i].animFrame.i[k] = 0;
1022 u0 = (s->p[i].animFrame.i[k]&&7) * 0.125f;
1023 v0 = (s->p[i].animFrame.i[k]>>3) * 0.125f;
1028 cm = (1.375f - thisWidth/width);
1029 if (s->p[i].dead.i[k] == 3)
1032 s->p[i].dead.i[k] = 1;
1036 cmv.f[0] = s->p[i].color[0].f[k]*cm;
1037 cmv.f[1] = s->p[i].color[1].f[k]*cm;
1038 cmv.f[2] = s->p[i].color[2].f[k]*cm;
1039 cmv.f[3] = s->p[i].color[3].f[k]*cm;
1042 /* MDT we can't use vectors in the Scalar routine */
1043 s->seraphimColors[sci++].v = cmv.v;
1044 s->seraphimColors[sci++].v = cmv.v;
1045 s->seraphimColors[sci++].v = cmv.v;
1046 s->seraphimColors[sci++].v = cmv.v;
1050 for (jj = 0; jj < 4; jj++) {
1051 for (ii = 0; ii < 4; ii++) {
1052 s->seraphimColors[sci].f[ii] = cmv.f[ii];
1059 s->seraphimTextures[sti++] = u0;
1060 s->seraphimTextures[sti++] = v0;
1061 s->seraphimTextures[sti++] = u0;
1062 s->seraphimTextures[sti++] = v1;
1064 s->seraphimTextures[sti++] = u1;
1065 s->seraphimTextures[sti++] = v1;
1066 s->seraphimTextures[sti++] = u1;
1067 s->seraphimTextures[sti++] = v0;
1069 s->seraphimVertices[svi].f[0] = sx+dxm-dys;
1070 s->seraphimVertices[svi].f[1] = sy+dym+dxs;
1071 s->seraphimVertices[svi].f[2] = sx+dxm+dys;
1072 s->seraphimVertices[svi].f[3] = sy+dym-dxs;
1075 s->seraphimVertices[svi].f[0] = oldscreenx-dxm+dyos;
1076 s->seraphimVertices[svi].f[1] = oldscreeny-dym-dxos;
1077 s->seraphimVertices[svi].f[2] = oldscreenx-dxm-dyos;
1078 s->seraphimVertices[svi].f[3] = oldscreeny-dym+dxos;
1084 glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1085 glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1086 glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1087 glDrawArrays(GL_QUADS,0,si*4);
1093 void DrawSmoke_Vector(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
1095 const vector float zero = (vector float)(0.0);
1100 floatToVector width;
1102 floatToVector u0,v0,u1,v1;
1103 vector float one_over_z;
1106 float screenRatio = global->sys_glWidth / 1024.0f;
1107 float hslash2 = global->sys_glHeight * 0.5f;
1108 float wslash2 = global->sys_glWidth * 0.5f;
1110 floatToVector briteV, fTimeV, expansionV, screenRatioV, hslash2V, wslash2V, streamSizeV;
1111 floatToVector glWidthV;
1113 vector float cmv[4];
1114 vector float svec[4], ovec[4];
1115 vector float oldscreenx, oldscreeny;
1117 vector float frameAnd7;
1118 vector float frameShift3;
1119 vector float one_over_width;
1120 vector float dx, dy;
1122 vector unsigned int vSi = vec_splat_u32(0);
1123 const vector float eighth = (vector float)(0.125);
1124 float glWidth50 = global->sys_glWidth + 50.0f;
1125 float glHeight50 = global->sys_glHeight + 50.0f;
1126 vector float vGLWidth50, vGLHeight50;
1127 unsigned int blitBool;
1129 vec_dst((int *)(&(s->p[0])), 0x00020200, 2);
1132 vector unsigned char permute1 = vec_lvsl( 0, &glWidth50 );
1133 vector unsigned char permute2 = vec_lvsl( 0, &glHeight50 );
1134 permute1 = (vector unsigned char) vec_splat( (vector unsigned int) permute1, 0 );
1135 permute2 = (vector unsigned char) vec_splat( (vector unsigned int) permute2, 0 );
1136 vGLWidth50 = vec_lde( 0, &glWidth50 );
1137 vGLHeight50 = vec_lde( 0, &glHeight50 );
1138 vGLWidth50 = vec_perm( vGLWidth50, vGLWidth50, permute1 );
1139 vGLHeight50 = vec_perm( vGLHeight50, vGLHeight50, permute2 );
1142 width.f[0] = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
1143 width.v = (vector float) vec_splat((vector unsigned int)width.v, 0);
1145 briteV.f[0] = brightness;
1146 briteV.v = (vector float) vec_splat((vector unsigned int)briteV.v, 0);
1148 fTimeV.f[0] = (float) flurry->fTime;
1149 fTimeV.v = (vector float) vec_splat((vector unsigned int)fTimeV.v, 0);
1151 expansionV.f[0] = flurry->streamExpansion;
1152 expansionV.v = (vector float) vec_splat((vector unsigned int)expansionV.v, 0);
1154 screenRatioV.f[0] = screenRatio;
1155 screenRatioV.v = (vector float) vec_splat((vector unsigned int)screenRatioV.v, 0);
1157 hslash2V.f[0] = hslash2;
1158 hslash2V.v = (vector float) vec_splat((vector unsigned int)hslash2V.v, 0);
1160 wslash2V.f[0] = wslash2;
1161 wslash2V.v = (vector float) vec_splat((vector unsigned int)wslash2V.v, 0);
1163 streamSizeV.f[0] = streamSize;
1164 streamSizeV.v = (vector float) vec_splat((vector unsigned int)streamSizeV.v, 0);
1166 glWidthV.f[0] = global->sys_glWidth;
1167 glWidthV.v = (vector float) vec_splat((vector unsigned int)glWidthV.v, 0);
1169 for (i=0;i<NUMSMOKEPARTICLES/4;i++) {
1170 vector float thisWidth;
1172 vector float oldx, oldy, one_over_oldz;
1173 vector float xabs, yabs, mn;
1175 vector float one_over_d;
1176 vector bool int dnz;
1179 vec_dst((int *)(&(s->p[i+4])), 0x00020200, 2);
1181 if (vec_all_eq(s->p[i].dead.v, (vector unsigned int)(1))) continue;
1183 blitBool = 0; /* keep track of particles that actually need to be drawn */
1185 thisWidth = vec_sub(fTimeV.v, s->p[i].time.v);
1186 thisWidth = vec_madd(thisWidth, expansionV.v, streamSizeV.v);
1187 thisWidth = vec_madd(thisWidth, screenRatioV.v, zero);
1189 z.v = s->p[i].position[2].v;
1190 one_over_z = vec_re(z.v);
1192 sx = vec_madd(s->p[i].position[0].v, glWidthV.v, zero);
1193 sx = vec_madd(sx, one_over_z, wslash2V.v);
1194 sy = vec_madd(s->p[i].position[1].v, glWidthV.v, zero);
1195 sy = vec_madd(sy, one_over_z, hslash2V.v);
1197 oldz = s->p[i].oldposition[2].v;
1199 w = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_z, zero));
1201 oldx = s->p[i].oldposition[0].v;
1202 oldy = s->p[i].oldposition[1].v;
1203 one_over_oldz = vec_re(oldz);
1204 oldscreenx = vec_madd(oldx, glWidthV.v, zero);
1205 oldscreenx = vec_madd(oldscreenx, one_over_oldz, wslash2V.v);
1206 oldscreeny = vec_madd(oldy, glWidthV.v, zero);
1207 oldscreeny = vec_madd(oldscreeny, one_over_oldz, hslash2V.v);
1208 dx = vec_sub(sx,oldscreenx);
1209 dy = vec_sub(sy,oldscreeny);
1213 mn = vec_min(xabs,yabs);
1214 d = vec_add(xabs,yabs);
1215 d = vec_madd(mn, (vector float)(-0.6875), d);
1217 ow = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_oldz, zero));
1218 one_over_d = vec_re(d);
1219 dnz = vec_cmpgt(d, zero);
1220 sm = vec_madd(w, one_over_d, zero);
1221 sm = vec_and(sm, dnz);
1222 os = vec_madd(ow, one_over_d, zero);
1223 os = vec_and(os, dnz);
1226 intToVector tempMask;
1227 vector bool int mask = vec_cmpeq( s->p[i].dead.v, vec_splat_u32(1) ); /* -1 where true */
1228 vector bool int gtMask = vec_cmpge( thisWidth, width.v ); /* -1 where true */
1229 vector bool int glWidth50Test = vec_cmpgt( sx, (vector float)(vGLWidth50) ); /* -1 where true */
1230 vector bool int glHeight50Test = vec_cmpgt( sy, (vector float)(vGLHeight50) ); /* -1 where true */
1231 vector bool int test50x = vec_cmplt( sx, (vector float) (-50.0) );
1232 vector bool int test50y = vec_cmplt( sy, (vector float) (-50.0) );
1233 vector bool int testz = vec_cmplt( z.v, (vector float) (25.0) );
1234 vector bool int testoldz = vec_cmplt( oldz, (vector float) (25.0) );
1235 mask = vec_or( mask, gtMask );
1236 s->p[i].dead.v = vec_and( mask, vec_splat_u32( 1 ) );
1237 mask = vec_or( mask, glWidth50Test );
1238 mask = vec_or( mask, glHeight50Test );
1239 mask = vec_or( mask, test50x );
1240 mask = vec_or( mask, test50y );
1241 mask = vec_or( mask, testz );
1242 mask = vec_or( mask, testoldz );
1243 tempMask.v = (vector unsigned int)mask;
1245 s->p[i].animFrame.v = vec_sub( s->p[i].animFrame.v, vec_nor( mask, mask ) );
1246 s->p[i].animFrame.v = vec_and( s->p[i].animFrame.v, (vector unsigned int)(63) );
1248 frameAnd7 = vec_ctf(vec_and(s->p[i].animFrame.v, (vector unsigned int)(7)),0);
1249 u0.v = vec_madd(frameAnd7, eighth, zero);
1251 frameShift3 = vec_ctf(vec_sr(s->p[i].animFrame.v, (vector unsigned int)(3)),0);
1252 v0.v = vec_madd(frameAnd7, eighth, zero);
1254 u1.v = vec_add(u0.v, eighth);
1255 v1.v = vec_add(v0.v, eighth);
1257 one_over_width = vec_re(width.v);
1258 cm.v = vec_sel( vec_nmsub(thisWidth, one_over_width, (vector float)(1.375)), cm.v, mask );
1259 cm.v = vec_madd(cm.v, briteV.v, zero);
1261 vSi = vec_sub( vSi, vec_nor( mask, mask ) );
1263 vector unsigned int blitMask = (vector unsigned int) (1, 2, 4, 8);
1264 vector unsigned int temp = (vector unsigned int)mask;
1265 temp = vec_andc( blitMask, temp );
1266 temp = vec_add( temp, vec_sld( temp, temp, 8 ) );
1267 temp = vec_add( temp, vec_sld( temp, temp, 4 ) );
1268 vec_ste( temp, 0, &blitBool );
1273 vector float temp1, temp2, temp3, temp4;
1274 vector float result1a, result1b, result2a, result2b, result3a, result3b, result4a, result4b;
1276 temp1 = vec_mergeh( u0.v, u0.v );
1277 temp2 = vec_mergel( u0.v, u0.v );
1278 temp3 = vec_mergeh( v0.v, v1.v );
1279 temp4 = vec_mergel( v0.v, v1.v );
1281 result1a = vec_mergeh( temp1, temp3 );
1282 result1b = vec_mergel( temp1, temp3 );
1283 result2a = vec_mergeh( temp2, temp4 );
1284 result2b = vec_mergel( temp2, temp4 );
1286 temp1 = vec_mergeh( u1.v, u1.v );
1287 temp2 = vec_mergel( u1.v, u1.v );
1288 temp3 = vec_mergeh( v1.v, v0.v );
1289 temp4 = vec_mergel( v1.v, v0.v );
1291 result3a = vec_mergeh( temp1, temp3 );
1292 result3b = vec_mergel( temp1, temp3 );
1293 result4a = vec_mergeh( temp2, temp4 );
1294 result4b = vec_mergel( temp2, temp4 );
1298 vec_st( result1a, 0, &s->seraphimTextures[sti] );
1299 vec_st( result3a, 16, &s->seraphimTextures[sti]);
1304 vec_st( result1b, 0, &s->seraphimTextures[sti]);
1305 vec_st( result3b, 16, &s->seraphimTextures[sti]);
1310 vec_st( result2a, 0, &s->seraphimTextures[sti]);
1311 vec_st( result4a, 16, &s->seraphimTextures[sti]);
1316 vec_st( result2b, 0, &s->seraphimTextures[sti]);
1317 vec_st( result4b, 16, &s->seraphimTextures[sti]);
1323 cmv[0] = vec_madd(s->p[i].color[0].v, cm.v, zero);
1324 cmv[1] = vec_madd(s->p[i].color[1].v, cm.v, zero);
1325 cmv[2] = vec_madd(s->p[i].color[2].v, cm.v, zero);
1326 cmv[3] = vec_madd(s->p[i].color[3].v, cm.v, zero);
1328 vector float vI0, vI1, vI2, vI3;
1330 vI0 = vec_mergeh ( cmv[0], cmv[2] );
1331 vI1 = vec_mergeh ( cmv[1], cmv[3] );
1332 vI2 = vec_mergel ( cmv[0], cmv[2] );
1333 vI3 = vec_mergel ( cmv[1], cmv[3] );
1335 cmv[0] = vec_mergeh ( vI0, vI1 );
1336 cmv[1] = vec_mergel ( vI0, vI1 );
1337 cmv[2] = vec_mergeh ( vI2, vI3 );
1338 cmv[3] = vec_mergel ( vI2, vI3 );
1341 vec_dst( cmv, 0x0D0100D0, 1 );
1344 vector float sxd, syd;
1345 vector float sxdm, sxdp, sydm, sydp;
1346 vector float oxd, oyd;
1347 vector float oxdm, oxdp, oydm, oydp;
1348 vector float vI0, vI1, vI2, vI3;
1349 vector float dxs, dys;
1350 vector float dxos, dyos;
1351 vector float dxm, dym;
1354 m = vec_add((vector float)(1.0), sm);
1356 dxs = vec_madd(dx, sm, zero);
1357 dys = vec_madd(dy, sm, zero);
1358 dxos = vec_madd(dx, os, zero);
1359 dyos = vec_madd(dy, os, zero);
1360 dxm = vec_madd(dx, m, zero);
1361 dym = vec_madd(dy, m, zero);
1363 sxd = vec_add(sx, dxm);
1364 sxdm = vec_sub(sxd, dys);
1365 sxdp = vec_add(sxd, dys);
1367 syd = vec_add(sy, dym);
1368 sydm = vec_sub(syd, dxs);
1369 sydp = vec_add(syd, dxs);
1371 oxd = vec_sub(oldscreenx, dxm);
1372 oxdm = vec_sub(oxd, dyos);
1373 oxdp = vec_add(oxd, dyos);
1375 oyd = vec_sub(oldscreeny, dym);
1376 oydm = vec_sub(oyd, dxos);
1377 oydp = vec_add(oyd, dxos);
1379 vI0 = vec_mergeh ( sxdm, sxdp );
1380 vI1 = vec_mergeh ( sydp, sydm );
1381 vI2 = vec_mergel ( sxdm, sxdp );
1382 vI3 = vec_mergel ( sydp, sydm );
1384 svec[0] = vec_mergeh ( vI0, vI1 );
1385 svec[1] = vec_mergel ( vI0, vI1 );
1386 svec[2] = vec_mergeh ( vI2, vI3 );
1387 svec[3] = vec_mergel ( vI2, vI3 );
1389 vI0 = vec_mergeh ( oxdp, oxdm );
1390 vI1 = vec_mergeh ( oydm, oydp );
1391 vI2 = vec_mergel ( oxdp, oxdm );
1392 vI3 = vec_mergel ( oydm, oydp );
1394 ovec[0] = vec_mergeh ( vI0, vI1 );
1395 ovec[1] = vec_mergel ( vI0, vI1 );
1396 ovec[2] = vec_mergeh ( vI2, vI3 );
1397 ovec[3] = vec_mergel ( vI2, vI3 );
1401 int offset0 = (sci + 0) * sizeof( vector float );
1402 int offset1 = (sci + 1) * sizeof( vector float );
1403 int offset2 = (sci + 2) * sizeof( vector float );
1404 int offset3 = (sci + 3) * sizeof( vector float );
1405 int offset4 = (svi + 0) * sizeof( vector float );
1406 int offset5 = (svi + 1) * sizeof( vector float );
1407 vector float *colors = (vector float *)s->seraphimColors;
1408 vector float *vertices = (vector float *)s->seraphimVertices;
1409 for (kk=0; kk<4; kk++) {
1410 if (blitBool>>kk & 1) {
1411 vector float vcmv = cmv[kk];
1412 vector float vsvec = svec[kk];
1413 vector float vovec = ovec[kk];
1415 vec_st( vcmv, offset0, colors );
1416 vec_st( vcmv, offset1, colors );
1417 vec_st( vcmv, offset2, colors );
1418 vec_st( vcmv, offset3, colors );
1419 vec_st( vsvec, offset4, vertices );
1420 vec_st( vovec, offset5, vertices );
1429 vSi = vec_add( vSi, vec_sld( vSi, vSi, 8 ) );
1430 vSi = vec_add( vSi, vec_sld( vSi, vSi, 4 ) );
1431 vec_ste( (vector signed int) vSi, 0, &si );
1433 glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1434 glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1435 glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1436 glDrawArrays(GL_QUADS,0,si*4);