3 Copyright (c) 2002, Calum Robinson
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 * Redistributions of source code must retain the above copyright notice, this
10 list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright notice,
13 this list of conditions and the following disclaimer in the documentation
14 and/or other materials provided with the distribution.
16 * Neither the name of the author nor the names of its contributors may be used
17 to endorse or promote products derived from this software without specific
18 prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
24 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 /* Smoke.cpp: implementation of the Smoke class. */
37 #define MAXANGLES 16384
38 #define NOT_QUITE_DEAD 3
40 #define intensity 75000.0f;
42 void InitSmoke(SmokeV *s)
46 s->nextSubParticle = 0;
47 s->lastParticleTime = 0.25f;
51 s->old[i] = RandFlt(-100.0, 100.0);
55 void UpdateSmoke_ScalarBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
58 float sx = flurry->star->position[0];
59 float sy = flurry->star->position[1];
60 float sz = flurry->star->position[2];
62 double frameRateModifier;
68 /* release 12 puffs every frame */
69 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
70 float dx,dy,dz,deltax,deltay,deltaz;
82 for(i=0;i<flurry->numStreams;i++) {
83 float streamSpeedCoherenceFactor;
85 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
86 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
87 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
88 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
89 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
90 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
91 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
92 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
93 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
94 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
95 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
96 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
97 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
98 rsquared = (dx*dx+dy*dy+dz*dz);
99 f = streamSpeed * streamSpeedCoherenceFactor;
101 mag = f / (float) sqrt(rsquared);
103 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
104 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
105 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
106 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
107 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
108 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
109 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
110 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
111 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
112 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
113 s->nextSubParticle++;
114 if (s->nextSubParticle==4) {
116 s->nextSubParticle=0;
118 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
120 s->nextSubParticle = 0;
124 s->lastParticleTime = flurry->fTime;
127 s->lastParticleTime = flurry->fTime;
132 s->old[i] = flurry->star->position[i];
135 frameRate = ((double) flurry->dframe)/(flurry->fTime);
136 frameRateModifier = 42.5f / frameRate;
138 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
148 if (s->p[i].dead.i[k]) {
152 deltax = s->p[i].delta[0].f[k];
153 deltay = s->p[i].delta[1].f[k];
154 deltaz = s->p[i].delta[2].f[k];
156 for(j=0;j<flurry->numStreams;j++) {
157 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
158 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
159 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
160 rsquared = (dx*dx+dy*dy+dz*dz);
162 f = (gravity/rsquared) * frameRateModifier;
164 if ((((i*4)+k) % flurry->numStreams) == j) {
165 f *= 1.0f + streamBias;
168 mag = f / (float) sqrt(rsquared);
170 deltax -= (dx * mag);
171 deltay -= (dy * mag);
172 deltaz -= (dz * mag);
175 /* slow this particle down by flurry->drag */
176 deltax *= flurry->drag;
177 deltay *= flurry->drag;
178 deltaz *= flurry->drag;
180 if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
181 s->p[i].dead.i[k] = 1;
185 /* update the position */
186 s->p[i].delta[0].f[k] = deltax;
187 s->p[i].delta[1].f[k] = deltay;
188 s->p[i].delta[2].f[k] = deltaz;
190 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
191 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
199 void UpdateSmoke_ScalarFrsqrte(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
202 float sx = flurry->star->position[0];
203 float sy = flurry->star->position[1];
204 float sz = flurry->star->position[2];
206 double frameRateModifier;
212 /* release 12 puffs every frame */
213 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
214 float dx,dy,dz,deltax,deltay,deltaz;
226 for(i=0;i<flurry->numStreams;i++) {
227 float streamSpeedCoherenceFactor;
229 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
230 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
231 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
232 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
233 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
234 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
235 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
236 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
237 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
238 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
239 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
240 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
241 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
242 rsquared = (dx*dx+dy*dy+dz*dz);
243 f = streamSpeed * streamSpeedCoherenceFactor;
245 /* mag = f / (float) sqrt(rsquared); */
247 /* reciprocal square-root estimate replaced above divide and call to system sqrt() */
248 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
251 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
252 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
253 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
254 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
255 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
256 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
257 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
258 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
259 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
260 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
261 s->nextSubParticle++;
262 if (s->nextSubParticle==4) {
264 s->nextSubParticle=0;
266 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
268 s->nextSubParticle = 0;
272 s->lastParticleTime = flurry->fTime;
275 s->lastParticleTime = flurry->fTime;
280 s->old[i] = flurry->star->position[i];
283 frameRate = ((double) flurry->dframe)/(flurry->fTime);
284 frameRateModifier = 42.5f / frameRate;
286 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
296 if (s->p[i].dead.i[k]) {
300 deltax = s->p[i].delta[0].f[k];
301 deltay = s->p[i].delta[1].f[k];
302 deltaz = s->p[i].delta[2].f[k];
304 for(j=0;j<flurry->numStreams;j++) {
305 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
306 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
307 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
308 rsquared = (dx*dx+dy*dy+dz*dz);
310 asm("fres %0, %1" : "=f" (f) : "f" (rsquared));
311 f *= gravity*frameRateModifier;
313 if((((i*4)+k) % flurry->numStreams) == j) {
314 f *= 1.0f + streamBias;
317 /* mag = f / (float) sqrt(rsquared); */
319 /* reciprocal square-root estimate replaced above divide and call to system sqrt() */
320 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
323 deltax -= (dx * mag);
324 deltay -= (dy * mag);
325 deltaz -= (dz * mag);
328 /* slow this particle down by flurry->drag */
329 deltax *= flurry->drag;
330 deltay *= flurry->drag;
331 deltaz *= flurry->drag;
333 if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
334 s->p[i].dead.i[k] = 1;
338 /* update the position */
339 s->p[i].delta[0].f[k] = deltax;
340 s->p[i].delta[1].f[k] = deltay;
341 s->p[i].delta[2].f[k] = deltaz;
343 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
344 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
354 void UpdateSmoke_VectorBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
357 float sx = flurry->star->position[0];
358 float sy = flurry->star->position[1];
359 float sz = flurry->star->position[2];
361 floatToVector frameRateModifier;
362 floatToVector gravityV;
364 floatToVector deltaTimeV;
365 const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
366 const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
367 const vector float biasConst = (vector float)(streamBias);
369 gravityV.f[0] = gravity;
370 gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
372 dragV.f[0] = flurry->drag;
373 dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
375 deltaTimeV.f[0] = flurry->fDeltaTime;
376 deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
381 /* release 12 puffs every frame */
382 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
383 float dx,dy,dz,deltax,deltay,deltaz;
395 for(i=0;i<flurry->numStreams;i++) {
396 float streamSpeedCoherenceFactor;
398 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
399 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
400 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
401 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
402 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
403 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
404 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
405 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
406 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
407 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
408 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
409 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
410 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
411 rsquared = (dx*dx+dy*dy+dz*dz);
412 f = streamSpeed * streamSpeedCoherenceFactor;
414 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
417 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
418 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
419 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
420 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
421 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
422 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
423 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
424 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
425 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
426 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
427 s->nextSubParticle++;
428 if (s->nextSubParticle==4) {
430 s->nextSubParticle=0;
432 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
434 s->nextSubParticle = 0;
438 s->lastParticleTime = flurry->fTime;
441 s->lastParticleTime = flurry->fTime;
446 s->old[i] = flurry->star->position[i];
449 frameRate = ((double) flurry->dframe)/(flurry->fTime);
450 frameRateModifier.f[0] = 42.5f / frameRate;
451 frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
453 frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
455 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
456 /* floatToVector f; */
457 vector float deltax, deltay, deltaz;
458 vector float distTemp;
459 vector unsigned int deadTemp;
460 /* floatToVector infopos0, infopos1, infopos2; */
462 vector unsigned int jVec;
465 vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
467 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
471 deltax = s->p[i].delta[0].v;
472 deltay = s->p[i].delta[1].v;
473 deltaz = s->p[i].delta[2].v;
475 mod.i[0] = (i<<2 + 0) % flurry->numStreams;
476 if(mod.i[0]+1 == flurry->numStreams) {
479 mod.i[1] = mod.i[0]+1;
481 if(mod.i[1]+1 == flurry->numStreams) {
484 mod.i[2] = mod.i[1]+1;
486 if(mod.i[2]+1 == flurry->numStreams) {
489 mod.i[3] = mod.i[2]+1;
492 jVec = vec_xor(jVec, jVec);
494 vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
495 for(j=0; j<flurry->numStreams;j++) {
496 vector float ip0, ip1 = (vector float)(0.0), ip2;
497 vector float dx, dy, dz;
498 vector float rsquared, f;
499 vector float one_over_rsquared;
500 vector float biasTemp;
502 vector bool int biasOr;
504 ip0 = vec_ld(0, flurry->spark[j]->position);
505 if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
506 ip1 = vec_ld(16, flurry->spark[j]->position);
509 ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
510 ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
511 ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
512 ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
514 dx = vec_sub(s->p[i].position[0].v, ip0);
515 dy = vec_sub(s->p[i].position[1].v, ip1);
516 dz = vec_sub(s->p[i].position[2].v, ip2);
518 rsquared = vec_madd(dx, dx, zero);
519 rsquared = vec_madd(dy, dy, rsquared);
520 rsquared = vec_madd(dz, dz, rsquared);
522 biasOr = vec_cmpeq(jVec, mod.v);
523 biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
525 f = vec_madd(biasTemp, frameRateModifier.v, zero);
526 one_over_rsquared = vec_re(rsquared);
527 f = vec_madd(f, one_over_rsquared, zero);
529 mag = vec_rsqrte(rsquared);
530 mag = vec_madd(mag, f, zero);
532 deltax = vec_nmsub(dx, mag, deltax);
533 deltay = vec_nmsub(dy, mag, deltay);
534 deltaz = vec_nmsub(dz, mag, deltaz);
536 jVec = vec_add(jVec, (vector unsigned int)(1));
539 /* slow this particle down by flurry->drag */
540 deltax = vec_madd(deltax, dragV.v, zero);
541 deltay = vec_madd(deltay, dragV.v, zero);
542 deltaz = vec_madd(deltaz, dragV.v, zero);
544 distTemp = vec_madd(deltax, deltax, zero);
545 distTemp = vec_madd(deltay, deltay, distTemp);
546 distTemp = vec_madd(deltaz, deltaz, distTemp);
548 deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
549 deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
550 s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
551 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
555 /* update the position */
556 s->p[i].delta[0].v = deltax;
557 s->p[i].delta[1].v = deltay;
558 s->p[i].delta[2].v = deltaz;
560 s->p[i].oldposition[j].v = s->p[i].position[j].v;
561 s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
566 void UpdateSmoke_VectorUnrolled(global_info_t *info, SmokeV *s)
569 float sx = flurry->star->position[0];
570 float sy = flurry->star->position[1];
571 float sz = flurry->star->position[2];
573 floatToVector frameRateModifier;
574 floatToVector gravityV;
576 floatToVector deltaTimeV;
577 const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
578 const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
579 const vector float biasConst = (vector float)(streamBias);
581 gravityV.f[0] = gravity;
582 gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
584 dragV.f[0] = flurry->drag;
585 dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
587 deltaTimeV.f[0] = flurry->fDeltaTime;
588 deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
593 /* release 12 puffs every frame */
594 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
595 float dx,dy,dz,deltax,deltay,deltaz;
607 for(i=0;i<flurry->numStreams;i++) {
608 float streamSpeedCoherenceFactor;
610 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
611 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
612 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
613 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
614 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
615 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
616 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
617 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
618 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
619 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
620 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
621 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
622 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
623 rsquared = (dx*dx+dy*dy+dz*dz);
624 f = streamSpeed * streamSpeedCoherenceFactor;
626 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
629 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
630 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
631 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
632 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
633 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
634 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
635 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
636 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
637 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
638 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
639 s->nextSubParticle++;
640 if (s->nextSubParticle==4) {
642 s->nextSubParticle=0;
644 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
646 s->nextSubParticle = 0;
650 s->lastParticleTime = flurry->fTime;
653 s->lastParticleTime = flurry->fTime;
658 s->old[i] = flurry->star->position[i];
661 frameRate = ((double) flurry->dframe)/(flurry->fTime);
662 frameRateModifier.f[0] = 42.5f / frameRate;
663 frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
665 frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
667 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
668 /* floatToVector f; */
669 vector float deltax, deltay, deltaz;
670 vector float distTemp;
671 vector unsigned int deadTemp;
672 /* floatToVector infopos0, infopos1, infopos2; */
674 vector unsigned int jVec;
675 vector unsigned int intOne = vec_splat_u32(1);
676 vector float floatOne = vec_ctf(intOne, 0);
679 vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
681 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
685 deltax = s->p[i].delta[0].v;
686 deltay = s->p[i].delta[1].v;
687 deltaz = s->p[i].delta[2].v;
689 mod.i[0] = (i<<2 + 0) % flurry->numStreams;
690 if(mod.i[0]+1 == flurry->numStreams) {
693 mod.i[1] = mod.i[0]+1;
695 if(mod.i[1]+1 == flurry->numStreams) {
698 mod.i[2] = mod.i[1]+1;
700 if(mod.i[2]+1 == flurry->numStreams) {
703 mod.i[3] = mod.i[2]+1;
706 jVec = vec_xor(jVec, jVec);
708 vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
709 for(j=0; j + 3 < flurry->numStreams;j+=4)
711 vector float dxa, dya, dza;
712 vector float dxb, dyb, dzb;
713 vector float dxc, dyc, dzc;
714 vector float dxd, dyd, dzd;
715 vector float ip0a, ip1a;
716 vector float ip0b, ip1b;
717 vector float ip0c, ip1c;
718 vector float ip0d, ip1d;
719 vector float rsquaredA;
720 vector float rsquaredB;
721 vector float rsquaredC;
722 vector float rsquaredD;
723 vector float fA, fB, fC, fD;
724 vector float biasTempA;
725 vector float biasTempB;
726 vector float biasTempC;
727 vector float biasTempD;
733 vector float one_over_rsquaredA;
734 vector float one_over_rsquaredB;
735 vector float one_over_rsquaredC;
736 vector float one_over_rsquaredD;
737 vector bool int biasOrA,biasOrB,biasOrC,biasOrD;
740 ip0a = vec_ld(0, flurry->spark[j]->position);
741 ip0b = vec_ld(0, flurry->spark[j+1]->position);
742 ip0c = vec_ld(0, flurry->spark[j+2]->position);
743 ip0d = vec_ld(0, flurry->spark[j+3]->position);
744 ip1a = vec_ld( 12, flurry->spark[j]->position );
745 ip1b = vec_ld( 12, flurry->spark[j+1]->position );
746 ip1c = vec_ld( 12, flurry->spark[j+2]->position );
747 ip1d = vec_ld( 12, flurry->spark[j+3]->position );
750 ip0a = vec_perm(ip0a, ip1a, vec_lvsl(0, flurry->spark[j]->position));
751 ip0b = vec_perm(ip0b, ip1b, vec_lvsl(0, flurry->spark[j+1]->position));
752 ip0c = vec_perm(ip0c, ip1c, vec_lvsl(0, flurry->spark[j+2]->position));
753 ip0d = vec_perm(ip0d, ip1d, vec_lvsl(0, flurry->spark[j+3]->position));
755 dxa = vec_splat( ip0a, 0 );
756 dxb = vec_splat( ip0b, 0 );
757 dxc = vec_splat( ip0c, 0 );
758 dxd = vec_splat( ip0d, 0 );
759 dxa = vec_sub( s->p[i].position[0].v, dxa );
760 dxb = vec_sub( s->p[i].position[0].v, dxb );
761 dxc = vec_sub( s->p[i].position[0].v, dxc );
762 dxd = vec_sub( s->p[i].position[0].v, dxd );
764 dya = vec_splat( ip0a, 1 );
765 dyb = vec_splat( ip0b, 1 );
766 dyc = vec_splat( ip0c, 1 );
767 dyd = vec_splat( ip0d, 1 );
768 dya = vec_sub( s->p[i].position[1].v, dya );
769 dyb = vec_sub( s->p[i].position[1].v, dyb );
770 dyc = vec_sub( s->p[i].position[1].v, dyc );
771 dyd = vec_sub( s->p[i].position[1].v, dyd );
773 dza = vec_splat( ip0a, 2 );
774 dzb = vec_splat( ip0b, 2 );
775 dzc = vec_splat( ip0c, 2 );
776 dzd = vec_splat( ip0d, 2 );
777 dza = vec_sub( s->p[i].position[2].v, dza );
778 dzb = vec_sub( s->p[i].position[2].v, dzb );
779 dzc = vec_sub( s->p[i].position[2].v, dzc );
780 dzd = vec_sub( s->p[i].position[2].v, dzd );
782 rsquaredA = vec_madd( dxa, dxa, zero );
783 rsquaredB = vec_madd( dxb, dxb, zero );
784 rsquaredC = vec_madd( dxc, dxc, zero );
785 rsquaredD = vec_madd( dxd, dxd, zero );
787 rsquaredA = vec_madd( dya, dya, rsquaredA );
788 rsquaredB = vec_madd( dyb, dyb, rsquaredB );
789 rsquaredC = vec_madd( dyc, dyc, rsquaredC );
790 rsquaredD = vec_madd( dyd, dyd, rsquaredD );
792 rsquaredA = vec_madd( dza, dza, rsquaredA );
793 rsquaredB = vec_madd( dzb, dzb, rsquaredB );
794 rsquaredC = vec_madd( dzc, dzc, rsquaredC );
795 rsquaredD = vec_madd( dzd, dzd, rsquaredD );
797 biasOrA = vec_cmpeq( jVec, mod.v );
798 jVec = vec_add(jVec, intOne);
799 biasOrB = vec_cmpeq( jVec, mod.v );
800 jVec = vec_add(jVec, intOne);
801 biasOrC = vec_cmpeq( jVec, mod.v );
802 jVec = vec_add(jVec, intOne);
803 biasOrD = vec_cmpeq( jVec, mod.v );
804 jVec = vec_add(jVec, intOne);
806 biasTempA = vec_add( vec_and( biasOrA, biasConst), floatOne);
807 biasTempB = vec_add( vec_and( biasOrB, biasConst), floatOne);
808 biasTempC = vec_add( vec_and( biasOrC, biasConst), floatOne);
809 biasTempD = vec_add( vec_and( biasOrD, biasConst), floatOne);
811 fA = vec_madd( biasTempA, frameRateModifier.v, zero);
812 fB = vec_madd( biasTempB, frameRateModifier.v, zero);
813 fC = vec_madd( biasTempC, frameRateModifier.v, zero);
814 fD = vec_madd( biasTempD, frameRateModifier.v, zero);
815 one_over_rsquaredA = vec_re( rsquaredA );
816 one_over_rsquaredB = vec_re( rsquaredB );
817 one_over_rsquaredC = vec_re( rsquaredC );
818 one_over_rsquaredD = vec_re( rsquaredD );
819 fA = vec_madd( fA, one_over_rsquaredA, zero);
820 fB = vec_madd( fB, one_over_rsquaredB, zero);
821 fC = vec_madd( fC, one_over_rsquaredC, zero);
822 fD = vec_madd( fD, one_over_rsquaredD, zero);
823 magA = vec_rsqrte( rsquaredA );
824 magB = vec_rsqrte( rsquaredB );
825 magC = vec_rsqrte( rsquaredC );
826 magD = vec_rsqrte( rsquaredD );
827 magA = vec_madd( magA, fA, zero );
828 magB = vec_madd( magB, fB, zero );
829 magC = vec_madd( magC, fC, zero );
830 magD = vec_madd( magD, fD, zero );
831 deltax = vec_nmsub( dxa, magA, deltax );
832 deltay = vec_nmsub( dya, magA, deltay );
833 deltaz = vec_nmsub( dza, magA, deltaz );
835 deltax = vec_nmsub( dxb, magB, deltax );
836 deltay = vec_nmsub( dyb, magB, deltay );
837 deltaz = vec_nmsub( dzb, magB, deltaz );
839 deltax = vec_nmsub( dxc, magC, deltax );
840 deltay = vec_nmsub( dyc, magC, deltay );
841 deltaz = vec_nmsub( dzc, magC, deltaz );
843 deltax = vec_nmsub( dxd, magD, deltax );
844 deltay = vec_nmsub( dyd, magD, deltay );
845 deltaz = vec_nmsub( dzd, magD, deltaz );
849 for(;j<flurry->numStreams;j++) {
850 vector float ip0, ip1 = (vector float)(0.0), ip2;
851 vector float dx, dy, dz;
852 vector float rsquared, f;
853 vector float one_over_rsquared;
854 vector float biasTemp;
856 vector bool int biasOr;
858 ip0 = vec_ld(0, flurry->spark[j]->position);
859 if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
860 ip1 = vec_ld(16, flurry->spark[j]->position);
863 ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
864 ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
865 ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
866 ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
868 dx = vec_sub(s->p[i].position[0].v, ip0);
869 dy = vec_sub(s->p[i].position[1].v, ip1);
870 dz = vec_sub(s->p[i].position[2].v, ip2);
872 rsquared = vec_madd(dx, dx, zero);
873 rsquared = vec_madd(dy, dy, rsquared);
874 rsquared = vec_madd(dz, dz, rsquared);
876 biasOr = vec_cmpeq(jVec, mod.v);
877 biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
879 f = vec_madd(biasTemp, frameRateModifier.v, zero);
880 one_over_rsquared = vec_re(rsquared);
881 f = vec_madd(f, one_over_rsquared, zero);
883 mag = vec_rsqrte(rsquared);
884 mag = vec_madd(mag, f, zero);
886 deltax = vec_nmsub(dx, mag, deltax);
887 deltay = vec_nmsub(dy, mag, deltay);
888 deltaz = vec_nmsub(dz, mag, deltaz);
890 jVec = vec_add(jVec, (vector unsigned int)(1));
893 /* slow this particle down by flurry->drag */
894 deltax = vec_madd(deltax, dragV.v, zero);
895 deltay = vec_madd(deltay, dragV.v, zero);
896 deltaz = vec_madd(deltaz, dragV.v, zero);
898 distTemp = vec_madd(deltax, deltax, zero);
899 distTemp = vec_madd(deltay, deltay, distTemp);
900 distTemp = vec_madd(deltaz, deltaz, distTemp);
902 deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
903 deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
904 s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
905 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
909 /* update the position */
910 s->p[i].delta[0].v = deltax;
911 s->p[i].delta[1].v = deltay;
912 s->p[i].delta[2].v = deltaz;
914 s->p[i].oldposition[j].v = s->p[i].position[j].v;
915 s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
922 void DrawSmoke_Scalar(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
932 float screenRatio = global->sys_glWidth / 1024.0f;
933 float hslash2 = global->sys_glHeight * 0.5f;
934 float wslash2 = global->sys_glWidth * 0.5f;
937 width = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
939 for (i=0;i<NUMSMOKEPARTICLES/4;i++)
941 for (k=0; k<4; k++) {
945 if (s->p[i].dead.i[k]) {
948 thisWidth = (streamSize + (flurry->fTime - s->p[i].time.f[k])*flurry->streamExpansion) * screenRatio;
949 if (thisWidth >= width)
951 s->p[i].dead.i[k] = 1;
954 z = s->p[i].position[2].f[k];
955 sx = s->p[i].position[0].f[k] * global->sys_glWidth / z + wslash2;
956 sy = s->p[i].position[1].f[k] * global->sys_glWidth / z + hslash2;
957 oldz = s->p[i].oldposition[2].f[k];
958 if (sx > global->sys_glWidth+50.0f || sx < -50.0f || sy > global->sys_glHeight+50.0f || sy < -50.0f || z < 25.0f || oldz < 25.0f)
963 w = MAX_(1.0f,thisWidth/z);
965 float oldx = s->p[i].oldposition[0].f[k];
966 float oldy = s->p[i].oldposition[1].f[k];
967 float oldscreenx = (oldx * global->sys_glWidth / oldz) + wslash2;
968 float oldscreeny = (oldy * global->sys_glWidth / oldz) + hslash2;
969 float dx = (sx-oldscreenx);
970 float dy = (sy-oldscreeny);
972 float d = FastDistance2D(dx, dy);
983 ow = MAX_(1.0f,thisWidth/oldz);
1005 s->p[i].animFrame.i[k]++;
1006 if (s->p[i].animFrame.i[k] >= 64)
1008 s->p[i].animFrame.i[k] = 0;
1011 u0 = (s->p[i].animFrame.i[k]&&7) * 0.125f;
1012 v0 = (s->p[i].animFrame.i[k]>>3) * 0.125f;
1017 cm = (1.375f - thisWidth/width);
1018 if (s->p[i].dead.i[k] == 3)
1021 s->p[i].dead.i[k] = 1;
1025 cmv.f[0] = s->p[i].color[0].f[k]*cm;
1026 cmv.f[1] = s->p[i].color[1].f[k]*cm;
1027 cmv.f[2] = s->p[i].color[2].f[k]*cm;
1028 cmv.f[3] = s->p[i].color[3].f[k]*cm;
1031 /* MDT we can't use vectors in the Scalar routine */
1032 s->seraphimColors[sci++].v = cmv.v;
1033 s->seraphimColors[sci++].v = cmv.v;
1034 s->seraphimColors[sci++].v = cmv.v;
1035 s->seraphimColors[sci++].v = cmv.v;
1039 for (jj = 0; jj < 4; jj++) {
1040 for (ii = 0; ii < 4; ii++) {
1041 s->seraphimColors[sci].f[ii] = cmv.f[ii];
1048 s->seraphimTextures[sti++] = u0;
1049 s->seraphimTextures[sti++] = v0;
1050 s->seraphimTextures[sti++] = u0;
1051 s->seraphimTextures[sti++] = v1;
1053 s->seraphimTextures[sti++] = u1;
1054 s->seraphimTextures[sti++] = v1;
1055 s->seraphimTextures[sti++] = u1;
1056 s->seraphimTextures[sti++] = v0;
1058 s->seraphimVertices[svi].f[0] = sx+dxm-dys;
1059 s->seraphimVertices[svi].f[1] = sy+dym+dxs;
1060 s->seraphimVertices[svi].f[2] = sx+dxm+dys;
1061 s->seraphimVertices[svi].f[3] = sy+dym-dxs;
1064 s->seraphimVertices[svi].f[0] = oldscreenx-dxm+dyos;
1065 s->seraphimVertices[svi].f[1] = oldscreeny-dym-dxos;
1066 s->seraphimVertices[svi].f[2] = oldscreenx-dxm-dyos;
1067 s->seraphimVertices[svi].f[3] = oldscreeny-dym+dxos;
1073 glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1074 glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1075 glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1076 glDrawArrays(GL_QUADS,0,si*4);
1081 void DrawSmoke_Vector(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
1083 const vector float zero = (vector float)(0.0);
1088 floatToVector width;
1090 floatToVector u0,v0,u1,v1;
1091 vector float one_over_z;
1094 float screenRatio = global->sys_glWidth / 1024.0f;
1095 float hslash2 = global->sys_glHeight * 0.5f;
1096 float wslash2 = global->sys_glWidth * 0.5f;
1098 floatToVector briteV, fTimeV, expansionV, screenRatioV, hslash2V, wslash2V, streamSizeV;
1099 floatToVector glWidthV;
1101 vector float cmv[4];
1102 vector float svec[4], ovec[4];
1103 vector float oldscreenx, oldscreeny;
1105 vector float frameAnd7;
1106 vector float frameShift3;
1107 vector float one_over_width;
1108 vector float dx, dy;
1110 vector unsigned int vSi = vec_splat_u32(0);
1111 const vector float eighth = (vector float)(0.125);
1112 float glWidth50 = global->sys_glWidth + 50.0f;
1113 float glHeight50 = global->sys_glHeight + 50.0f;
1114 vector float vGLWidth50, vGLHeight50;
1115 unsigned int blitBool;
1117 vec_dst((int *)(&(s->p[0])), 0x00020200, 2);
1120 vector unsigned char permute1 = vec_lvsl( 0, &glWidth50 );
1121 vector unsigned char permute2 = vec_lvsl( 0, &glHeight50 );
1122 permute1 = (vector unsigned char) vec_splat( (vector unsigned int) permute1, 0 );
1123 permute2 = (vector unsigned char) vec_splat( (vector unsigned int) permute2, 0 );
1124 vGLWidth50 = vec_lde( 0, &glWidth50 );
1125 vGLHeight50 = vec_lde( 0, &glHeight50 );
1126 vGLWidth50 = vec_perm( vGLWidth50, vGLWidth50, permute1 );
1127 vGLHeight50 = vec_perm( vGLHeight50, vGLHeight50, permute2 );
1130 width.f[0] = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
1131 width.v = (vector float) vec_splat((vector unsigned int)width.v, 0);
1133 briteV.f[0] = brightness;
1134 briteV.v = (vector float) vec_splat((vector unsigned int)briteV.v, 0);
1136 fTimeV.f[0] = (float) flurry->fTime;
1137 fTimeV.v = (vector float) vec_splat((vector unsigned int)fTimeV.v, 0);
1139 expansionV.f[0] = flurry->streamExpansion;
1140 expansionV.v = (vector float) vec_splat((vector unsigned int)expansionV.v, 0);
1142 screenRatioV.f[0] = screenRatio;
1143 screenRatioV.v = (vector float) vec_splat((vector unsigned int)screenRatioV.v, 0);
1145 hslash2V.f[0] = hslash2;
1146 hslash2V.v = (vector float) vec_splat((vector unsigned int)hslash2V.v, 0);
1148 wslash2V.f[0] = wslash2;
1149 wslash2V.v = (vector float) vec_splat((vector unsigned int)wslash2V.v, 0);
1151 streamSizeV.f[0] = streamSize;
1152 streamSizeV.v = (vector float) vec_splat((vector unsigned int)streamSizeV.v, 0);
1154 glWidthV.f[0] = global->sys_glWidth;
1155 glWidthV.v = (vector float) vec_splat((vector unsigned int)glWidthV.v, 0);
1157 for (i=0;i<NUMSMOKEPARTICLES/4;i++) {
1158 vector float thisWidth;
1160 vector float oldx, oldy, one_over_oldz;
1161 vector float xabs, yabs, mn;
1163 vector float one_over_d;
1164 vector bool int dnz;
1167 vec_dst((int *)(&(s->p[i+4])), 0x00020200, 2);
1169 if (vec_all_eq(s->p[i].dead.v, (vector unsigned int)(1))) continue;
1171 blitBool = 0; /* keep track of particles that actually need to be drawn */
1173 thisWidth = vec_sub(fTimeV.v, s->p[i].time.v);
1174 thisWidth = vec_madd(thisWidth, expansionV.v, streamSizeV.v);
1175 thisWidth = vec_madd(thisWidth, screenRatioV.v, zero);
1177 z.v = s->p[i].position[2].v;
1178 one_over_z = vec_re(z.v);
1180 sx = vec_madd(s->p[i].position[0].v, glWidthV.v, zero);
1181 sx = vec_madd(sx, one_over_z, wslash2V.v);
1182 sy = vec_madd(s->p[i].position[1].v, glWidthV.v, zero);
1183 sy = vec_madd(sy, one_over_z, hslash2V.v);
1185 oldz = s->p[i].oldposition[2].v;
1187 w = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_z, zero));
1189 oldx = s->p[i].oldposition[0].v;
1190 oldy = s->p[i].oldposition[1].v;
1191 one_over_oldz = vec_re(oldz);
1192 oldscreenx = vec_madd(oldx, glWidthV.v, zero);
1193 oldscreenx = vec_madd(oldscreenx, one_over_oldz, wslash2V.v);
1194 oldscreeny = vec_madd(oldy, glWidthV.v, zero);
1195 oldscreeny = vec_madd(oldscreeny, one_over_oldz, hslash2V.v);
1196 dx = vec_sub(sx,oldscreenx);
1197 dy = vec_sub(sy,oldscreeny);
1201 mn = vec_min(xabs,yabs);
1202 d = vec_add(xabs,yabs);
1203 d = vec_madd(mn, (vector float)(-0.6875), d);
1205 ow = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_oldz, zero));
1206 one_over_d = vec_re(d);
1207 dnz = vec_cmpgt(d, zero);
1208 sm = vec_madd(w, one_over_d, zero);
1209 sm = vec_and(sm, dnz);
1210 os = vec_madd(ow, one_over_d, zero);
1211 os = vec_and(os, dnz);
1214 intToVector tempMask;
1215 vector bool int mask = vec_cmpeq( s->p[i].dead.v, vec_splat_u32(1) ); /* -1 where true */
1216 vector bool int gtMask = vec_cmpge( thisWidth, width.v ); /* -1 where true */
1217 vector bool int glWidth50Test = vec_cmpgt( sx, (vector float)(vGLWidth50) ); /* -1 where true */
1218 vector bool int glHeight50Test = vec_cmpgt( sy, (vector float)(vGLHeight50) ); /* -1 where true */
1219 vector bool int test50x = vec_cmplt( sx, (vector float) (-50.0) );
1220 vector bool int test50y = vec_cmplt( sy, (vector float) (-50.0) );
1221 vector bool int testz = vec_cmplt( z.v, (vector float) (25.0) );
1222 vector bool int testoldz = vec_cmplt( oldz, (vector float) (25.0) );
1223 mask = vec_or( mask, gtMask );
1224 s->p[i].dead.v = vec_and( mask, vec_splat_u32( 1 ) );
1225 mask = vec_or( mask, glWidth50Test );
1226 mask = vec_or( mask, glHeight50Test );
1227 mask = vec_or( mask, test50x );
1228 mask = vec_or( mask, test50y );
1229 mask = vec_or( mask, testz );
1230 mask = vec_or( mask, testoldz );
1231 tempMask.v = (vector unsigned int)mask;
1233 s->p[i].animFrame.v = vec_sub( s->p[i].animFrame.v, vec_nor( mask, mask ) );
1234 s->p[i].animFrame.v = vec_and( s->p[i].animFrame.v, (vector unsigned int)(63) );
1236 frameAnd7 = vec_ctf(vec_and(s->p[i].animFrame.v, (vector unsigned int)(7)),0);
1237 u0.v = vec_madd(frameAnd7, eighth, zero);
1239 frameShift3 = vec_ctf(vec_sr(s->p[i].animFrame.v, (vector unsigned int)(3)),0);
1240 v0.v = vec_madd(frameAnd7, eighth, zero);
1242 u1.v = vec_add(u0.v, eighth);
1243 v1.v = vec_add(v0.v, eighth);
1245 one_over_width = vec_re(width.v);
1246 cm.v = vec_sel( vec_nmsub(thisWidth, one_over_width, (vector float)(1.375)), cm.v, mask );
1247 cm.v = vec_madd(cm.v, briteV.v, zero);
1249 vSi = vec_sub( vSi, vec_nor( mask, mask ) );
1251 vector unsigned int blitMask = (vector unsigned int) (1, 2, 4, 8);
1252 vector unsigned int temp = (vector unsigned int)mask;
1253 temp = vec_andc( blitMask, temp );
1254 temp = vec_add( temp, vec_sld( temp, temp, 8 ) );
1255 temp = vec_add( temp, vec_sld( temp, temp, 4 ) );
1256 vec_ste( temp, 0, &blitBool );
1261 vector float temp1, temp2, temp3, temp4;
1262 vector float result1a, result1b, result2a, result2b, result3a, result3b, result4a, result4b;
1264 temp1 = vec_mergeh( u0.v, u0.v );
1265 temp2 = vec_mergel( u0.v, u0.v );
1266 temp3 = vec_mergeh( v0.v, v1.v );
1267 temp4 = vec_mergel( v0.v, v1.v );
1269 result1a = vec_mergeh( temp1, temp3 );
1270 result1b = vec_mergel( temp1, temp3 );
1271 result2a = vec_mergeh( temp2, temp4 );
1272 result2b = vec_mergel( temp2, temp4 );
1274 temp1 = vec_mergeh( u1.v, u1.v );
1275 temp2 = vec_mergel( u1.v, u1.v );
1276 temp3 = vec_mergeh( v1.v, v0.v );
1277 temp4 = vec_mergel( v1.v, v0.v );
1279 result3a = vec_mergeh( temp1, temp3 );
1280 result3b = vec_mergel( temp1, temp3 );
1281 result4a = vec_mergeh( temp2, temp4 );
1282 result4b = vec_mergel( temp2, temp4 );
1286 vec_st( result1a, 0, &s->seraphimTextures[sti] );
1287 vec_st( result3a, 16, &s->seraphimTextures[sti]);
1292 vec_st( result1b, 0, &s->seraphimTextures[sti]);
1293 vec_st( result3b, 16, &s->seraphimTextures[sti]);
1298 vec_st( result2a, 0, &s->seraphimTextures[sti]);
1299 vec_st( result4a, 16, &s->seraphimTextures[sti]);
1304 vec_st( result2b, 0, &s->seraphimTextures[sti]);
1305 vec_st( result4b, 16, &s->seraphimTextures[sti]);
1311 cmv[0] = vec_madd(s->p[i].color[0].v, cm.v, zero);
1312 cmv[1] = vec_madd(s->p[i].color[1].v, cm.v, zero);
1313 cmv[2] = vec_madd(s->p[i].color[2].v, cm.v, zero);
1314 cmv[3] = vec_madd(s->p[i].color[3].v, cm.v, zero);
1316 vector float vI0, vI1, vI2, vI3;
1318 vI0 = vec_mergeh ( cmv[0], cmv[2] );
1319 vI1 = vec_mergeh ( cmv[1], cmv[3] );
1320 vI2 = vec_mergel ( cmv[0], cmv[2] );
1321 vI3 = vec_mergel ( cmv[1], cmv[3] );
1323 cmv[0] = vec_mergeh ( vI0, vI1 );
1324 cmv[1] = vec_mergel ( vI0, vI1 );
1325 cmv[2] = vec_mergeh ( vI2, vI3 );
1326 cmv[3] = vec_mergel ( vI2, vI3 );
1329 vec_dst( cmv, 0x0D0100D0, 1 );
1332 vector float sxd, syd;
1333 vector float sxdm, sxdp, sydm, sydp;
1334 vector float oxd, oyd;
1335 vector float oxdm, oxdp, oydm, oydp;
1336 vector float vI0, vI1, vI2, vI3;
1337 vector float dxs, dys;
1338 vector float dxos, dyos;
1339 vector float dxm, dym;
1342 m = vec_add((vector float)(1.0), sm);
1344 dxs = vec_madd(dx, sm, zero);
1345 dys = vec_madd(dy, sm, zero);
1346 dxos = vec_madd(dx, os, zero);
1347 dyos = vec_madd(dy, os, zero);
1348 dxm = vec_madd(dx, m, zero);
1349 dym = vec_madd(dy, m, zero);
1351 sxd = vec_add(sx, dxm);
1352 sxdm = vec_sub(sxd, dys);
1353 sxdp = vec_add(sxd, dys);
1355 syd = vec_add(sy, dym);
1356 sydm = vec_sub(syd, dxs);
1357 sydp = vec_add(syd, dxs);
1359 oxd = vec_sub(oldscreenx, dxm);
1360 oxdm = vec_sub(oxd, dyos);
1361 oxdp = vec_add(oxd, dyos);
1363 oyd = vec_sub(oldscreeny, dym);
1364 oydm = vec_sub(oyd, dxos);
1365 oydp = vec_add(oyd, dxos);
1367 vI0 = vec_mergeh ( sxdm, sxdp );
1368 vI1 = vec_mergeh ( sydp, sydm );
1369 vI2 = vec_mergel ( sxdm, sxdp );
1370 vI3 = vec_mergel ( sydp, sydm );
1372 svec[0] = vec_mergeh ( vI0, vI1 );
1373 svec[1] = vec_mergel ( vI0, vI1 );
1374 svec[2] = vec_mergeh ( vI2, vI3 );
1375 svec[3] = vec_mergel ( vI2, vI3 );
1377 vI0 = vec_mergeh ( oxdp, oxdm );
1378 vI1 = vec_mergeh ( oydm, oydp );
1379 vI2 = vec_mergel ( oxdp, oxdm );
1380 vI3 = vec_mergel ( oydm, oydp );
1382 ovec[0] = vec_mergeh ( vI0, vI1 );
1383 ovec[1] = vec_mergel ( vI0, vI1 );
1384 ovec[2] = vec_mergeh ( vI2, vI3 );
1385 ovec[3] = vec_mergel ( vI2, vI3 );
1389 int offset0 = (sci + 0) * sizeof( vector float );
1390 int offset1 = (sci + 1) * sizeof( vector float );
1391 int offset2 = (sci + 2) * sizeof( vector float );
1392 int offset3 = (sci + 3) * sizeof( vector float );
1393 int offset4 = (svi + 0) * sizeof( vector float );
1394 int offset5 = (svi + 1) * sizeof( vector float );
1395 vector float *colors = (vector float *)s->seraphimColors;
1396 vector float *vertices = (vector float *)s->seraphimVertices;
1397 for (kk=0; kk<4; kk++) {
1398 if (blitBool>>kk & 1) {
1399 vector float vcmv = cmv[kk];
1400 vector float vsvec = svec[kk];
1401 vector float vovec = ovec[kk];
1403 vec_st( vcmv, offset0, colors );
1404 vec_st( vcmv, offset1, colors );
1405 vec_st( vcmv, offset2, colors );
1406 vec_st( vcmv, offset3, colors );
1407 vec_st( vsvec, offset4, vertices );
1408 vec_st( vovec, offset5, vertices );
1417 vSi = vec_add( vSi, vec_sld( vSi, vSi, 8 ) );
1418 vSi = vec_add( vSi, vec_sld( vSi, vSi, 4 ) );
1419 vec_ste( (vector signed int) vSi, 0, &si );
1421 glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1422 glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1423 glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1424 glDrawArrays(GL_QUADS,0,si*4);