3 Copyright (c) 2002, Calum Robinson
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 * Redistributions of source code must retain the above copyright notice, this
10 list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright notice,
13 this list of conditions and the following disclaimer in the documentation
14 and/or other materials provided with the distribution.
16 * Neither the name of the author nor the names of its contributors may be used
17 to endorse or promote products derived from this software without specific
18 prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
24 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 /* Smoke.cpp: implementation of the Smoke class. */
41 #define MAXANGLES 16384
42 #define NOT_QUITE_DEAD 3
44 #define intensity 75000.0f;
46 void InitSmoke(SmokeV *s)
50 s->nextSubParticle = 0;
51 s->lastParticleTime = 0.25f;
55 s->old[i] = RandFlt(-100.0, 100.0);
59 void UpdateSmoke_ScalarBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
62 float sx = flurry->star->position[0];
63 float sy = flurry->star->position[1];
64 float sz = flurry->star->position[2];
66 double frameRateModifier;
72 /* release 12 puffs every frame */
73 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
74 float dx,dy,dz,deltax,deltay,deltaz;
86 for(i=0;i<flurry->numStreams;i++) {
87 float streamSpeedCoherenceFactor;
89 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
90 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
91 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
92 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
93 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
94 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
95 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
96 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
97 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
98 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
99 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
100 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
101 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
102 rsquared = (dx*dx+dy*dy+dz*dz);
103 f = streamSpeed * streamSpeedCoherenceFactor;
105 mag = f / (float) sqrt(rsquared);
107 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
108 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
109 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
110 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
111 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
112 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
113 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
114 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
115 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
116 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
117 s->nextSubParticle++;
118 if (s->nextSubParticle==4) {
120 s->nextSubParticle=0;
122 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
124 s->nextSubParticle = 0;
128 s->lastParticleTime = flurry->fTime;
131 s->lastParticleTime = flurry->fTime;
136 s->old[i] = flurry->star->position[i];
139 frameRate = ((double) flurry->dframe)/(flurry->fTime);
140 frameRateModifier = 42.5f / frameRate;
142 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
152 if (s->p[i].dead.i[k]) {
156 deltax = s->p[i].delta[0].f[k];
157 deltay = s->p[i].delta[1].f[k];
158 deltaz = s->p[i].delta[2].f[k];
160 for(j=0;j<flurry->numStreams;j++) {
161 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
162 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
163 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
164 rsquared = (dx*dx+dy*dy+dz*dz);
166 f = (gravity/rsquared) * frameRateModifier;
168 if ((((i*4)+k) % flurry->numStreams) == j) {
169 f *= 1.0f + streamBias;
172 mag = f / (float) sqrt(rsquared);
174 deltax -= (dx * mag);
175 deltay -= (dy * mag);
176 deltaz -= (dz * mag);
179 /* slow this particle down by flurry->drag */
180 deltax *= flurry->drag;
181 deltay *= flurry->drag;
182 deltaz *= flurry->drag;
184 if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
185 s->p[i].dead.i[k] = 1;
189 /* update the position */
190 s->p[i].delta[0].f[k] = deltax;
191 s->p[i].delta[1].f[k] = deltay;
192 s->p[i].delta[2].f[k] = deltaz;
194 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
195 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
204 void UpdateSmoke_ScalarFrsqrte(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
207 float sx = flurry->star->position[0];
208 float sy = flurry->star->position[1];
209 float sz = flurry->star->position[2];
211 double frameRateModifier;
217 /* release 12 puffs every frame */
218 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
219 float dx,dy,dz,deltax,deltay,deltaz;
231 for(i=0;i<flurry->numStreams;i++) {
232 float streamSpeedCoherenceFactor;
234 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
235 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
236 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
237 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
238 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
239 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
240 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
241 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
242 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
243 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
244 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
245 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
246 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
247 rsquared = (dx*dx+dy*dy+dz*dz);
248 f = streamSpeed * streamSpeedCoherenceFactor;
250 mag = f / (float) sqrt(rsquared);
252 reciprocal square-root estimate replaced above divide and call to system sqrt()
254 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
258 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
259 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
260 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
261 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
262 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
263 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
264 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
265 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
266 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
267 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
268 s->nextSubParticle++;
269 if (s->nextSubParticle==4) {
271 s->nextSubParticle=0;
273 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
275 s->nextSubParticle = 0;
279 s->lastParticleTime = flurry->fTime;
282 s->lastParticleTime = flurry->fTime;
287 s->old[i] = flurry->star->position[i];
290 frameRate = ((double) flurry->dframe)/(flurry->fTime);
291 frameRateModifier = 42.5f / frameRate;
293 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
303 if (s->p[i].dead.i[k]) {
307 deltax = s->p[i].delta[0].f[k];
308 deltay = s->p[i].delta[1].f[k];
309 deltaz = s->p[i].delta[2].f[k];
311 for(j=0;j<flurry->numStreams;j++) {
312 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
313 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
314 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
315 rsquared = (dx*dx+dy*dy+dz*dz);
318 asm("fres %0, %1" : "=f" (f) : "f" (rsquared));
319 f *= gravity*frameRateModifier;
321 f = ( gravity * frameRateModifier ) / rsquared;
323 if((((i*4)+k) % flurry->numStreams) == j) {
324 f *= 1.0f + streamBias;
327 mag = f / (float) sqrt(rsquared);
329 /* reciprocal square-root estimate replaced above divide and call to system sqrt() */
331 deltax -= (dx * mag);
332 deltay -= (dy * mag);
333 deltaz -= (dz * mag);
336 /* slow this particle down by flurry->drag */
337 deltax *= flurry->drag;
338 deltay *= flurry->drag;
339 deltaz *= flurry->drag;
341 if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
342 s->p[i].dead.i[k] = 1;
346 /* update the position */
347 s->p[i].delta[0].f[k] = deltax;
348 s->p[i].delta[1].f[k] = deltay;
349 s->p[i].delta[2].f[k] = deltaz;
351 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
352 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
362 void UpdateSmoke_VectorBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
365 float sx = flurry->star->position[0];
366 float sy = flurry->star->position[1];
367 float sz = flurry->star->position[2];
369 floatToVector frameRateModifier;
370 floatToVector gravityV;
372 floatToVector deltaTimeV;
373 const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
374 const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
375 const vector float biasConst = (vector float)(streamBias);
377 gravityV.f[0] = gravity;
378 gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
380 dragV.f[0] = flurry->drag;
381 dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
383 deltaTimeV.f[0] = flurry->fDeltaTime;
384 deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
389 /* release 12 puffs every frame */
390 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
391 float dx,dy,dz,deltax,deltay,deltaz;
403 for(i=0;i<flurry->numStreams;i++) {
404 float streamSpeedCoherenceFactor;
406 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
407 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
408 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
409 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
410 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
411 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
412 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
413 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
414 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
415 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
416 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
417 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
418 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
419 rsquared = (dx*dx+dy*dy+dz*dz);
420 f = streamSpeed * streamSpeedCoherenceFactor;
422 mag = f / (float) sqrt(rsquared);
424 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
428 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
429 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
430 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
431 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
432 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
433 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
434 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
435 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
436 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
437 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
438 s->nextSubParticle++;
439 if (s->nextSubParticle==4) {
441 s->nextSubParticle=0;
443 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
445 s->nextSubParticle = 0;
449 s->lastParticleTime = flurry->fTime;
452 s->lastParticleTime = flurry->fTime;
457 s->old[i] = flurry->star->position[i];
460 frameRate = ((double) flurry->dframe)/(flurry->fTime);
461 frameRateModifier.f[0] = 42.5f / frameRate;
462 frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
464 frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
466 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
467 /* floatToVector f; */
468 vector float deltax, deltay, deltaz;
469 vector float distTemp;
470 vector unsigned int deadTemp;
471 /* floatToVector infopos0, infopos1, infopos2; */
473 vector unsigned int jVec;
476 vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
478 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
482 deltax = s->p[i].delta[0].v;
483 deltay = s->p[i].delta[1].v;
484 deltaz = s->p[i].delta[2].v;
486 mod.i[0] = (i<<2 + 0) % flurry->numStreams;
487 if(mod.i[0]+1 == flurry->numStreams) {
490 mod.i[1] = mod.i[0]+1;
492 if(mod.i[1]+1 == flurry->numStreams) {
495 mod.i[2] = mod.i[1]+1;
497 if(mod.i[2]+1 == flurry->numStreams) {
500 mod.i[3] = mod.i[2]+1;
503 jVec = vec_xor(jVec, jVec);
505 vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
506 for(j=0; j<flurry->numStreams;j++) {
507 vector float ip0, ip1 = (vector float)(0.0), ip2;
508 vector float dx, dy, dz;
509 vector float rsquared, f;
510 vector float one_over_rsquared;
511 vector float biasTemp;
513 vector bool int biasOr;
515 ip0 = vec_ld(0, flurry->spark[j]->position);
516 if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
517 ip1 = vec_ld(16, flurry->spark[j]->position);
520 ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
521 ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
522 ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
523 ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
525 dx = vec_sub(s->p[i].position[0].v, ip0);
526 dy = vec_sub(s->p[i].position[1].v, ip1);
527 dz = vec_sub(s->p[i].position[2].v, ip2);
529 rsquared = vec_madd(dx, dx, zero);
530 rsquared = vec_madd(dy, dy, rsquared);
531 rsquared = vec_madd(dz, dz, rsquared);
533 biasOr = vec_cmpeq(jVec, mod.v);
534 biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
536 f = vec_madd(biasTemp, frameRateModifier.v, zero);
537 one_over_rsquared = vec_re(rsquared);
538 f = vec_madd(f, one_over_rsquared, zero);
540 mag = vec_rsqrte(rsquared);
541 mag = vec_madd(mag, f, zero);
543 deltax = vec_nmsub(dx, mag, deltax);
544 deltay = vec_nmsub(dy, mag, deltay);
545 deltaz = vec_nmsub(dz, mag, deltaz);
547 jVec = vec_add(jVec, (vector unsigned int)(1));
550 /* slow this particle down by flurry->drag */
551 deltax = vec_madd(deltax, dragV.v, zero);
552 deltay = vec_madd(deltay, dragV.v, zero);
553 deltaz = vec_madd(deltaz, dragV.v, zero);
555 distTemp = vec_madd(deltax, deltax, zero);
556 distTemp = vec_madd(deltay, deltay, distTemp);
557 distTemp = vec_madd(deltaz, deltaz, distTemp);
559 deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
560 deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
561 s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
562 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
566 /* update the position */
567 s->p[i].delta[0].v = deltax;
568 s->p[i].delta[1].v = deltay;
569 s->p[i].delta[2].v = deltaz;
571 s->p[i].oldposition[j].v = s->p[i].position[j].v;
572 s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
577 void UpdateSmoke_VectorUnrolled(global_info_t *info, SmokeV *s)
580 float sx = flurry->star->position[0];
581 float sy = flurry->star->position[1];
582 float sz = flurry->star->position[2];
584 floatToVector frameRateModifier;
585 floatToVector gravityV;
587 floatToVector deltaTimeV;
588 const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
589 const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
590 const vector float biasConst = (vector float)(streamBias);
592 gravityV.f[0] = gravity;
593 gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
595 dragV.f[0] = flurry->drag;
596 dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
598 deltaTimeV.f[0] = flurry->fDeltaTime;
599 deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
604 /* release 12 puffs every frame */
605 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
606 float dx,dy,dz,deltax,deltay,deltaz;
618 for(i=0;i<flurry->numStreams;i++) {
619 float streamSpeedCoherenceFactor;
621 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
622 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
623 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
624 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
625 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
626 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
627 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
628 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
629 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
630 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
631 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
632 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
633 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
634 rsquared = (dx*dx+dy*dy+dz*dz);
635 f = streamSpeed * streamSpeedCoherenceFactor;
637 mag = f / (float) sqrt(rsquared);
639 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
643 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
644 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
645 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
646 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
647 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
648 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
649 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
650 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
651 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
652 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
653 s->nextSubParticle++;
654 if (s->nextSubParticle==4) {
656 s->nextSubParticle=0;
658 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
660 s->nextSubParticle = 0;
664 s->lastParticleTime = flurry->fTime;
667 s->lastParticleTime = flurry->fTime;
672 s->old[i] = flurry->star->position[i];
675 frameRate = ((double) flurry->dframe)/(flurry->fTime);
676 frameRateModifier.f[0] = 42.5f / frameRate;
677 frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
679 frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
681 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
682 /* floatToVector f; */
683 vector float deltax, deltay, deltaz;
684 vector float distTemp;
685 vector unsigned int deadTemp;
686 /* floatToVector infopos0, infopos1, infopos2; */
688 vector unsigned int jVec;
689 vector unsigned int intOne = vec_splat_u32(1);
690 vector float floatOne = vec_ctf(intOne, 0);
693 vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
695 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
699 deltax = s->p[i].delta[0].v;
700 deltay = s->p[i].delta[1].v;
701 deltaz = s->p[i].delta[2].v;
703 mod.i[0] = (i<<2 + 0) % flurry->numStreams;
704 if(mod.i[0]+1 == flurry->numStreams) {
707 mod.i[1] = mod.i[0]+1;
709 if(mod.i[1]+1 == flurry->numStreams) {
712 mod.i[2] = mod.i[1]+1;
714 if(mod.i[2]+1 == flurry->numStreams) {
717 mod.i[3] = mod.i[2]+1;
720 jVec = vec_xor(jVec, jVec);
722 vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
723 for(j=0; j + 3 < flurry->numStreams;j+=4)
725 vector float dxa, dya, dza;
726 vector float dxb, dyb, dzb;
727 vector float dxc, dyc, dzc;
728 vector float dxd, dyd, dzd;
729 vector float ip0a, ip1a;
730 vector float ip0b, ip1b;
731 vector float ip0c, ip1c;
732 vector float ip0d, ip1d;
733 vector float rsquaredA;
734 vector float rsquaredB;
735 vector float rsquaredC;
736 vector float rsquaredD;
737 vector float fA, fB, fC, fD;
738 vector float biasTempA;
739 vector float biasTempB;
740 vector float biasTempC;
741 vector float biasTempD;
747 vector float one_over_rsquaredA;
748 vector float one_over_rsquaredB;
749 vector float one_over_rsquaredC;
750 vector float one_over_rsquaredD;
751 vector bool int biasOrA,biasOrB,biasOrC,biasOrD;
754 ip0a = vec_ld(0, flurry->spark[j]->position);
755 ip0b = vec_ld(0, flurry->spark[j+1]->position);
756 ip0c = vec_ld(0, flurry->spark[j+2]->position);
757 ip0d = vec_ld(0, flurry->spark[j+3]->position);
758 ip1a = vec_ld( 12, flurry->spark[j]->position );
759 ip1b = vec_ld( 12, flurry->spark[j+1]->position );
760 ip1c = vec_ld( 12, flurry->spark[j+2]->position );
761 ip1d = vec_ld( 12, flurry->spark[j+3]->position );
764 ip0a = vec_perm(ip0a, ip1a, vec_lvsl(0, flurry->spark[j]->position));
765 ip0b = vec_perm(ip0b, ip1b, vec_lvsl(0, flurry->spark[j+1]->position));
766 ip0c = vec_perm(ip0c, ip1c, vec_lvsl(0, flurry->spark[j+2]->position));
767 ip0d = vec_perm(ip0d, ip1d, vec_lvsl(0, flurry->spark[j+3]->position));
769 dxa = vec_splat( ip0a, 0 );
770 dxb = vec_splat( ip0b, 0 );
771 dxc = vec_splat( ip0c, 0 );
772 dxd = vec_splat( ip0d, 0 );
773 dxa = vec_sub( s->p[i].position[0].v, dxa );
774 dxb = vec_sub( s->p[i].position[0].v, dxb );
775 dxc = vec_sub( s->p[i].position[0].v, dxc );
776 dxd = vec_sub( s->p[i].position[0].v, dxd );
778 dya = vec_splat( ip0a, 1 );
779 dyb = vec_splat( ip0b, 1 );
780 dyc = vec_splat( ip0c, 1 );
781 dyd = vec_splat( ip0d, 1 );
782 dya = vec_sub( s->p[i].position[1].v, dya );
783 dyb = vec_sub( s->p[i].position[1].v, dyb );
784 dyc = vec_sub( s->p[i].position[1].v, dyc );
785 dyd = vec_sub( s->p[i].position[1].v, dyd );
787 dza = vec_splat( ip0a, 2 );
788 dzb = vec_splat( ip0b, 2 );
789 dzc = vec_splat( ip0c, 2 );
790 dzd = vec_splat( ip0d, 2 );
791 dza = vec_sub( s->p[i].position[2].v, dza );
792 dzb = vec_sub( s->p[i].position[2].v, dzb );
793 dzc = vec_sub( s->p[i].position[2].v, dzc );
794 dzd = vec_sub( s->p[i].position[2].v, dzd );
796 rsquaredA = vec_madd( dxa, dxa, zero );
797 rsquaredB = vec_madd( dxb, dxb, zero );
798 rsquaredC = vec_madd( dxc, dxc, zero );
799 rsquaredD = vec_madd( dxd, dxd, zero );
801 rsquaredA = vec_madd( dya, dya, rsquaredA );
802 rsquaredB = vec_madd( dyb, dyb, rsquaredB );
803 rsquaredC = vec_madd( dyc, dyc, rsquaredC );
804 rsquaredD = vec_madd( dyd, dyd, rsquaredD );
806 rsquaredA = vec_madd( dza, dza, rsquaredA );
807 rsquaredB = vec_madd( dzb, dzb, rsquaredB );
808 rsquaredC = vec_madd( dzc, dzc, rsquaredC );
809 rsquaredD = vec_madd( dzd, dzd, rsquaredD );
811 biasOrA = vec_cmpeq( jVec, mod.v );
812 jVec = vec_add(jVec, intOne);
813 biasOrB = vec_cmpeq( jVec, mod.v );
814 jVec = vec_add(jVec, intOne);
815 biasOrC = vec_cmpeq( jVec, mod.v );
816 jVec = vec_add(jVec, intOne);
817 biasOrD = vec_cmpeq( jVec, mod.v );
818 jVec = vec_add(jVec, intOne);
820 biasTempA = vec_add( vec_and( biasOrA, biasConst), floatOne);
821 biasTempB = vec_add( vec_and( biasOrB, biasConst), floatOne);
822 biasTempC = vec_add( vec_and( biasOrC, biasConst), floatOne);
823 biasTempD = vec_add( vec_and( biasOrD, biasConst), floatOne);
825 fA = vec_madd( biasTempA, frameRateModifier.v, zero);
826 fB = vec_madd( biasTempB, frameRateModifier.v, zero);
827 fC = vec_madd( biasTempC, frameRateModifier.v, zero);
828 fD = vec_madd( biasTempD, frameRateModifier.v, zero);
829 one_over_rsquaredA = vec_re( rsquaredA );
830 one_over_rsquaredB = vec_re( rsquaredB );
831 one_over_rsquaredC = vec_re( rsquaredC );
832 one_over_rsquaredD = vec_re( rsquaredD );
833 fA = vec_madd( fA, one_over_rsquaredA, zero);
834 fB = vec_madd( fB, one_over_rsquaredB, zero);
835 fC = vec_madd( fC, one_over_rsquaredC, zero);
836 fD = vec_madd( fD, one_over_rsquaredD, zero);
837 magA = vec_rsqrte( rsquaredA );
838 magB = vec_rsqrte( rsquaredB );
839 magC = vec_rsqrte( rsquaredC );
840 magD = vec_rsqrte( rsquaredD );
841 magA = vec_madd( magA, fA, zero );
842 magB = vec_madd( magB, fB, zero );
843 magC = vec_madd( magC, fC, zero );
844 magD = vec_madd( magD, fD, zero );
845 deltax = vec_nmsub( dxa, magA, deltax );
846 deltay = vec_nmsub( dya, magA, deltay );
847 deltaz = vec_nmsub( dza, magA, deltaz );
849 deltax = vec_nmsub( dxb, magB, deltax );
850 deltay = vec_nmsub( dyb, magB, deltay );
851 deltaz = vec_nmsub( dzb, magB, deltaz );
853 deltax = vec_nmsub( dxc, magC, deltax );
854 deltay = vec_nmsub( dyc, magC, deltay );
855 deltaz = vec_nmsub( dzc, magC, deltaz );
857 deltax = vec_nmsub( dxd, magD, deltax );
858 deltay = vec_nmsub( dyd, magD, deltay );
859 deltaz = vec_nmsub( dzd, magD, deltaz );
863 for(;j<flurry->numStreams;j++) {
864 vector float ip0, ip1 = (vector float)(0.0), ip2;
865 vector float dx, dy, dz;
866 vector float rsquared, f;
867 vector float one_over_rsquared;
868 vector float biasTemp;
870 vector bool int biasOr;
872 ip0 = vec_ld(0, flurry->spark[j]->position);
873 if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
874 ip1 = vec_ld(16, flurry->spark[j]->position);
877 ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
878 ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
879 ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
880 ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
882 dx = vec_sub(s->p[i].position[0].v, ip0);
883 dy = vec_sub(s->p[i].position[1].v, ip1);
884 dz = vec_sub(s->p[i].position[2].v, ip2);
886 rsquared = vec_madd(dx, dx, zero);
887 rsquared = vec_madd(dy, dy, rsquared);
888 rsquared = vec_madd(dz, dz, rsquared);
890 biasOr = vec_cmpeq(jVec, mod.v);
891 biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
893 f = vec_madd(biasTemp, frameRateModifier.v, zero);
894 one_over_rsquared = vec_re(rsquared);
895 f = vec_madd(f, one_over_rsquared, zero);
897 mag = vec_rsqrte(rsquared);
898 mag = vec_madd(mag, f, zero);
900 deltax = vec_nmsub(dx, mag, deltax);
901 deltay = vec_nmsub(dy, mag, deltay);
902 deltaz = vec_nmsub(dz, mag, deltaz);
904 jVec = vec_add(jVec, (vector unsigned int)(1));
907 /* slow this particle down by flurry->drag */
908 deltax = vec_madd(deltax, dragV.v, zero);
909 deltay = vec_madd(deltay, dragV.v, zero);
910 deltaz = vec_madd(deltaz, dragV.v, zero);
912 distTemp = vec_madd(deltax, deltax, zero);
913 distTemp = vec_madd(deltay, deltay, distTemp);
914 distTemp = vec_madd(deltaz, deltaz, distTemp);
916 deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
917 deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
918 s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
919 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
923 /* update the position */
924 s->p[i].delta[0].v = deltax;
925 s->p[i].delta[1].v = deltay;
926 s->p[i].delta[2].v = deltaz;
928 s->p[i].oldposition[j].v = s->p[i].position[j].v;
929 s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
937 void DrawSmoke_Scalar(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
947 float screenRatio = global->sys_glWidth / 1024.0f;
948 float hslash2 = global->sys_glHeight * 0.5f;
949 float wslash2 = global->sys_glWidth * 0.5f;
952 width = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
954 for (i=0;i<NUMSMOKEPARTICLES/4;i++)
956 for (k=0; k<4; k++) {
960 if (s->p[i].dead.i[k]) {
963 thisWidth = (streamSize + (flurry->fTime - s->p[i].time.f[k])*flurry->streamExpansion) * screenRatio;
964 if (thisWidth >= width)
966 s->p[i].dead.i[k] = 1;
969 z = s->p[i].position[2].f[k];
970 sx = s->p[i].position[0].f[k] * global->sys_glWidth / z + wslash2;
971 sy = s->p[i].position[1].f[k] * global->sys_glWidth / z + hslash2;
972 oldz = s->p[i].oldposition[2].f[k];
973 if (sx > global->sys_glWidth+50.0f || sx < -50.0f || sy > global->sys_glHeight+50.0f || sy < -50.0f || z < 25.0f || oldz < 25.0f)
978 w = MAX_(1.0f,thisWidth/z);
980 float oldx = s->p[i].oldposition[0].f[k];
981 float oldy = s->p[i].oldposition[1].f[k];
982 float oldscreenx = (oldx * global->sys_glWidth / oldz) + wslash2;
983 float oldscreeny = (oldy * global->sys_glWidth / oldz) + hslash2;
984 float dx = (sx-oldscreenx);
985 float dy = (sy-oldscreeny);
987 float d = FastDistance2D(dx, dy);
998 ow = MAX_(1.0f,thisWidth/oldz);
1011 float m = 1.0f + sm;
1020 s->p[i].animFrame.i[k]++;
1021 if (s->p[i].animFrame.i[k] >= 64)
1023 s->p[i].animFrame.i[k] = 0;
1026 u0 = (s->p[i].animFrame.i[k]& 7) * 0.125f;
1027 v0 = (s->p[i].animFrame.i[k]>>3) * 0.125f;
1030 cm = (1.375f - thisWidth/width);
1031 if (s->p[i].dead.i[k] == 3)
1034 s->p[i].dead.i[k] = 1;
1038 cmv.f[0] = s->p[i].color[0].f[k]*cm;
1039 cmv.f[1] = s->p[i].color[1].f[k]*cm;
1040 cmv.f[2] = s->p[i].color[2].f[k]*cm;
1041 cmv.f[3] = s->p[i].color[3].f[k]*cm;
1044 /* MDT we can't use vectors in the Scalar routine */
1045 s->seraphimColors[sci++].v = cmv.v;
1046 s->seraphimColors[sci++].v = cmv.v;
1047 s->seraphimColors[sci++].v = cmv.v;
1048 s->seraphimColors[sci++].v = cmv.v;
1052 for (jj = 0; jj < 4; jj++) {
1053 for (ii = 0; ii < 4; ii++) {
1054 s->seraphimColors[sci].f[ii] = cmv.f[ii];
1061 s->seraphimTextures[sti++] = u0;
1062 s->seraphimTextures[sti++] = v0;
1063 s->seraphimTextures[sti++] = u0;
1064 s->seraphimTextures[sti++] = v1;
1066 s->seraphimTextures[sti++] = u1;
1067 s->seraphimTextures[sti++] = v1;
1068 s->seraphimTextures[sti++] = u1;
1069 s->seraphimTextures[sti++] = v0;
1071 s->seraphimVertices[svi].f[0] = sx+dxm-dys;
1072 s->seraphimVertices[svi].f[1] = sy+dym+dxs;
1073 s->seraphimVertices[svi].f[2] = sx+dxm+dys;
1074 s->seraphimVertices[svi].f[3] = sy+dym-dxs;
1077 s->seraphimVertices[svi].f[0] = oldscreenx-dxm+dyos;
1078 s->seraphimVertices[svi].f[1] = oldscreeny-dym-dxos;
1079 s->seraphimVertices[svi].f[2] = oldscreenx-dxm-dyos;
1080 s->seraphimVertices[svi].f[3] = oldscreeny-dym+dxos;
1086 glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1087 glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1088 glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1089 glDrawArrays(GL_QUADS,0,si*4);
1095 void DrawSmoke_Vector(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
1097 const vector float zero = (vector float)(0.0);
1102 floatToVector width;
1104 floatToVector u0,v0,u1,v1;
1105 vector float one_over_z;
1108 float screenRatio = global->sys_glWidth / 1024.0f;
1109 float hslash2 = global->sys_glHeight * 0.5f;
1110 float wslash2 = global->sys_glWidth * 0.5f;
1112 floatToVector briteV, fTimeV, expansionV, screenRatioV, hslash2V, wslash2V, streamSizeV;
1113 floatToVector glWidthV;
1115 vector float cmv[4];
1116 vector float svec[4], ovec[4];
1117 vector float oldscreenx, oldscreeny;
1119 vector float frameAnd7;
1120 vector float frameShift3;
1121 vector float one_over_width;
1122 vector float dx, dy;
1124 vector unsigned int vSi = vec_splat_u32(0);
1125 const vector float eighth = (vector float)(0.125);
1126 float glWidth50 = global->sys_glWidth + 50.0f;
1127 float glHeight50 = global->sys_glHeight + 50.0f;
1128 vector float vGLWidth50, vGLHeight50;
1129 unsigned int blitBool;
1131 vec_dst((int *)(&(s->p[0])), 0x00020200, 2);
1134 vector unsigned char permute1 = vec_lvsl( 0, &glWidth50 );
1135 vector unsigned char permute2 = vec_lvsl( 0, &glHeight50 );
1136 permute1 = (vector unsigned char) vec_splat( (vector unsigned int) permute1, 0 );
1137 permute2 = (vector unsigned char) vec_splat( (vector unsigned int) permute2, 0 );
1138 vGLWidth50 = vec_lde( 0, &glWidth50 );
1139 vGLHeight50 = vec_lde( 0, &glHeight50 );
1140 vGLWidth50 = vec_perm( vGLWidth50, vGLWidth50, permute1 );
1141 vGLHeight50 = vec_perm( vGLHeight50, vGLHeight50, permute2 );
1144 width.f[0] = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
1145 width.v = (vector float) vec_splat((vector unsigned int)width.v, 0);
1147 briteV.f[0] = brightness;
1148 briteV.v = (vector float) vec_splat((vector unsigned int)briteV.v, 0);
1150 fTimeV.f[0] = (float) flurry->fTime;
1151 fTimeV.v = (vector float) vec_splat((vector unsigned int)fTimeV.v, 0);
1153 expansionV.f[0] = flurry->streamExpansion;
1154 expansionV.v = (vector float) vec_splat((vector unsigned int)expansionV.v, 0);
1156 screenRatioV.f[0] = screenRatio;
1157 screenRatioV.v = (vector float) vec_splat((vector unsigned int)screenRatioV.v, 0);
1159 hslash2V.f[0] = hslash2;
1160 hslash2V.v = (vector float) vec_splat((vector unsigned int)hslash2V.v, 0);
1162 wslash2V.f[0] = wslash2;
1163 wslash2V.v = (vector float) vec_splat((vector unsigned int)wslash2V.v, 0);
1165 streamSizeV.f[0] = streamSize;
1166 streamSizeV.v = (vector float) vec_splat((vector unsigned int)streamSizeV.v, 0);
1168 glWidthV.f[0] = global->sys_glWidth;
1169 glWidthV.v = (vector float) vec_splat((vector unsigned int)glWidthV.v, 0);
1171 for (i=0;i<NUMSMOKEPARTICLES/4;i++) {
1172 vector float thisWidth;
1174 vector float oldx, oldy, one_over_oldz;
1175 vector float xabs, yabs, mn;
1177 vector float one_over_d;
1178 vector bool int dnz;
1181 vec_dst((int *)(&(s->p[i+4])), 0x00020200, 2);
1183 if (vec_all_eq(s->p[i].dead.v, (vector unsigned int)(1))) continue;
1185 blitBool = 0; /* keep track of particles that actually need to be drawn */
1187 thisWidth = vec_sub(fTimeV.v, s->p[i].time.v);
1188 thisWidth = vec_madd(thisWidth, expansionV.v, streamSizeV.v);
1189 thisWidth = vec_madd(thisWidth, screenRatioV.v, zero);
1191 z.v = s->p[i].position[2].v;
1192 one_over_z = vec_re(z.v);
1194 sx = vec_madd(s->p[i].position[0].v, glWidthV.v, zero);
1195 sx = vec_madd(sx, one_over_z, wslash2V.v);
1196 sy = vec_madd(s->p[i].position[1].v, glWidthV.v, zero);
1197 sy = vec_madd(sy, one_over_z, hslash2V.v);
1199 oldz = s->p[i].oldposition[2].v;
1201 w = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_z, zero));
1203 oldx = s->p[i].oldposition[0].v;
1204 oldy = s->p[i].oldposition[1].v;
1205 one_over_oldz = vec_re(oldz);
1206 oldscreenx = vec_madd(oldx, glWidthV.v, zero);
1207 oldscreenx = vec_madd(oldscreenx, one_over_oldz, wslash2V.v);
1208 oldscreeny = vec_madd(oldy, glWidthV.v, zero);
1209 oldscreeny = vec_madd(oldscreeny, one_over_oldz, hslash2V.v);
1210 dx = vec_sub(sx,oldscreenx);
1211 dy = vec_sub(sy,oldscreeny);
1215 mn = vec_min(xabs,yabs);
1216 d = vec_add(xabs,yabs);
1217 d = vec_madd(mn, (vector float)(-0.6875), d);
1219 ow = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_oldz, zero));
1220 one_over_d = vec_re(d);
1221 dnz = vec_cmpgt(d, zero);
1222 sm = vec_madd(w, one_over_d, zero);
1223 sm = vec_and(sm, dnz);
1224 os = vec_madd(ow, one_over_d, zero);
1225 os = vec_and(os, dnz);
1228 intToVector tempMask;
1229 vector bool int mask = vec_cmpeq( s->p[i].dead.v, vec_splat_u32(1) ); /* -1 where true */
1230 vector bool int gtMask = vec_cmpge( thisWidth, width.v ); /* -1 where true */
1231 vector bool int glWidth50Test = vec_cmpgt( sx, (vector float)(vGLWidth50) ); /* -1 where true */
1232 vector bool int glHeight50Test = vec_cmpgt( sy, (vector float)(vGLHeight50) ); /* -1 where true */
1233 vector bool int test50x = vec_cmplt( sx, (vector float) (-50.0) );
1234 vector bool int test50y = vec_cmplt( sy, (vector float) (-50.0) );
1235 vector bool int testz = vec_cmplt( z.v, (vector float) (25.0) );
1236 vector bool int testoldz = vec_cmplt( oldz, (vector float) (25.0) );
1237 mask = vec_or( mask, gtMask );
1238 s->p[i].dead.v = vec_and( mask, vec_splat_u32( 1 ) );
1239 mask = vec_or( mask, glWidth50Test );
1240 mask = vec_or( mask, glHeight50Test );
1241 mask = vec_or( mask, test50x );
1242 mask = vec_or( mask, test50y );
1243 mask = vec_or( mask, testz );
1244 mask = vec_or( mask, testoldz );
1245 tempMask.v = (vector unsigned int)mask;
1247 s->p[i].animFrame.v = vec_sub( s->p[i].animFrame.v, vec_nor( mask, mask ) );
1248 s->p[i].animFrame.v = vec_and( s->p[i].animFrame.v, (vector unsigned int)(63) );
1250 frameAnd7 = vec_ctf(vec_and(s->p[i].animFrame.v, (vector unsigned int)(7)),0);
1251 u0.v = vec_madd(frameAnd7, eighth, zero);
1253 frameShift3 = vec_ctf(vec_sr(s->p[i].animFrame.v, (vector unsigned int)(3)),0);
1254 v0.v = vec_madd(frameAnd7, eighth, zero);
1256 u1.v = vec_add(u0.v, eighth);
1257 v1.v = vec_add(v0.v, eighth);
1259 one_over_width = vec_re(width.v);
1260 cm.v = vec_sel( vec_nmsub(thisWidth, one_over_width, (vector float)(1.375)), cm.v, mask );
1261 cm.v = vec_madd(cm.v, briteV.v, zero);
1263 vSi = vec_sub( vSi, vec_nor( mask, mask ) );
1265 vector unsigned int blitMask = (vector unsigned int) (1, 2, 4, 8);
1266 vector unsigned int temp = (vector unsigned int)mask;
1267 temp = vec_andc( blitMask, temp );
1268 temp = vec_add( temp, vec_sld( temp, temp, 8 ) );
1269 temp = vec_add( temp, vec_sld( temp, temp, 4 ) );
1270 vec_ste( temp, 0, &blitBool );
1275 vector float temp1, temp2, temp3, temp4;
1276 vector float result1a, result1b, result2a, result2b, result3a, result3b, result4a, result4b;
1278 temp1 = vec_mergeh( u0.v, u0.v );
1279 temp2 = vec_mergel( u0.v, u0.v );
1280 temp3 = vec_mergeh( v0.v, v1.v );
1281 temp4 = vec_mergel( v0.v, v1.v );
1283 result1a = vec_mergeh( temp1, temp3 );
1284 result1b = vec_mergel( temp1, temp3 );
1285 result2a = vec_mergeh( temp2, temp4 );
1286 result2b = vec_mergel( temp2, temp4 );
1288 temp1 = vec_mergeh( u1.v, u1.v );
1289 temp2 = vec_mergel( u1.v, u1.v );
1290 temp3 = vec_mergeh( v1.v, v0.v );
1291 temp4 = vec_mergel( v1.v, v0.v );
1293 result3a = vec_mergeh( temp1, temp3 );
1294 result3b = vec_mergel( temp1, temp3 );
1295 result4a = vec_mergeh( temp2, temp4 );
1296 result4b = vec_mergel( temp2, temp4 );
1300 vec_st( result1a, 0, &s->seraphimTextures[sti] );
1301 vec_st( result3a, 16, &s->seraphimTextures[sti]);
1306 vec_st( result1b, 0, &s->seraphimTextures[sti]);
1307 vec_st( result3b, 16, &s->seraphimTextures[sti]);
1312 vec_st( result2a, 0, &s->seraphimTextures[sti]);
1313 vec_st( result4a, 16, &s->seraphimTextures[sti]);
1318 vec_st( result2b, 0, &s->seraphimTextures[sti]);
1319 vec_st( result4b, 16, &s->seraphimTextures[sti]);
1325 cmv[0] = vec_madd(s->p[i].color[0].v, cm.v, zero);
1326 cmv[1] = vec_madd(s->p[i].color[1].v, cm.v, zero);
1327 cmv[2] = vec_madd(s->p[i].color[2].v, cm.v, zero);
1328 cmv[3] = vec_madd(s->p[i].color[3].v, cm.v, zero);
1330 vector float vI0, vI1, vI2, vI3;
1332 vI0 = vec_mergeh ( cmv[0], cmv[2] );
1333 vI1 = vec_mergeh ( cmv[1], cmv[3] );
1334 vI2 = vec_mergel ( cmv[0], cmv[2] );
1335 vI3 = vec_mergel ( cmv[1], cmv[3] );
1337 cmv[0] = vec_mergeh ( vI0, vI1 );
1338 cmv[1] = vec_mergel ( vI0, vI1 );
1339 cmv[2] = vec_mergeh ( vI2, vI3 );
1340 cmv[3] = vec_mergel ( vI2, vI3 );
1343 vec_dst( cmv, 0x0D0100D0, 1 );
1346 vector float sxd, syd;
1347 vector float sxdm, sxdp, sydm, sydp;
1348 vector float oxd, oyd;
1349 vector float oxdm, oxdp, oydm, oydp;
1350 vector float vI0, vI1, vI2, vI3;
1351 vector float dxs, dys;
1352 vector float dxos, dyos;
1353 vector float dxm, dym;
1356 m = vec_add((vector float)(1.0), sm);
1358 dxs = vec_madd(dx, sm, zero);
1359 dys = vec_madd(dy, sm, zero);
1360 dxos = vec_madd(dx, os, zero);
1361 dyos = vec_madd(dy, os, zero);
1362 dxm = vec_madd(dx, m, zero);
1363 dym = vec_madd(dy, m, zero);
1365 sxd = vec_add(sx, dxm);
1366 sxdm = vec_sub(sxd, dys);
1367 sxdp = vec_add(sxd, dys);
1369 syd = vec_add(sy, dym);
1370 sydm = vec_sub(syd, dxs);
1371 sydp = vec_add(syd, dxs);
1373 oxd = vec_sub(oldscreenx, dxm);
1374 oxdm = vec_sub(oxd, dyos);
1375 oxdp = vec_add(oxd, dyos);
1377 oyd = vec_sub(oldscreeny, dym);
1378 oydm = vec_sub(oyd, dxos);
1379 oydp = vec_add(oyd, dxos);
1381 vI0 = vec_mergeh ( sxdm, sxdp );
1382 vI1 = vec_mergeh ( sydp, sydm );
1383 vI2 = vec_mergel ( sxdm, sxdp );
1384 vI3 = vec_mergel ( sydp, sydm );
1386 svec[0] = vec_mergeh ( vI0, vI1 );
1387 svec[1] = vec_mergel ( vI0, vI1 );
1388 svec[2] = vec_mergeh ( vI2, vI3 );
1389 svec[3] = vec_mergel ( vI2, vI3 );
1391 vI0 = vec_mergeh ( oxdp, oxdm );
1392 vI1 = vec_mergeh ( oydm, oydp );
1393 vI2 = vec_mergel ( oxdp, oxdm );
1394 vI3 = vec_mergel ( oydm, oydp );
1396 ovec[0] = vec_mergeh ( vI0, vI1 );
1397 ovec[1] = vec_mergel ( vI0, vI1 );
1398 ovec[2] = vec_mergeh ( vI2, vI3 );
1399 ovec[3] = vec_mergel ( vI2, vI3 );
1403 int offset0 = (sci + 0) * sizeof( vector float );
1404 int offset1 = (sci + 1) * sizeof( vector float );
1405 int offset2 = (sci + 2) * sizeof( vector float );
1406 int offset3 = (sci + 3) * sizeof( vector float );
1407 int offset4 = (svi + 0) * sizeof( vector float );
1408 int offset5 = (svi + 1) * sizeof( vector float );
1409 vector float *colors = (vector float *)s->seraphimColors;
1410 vector float *vertices = (vector float *)s->seraphimVertices;
1411 for (kk=0; kk<4; kk++) {
1412 if (blitBool>>kk & 1) {
1413 vector float vcmv = cmv[kk];
1414 vector float vsvec = svec[kk];
1415 vector float vovec = ovec[kk];
1417 vec_st( vcmv, offset0, colors );
1418 vec_st( vcmv, offset1, colors );
1419 vec_st( vcmv, offset2, colors );
1420 vec_st( vcmv, offset3, colors );
1421 vec_st( vsvec, offset4, vertices );
1422 vec_st( vovec, offset5, vertices );
1431 vSi = vec_add( vSi, vec_sld( vSi, vSi, 8 ) );
1432 vSi = vec_add( vSi, vec_sld( vSi, vSi, 4 ) );
1433 vec_ste( (vector signed int) vSi, 0, &si );
1435 glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1436 glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1437 glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1438 glDrawArrays(GL_QUADS,0,si*4);