3 Copyright (c) 2002, Calum Robinson
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 * Redistributions of source code must retain the above copyright notice, this
10 list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright notice,
13 this list of conditions and the following disclaimer in the documentation
14 and/or other materials provided with the distribution.
16 * Neither the name of the author nor the names of its contributors may be used
17 to endorse or promote products derived from this software without specific
18 prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
24 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 /* Smoke.cpp: implementation of the Smoke class. */
41 #define MAXANGLES 16384
42 #define NOT_QUITE_DEAD 3
44 #define intensity 75000.0f;
46 void InitSmoke(SmokeV *s)
50 s->nextSubParticle = 0;
51 s->lastParticleTime = 0.25f;
55 s->old[i] = RandFlt(-100.0, 100.0);
59 void UpdateSmoke_ScalarBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
62 float sx = flurry->star->position[0];
63 float sy = flurry->star->position[1];
64 float sz = flurry->star->position[2];
66 double frameRateModifier;
72 /* release 12 puffs every frame */
73 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
74 float dx,dy,dz,deltax,deltay,deltaz;
86 for(i=0;i<flurry->numStreams;i++) {
87 float streamSpeedCoherenceFactor;
89 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
90 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
91 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
92 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
93 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
94 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
95 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
96 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
97 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
98 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
99 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
100 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
101 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
102 rsquared = (dx*dx+dy*dy+dz*dz);
103 f = streamSpeed * streamSpeedCoherenceFactor;
105 mag = f / (float) sqrt(rsquared);
107 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
108 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
109 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
110 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
111 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
112 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
113 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
114 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
115 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
116 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
117 s->nextSubParticle++;
118 if (s->nextSubParticle==4) {
120 s->nextSubParticle=0;
122 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
124 s->nextSubParticle = 0;
128 s->lastParticleTime = flurry->fTime;
131 s->lastParticleTime = flurry->fTime;
136 s->old[i] = flurry->star->position[i];
139 frameRate = ((double) flurry->dframe)/(flurry->fTime);
140 frameRateModifier = 42.5f / frameRate;
142 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
152 if (s->p[i].dead.i[k]) {
156 deltax = s->p[i].delta[0].f[k];
157 deltay = s->p[i].delta[1].f[k];
158 deltaz = s->p[i].delta[2].f[k];
160 for(j=0;j<flurry->numStreams;j++) {
161 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
162 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
163 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
164 rsquared = (dx*dx+dy*dy+dz*dz);
166 f = (gravity/rsquared) * frameRateModifier;
168 if ((((i*4)+k) % flurry->numStreams) == j) {
169 f *= 1.0f + streamBias;
172 mag = f / (float) sqrt(rsquared);
174 deltax -= (dx * mag);
175 deltay -= (dy * mag);
176 deltaz -= (dz * mag);
179 /* slow this particle down by flurry->drag */
180 deltax *= flurry->drag;
181 deltay *= flurry->drag;
182 deltaz *= flurry->drag;
184 if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
185 s->p[i].dead.i[k] = 1;
189 /* update the position */
190 s->p[i].delta[0].f[k] = deltax;
191 s->p[i].delta[1].f[k] = deltay;
192 s->p[i].delta[2].f[k] = deltaz;
194 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
195 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
204 void UpdateSmoke_ScalarFrsqrte(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
207 float sx = flurry->star->position[0];
208 float sy = flurry->star->position[1];
209 float sz = flurry->star->position[2];
211 double frameRateModifier;
217 /* release 12 puffs every frame */
218 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
219 float dx,dy,dz,deltax,deltay,deltaz;
231 for(i=0;i<flurry->numStreams;i++) {
232 float streamSpeedCoherenceFactor;
234 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
235 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
236 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
237 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
238 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
239 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
240 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
241 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
242 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
243 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
244 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
245 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
246 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
247 rsquared = (dx*dx+dy*dy+dz*dz);
248 f = streamSpeed * streamSpeedCoherenceFactor;
250 mag = f / (float) sqrt(rsquared);
252 reciprocal square-root estimate replaced above divide and call to system sqrt()
254 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
258 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
259 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
260 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
261 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
262 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
263 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
264 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
265 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
266 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
267 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
268 s->nextSubParticle++;
269 if (s->nextSubParticle==4) {
271 s->nextSubParticle=0;
273 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
275 s->nextSubParticle = 0;
279 s->lastParticleTime = flurry->fTime;
282 s->lastParticleTime = flurry->fTime;
287 s->old[i] = flurry->star->position[i];
290 frameRate = ((double) flurry->dframe)/(flurry->fTime);
291 frameRateModifier = 42.5f / frameRate;
293 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
303 if (s->p[i].dead.i[k]) {
307 deltax = s->p[i].delta[0].f[k];
308 deltay = s->p[i].delta[1].f[k];
309 deltaz = s->p[i].delta[2].f[k];
311 for(j=0;j<flurry->numStreams;j++) {
312 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
313 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
314 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
315 rsquared = (dx*dx+dy*dy+dz*dz);
318 asm("fres %0, %1" : "=f" (f) : "f" (rsquared));
319 f *= gravity*frameRateModifier;
321 f = ( gravity * frameRateModifier ) / rsquared;
323 if((((i*4)+k) % flurry->numStreams) == j) {
324 f *= 1.0f + streamBias;
327 mag = f / (float) sqrt(rsquared);
329 /* reciprocal square-root estimate replaced above divide and call to system sqrt() */
331 deltax -= (dx * mag);
332 deltay -= (dy * mag);
333 deltaz -= (dz * mag);
336 /* slow this particle down by flurry->drag */
337 deltax *= flurry->drag;
338 deltay *= flurry->drag;
339 deltaz *= flurry->drag;
341 if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
342 s->p[i].dead.i[k] = 1;
346 /* update the position */
347 s->p[i].delta[0].f[k] = deltax;
348 s->p[i].delta[1].f[k] = deltay;
349 s->p[i].delta[2].f[k] = deltaz;
351 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
352 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
362 void UpdateSmoke_VectorBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
365 float sx = flurry->star->position[0];
366 float sy = flurry->star->position[1];
367 float sz = flurry->star->position[2];
369 floatToVector frameRateModifier;
370 floatToVector gravityV;
372 floatToVector deltaTimeV;
373 const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
374 const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
375 const vector float biasConst = (vector float)(streamBias);
377 gravityV.f[0] = gravity;
378 gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
380 dragV.f[0] = flurry->drag;
381 dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
383 deltaTimeV.f[0] = flurry->fDeltaTime;
384 deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
389 /* release 12 puffs every frame */
390 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
391 float dx,dy,dz,deltax,deltay,deltaz;
403 for(i=0;i<flurry->numStreams;i++) {
404 float streamSpeedCoherenceFactor;
406 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
407 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
408 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
409 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
410 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
411 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
412 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
413 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
414 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
415 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
416 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
417 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
418 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
419 rsquared = (dx*dx+dy*dy+dz*dz);
420 f = streamSpeed * streamSpeedCoherenceFactor;
422 mag = f / (float) sqrt(rsquared);
424 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
428 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
429 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
430 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
431 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
432 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
433 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
434 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
435 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
436 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
437 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
438 s->nextSubParticle++;
439 if (s->nextSubParticle==4) {
441 s->nextSubParticle=0;
443 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
445 s->nextSubParticle = 0;
449 s->lastParticleTime = flurry->fTime;
452 s->lastParticleTime = flurry->fTime;
457 s->old[i] = flurry->star->position[i];
460 frameRate = ((double) flurry->dframe)/(flurry->fTime);
461 frameRateModifier.f[0] = 42.5f / frameRate;
462 frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
464 frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
466 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
467 /* floatToVector f; */
468 vector float deltax, deltay, deltaz;
469 vector float distTemp;
470 vector unsigned int deadTemp;
471 /* floatToVector infopos0, infopos1, infopos2; */
473 vector unsigned int jVec;
476 vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
478 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
482 deltax = s->p[i].delta[0].v;
483 deltay = s->p[i].delta[1].v;
484 deltaz = s->p[i].delta[2].v;
486 mod.i[0] = (i<<2 + 0) % flurry->numStreams;
487 if(mod.i[0]+1 == flurry->numStreams) {
490 mod.i[1] = mod.i[0]+1;
492 if(mod.i[1]+1 == flurry->numStreams) {
495 mod.i[2] = mod.i[1]+1;
497 if(mod.i[2]+1 == flurry->numStreams) {
500 mod.i[3] = mod.i[2]+1;
503 jVec = vec_xor(jVec, jVec);
505 vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
506 for(j=0; j<flurry->numStreams;j++) {
507 vector float ip0, ip1 = (vector float)(0.0), ip2;
508 vector float dx, dy, dz;
509 vector float rsquared, f;
510 vector float one_over_rsquared;
511 vector float biasTemp;
513 vector bool int biasOr;
515 ip0 = vec_ld(0, flurry->spark[j]->position);
516 if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
517 ip1 = vec_ld(16, flurry->spark[j]->position);
520 ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
521 ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
522 ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
523 ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
525 dx = vec_sub(s->p[i].position[0].v, ip0);
526 dy = vec_sub(s->p[i].position[1].v, ip1);
527 dz = vec_sub(s->p[i].position[2].v, ip2);
529 rsquared = vec_madd(dx, dx, zero);
530 rsquared = vec_madd(dy, dy, rsquared);
531 rsquared = vec_madd(dz, dz, rsquared);
533 biasOr = vec_cmpeq(jVec, mod.v);
534 biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
536 f = vec_madd(biasTemp, frameRateModifier.v, zero);
537 one_over_rsquared = vec_re(rsquared);
538 f = vec_madd(f, one_over_rsquared, zero);
540 mag = vec_rsqrte(rsquared);
541 mag = vec_madd(mag, f, zero);
543 deltax = vec_nmsub(dx, mag, deltax);
544 deltay = vec_nmsub(dy, mag, deltay);
545 deltaz = vec_nmsub(dz, mag, deltaz);
547 jVec = vec_add(jVec, (vector unsigned int)(1));
550 /* slow this particle down by flurry->drag */
551 deltax = vec_madd(deltax, dragV.v, zero);
552 deltay = vec_madd(deltay, dragV.v, zero);
553 deltaz = vec_madd(deltaz, dragV.v, zero);
555 distTemp = vec_madd(deltax, deltax, zero);
556 distTemp = vec_madd(deltay, deltay, distTemp);
557 distTemp = vec_madd(deltaz, deltaz, distTemp);
559 deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
560 deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
561 s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
562 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
566 /* update the position */
567 s->p[i].delta[0].v = deltax;
568 s->p[i].delta[1].v = deltay;
569 s->p[i].delta[2].v = deltaz;
571 s->p[i].oldposition[j].v = s->p[i].position[j].v;
572 s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
577 void UpdateSmoke_VectorUnrolled(global_info_t *info, SmokeV *s)
580 float sx = flurry->star->position[0];
581 float sy = flurry->star->position[1];
582 float sz = flurry->star->position[2];
584 floatToVector frameRateModifier;
585 floatToVector gravityV;
587 floatToVector deltaTimeV;
588 const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
589 const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
590 const vector float biasConst = (vector float)(streamBias);
592 gravityV.f[0] = gravity;
593 gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
595 dragV.f[0] = flurry->drag;
596 dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
598 deltaTimeV.f[0] = flurry->fDeltaTime;
599 deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
604 /* release 12 puffs every frame */
605 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
606 float dx,dy,dz,deltax,deltay,deltaz;
618 for(i=0;i<flurry->numStreams;i++) {
619 float streamSpeedCoherenceFactor;
621 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
622 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
623 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
624 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
625 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
626 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
627 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
628 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
629 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
630 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
631 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
632 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
633 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
634 rsquared = (dx*dx+dy*dy+dz*dz);
635 f = streamSpeed * streamSpeedCoherenceFactor;
637 mag = f / (float) sqrt(rsquared);
639 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
643 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
644 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
645 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
646 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
647 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
648 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
649 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
650 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
651 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
652 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
653 s->nextSubParticle++;
654 if (s->nextSubParticle==4) {
656 s->nextSubParticle=0;
658 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
660 s->nextSubParticle = 0;
664 s->lastParticleTime = flurry->fTime;
667 s->lastParticleTime = flurry->fTime;
672 s->old[i] = flurry->star->position[i];
675 frameRate = ((double) flurry->dframe)/(flurry->fTime);
676 frameRateModifier.f[0] = 42.5f / frameRate;
677 frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
679 frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
681 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
682 /* floatToVector f; */
683 vector float deltax, deltay, deltaz;
684 vector float distTemp;
685 vector unsigned int deadTemp;
686 /* floatToVector infopos0, infopos1, infopos2; */
688 vector unsigned int jVec;
689 vector unsigned int intOne = vec_splat_u32(1);
690 vector float floatOne = vec_ctf(intOne, 0);
693 vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
695 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
699 deltax = s->p[i].delta[0].v;
700 deltay = s->p[i].delta[1].v;
701 deltaz = s->p[i].delta[2].v;
703 mod.i[0] = (i<<2 + 0) % flurry->numStreams;
704 if(mod.i[0]+1 == flurry->numStreams) {
707 mod.i[1] = mod.i[0]+1;
709 if(mod.i[1]+1 == flurry->numStreams) {
712 mod.i[2] = mod.i[1]+1;
714 if(mod.i[2]+1 == flurry->numStreams) {
717 mod.i[3] = mod.i[2]+1;
720 jVec = vec_xor(jVec, jVec);
722 vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
723 for(j=0; j + 3 < flurry->numStreams;j+=4)
725 vector float dxa, dya, dza;
726 vector float dxb, dyb, dzb;
727 vector float dxc, dyc, dzc;
728 vector float dxd, dyd, dzd;
729 vector float ip0a, ip1a;
730 vector float ip0b, ip1b;
731 vector float ip0c, ip1c;
732 vector float ip0d, ip1d;
733 vector float rsquaredA;
734 vector float rsquaredB;
735 vector float rsquaredC;
736 vector float rsquaredD;
737 vector float fA, fB, fC, fD;
738 vector float biasTempA;
739 vector float biasTempB;
740 vector float biasTempC;
741 vector float biasTempD;
747 vector float one_over_rsquaredA;
748 vector float one_over_rsquaredB;
749 vector float one_over_rsquaredC;
750 vector float one_over_rsquaredD;
751 vector bool int biasOrA,biasOrB,biasOrC,biasOrD;
754 ip0a = vec_ld(0, flurry->spark[j]->position);
755 ip0b = vec_ld(0, flurry->spark[j+1]->position);
756 ip0c = vec_ld(0, flurry->spark[j+2]->position);
757 ip0d = vec_ld(0, flurry->spark[j+3]->position);
758 ip1a = vec_ld( 12, flurry->spark[j]->position );
759 ip1b = vec_ld( 12, flurry->spark[j+1]->position );
760 ip1c = vec_ld( 12, flurry->spark[j+2]->position );
761 ip1d = vec_ld( 12, flurry->spark[j+3]->position );
764 ip0a = vec_perm(ip0a, ip1a, vec_lvsl(0, flurry->spark[j]->position));
765 ip0b = vec_perm(ip0b, ip1b, vec_lvsl(0, flurry->spark[j+1]->position));
766 ip0c = vec_perm(ip0c, ip1c, vec_lvsl(0, flurry->spark[j+2]->position));
767 ip0d = vec_perm(ip0d, ip1d, vec_lvsl(0, flurry->spark[j+3]->position));
769 dxa = vec_splat( ip0a, 0 );
770 dxb = vec_splat( ip0b, 0 );
771 dxc = vec_splat( ip0c, 0 );
772 dxd = vec_splat( ip0d, 0 );
773 dxa = vec_sub( s->p[i].position[0].v, dxa );
774 dxb = vec_sub( s->p[i].position[0].v, dxb );
775 dxc = vec_sub( s->p[i].position[0].v, dxc );
776 dxd = vec_sub( s->p[i].position[0].v, dxd );
778 dya = vec_splat( ip0a, 1 );
779 dyb = vec_splat( ip0b, 1 );
780 dyc = vec_splat( ip0c, 1 );
781 dyd = vec_splat( ip0d, 1 );
782 dya = vec_sub( s->p[i].position[1].v, dya );
783 dyb = vec_sub( s->p[i].position[1].v, dyb );
784 dyc = vec_sub( s->p[i].position[1].v, dyc );
785 dyd = vec_sub( s->p[i].position[1].v, dyd );
787 dza = vec_splat( ip0a, 2 );
788 dzb = vec_splat( ip0b, 2 );
789 dzc = vec_splat( ip0c, 2 );
790 dzd = vec_splat( ip0d, 2 );
791 dza = vec_sub( s->p[i].position[2].v, dza );
792 dzb = vec_sub( s->p[i].position[2].v, dzb );
793 dzc = vec_sub( s->p[i].position[2].v, dzc );
794 dzd = vec_sub( s->p[i].position[2].v, dzd );
796 rsquaredA = vec_madd( dxa, dxa, zero );
797 rsquaredB = vec_madd( dxb, dxb, zero );
798 rsquaredC = vec_madd( dxc, dxc, zero );
799 rsquaredD = vec_madd( dxd, dxd, zero );
801 rsquaredA = vec_madd( dya, dya, rsquaredA );
802 rsquaredB = vec_madd( dyb, dyb, rsquaredB );
803 rsquaredC = vec_madd( dyc, dyc, rsquaredC );
804 rsquaredD = vec_madd( dyd, dyd, rsquaredD );
806 rsquaredA = vec_madd( dza, dza, rsquaredA );
807 rsquaredB = vec_madd( dzb, dzb, rsquaredB );
808 rsquaredC = vec_madd( dzc, dzc, rsquaredC );
809 rsquaredD = vec_madd( dzd, dzd, rsquaredD );
811 biasOrA = vec_cmpeq( jVec, mod.v );
812 jVec = vec_add(jVec, intOne);
813 biasOrB = vec_cmpeq( jVec, mod.v );
814 jVec = vec_add(jVec, intOne);
815 biasOrC = vec_cmpeq( jVec, mod.v );
816 jVec = vec_add(jVec, intOne);
817 biasOrD = vec_cmpeq( jVec, mod.v );
818 jVec = vec_add(jVec, intOne);
820 biasTempA = vec_add( vec_and( biasOrA, biasConst), floatOne);
821 biasTempB = vec_add( vec_and( biasOrB, biasConst), floatOne);
822 biasTempC = vec_add( vec_and( biasOrC, biasConst), floatOne);
823 biasTempD = vec_add( vec_and( biasOrD, biasConst), floatOne);
825 fA = vec_madd( biasTempA, frameRateModifier.v, zero);
826 fB = vec_madd( biasTempB, frameRateModifier.v, zero);
827 fC = vec_madd( biasTempC, frameRateModifier.v, zero);
828 fD = vec_madd( biasTempD, frameRateModifier.v, zero);
829 one_over_rsquaredA = vec_re( rsquaredA );
830 one_over_rsquaredB = vec_re( rsquaredB );
831 one_over_rsquaredC = vec_re( rsquaredC );
832 one_over_rsquaredD = vec_re( rsquaredD );
833 fA = vec_madd( fA, one_over_rsquaredA, zero);
834 fB = vec_madd( fB, one_over_rsquaredB, zero);
835 fC = vec_madd( fC, one_over_rsquaredC, zero);
836 fD = vec_madd( fD, one_over_rsquaredD, zero);
837 magA = vec_rsqrte( rsquaredA );
838 magB = vec_rsqrte( rsquaredB );
839 magC = vec_rsqrte( rsquaredC );
840 magD = vec_rsqrte( rsquaredD );
841 magA = vec_madd( magA, fA, zero );
842 magB = vec_madd( magB, fB, zero );
843 magC = vec_madd( magC, fC, zero );
844 magD = vec_madd( magD, fD, zero );
845 deltax = vec_nmsub( dxa, magA, deltax );
846 deltay = vec_nmsub( dya, magA, deltay );
847 deltaz = vec_nmsub( dza, magA, deltaz );
849 deltax = vec_nmsub( dxb, magB, deltax );
850 deltay = vec_nmsub( dyb, magB, deltay );
851 deltaz = vec_nmsub( dzb, magB, deltaz );
853 deltax = vec_nmsub( dxc, magC, deltax );
854 deltay = vec_nmsub( dyc, magC, deltay );
855 deltaz = vec_nmsub( dzc, magC, deltaz );
857 deltax = vec_nmsub( dxd, magD, deltax );
858 deltay = vec_nmsub( dyd, magD, deltay );
859 deltaz = vec_nmsub( dzd, magD, deltaz );
863 for(;j<flurry->numStreams;j++) {
864 vector float ip0, ip1 = (vector float)(0.0), ip2;
865 vector float dx, dy, dz;
866 vector float rsquared, f;
867 vector float one_over_rsquared;
868 vector float biasTemp;
870 vector bool int biasOr;
872 ip0 = vec_ld(0, flurry->spark[j]->position);
873 if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
874 ip1 = vec_ld(16, flurry->spark[j]->position);
877 ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
878 ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
879 ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
880 ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
882 dx = vec_sub(s->p[i].position[0].v, ip0);
883 dy = vec_sub(s->p[i].position[1].v, ip1);
884 dz = vec_sub(s->p[i].position[2].v, ip2);
886 rsquared = vec_madd(dx, dx, zero);
887 rsquared = vec_madd(dy, dy, rsquared);
888 rsquared = vec_madd(dz, dz, rsquared);
890 biasOr = vec_cmpeq(jVec, mod.v);
891 biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
893 f = vec_madd(biasTemp, frameRateModifier.v, zero);
894 one_over_rsquared = vec_re(rsquared);
895 f = vec_madd(f, one_over_rsquared, zero);
897 mag = vec_rsqrte(rsquared);
898 mag = vec_madd(mag, f, zero);
900 deltax = vec_nmsub(dx, mag, deltax);
901 deltay = vec_nmsub(dy, mag, deltay);
902 deltaz = vec_nmsub(dz, mag, deltaz);
904 jVec = vec_add(jVec, (vector unsigned int)(1));
907 /* slow this particle down by flurry->drag */
908 deltax = vec_madd(deltax, dragV.v, zero);
909 deltay = vec_madd(deltay, dragV.v, zero);
910 deltaz = vec_madd(deltaz, dragV.v, zero);
912 distTemp = vec_madd(deltax, deltax, zero);
913 distTemp = vec_madd(deltay, deltay, distTemp);
914 distTemp = vec_madd(deltaz, deltaz, distTemp);
916 deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
917 deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
918 s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
919 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
923 /* update the position */
924 s->p[i].delta[0].v = deltax;
925 s->p[i].delta[1].v = deltay;
926 s->p[i].delta[2].v = deltaz;
928 s->p[i].oldposition[j].v = s->p[i].position[j].v;
929 s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
937 void DrawSmoke_Scalar(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
947 float screenRatio = global->sys_glWidth / 1024.0f;
948 float hslash2 = global->sys_glHeight * 0.5f;
949 float wslash2 = global->sys_glWidth * 0.5f;
952 width = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
954 for (i=0;i<NUMSMOKEPARTICLES/4;i++)
956 for (k=0; k<4; k++) {
960 if (s->p[i].dead.i[k]) {
963 thisWidth = (streamSize + (flurry->fTime - s->p[i].time.f[k])*flurry->streamExpansion) * screenRatio;
964 if (thisWidth >= width)
966 s->p[i].dead.i[k] = 1;
969 z = s->p[i].position[2].f[k];
970 sx = s->p[i].position[0].f[k] * global->sys_glWidth / z + wslash2;
971 sy = s->p[i].position[1].f[k] * global->sys_glWidth / z + hslash2;
972 oldz = s->p[i].oldposition[2].f[k];
973 if (sx > global->sys_glWidth+50.0f || sx < -50.0f || sy > global->sys_glHeight+50.0f || sy < -50.0f || z < 25.0f || oldz < 25.0f)
978 w = MAX_(1.0f,thisWidth/z);
980 float oldx = s->p[i].oldposition[0].f[k];
981 float oldy = s->p[i].oldposition[1].f[k];
982 float oldscreenx = (oldx * global->sys_glWidth / oldz) + wslash2;
983 float oldscreeny = (oldy * global->sys_glWidth / oldz) + hslash2;
984 float dx = (sx-oldscreenx);
985 float dy = (sy-oldscreeny);
987 float d = FastDistance2D(dx, dy);
998 ow = MAX_(1.0f,thisWidth/oldz);
1011 float m = 1.0f + sm;
1020 s->p[i].animFrame.i[k]++;
1021 if (s->p[i].animFrame.i[k] >= 64)
1023 s->p[i].animFrame.i[k] = 0;
1026 u0 = (s->p[i].animFrame.i[k]&&7) * 0.125f;
1027 v0 = (s->p[i].animFrame.i[k]>>3) * 0.125f;
1032 cm = (1.375f - thisWidth/width);
1033 if (s->p[i].dead.i[k] == 3)
1036 s->p[i].dead.i[k] = 1;
1040 cmv.f[0] = s->p[i].color[0].f[k]*cm;
1041 cmv.f[1] = s->p[i].color[1].f[k]*cm;
1042 cmv.f[2] = s->p[i].color[2].f[k]*cm;
1043 cmv.f[3] = s->p[i].color[3].f[k]*cm;
1046 /* MDT we can't use vectors in the Scalar routine */
1047 s->seraphimColors[sci++].v = cmv.v;
1048 s->seraphimColors[sci++].v = cmv.v;
1049 s->seraphimColors[sci++].v = cmv.v;
1050 s->seraphimColors[sci++].v = cmv.v;
1054 for (jj = 0; jj < 4; jj++) {
1055 for (ii = 0; ii < 4; ii++) {
1056 s->seraphimColors[sci].f[ii] = cmv.f[ii];
1063 s->seraphimTextures[sti++] = u0;
1064 s->seraphimTextures[sti++] = v0;
1065 s->seraphimTextures[sti++] = u0;
1066 s->seraphimTextures[sti++] = v1;
1068 s->seraphimTextures[sti++] = u1;
1069 s->seraphimTextures[sti++] = v1;
1070 s->seraphimTextures[sti++] = u1;
1071 s->seraphimTextures[sti++] = v0;
1073 s->seraphimVertices[svi].f[0] = sx+dxm-dys;
1074 s->seraphimVertices[svi].f[1] = sy+dym+dxs;
1075 s->seraphimVertices[svi].f[2] = sx+dxm+dys;
1076 s->seraphimVertices[svi].f[3] = sy+dym-dxs;
1079 s->seraphimVertices[svi].f[0] = oldscreenx-dxm+dyos;
1080 s->seraphimVertices[svi].f[1] = oldscreeny-dym-dxos;
1081 s->seraphimVertices[svi].f[2] = oldscreenx-dxm-dyos;
1082 s->seraphimVertices[svi].f[3] = oldscreeny-dym+dxos;
1088 glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1089 glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1090 glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1091 glDrawArrays(GL_QUADS,0,si*4);
1097 void DrawSmoke_Vector(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
1099 const vector float zero = (vector float)(0.0);
1104 floatToVector width;
1106 floatToVector u0,v0,u1,v1;
1107 vector float one_over_z;
1110 float screenRatio = global->sys_glWidth / 1024.0f;
1111 float hslash2 = global->sys_glHeight * 0.5f;
1112 float wslash2 = global->sys_glWidth * 0.5f;
1114 floatToVector briteV, fTimeV, expansionV, screenRatioV, hslash2V, wslash2V, streamSizeV;
1115 floatToVector glWidthV;
1117 vector float cmv[4];
1118 vector float svec[4], ovec[4];
1119 vector float oldscreenx, oldscreeny;
1121 vector float frameAnd7;
1122 vector float frameShift3;
1123 vector float one_over_width;
1124 vector float dx, dy;
1126 vector unsigned int vSi = vec_splat_u32(0);
1127 const vector float eighth = (vector float)(0.125);
1128 float glWidth50 = global->sys_glWidth + 50.0f;
1129 float glHeight50 = global->sys_glHeight + 50.0f;
1130 vector float vGLWidth50, vGLHeight50;
1131 unsigned int blitBool;
1133 vec_dst((int *)(&(s->p[0])), 0x00020200, 2);
1136 vector unsigned char permute1 = vec_lvsl( 0, &glWidth50 );
1137 vector unsigned char permute2 = vec_lvsl( 0, &glHeight50 );
1138 permute1 = (vector unsigned char) vec_splat( (vector unsigned int) permute1, 0 );
1139 permute2 = (vector unsigned char) vec_splat( (vector unsigned int) permute2, 0 );
1140 vGLWidth50 = vec_lde( 0, &glWidth50 );
1141 vGLHeight50 = vec_lde( 0, &glHeight50 );
1142 vGLWidth50 = vec_perm( vGLWidth50, vGLWidth50, permute1 );
1143 vGLHeight50 = vec_perm( vGLHeight50, vGLHeight50, permute2 );
1146 width.f[0] = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
1147 width.v = (vector float) vec_splat((vector unsigned int)width.v, 0);
1149 briteV.f[0] = brightness;
1150 briteV.v = (vector float) vec_splat((vector unsigned int)briteV.v, 0);
1152 fTimeV.f[0] = (float) flurry->fTime;
1153 fTimeV.v = (vector float) vec_splat((vector unsigned int)fTimeV.v, 0);
1155 expansionV.f[0] = flurry->streamExpansion;
1156 expansionV.v = (vector float) vec_splat((vector unsigned int)expansionV.v, 0);
1158 screenRatioV.f[0] = screenRatio;
1159 screenRatioV.v = (vector float) vec_splat((vector unsigned int)screenRatioV.v, 0);
1161 hslash2V.f[0] = hslash2;
1162 hslash2V.v = (vector float) vec_splat((vector unsigned int)hslash2V.v, 0);
1164 wslash2V.f[0] = wslash2;
1165 wslash2V.v = (vector float) vec_splat((vector unsigned int)wslash2V.v, 0);
1167 streamSizeV.f[0] = streamSize;
1168 streamSizeV.v = (vector float) vec_splat((vector unsigned int)streamSizeV.v, 0);
1170 glWidthV.f[0] = global->sys_glWidth;
1171 glWidthV.v = (vector float) vec_splat((vector unsigned int)glWidthV.v, 0);
1173 for (i=0;i<NUMSMOKEPARTICLES/4;i++) {
1174 vector float thisWidth;
1176 vector float oldx, oldy, one_over_oldz;
1177 vector float xabs, yabs, mn;
1179 vector float one_over_d;
1180 vector bool int dnz;
1183 vec_dst((int *)(&(s->p[i+4])), 0x00020200, 2);
1185 if (vec_all_eq(s->p[i].dead.v, (vector unsigned int)(1))) continue;
1187 blitBool = 0; /* keep track of particles that actually need to be drawn */
1189 thisWidth = vec_sub(fTimeV.v, s->p[i].time.v);
1190 thisWidth = vec_madd(thisWidth, expansionV.v, streamSizeV.v);
1191 thisWidth = vec_madd(thisWidth, screenRatioV.v, zero);
1193 z.v = s->p[i].position[2].v;
1194 one_over_z = vec_re(z.v);
1196 sx = vec_madd(s->p[i].position[0].v, glWidthV.v, zero);
1197 sx = vec_madd(sx, one_over_z, wslash2V.v);
1198 sy = vec_madd(s->p[i].position[1].v, glWidthV.v, zero);
1199 sy = vec_madd(sy, one_over_z, hslash2V.v);
1201 oldz = s->p[i].oldposition[2].v;
1203 w = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_z, zero));
1205 oldx = s->p[i].oldposition[0].v;
1206 oldy = s->p[i].oldposition[1].v;
1207 one_over_oldz = vec_re(oldz);
1208 oldscreenx = vec_madd(oldx, glWidthV.v, zero);
1209 oldscreenx = vec_madd(oldscreenx, one_over_oldz, wslash2V.v);
1210 oldscreeny = vec_madd(oldy, glWidthV.v, zero);
1211 oldscreeny = vec_madd(oldscreeny, one_over_oldz, hslash2V.v);
1212 dx = vec_sub(sx,oldscreenx);
1213 dy = vec_sub(sy,oldscreeny);
1217 mn = vec_min(xabs,yabs);
1218 d = vec_add(xabs,yabs);
1219 d = vec_madd(mn, (vector float)(-0.6875), d);
1221 ow = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_oldz, zero));
1222 one_over_d = vec_re(d);
1223 dnz = vec_cmpgt(d, zero);
1224 sm = vec_madd(w, one_over_d, zero);
1225 sm = vec_and(sm, dnz);
1226 os = vec_madd(ow, one_over_d, zero);
1227 os = vec_and(os, dnz);
1230 intToVector tempMask;
1231 vector bool int mask = vec_cmpeq( s->p[i].dead.v, vec_splat_u32(1) ); /* -1 where true */
1232 vector bool int gtMask = vec_cmpge( thisWidth, width.v ); /* -1 where true */
1233 vector bool int glWidth50Test = vec_cmpgt( sx, (vector float)(vGLWidth50) ); /* -1 where true */
1234 vector bool int glHeight50Test = vec_cmpgt( sy, (vector float)(vGLHeight50) ); /* -1 where true */
1235 vector bool int test50x = vec_cmplt( sx, (vector float) (-50.0) );
1236 vector bool int test50y = vec_cmplt( sy, (vector float) (-50.0) );
1237 vector bool int testz = vec_cmplt( z.v, (vector float) (25.0) );
1238 vector bool int testoldz = vec_cmplt( oldz, (vector float) (25.0) );
1239 mask = vec_or( mask, gtMask );
1240 s->p[i].dead.v = vec_and( mask, vec_splat_u32( 1 ) );
1241 mask = vec_or( mask, glWidth50Test );
1242 mask = vec_or( mask, glHeight50Test );
1243 mask = vec_or( mask, test50x );
1244 mask = vec_or( mask, test50y );
1245 mask = vec_or( mask, testz );
1246 mask = vec_or( mask, testoldz );
1247 tempMask.v = (vector unsigned int)mask;
1249 s->p[i].animFrame.v = vec_sub( s->p[i].animFrame.v, vec_nor( mask, mask ) );
1250 s->p[i].animFrame.v = vec_and( s->p[i].animFrame.v, (vector unsigned int)(63) );
1252 frameAnd7 = vec_ctf(vec_and(s->p[i].animFrame.v, (vector unsigned int)(7)),0);
1253 u0.v = vec_madd(frameAnd7, eighth, zero);
1255 frameShift3 = vec_ctf(vec_sr(s->p[i].animFrame.v, (vector unsigned int)(3)),0);
1256 v0.v = vec_madd(frameAnd7, eighth, zero);
1258 u1.v = vec_add(u0.v, eighth);
1259 v1.v = vec_add(v0.v, eighth);
1261 one_over_width = vec_re(width.v);
1262 cm.v = vec_sel( vec_nmsub(thisWidth, one_over_width, (vector float)(1.375)), cm.v, mask );
1263 cm.v = vec_madd(cm.v, briteV.v, zero);
1265 vSi = vec_sub( vSi, vec_nor( mask, mask ) );
1267 vector unsigned int blitMask = (vector unsigned int) (1, 2, 4, 8);
1268 vector unsigned int temp = (vector unsigned int)mask;
1269 temp = vec_andc( blitMask, temp );
1270 temp = vec_add( temp, vec_sld( temp, temp, 8 ) );
1271 temp = vec_add( temp, vec_sld( temp, temp, 4 ) );
1272 vec_ste( temp, 0, &blitBool );
1277 vector float temp1, temp2, temp3, temp4;
1278 vector float result1a, result1b, result2a, result2b, result3a, result3b, result4a, result4b;
1280 temp1 = vec_mergeh( u0.v, u0.v );
1281 temp2 = vec_mergel( u0.v, u0.v );
1282 temp3 = vec_mergeh( v0.v, v1.v );
1283 temp4 = vec_mergel( v0.v, v1.v );
1285 result1a = vec_mergeh( temp1, temp3 );
1286 result1b = vec_mergel( temp1, temp3 );
1287 result2a = vec_mergeh( temp2, temp4 );
1288 result2b = vec_mergel( temp2, temp4 );
1290 temp1 = vec_mergeh( u1.v, u1.v );
1291 temp2 = vec_mergel( u1.v, u1.v );
1292 temp3 = vec_mergeh( v1.v, v0.v );
1293 temp4 = vec_mergel( v1.v, v0.v );
1295 result3a = vec_mergeh( temp1, temp3 );
1296 result3b = vec_mergel( temp1, temp3 );
1297 result4a = vec_mergeh( temp2, temp4 );
1298 result4b = vec_mergel( temp2, temp4 );
1302 vec_st( result1a, 0, &s->seraphimTextures[sti] );
1303 vec_st( result3a, 16, &s->seraphimTextures[sti]);
1308 vec_st( result1b, 0, &s->seraphimTextures[sti]);
1309 vec_st( result3b, 16, &s->seraphimTextures[sti]);
1314 vec_st( result2a, 0, &s->seraphimTextures[sti]);
1315 vec_st( result4a, 16, &s->seraphimTextures[sti]);
1320 vec_st( result2b, 0, &s->seraphimTextures[sti]);
1321 vec_st( result4b, 16, &s->seraphimTextures[sti]);
1327 cmv[0] = vec_madd(s->p[i].color[0].v, cm.v, zero);
1328 cmv[1] = vec_madd(s->p[i].color[1].v, cm.v, zero);
1329 cmv[2] = vec_madd(s->p[i].color[2].v, cm.v, zero);
1330 cmv[3] = vec_madd(s->p[i].color[3].v, cm.v, zero);
1332 vector float vI0, vI1, vI2, vI3;
1334 vI0 = vec_mergeh ( cmv[0], cmv[2] );
1335 vI1 = vec_mergeh ( cmv[1], cmv[3] );
1336 vI2 = vec_mergel ( cmv[0], cmv[2] );
1337 vI3 = vec_mergel ( cmv[1], cmv[3] );
1339 cmv[0] = vec_mergeh ( vI0, vI1 );
1340 cmv[1] = vec_mergel ( vI0, vI1 );
1341 cmv[2] = vec_mergeh ( vI2, vI3 );
1342 cmv[3] = vec_mergel ( vI2, vI3 );
1345 vec_dst( cmv, 0x0D0100D0, 1 );
1348 vector float sxd, syd;
1349 vector float sxdm, sxdp, sydm, sydp;
1350 vector float oxd, oyd;
1351 vector float oxdm, oxdp, oydm, oydp;
1352 vector float vI0, vI1, vI2, vI3;
1353 vector float dxs, dys;
1354 vector float dxos, dyos;
1355 vector float dxm, dym;
1358 m = vec_add((vector float)(1.0), sm);
1360 dxs = vec_madd(dx, sm, zero);
1361 dys = vec_madd(dy, sm, zero);
1362 dxos = vec_madd(dx, os, zero);
1363 dyos = vec_madd(dy, os, zero);
1364 dxm = vec_madd(dx, m, zero);
1365 dym = vec_madd(dy, m, zero);
1367 sxd = vec_add(sx, dxm);
1368 sxdm = vec_sub(sxd, dys);
1369 sxdp = vec_add(sxd, dys);
1371 syd = vec_add(sy, dym);
1372 sydm = vec_sub(syd, dxs);
1373 sydp = vec_add(syd, dxs);
1375 oxd = vec_sub(oldscreenx, dxm);
1376 oxdm = vec_sub(oxd, dyos);
1377 oxdp = vec_add(oxd, dyos);
1379 oyd = vec_sub(oldscreeny, dym);
1380 oydm = vec_sub(oyd, dxos);
1381 oydp = vec_add(oyd, dxos);
1383 vI0 = vec_mergeh ( sxdm, sxdp );
1384 vI1 = vec_mergeh ( sydp, sydm );
1385 vI2 = vec_mergel ( sxdm, sxdp );
1386 vI3 = vec_mergel ( sydp, sydm );
1388 svec[0] = vec_mergeh ( vI0, vI1 );
1389 svec[1] = vec_mergel ( vI0, vI1 );
1390 svec[2] = vec_mergeh ( vI2, vI3 );
1391 svec[3] = vec_mergel ( vI2, vI3 );
1393 vI0 = vec_mergeh ( oxdp, oxdm );
1394 vI1 = vec_mergeh ( oydm, oydp );
1395 vI2 = vec_mergel ( oxdp, oxdm );
1396 vI3 = vec_mergel ( oydm, oydp );
1398 ovec[0] = vec_mergeh ( vI0, vI1 );
1399 ovec[1] = vec_mergel ( vI0, vI1 );
1400 ovec[2] = vec_mergeh ( vI2, vI3 );
1401 ovec[3] = vec_mergel ( vI2, vI3 );
1405 int offset0 = (sci + 0) * sizeof( vector float );
1406 int offset1 = (sci + 1) * sizeof( vector float );
1407 int offset2 = (sci + 2) * sizeof( vector float );
1408 int offset3 = (sci + 3) * sizeof( vector float );
1409 int offset4 = (svi + 0) * sizeof( vector float );
1410 int offset5 = (svi + 1) * sizeof( vector float );
1411 vector float *colors = (vector float *)s->seraphimColors;
1412 vector float *vertices = (vector float *)s->seraphimVertices;
1413 for (kk=0; kk<4; kk++) {
1414 if (blitBool>>kk & 1) {
1415 vector float vcmv = cmv[kk];
1416 vector float vsvec = svec[kk];
1417 vector float vovec = ovec[kk];
1419 vec_st( vcmv, offset0, colors );
1420 vec_st( vcmv, offset1, colors );
1421 vec_st( vcmv, offset2, colors );
1422 vec_st( vcmv, offset3, colors );
1423 vec_st( vsvec, offset4, vertices );
1424 vec_st( vovec, offset5, vertices );
1433 vSi = vec_add( vSi, vec_sld( vSi, vSi, 8 ) );
1434 vSi = vec_add( vSi, vec_sld( vSi, vSi, 4 ) );
1435 vec_ste( (vector signed int) vSi, 0, &si );
1437 glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1438 glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1439 glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1440 glDrawArrays(GL_QUADS,0,si*4);