3 Copyright (c) 2002, Calum Robinson
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 * Redistributions of source code must retain the above copyright notice, this
10 list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright notice,
13 this list of conditions and the following disclaimer in the documentation
14 and/or other materials provided with the distribution.
16 * Neither the name of the author nor the names of its contributors may be used
17 to endorse or promote products derived from this software without specific
18 prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
24 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 /* Smoke.cpp: implementation of the Smoke class. */
41 #define MAXANGLES 16384
42 #define NOT_QUITE_DEAD 3
44 #define intensity 75000.0f;
46 void InitSmoke(SmokeV *s)
50 s->nextSubParticle = 0;
51 s->lastParticleTime = 0.25f;
55 s->old[i] = RandFlt(-100.0, 100.0);
59 void UpdateSmoke_ScalarBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
62 float sx = flurry->star->position[0];
63 float sy = flurry->star->position[1];
64 float sz = flurry->star->position[2];
66 double frameRateModifier;
72 /* release 12 puffs every frame */
73 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
74 float dx,dy,dz,deltax,deltay,deltaz;
86 for(i=0;i<flurry->numStreams;i++) {
87 float streamSpeedCoherenceFactor;
89 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
90 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
91 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
92 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
93 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
94 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
95 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
96 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
97 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
98 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
99 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
100 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
101 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
102 rsquared = (dx*dx+dy*dy+dz*dz);
103 f = streamSpeed * streamSpeedCoherenceFactor;
105 mag = f / (float) sqrt(rsquared);
107 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
108 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
109 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
110 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
111 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
112 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
113 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
114 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
115 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
116 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
117 s->nextSubParticle++;
118 if (s->nextSubParticle==4) {
120 s->nextSubParticle=0;
122 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
124 s->nextSubParticle = 0;
128 s->lastParticleTime = flurry->fTime;
131 s->lastParticleTime = flurry->fTime;
136 s->old[i] = flurry->star->position[i];
139 frameRate = ((double) flurry->dframe)/(flurry->fTime);
140 frameRateModifier = 42.5f / frameRate;
142 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
152 if (s->p[i].dead.i[k]) {
156 deltax = s->p[i].delta[0].f[k];
157 deltay = s->p[i].delta[1].f[k];
158 deltaz = s->p[i].delta[2].f[k];
160 for(j=0;j<flurry->numStreams;j++) {
161 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
162 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
163 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
164 rsquared = (dx*dx+dy*dy+dz*dz);
166 f = (gravity/rsquared) * frameRateModifier;
168 if ((((i*4)+k) % flurry->numStreams) == j) {
169 f *= 1.0f + streamBias;
171 mag = f / (float) sqrt(rsquared);
173 deltax -= (dx * mag);
174 deltay -= (dy * mag);
175 deltaz -= (dz * mag);
178 /* slow this particle down by flurry->drag */
179 deltax *= flurry->drag;
180 deltay *= flurry->drag;
181 deltaz *= flurry->drag;
183 if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
184 s->p[i].dead.i[k] = 1;
188 /* update the position */
189 s->p[i].delta[0].f[k] = deltax;
190 s->p[i].delta[1].f[k] = deltay;
191 s->p[i].delta[2].f[k] = deltaz;
193 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
194 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
203 void UpdateSmoke_ScalarFrsqrte(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
206 float sx = flurry->star->position[0];
207 float sy = flurry->star->position[1];
208 float sz = flurry->star->position[2];
210 double frameRateModifier;
216 /* release 12 puffs every frame */
217 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
218 float dx,dy,dz,deltax,deltay,deltaz;
230 for(i=0;i<flurry->numStreams;i++) {
231 float streamSpeedCoherenceFactor;
233 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
234 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
235 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
236 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
237 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
238 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
239 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
240 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
241 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
242 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
243 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
244 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
245 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
246 rsquared = (dx*dx+dy*dy+dz*dz);
247 f = streamSpeed * streamSpeedCoherenceFactor;
249 mag = f / (float) sqrt(rsquared);
251 reciprocal square-root estimate replaced above divide and call to system sqrt()
253 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
257 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
258 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
259 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
260 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
261 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
262 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
263 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
264 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
265 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
266 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
267 s->nextSubParticle++;
268 if (s->nextSubParticle==4) {
270 s->nextSubParticle=0;
272 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
274 s->nextSubParticle = 0;
278 s->lastParticleTime = flurry->fTime;
281 s->lastParticleTime = flurry->fTime;
286 s->old[i] = flurry->star->position[i];
289 frameRate = ((double) flurry->dframe)/(flurry->fTime);
290 frameRateModifier = 42.5f / frameRate;
292 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
302 if (s->p[i].dead.i[k]) {
306 deltax = s->p[i].delta[0].f[k];
307 deltay = s->p[i].delta[1].f[k];
308 deltaz = s->p[i].delta[2].f[k];
310 for(j=0;j<flurry->numStreams;j++) {
311 dx = s->p[i].position[0].f[k] - flurry->spark[j]->position[0];
312 dy = s->p[i].position[1].f[k] - flurry->spark[j]->position[1];
313 dz = s->p[i].position[2].f[k] - flurry->spark[j]->position[2];
314 rsquared = (dx*dx+dy*dy+dz*dz);
317 asm("fres %0, %1" : "=f" (f) : "f" (rsquared));
318 f *= gravity*frameRateModifier;
320 f = ( gravity * frameRateModifier ) / rsquared;
322 if((((i*4)+k) % flurry->numStreams) == j) {
323 f *= 1.0f + streamBias;
326 mag = f / (float) sqrt(rsquared);
328 /* reciprocal square-root estimate replaced above divide and call to system sqrt() */
330 deltax -= (dx * mag);
331 deltay -= (dy * mag);
332 deltaz -= (dz * mag);
335 /* slow this particle down by flurry->drag */
336 deltax *= flurry->drag;
337 deltay *= flurry->drag;
338 deltaz *= flurry->drag;
340 if((deltax*deltax+deltay*deltay+deltaz*deltaz) >= 25000000.0f) {
341 s->p[i].dead.i[k] = 1;
345 /* update the position */
346 s->p[i].delta[0].f[k] = deltax;
347 s->p[i].delta[1].f[k] = deltay;
348 s->p[i].delta[2].f[k] = deltaz;
350 s->p[i].oldposition[j].f[k] = s->p[i].position[j].f[k];
351 s->p[i].position[j].f[k] += (s->p[i].delta[j].f[k])*flurry->fDeltaTime;
361 void UpdateSmoke_VectorBase(global_info_t *global, flurry_info_t *flurry, SmokeV *s)
364 float sx = flurry->star->position[0];
365 float sy = flurry->star->position[1];
366 float sz = flurry->star->position[2];
368 floatToVector frameRateModifier;
369 floatToVector gravityV;
371 floatToVector deltaTimeV;
372 const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
373 const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
374 const vector float biasConst = (vector float)(streamBias);
376 gravityV.f[0] = gravity;
377 gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
379 dragV.f[0] = flurry->drag;
380 dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
382 deltaTimeV.f[0] = flurry->fDeltaTime;
383 deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
388 /* release 12 puffs every frame */
389 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
390 float dx,dy,dz,deltax,deltay,deltaz;
402 for(i=0;i<flurry->numStreams;i++) {
403 float streamSpeedCoherenceFactor;
405 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
406 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
407 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
408 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
409 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
410 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
411 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
412 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
413 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
414 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
415 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
416 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
417 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
418 rsquared = (dx*dx+dy*dy+dz*dz);
419 f = streamSpeed * streamSpeedCoherenceFactor;
421 mag = f / (float) sqrt(rsquared);
423 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
427 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
428 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
429 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
430 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
431 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
432 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
433 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
434 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
435 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
436 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
437 s->nextSubParticle++;
438 if (s->nextSubParticle==4) {
440 s->nextSubParticle=0;
442 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
444 s->nextSubParticle = 0;
448 s->lastParticleTime = flurry->fTime;
451 s->lastParticleTime = flurry->fTime;
456 s->old[i] = flurry->star->position[i];
459 frameRate = ((double) flurry->dframe)/(flurry->fTime);
460 frameRateModifier.f[0] = 42.5f / frameRate;
461 frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
463 frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
465 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
466 /* floatToVector f; */
467 vector float deltax, deltay, deltaz;
468 vector float distTemp;
469 vector unsigned int deadTemp;
470 /* floatToVector infopos0, infopos1, infopos2; */
472 vector unsigned int jVec;
475 vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
477 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
481 deltax = s->p[i].delta[0].v;
482 deltay = s->p[i].delta[1].v;
483 deltaz = s->p[i].delta[2].v;
485 mod.i[0] = (i<<2 + 0) % flurry->numStreams;
486 if(mod.i[0]+1 == flurry->numStreams) {
489 mod.i[1] = mod.i[0]+1;
491 if(mod.i[1]+1 == flurry->numStreams) {
494 mod.i[2] = mod.i[1]+1;
496 if(mod.i[2]+1 == flurry->numStreams) {
499 mod.i[3] = mod.i[2]+1;
502 jVec = vec_xor(jVec, jVec);
504 vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
505 for(j=0; j<flurry->numStreams;j++) {
506 vector float ip0, ip1 = (vector float)(0.0), ip2;
507 vector float dx, dy, dz;
508 vector float rsquared, f;
509 vector float one_over_rsquared;
510 vector float biasTemp;
512 vector bool int biasOr;
514 ip0 = vec_ld(0, flurry->spark[j]->position);
515 if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
516 ip1 = vec_ld(16, flurry->spark[j]->position);
519 ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
520 ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
521 ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
522 ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
524 dx = vec_sub(s->p[i].position[0].v, ip0);
525 dy = vec_sub(s->p[i].position[1].v, ip1);
526 dz = vec_sub(s->p[i].position[2].v, ip2);
528 rsquared = vec_madd(dx, dx, zero);
529 rsquared = vec_madd(dy, dy, rsquared);
530 rsquared = vec_madd(dz, dz, rsquared);
532 biasOr = vec_cmpeq(jVec, mod.v);
533 biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
535 f = vec_madd(biasTemp, frameRateModifier.v, zero);
536 one_over_rsquared = vec_re(rsquared);
537 f = vec_madd(f, one_over_rsquared, zero);
539 mag = vec_rsqrte(rsquared);
540 mag = vec_madd(mag, f, zero);
542 deltax = vec_nmsub(dx, mag, deltax);
543 deltay = vec_nmsub(dy, mag, deltay);
544 deltaz = vec_nmsub(dz, mag, deltaz);
546 jVec = vec_add(jVec, (vector unsigned int)(1));
549 /* slow this particle down by flurry->drag */
550 deltax = vec_madd(deltax, dragV.v, zero);
551 deltay = vec_madd(deltay, dragV.v, zero);
552 deltaz = vec_madd(deltaz, dragV.v, zero);
554 distTemp = vec_madd(deltax, deltax, zero);
555 distTemp = vec_madd(deltay, deltay, distTemp);
556 distTemp = vec_madd(deltaz, deltaz, distTemp);
558 deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
559 deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
560 s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
561 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
565 /* update the position */
566 s->p[i].delta[0].v = deltax;
567 s->p[i].delta[1].v = deltay;
568 s->p[i].delta[2].v = deltaz;
570 s->p[i].oldposition[j].v = s->p[i].position[j].v;
571 s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
576 void UpdateSmoke_VectorUnrolled(global_info_t *info, SmokeV *s)
579 float sx = flurry->star->position[0];
580 float sy = flurry->star->position[1];
581 float sz = flurry->star->position[2];
583 floatToVector frameRateModifier;
584 floatToVector gravityV;
586 floatToVector deltaTimeV;
587 const vector float deadConst = (vector float) (25000000.0,25000000.0,25000000.0,25000000.0);
588 const vector float zero = (vector float)(0.0, 0.0, 0.0, 0.0);
589 const vector float biasConst = (vector float)(streamBias);
591 gravityV.f[0] = gravity;
592 gravityV.v = (vector float) vec_splat((vector unsigned int)gravityV.v, 0);
594 dragV.f[0] = flurry->drag;
595 dragV.v = (vector float) vec_splat((vector unsigned int)dragV.v, 0);
597 deltaTimeV.f[0] = flurry->fDeltaTime;
598 deltaTimeV.v = (vector float) vec_splat((vector unsigned int)deltaTimeV.v, 0);
603 /* release 12 puffs every frame */
604 if(flurry->fTime - s->lastParticleTime >= 1.0f / 121.0f) {
605 float dx,dy,dz,deltax,deltay,deltaz;
617 for(i=0;i<flurry->numStreams;i++) {
618 float streamSpeedCoherenceFactor;
620 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] = deltax;
621 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] = deltay;
622 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] = deltaz;
623 s->p[s->nextParticle].position[0].f[s->nextSubParticle] = sx;
624 s->p[s->nextParticle].position[1].f[s->nextSubParticle] = sy;
625 s->p[s->nextParticle].position[2].f[s->nextSubParticle] = sz;
626 s->p[s->nextParticle].oldposition[0].f[s->nextSubParticle] = sx;
627 s->p[s->nextParticle].oldposition[1].f[s->nextSubParticle] = sy;
628 s->p[s->nextParticle].oldposition[2].f[s->nextSubParticle] = sz;
629 streamSpeedCoherenceFactor = MAX_(0.0f,1.0f + RandBell(0.25f*incohesion));
630 dx = s->p[s->nextParticle].position[0].f[s->nextSubParticle] - flurry->spark[i]->position[0];
631 dy = s->p[s->nextParticle].position[1].f[s->nextSubParticle] - flurry->spark[i]->position[1];
632 dz = s->p[s->nextParticle].position[2].f[s->nextSubParticle] - flurry->spark[i]->position[2];
633 rsquared = (dx*dx+dy*dy+dz*dz);
634 f = streamSpeed * streamSpeedCoherenceFactor;
636 mag = f / (float) sqrt(rsquared);
638 asm("frsqrte %0, %1" : "=f" (mag) : "f" (rsquared));
642 s->p[s->nextParticle].delta[0].f[s->nextSubParticle] -= (dx * mag);
643 s->p[s->nextParticle].delta[1].f[s->nextSubParticle] -= (dy * mag);
644 s->p[s->nextParticle].delta[2].f[s->nextSubParticle] -= (dz * mag);
645 s->p[s->nextParticle].color[0].f[s->nextSubParticle] = flurry->spark[i]->color[0] * (1.0f + RandBell(colorIncoherence));
646 s->p[s->nextParticle].color[1].f[s->nextSubParticle] = flurry->spark[i]->color[1] * (1.0f + RandBell(colorIncoherence));
647 s->p[s->nextParticle].color[2].f[s->nextSubParticle] = flurry->spark[i]->color[2] * (1.0f + RandBell(colorIncoherence));
648 s->p[s->nextParticle].color[3].f[s->nextSubParticle] = 0.85f * (1.0f + RandBell(0.5f*colorIncoherence));
649 s->p[s->nextParticle].time.f[s->nextSubParticle] = flurry->fTime;
650 s->p[s->nextParticle].dead.i[s->nextSubParticle] = 0;
651 s->p[s->nextParticle].animFrame.i[s->nextSubParticle] = random()&63;
652 s->nextSubParticle++;
653 if (s->nextSubParticle==4) {
655 s->nextSubParticle=0;
657 if (s->nextParticle >= NUMSMOKEPARTICLES/4) {
659 s->nextSubParticle = 0;
663 s->lastParticleTime = flurry->fTime;
666 s->lastParticleTime = flurry->fTime;
671 s->old[i] = flurry->star->position[i];
674 frameRate = ((double) flurry->dframe)/(flurry->fTime);
675 frameRateModifier.f[0] = 42.5f / frameRate;
676 frameRateModifier.v = (vector float) vec_splat((vector unsigned int)frameRateModifier.v, 0);
678 frameRateModifier.v = vec_madd(frameRateModifier.v, gravityV.v, zero);
680 for(i=0;i<NUMSMOKEPARTICLES/4;i++) {
681 /* floatToVector f; */
682 vector float deltax, deltay, deltaz;
683 vector float distTemp;
684 vector unsigned int deadTemp;
685 /* floatToVector infopos0, infopos1, infopos2; */
687 vector unsigned int jVec;
688 vector unsigned int intOne = vec_splat_u32(1);
689 vector float floatOne = vec_ctf(intOne, 0);
692 vec_dst((int *)(&(s->p[i+4])), 0x00020200, 3);
694 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
698 deltax = s->p[i].delta[0].v;
699 deltay = s->p[i].delta[1].v;
700 deltaz = s->p[i].delta[2].v;
702 mod.i[0] = (i<<2 + 0) % flurry->numStreams;
703 if(mod.i[0]+1 == flurry->numStreams) {
706 mod.i[1] = mod.i[0]+1;
708 if(mod.i[1]+1 == flurry->numStreams) {
711 mod.i[2] = mod.i[1]+1;
713 if(mod.i[2]+1 == flurry->numStreams) {
716 mod.i[3] = mod.i[2]+1;
719 jVec = vec_xor(jVec, jVec);
721 vec_dst( &flurry->spark[0]->position[0], 0x16020160, 3 );
722 for(j=0; j + 3 < flurry->numStreams;j+=4)
724 vector float dxa, dya, dza;
725 vector float dxb, dyb, dzb;
726 vector float dxc, dyc, dzc;
727 vector float dxd, dyd, dzd;
728 vector float ip0a, ip1a;
729 vector float ip0b, ip1b;
730 vector float ip0c, ip1c;
731 vector float ip0d, ip1d;
732 vector float rsquaredA;
733 vector float rsquaredB;
734 vector float rsquaredC;
735 vector float rsquaredD;
736 vector float fA, fB, fC, fD;
737 vector float biasTempA;
738 vector float biasTempB;
739 vector float biasTempC;
740 vector float biasTempD;
746 vector float one_over_rsquaredA;
747 vector float one_over_rsquaredB;
748 vector float one_over_rsquaredC;
749 vector float one_over_rsquaredD;
750 vector bool int biasOrA,biasOrB,biasOrC,biasOrD;
753 ip0a = vec_ld(0, flurry->spark[j]->position);
754 ip0b = vec_ld(0, flurry->spark[j+1]->position);
755 ip0c = vec_ld(0, flurry->spark[j+2]->position);
756 ip0d = vec_ld(0, flurry->spark[j+3]->position);
757 ip1a = vec_ld( 12, flurry->spark[j]->position );
758 ip1b = vec_ld( 12, flurry->spark[j+1]->position );
759 ip1c = vec_ld( 12, flurry->spark[j+2]->position );
760 ip1d = vec_ld( 12, flurry->spark[j+3]->position );
763 ip0a = vec_perm(ip0a, ip1a, vec_lvsl(0, flurry->spark[j]->position));
764 ip0b = vec_perm(ip0b, ip1b, vec_lvsl(0, flurry->spark[j+1]->position));
765 ip0c = vec_perm(ip0c, ip1c, vec_lvsl(0, flurry->spark[j+2]->position));
766 ip0d = vec_perm(ip0d, ip1d, vec_lvsl(0, flurry->spark[j+3]->position));
768 dxa = vec_splat( ip0a, 0 );
769 dxb = vec_splat( ip0b, 0 );
770 dxc = vec_splat( ip0c, 0 );
771 dxd = vec_splat( ip0d, 0 );
772 dxa = vec_sub( s->p[i].position[0].v, dxa );
773 dxb = vec_sub( s->p[i].position[0].v, dxb );
774 dxc = vec_sub( s->p[i].position[0].v, dxc );
775 dxd = vec_sub( s->p[i].position[0].v, dxd );
777 dya = vec_splat( ip0a, 1 );
778 dyb = vec_splat( ip0b, 1 );
779 dyc = vec_splat( ip0c, 1 );
780 dyd = vec_splat( ip0d, 1 );
781 dya = vec_sub( s->p[i].position[1].v, dya );
782 dyb = vec_sub( s->p[i].position[1].v, dyb );
783 dyc = vec_sub( s->p[i].position[1].v, dyc );
784 dyd = vec_sub( s->p[i].position[1].v, dyd );
786 dza = vec_splat( ip0a, 2 );
787 dzb = vec_splat( ip0b, 2 );
788 dzc = vec_splat( ip0c, 2 );
789 dzd = vec_splat( ip0d, 2 );
790 dza = vec_sub( s->p[i].position[2].v, dza );
791 dzb = vec_sub( s->p[i].position[2].v, dzb );
792 dzc = vec_sub( s->p[i].position[2].v, dzc );
793 dzd = vec_sub( s->p[i].position[2].v, dzd );
795 rsquaredA = vec_madd( dxa, dxa, zero );
796 rsquaredB = vec_madd( dxb, dxb, zero );
797 rsquaredC = vec_madd( dxc, dxc, zero );
798 rsquaredD = vec_madd( dxd, dxd, zero );
800 rsquaredA = vec_madd( dya, dya, rsquaredA );
801 rsquaredB = vec_madd( dyb, dyb, rsquaredB );
802 rsquaredC = vec_madd( dyc, dyc, rsquaredC );
803 rsquaredD = vec_madd( dyd, dyd, rsquaredD );
805 rsquaredA = vec_madd( dza, dza, rsquaredA );
806 rsquaredB = vec_madd( dzb, dzb, rsquaredB );
807 rsquaredC = vec_madd( dzc, dzc, rsquaredC );
808 rsquaredD = vec_madd( dzd, dzd, rsquaredD );
810 biasOrA = vec_cmpeq( jVec, mod.v );
811 jVec = vec_add(jVec, intOne);
812 biasOrB = vec_cmpeq( jVec, mod.v );
813 jVec = vec_add(jVec, intOne);
814 biasOrC = vec_cmpeq( jVec, mod.v );
815 jVec = vec_add(jVec, intOne);
816 biasOrD = vec_cmpeq( jVec, mod.v );
817 jVec = vec_add(jVec, intOne);
819 biasTempA = vec_add( vec_and( biasOrA, biasConst), floatOne);
820 biasTempB = vec_add( vec_and( biasOrB, biasConst), floatOne);
821 biasTempC = vec_add( vec_and( biasOrC, biasConst), floatOne);
822 biasTempD = vec_add( vec_and( biasOrD, biasConst), floatOne);
824 fA = vec_madd( biasTempA, frameRateModifier.v, zero);
825 fB = vec_madd( biasTempB, frameRateModifier.v, zero);
826 fC = vec_madd( biasTempC, frameRateModifier.v, zero);
827 fD = vec_madd( biasTempD, frameRateModifier.v, zero);
828 one_over_rsquaredA = vec_re( rsquaredA );
829 one_over_rsquaredB = vec_re( rsquaredB );
830 one_over_rsquaredC = vec_re( rsquaredC );
831 one_over_rsquaredD = vec_re( rsquaredD );
832 fA = vec_madd( fA, one_over_rsquaredA, zero);
833 fB = vec_madd( fB, one_over_rsquaredB, zero);
834 fC = vec_madd( fC, one_over_rsquaredC, zero);
835 fD = vec_madd( fD, one_over_rsquaredD, zero);
836 magA = vec_rsqrte( rsquaredA );
837 magB = vec_rsqrte( rsquaredB );
838 magC = vec_rsqrte( rsquaredC );
839 magD = vec_rsqrte( rsquaredD );
840 magA = vec_madd( magA, fA, zero );
841 magB = vec_madd( magB, fB, zero );
842 magC = vec_madd( magC, fC, zero );
843 magD = vec_madd( magD, fD, zero );
844 deltax = vec_nmsub( dxa, magA, deltax );
845 deltay = vec_nmsub( dya, magA, deltay );
846 deltaz = vec_nmsub( dza, magA, deltaz );
848 deltax = vec_nmsub( dxb, magB, deltax );
849 deltay = vec_nmsub( dyb, magB, deltay );
850 deltaz = vec_nmsub( dzb, magB, deltaz );
852 deltax = vec_nmsub( dxc, magC, deltax );
853 deltay = vec_nmsub( dyc, magC, deltay );
854 deltaz = vec_nmsub( dzc, magC, deltaz );
856 deltax = vec_nmsub( dxd, magD, deltax );
857 deltay = vec_nmsub( dyd, magD, deltay );
858 deltaz = vec_nmsub( dzd, magD, deltaz );
862 for(;j<flurry->numStreams;j++) {
863 vector float ip0, ip1 = (vector float)(0.0), ip2;
864 vector float dx, dy, dz;
865 vector float rsquared, f;
866 vector float one_over_rsquared;
867 vector float biasTemp;
869 vector bool int biasOr;
871 ip0 = vec_ld(0, flurry->spark[j]->position);
872 if(((int)(flurry->spark[j]->position) & 0xF)>=8) {
873 ip1 = vec_ld(16, flurry->spark[j]->position);
876 ip0 = vec_perm(ip0, ip1, vec_lvsl(0, flurry->spark[j]->position));
877 ip1 = (vector float) vec_splat((vector unsigned int)ip0, 1);
878 ip2 = (vector float) vec_splat((vector unsigned int)ip0, 2);
879 ip0 = (vector float) vec_splat((vector unsigned int)ip0, 0);
881 dx = vec_sub(s->p[i].position[0].v, ip0);
882 dy = vec_sub(s->p[i].position[1].v, ip1);
883 dz = vec_sub(s->p[i].position[2].v, ip2);
885 rsquared = vec_madd(dx, dx, zero);
886 rsquared = vec_madd(dy, dy, rsquared);
887 rsquared = vec_madd(dz, dz, rsquared);
889 biasOr = vec_cmpeq(jVec, mod.v);
890 biasTemp = vec_add(vec_and(biasOr, biasConst), (vector float)(1.0));
892 f = vec_madd(biasTemp, frameRateModifier.v, zero);
893 one_over_rsquared = vec_re(rsquared);
894 f = vec_madd(f, one_over_rsquared, zero);
896 mag = vec_rsqrte(rsquared);
897 mag = vec_madd(mag, f, zero);
899 deltax = vec_nmsub(dx, mag, deltax);
900 deltay = vec_nmsub(dy, mag, deltay);
901 deltaz = vec_nmsub(dz, mag, deltaz);
903 jVec = vec_add(jVec, (vector unsigned int)(1));
906 /* slow this particle down by flurry->drag */
907 deltax = vec_madd(deltax, dragV.v, zero);
908 deltay = vec_madd(deltay, dragV.v, zero);
909 deltaz = vec_madd(deltaz, dragV.v, zero);
911 distTemp = vec_madd(deltax, deltax, zero);
912 distTemp = vec_madd(deltay, deltay, distTemp);
913 distTemp = vec_madd(deltaz, deltaz, distTemp);
915 deadTemp = (vector unsigned int) vec_cmpge(distTemp, deadConst);
916 deadTemp = vec_and((vector unsigned int)vec_splat_u32(1), deadTemp);
917 s->p[i].dead.v = vec_or(s->p[i].dead.v, deadTemp);
918 if (vec_all_ne(s->p[i].dead.v, (vector unsigned int)(0))) {
922 /* update the position */
923 s->p[i].delta[0].v = deltax;
924 s->p[i].delta[1].v = deltay;
925 s->p[i].delta[2].v = deltaz;
927 s->p[i].oldposition[j].v = s->p[i].position[j].v;
928 s->p[i].position[j].v = vec_madd(s->p[i].delta[j].v, deltaTimeV.v, s->p[i].position[j].v);
936 void DrawSmoke_Scalar(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
946 float screenRatio = global->sys_glWidth / 1024.0f;
947 float hslash2 = global->sys_glHeight * 0.5f;
948 float wslash2 = global->sys_glWidth * 0.5f;
951 width = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
953 for (i=0;i<NUMSMOKEPARTICLES/4;i++)
955 for (k=0; k<4; k++) {
959 if (s->p[i].dead.i[k]) {
962 thisWidth = (streamSize + (flurry->fTime - s->p[i].time.f[k])*flurry->streamExpansion) * screenRatio;
963 if (thisWidth >= width)
965 s->p[i].dead.i[k] = 1;
968 z = s->p[i].position[2].f[k];
969 sx = s->p[i].position[0].f[k] * global->sys_glWidth / z + wslash2;
970 sy = s->p[i].position[1].f[k] * global->sys_glWidth / z + hslash2;
971 oldz = s->p[i].oldposition[2].f[k];
972 if (sx > global->sys_glWidth+50.0f || sx < -50.0f || sy > global->sys_glHeight+50.0f || sy < -50.0f || z < 25.0f || oldz < 25.0f)
977 w = MAX_(1.0f,thisWidth/z);
979 float oldx = s->p[i].oldposition[0].f[k];
980 float oldy = s->p[i].oldposition[1].f[k];
981 float oldscreenx = (oldx * global->sys_glWidth / oldz) + wslash2;
982 float oldscreeny = (oldy * global->sys_glWidth / oldz) + hslash2;
983 float dx = (sx-oldscreenx);
984 float dy = (sy-oldscreeny);
986 float d = FastDistance2D(dx, dy);
997 ow = MAX_(1.0f,thisWidth/oldz);
1010 float m = 1.0f + sm;
1019 s->p[i].animFrame.i[k]++;
1020 if (s->p[i].animFrame.i[k] >= 64)
1022 s->p[i].animFrame.i[k] = 0;
1025 u0 = (s->p[i].animFrame.i[k]& 7) * 0.125f;
1026 v0 = (s->p[i].animFrame.i[k]>>3) * 0.125f;
1029 cm = (1.375f - thisWidth/width);
1030 if (s->p[i].dead.i[k] == 3)
1033 s->p[i].dead.i[k] = 1;
1037 cmv.f[0] = s->p[i].color[0].f[k]*cm;
1038 cmv.f[1] = s->p[i].color[1].f[k]*cm;
1039 cmv.f[2] = s->p[i].color[2].f[k]*cm;
1040 cmv.f[3] = s->p[i].color[3].f[k]*cm;
1043 /* MDT we can't use vectors in the Scalar routine */
1044 s->seraphimColors[sci++].v = cmv.v;
1045 s->seraphimColors[sci++].v = cmv.v;
1046 s->seraphimColors[sci++].v = cmv.v;
1047 s->seraphimColors[sci++].v = cmv.v;
1051 for (jj = 0; jj < 4; jj++) {
1052 for (ii = 0; ii < 4; ii++) {
1053 s->seraphimColors[sci].f[ii] = cmv.f[ii];
1060 s->seraphimTextures[sti++] = u0;
1061 s->seraphimTextures[sti++] = v0;
1062 s->seraphimTextures[sti++] = u0;
1063 s->seraphimTextures[sti++] = v1;
1065 s->seraphimTextures[sti++] = u1;
1066 s->seraphimTextures[sti++] = v1;
1067 s->seraphimTextures[sti++] = u1;
1068 s->seraphimTextures[sti++] = v0;
1070 s->seraphimVertices[svi].f[0] = sx+dxm-dys;
1071 s->seraphimVertices[svi].f[1] = sy+dym+dxs;
1072 s->seraphimVertices[svi].f[2] = sx+dxm+dys;
1073 s->seraphimVertices[svi].f[3] = sy+dym-dxs;
1076 s->seraphimVertices[svi].f[0] = oldscreenx-dxm+dyos;
1077 s->seraphimVertices[svi].f[1] = oldscreeny-dym-dxos;
1078 s->seraphimVertices[svi].f[2] = oldscreenx-dxm-dyos;
1079 s->seraphimVertices[svi].f[3] = oldscreeny-dym+dxos;
1085 glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1086 glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1087 glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1088 glDrawArrays(GL_QUADS,0,si*4);
1094 void DrawSmoke_Vector(global_info_t *global, flurry_info_t *flurry, SmokeV *s, float brightness)
1096 const vector float zero = (vector float)(0.0);
1101 floatToVector width;
1103 floatToVector u0,v0,u1,v1;
1104 vector float one_over_z;
1107 float screenRatio = global->sys_glWidth / 1024.0f;
1108 float hslash2 = global->sys_glHeight * 0.5f;
1109 float wslash2 = global->sys_glWidth * 0.5f;
1111 floatToVector briteV, fTimeV, expansionV, screenRatioV, hslash2V, wslash2V, streamSizeV;
1112 floatToVector glWidthV;
1114 vector float cmv[4];
1115 vector float svec[4], ovec[4];
1116 vector float oldscreenx, oldscreeny;
1118 vector float frameAnd7;
1119 vector float frameShift3;
1120 vector float one_over_width;
1121 vector float dx, dy;
1123 vector unsigned int vSi = vec_splat_u32(0);
1124 const vector float eighth = (vector float)(0.125);
1125 float glWidth50 = global->sys_glWidth + 50.0f;
1126 float glHeight50 = global->sys_glHeight + 50.0f;
1127 vector float vGLWidth50, vGLHeight50;
1128 unsigned int blitBool;
1130 vec_dst((int *)(&(s->p[0])), 0x00020200, 2);
1133 vector unsigned char permute1 = vec_lvsl( 0, &glWidth50 );
1134 vector unsigned char permute2 = vec_lvsl( 0, &glHeight50 );
1135 permute1 = (vector unsigned char) vec_splat( (vector unsigned int) permute1, 0 );
1136 permute2 = (vector unsigned char) vec_splat( (vector unsigned int) permute2, 0 );
1137 vGLWidth50 = vec_lde( 0, &glWidth50 );
1138 vGLHeight50 = vec_lde( 0, &glHeight50 );
1139 vGLWidth50 = vec_perm( vGLWidth50, vGLWidth50, permute1 );
1140 vGLHeight50 = vec_perm( vGLHeight50, vGLHeight50, permute2 );
1143 width.f[0] = (streamSize+2.5f*flurry->streamExpansion) * screenRatio;
1144 width.v = (vector float) vec_splat((vector unsigned int)width.v, 0);
1146 briteV.f[0] = brightness;
1147 briteV.v = (vector float) vec_splat((vector unsigned int)briteV.v, 0);
1149 fTimeV.f[0] = (float) flurry->fTime;
1150 fTimeV.v = (vector float) vec_splat((vector unsigned int)fTimeV.v, 0);
1152 expansionV.f[0] = flurry->streamExpansion;
1153 expansionV.v = (vector float) vec_splat((vector unsigned int)expansionV.v, 0);
1155 screenRatioV.f[0] = screenRatio;
1156 screenRatioV.v = (vector float) vec_splat((vector unsigned int)screenRatioV.v, 0);
1158 hslash2V.f[0] = hslash2;
1159 hslash2V.v = (vector float) vec_splat((vector unsigned int)hslash2V.v, 0);
1161 wslash2V.f[0] = wslash2;
1162 wslash2V.v = (vector float) vec_splat((vector unsigned int)wslash2V.v, 0);
1164 streamSizeV.f[0] = streamSize;
1165 streamSizeV.v = (vector float) vec_splat((vector unsigned int)streamSizeV.v, 0);
1167 glWidthV.f[0] = global->sys_glWidth;
1168 glWidthV.v = (vector float) vec_splat((vector unsigned int)glWidthV.v, 0);
1170 for (i=0;i<NUMSMOKEPARTICLES/4;i++) {
1171 vector float thisWidth;
1173 vector float oldx, oldy, one_over_oldz;
1174 vector float xabs, yabs, mn;
1176 vector float one_over_d;
1177 vector bool int dnz;
1180 vec_dst((int *)(&(s->p[i+4])), 0x00020200, 2);
1182 if (vec_all_eq(s->p[i].dead.v, (vector unsigned int)(1))) continue;
1184 blitBool = 0; /* keep track of particles that actually need to be drawn */
1186 thisWidth = vec_sub(fTimeV.v, s->p[i].time.v);
1187 thisWidth = vec_madd(thisWidth, expansionV.v, streamSizeV.v);
1188 thisWidth = vec_madd(thisWidth, screenRatioV.v, zero);
1190 z.v = s->p[i].position[2].v;
1191 one_over_z = vec_re(z.v);
1193 sx = vec_madd(s->p[i].position[0].v, glWidthV.v, zero);
1194 sx = vec_madd(sx, one_over_z, wslash2V.v);
1195 sy = vec_madd(s->p[i].position[1].v, glWidthV.v, zero);
1196 sy = vec_madd(sy, one_over_z, hslash2V.v);
1198 oldz = s->p[i].oldposition[2].v;
1200 w = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_z, zero));
1202 oldx = s->p[i].oldposition[0].v;
1203 oldy = s->p[i].oldposition[1].v;
1204 one_over_oldz = vec_re(oldz);
1205 oldscreenx = vec_madd(oldx, glWidthV.v, zero);
1206 oldscreenx = vec_madd(oldscreenx, one_over_oldz, wslash2V.v);
1207 oldscreeny = vec_madd(oldy, glWidthV.v, zero);
1208 oldscreeny = vec_madd(oldscreeny, one_over_oldz, hslash2V.v);
1209 dx = vec_sub(sx,oldscreenx);
1210 dy = vec_sub(sy,oldscreeny);
1214 mn = vec_min(xabs,yabs);
1215 d = vec_add(xabs,yabs);
1216 d = vec_madd(mn, (vector float)(-0.6875), d);
1218 ow = vec_max((vector float)(1.0), vec_madd(thisWidth, one_over_oldz, zero));
1219 one_over_d = vec_re(d);
1220 dnz = vec_cmpgt(d, zero);
1221 sm = vec_madd(w, one_over_d, zero);
1222 sm = vec_and(sm, dnz);
1223 os = vec_madd(ow, one_over_d, zero);
1224 os = vec_and(os, dnz);
1227 intToVector tempMask;
1228 vector bool int mask = vec_cmpeq( s->p[i].dead.v, vec_splat_u32(1) ); /* -1 where true */
1229 vector bool int gtMask = vec_cmpge( thisWidth, width.v ); /* -1 where true */
1230 vector bool int glWidth50Test = vec_cmpgt( sx, (vector float)(vGLWidth50) ); /* -1 where true */
1231 vector bool int glHeight50Test = vec_cmpgt( sy, (vector float)(vGLHeight50) ); /* -1 where true */
1232 vector bool int test50x = vec_cmplt( sx, (vector float) (-50.0) );
1233 vector bool int test50y = vec_cmplt( sy, (vector float) (-50.0) );
1234 vector bool int testz = vec_cmplt( z.v, (vector float) (25.0) );
1235 vector bool int testoldz = vec_cmplt( oldz, (vector float) (25.0) );
1236 mask = vec_or( mask, gtMask );
1237 s->p[i].dead.v = vec_and( mask, vec_splat_u32( 1 ) );
1238 mask = vec_or( mask, glWidth50Test );
1239 mask = vec_or( mask, glHeight50Test );
1240 mask = vec_or( mask, test50x );
1241 mask = vec_or( mask, test50y );
1242 mask = vec_or( mask, testz );
1243 mask = vec_or( mask, testoldz );
1244 tempMask.v = (vector unsigned int)mask;
1246 s->p[i].animFrame.v = vec_sub( s->p[i].animFrame.v, vec_nor( mask, mask ) );
1247 s->p[i].animFrame.v = vec_and( s->p[i].animFrame.v, (vector unsigned int)(63) );
1249 frameAnd7 = vec_ctf(vec_and(s->p[i].animFrame.v, (vector unsigned int)(7)),0);
1250 u0.v = vec_madd(frameAnd7, eighth, zero);
1252 frameShift3 = vec_ctf(vec_sr(s->p[i].animFrame.v, (vector unsigned int)(3)),0);
1253 v0.v = vec_madd(frameAnd7, eighth, zero);
1255 u1.v = vec_add(u0.v, eighth);
1256 v1.v = vec_add(v0.v, eighth);
1258 one_over_width = vec_re(width.v);
1259 cm.v = vec_sel( vec_nmsub(thisWidth, one_over_width, (vector float)(1.375)), cm.v, mask );
1260 cm.v = vec_madd(cm.v, briteV.v, zero);
1262 vSi = vec_sub( vSi, vec_nor( mask, mask ) );
1264 vector unsigned int blitMask = (vector unsigned int) (1, 2, 4, 8);
1265 vector unsigned int temp = (vector unsigned int)mask;
1266 temp = vec_andc( blitMask, temp );
1267 temp = vec_add( temp, vec_sld( temp, temp, 8 ) );
1268 temp = vec_add( temp, vec_sld( temp, temp, 4 ) );
1269 vec_ste( temp, 0, &blitBool );
1274 vector float temp1, temp2, temp3, temp4;
1275 vector float result1a, result1b, result2a, result2b, result3a, result3b, result4a, result4b;
1277 temp1 = vec_mergeh( u0.v, u0.v );
1278 temp2 = vec_mergel( u0.v, u0.v );
1279 temp3 = vec_mergeh( v0.v, v1.v );
1280 temp4 = vec_mergel( v0.v, v1.v );
1282 result1a = vec_mergeh( temp1, temp3 );
1283 result1b = vec_mergel( temp1, temp3 );
1284 result2a = vec_mergeh( temp2, temp4 );
1285 result2b = vec_mergel( temp2, temp4 );
1287 temp1 = vec_mergeh( u1.v, u1.v );
1288 temp2 = vec_mergel( u1.v, u1.v );
1289 temp3 = vec_mergeh( v1.v, v0.v );
1290 temp4 = vec_mergel( v1.v, v0.v );
1292 result3a = vec_mergeh( temp1, temp3 );
1293 result3b = vec_mergel( temp1, temp3 );
1294 result4a = vec_mergeh( temp2, temp4 );
1295 result4b = vec_mergel( temp2, temp4 );
1299 vec_st( result1a, 0, &s->seraphimTextures[sti] );
1300 vec_st( result3a, 16, &s->seraphimTextures[sti]);
1305 vec_st( result1b, 0, &s->seraphimTextures[sti]);
1306 vec_st( result3b, 16, &s->seraphimTextures[sti]);
1311 vec_st( result2a, 0, &s->seraphimTextures[sti]);
1312 vec_st( result4a, 16, &s->seraphimTextures[sti]);
1317 vec_st( result2b, 0, &s->seraphimTextures[sti]);
1318 vec_st( result4b, 16, &s->seraphimTextures[sti]);
1324 cmv[0] = vec_madd(s->p[i].color[0].v, cm.v, zero);
1325 cmv[1] = vec_madd(s->p[i].color[1].v, cm.v, zero);
1326 cmv[2] = vec_madd(s->p[i].color[2].v, cm.v, zero);
1327 cmv[3] = vec_madd(s->p[i].color[3].v, cm.v, zero);
1329 vector float vI0, vI1, vI2, vI3;
1331 vI0 = vec_mergeh ( cmv[0], cmv[2] );
1332 vI1 = vec_mergeh ( cmv[1], cmv[3] );
1333 vI2 = vec_mergel ( cmv[0], cmv[2] );
1334 vI3 = vec_mergel ( cmv[1], cmv[3] );
1336 cmv[0] = vec_mergeh ( vI0, vI1 );
1337 cmv[1] = vec_mergel ( vI0, vI1 );
1338 cmv[2] = vec_mergeh ( vI2, vI3 );
1339 cmv[3] = vec_mergel ( vI2, vI3 );
1342 vec_dst( cmv, 0x0D0100D0, 1 );
1345 vector float sxd, syd;
1346 vector float sxdm, sxdp, sydm, sydp;
1347 vector float oxd, oyd;
1348 vector float oxdm, oxdp, oydm, oydp;
1349 vector float vI0, vI1, vI2, vI3;
1350 vector float dxs, dys;
1351 vector float dxos, dyos;
1352 vector float dxm, dym;
1355 m = vec_add((vector float)(1.0), sm);
1357 dxs = vec_madd(dx, sm, zero);
1358 dys = vec_madd(dy, sm, zero);
1359 dxos = vec_madd(dx, os, zero);
1360 dyos = vec_madd(dy, os, zero);
1361 dxm = vec_madd(dx, m, zero);
1362 dym = vec_madd(dy, m, zero);
1364 sxd = vec_add(sx, dxm);
1365 sxdm = vec_sub(sxd, dys);
1366 sxdp = vec_add(sxd, dys);
1368 syd = vec_add(sy, dym);
1369 sydm = vec_sub(syd, dxs);
1370 sydp = vec_add(syd, dxs);
1372 oxd = vec_sub(oldscreenx, dxm);
1373 oxdm = vec_sub(oxd, dyos);
1374 oxdp = vec_add(oxd, dyos);
1376 oyd = vec_sub(oldscreeny, dym);
1377 oydm = vec_sub(oyd, dxos);
1378 oydp = vec_add(oyd, dxos);
1380 vI0 = vec_mergeh ( sxdm, sxdp );
1381 vI1 = vec_mergeh ( sydp, sydm );
1382 vI2 = vec_mergel ( sxdm, sxdp );
1383 vI3 = vec_mergel ( sydp, sydm );
1385 svec[0] = vec_mergeh ( vI0, vI1 );
1386 svec[1] = vec_mergel ( vI0, vI1 );
1387 svec[2] = vec_mergeh ( vI2, vI3 );
1388 svec[3] = vec_mergel ( vI2, vI3 );
1390 vI0 = vec_mergeh ( oxdp, oxdm );
1391 vI1 = vec_mergeh ( oydm, oydp );
1392 vI2 = vec_mergel ( oxdp, oxdm );
1393 vI3 = vec_mergel ( oydm, oydp );
1395 ovec[0] = vec_mergeh ( vI0, vI1 );
1396 ovec[1] = vec_mergel ( vI0, vI1 );
1397 ovec[2] = vec_mergeh ( vI2, vI3 );
1398 ovec[3] = vec_mergel ( vI2, vI3 );
1402 int offset0 = (sci + 0) * sizeof( vector float );
1403 int offset1 = (sci + 1) * sizeof( vector float );
1404 int offset2 = (sci + 2) * sizeof( vector float );
1405 int offset3 = (sci + 3) * sizeof( vector float );
1406 int offset4 = (svi + 0) * sizeof( vector float );
1407 int offset5 = (svi + 1) * sizeof( vector float );
1408 vector float *colors = (vector float *)s->seraphimColors;
1409 vector float *vertices = (vector float *)s->seraphimVertices;
1410 for (kk=0; kk<4; kk++) {
1411 if (blitBool>>kk & 1) {
1412 vector float vcmv = cmv[kk];
1413 vector float vsvec = svec[kk];
1414 vector float vovec = ovec[kk];
1416 vec_st( vcmv, offset0, colors );
1417 vec_st( vcmv, offset1, colors );
1418 vec_st( vcmv, offset2, colors );
1419 vec_st( vcmv, offset3, colors );
1420 vec_st( vsvec, offset4, vertices );
1421 vec_st( vovec, offset5, vertices );
1430 vSi = vec_add( vSi, vec_sld( vSi, vSi, 8 ) );
1431 vSi = vec_add( vSi, vec_sld( vSi, vSi, 4 ) );
1432 vec_ste( (vector signed int) vSi, 0, &si );
1434 glColorPointer(4,GL_FLOAT,0,s->seraphimColors);
1435 glVertexPointer(2,GL_FLOAT,0,s->seraphimVertices);
1436 glTexCoordPointer(2,GL_FLOAT,0,s->seraphimTextures);
1437 glDrawArrays(GL_QUADS,0,si*4);