- unsigned int n,q;
- Visual *vi;
- Colormap cmap;
- Bool writable;
- XWindowAttributes xwa;
- GC gc;
- XGCValues gcv;
- firepix *fpix, *ffpix;
- fireshell *fshell, *ffshell;
- glow_on = get_boolean_resource("glow" , "Boolean");
- flash_on = get_boolean_resource("flash" , "Boolean");
- shoot = get_boolean_resource("shoot" , "Boolean");
- verbose = get_boolean_resource("verbose" , "Boolean");
- rndlife = get_integer_resource("maxlife" , "Integer");
- delay = get_integer_resource("delay" , "Integer");
- minlife = rndlife/4;
- if(rndlife<1000) flash_fade=0.98;
- if(rndlife<500) flash_fade=0.97;
- if(verbose){
- printf("Fireworkx %s - pyrotechnics simulation program \n", FWXVERSION);
- printf("Copyright (c) 1999-2005 Rony B Chandran <ronybc@asia.com> \n\n");
- printf("url: http://www.ronybc.8k.com \n\n");}
-
- XGetWindowAttributes(display,win,&xwa);
- depth = xwa.depth;
- vi = xwa.visual;
- cmap = xwa.colormap;
- bigendian = (ImageByteOrder(display) == MSBFirst);
-
- if(depth==8){
- if(verbose){
- printf("Pseudocolor color: use '-noflash' for better results.\n");}
- colors = (XColor *) calloc(sizeof(XColor),ncolors+1);
- writable = False;
- make_smooth_colormap(display, vi, cmap, colors, &ncolors,
- False, &writable, True);
- }
- gc = XCreateGC(display, win, 0, &gcv);
-
- resize(display,win); /* initialize palakas */
-
- ffpix = malloc(sizeof(firepix) * PIXCOUNT * SHELLCOUNT);
- ffshell = malloc(sizeof(fireshell) * SHELLCOUNT);
- fshell = ffshell;
- fpix = ffpix;
- for (n=0;n<SHELLCOUNT;n++){
- fshell->fpix = fpix;
- recycle (fshell,rnd(width),rnd(height));
- fshell++;
- fpix += PIXCOUNT; }
-
- while(1) {
- for(q=FTWEAK;q;q--){
- fshell=ffshell;
- for(n=SHELLCOUNT;n;n--){
- if (!explode(fshell)){
- recycle(fshell,rnd(width),rnd(height)); }
- fshell++; }}
-#if HAVE_X86_MMX
- if(glow_on) mmx_glow(palaka1,width,height,8,palaka2);
+ fs->vshift_phase = fs->vshift_phase + 0.008;
+ fs->v = fabs(sin(fs->vshift_phase));
+ fs_roll_rgb(fs);
+}
+
+static int explode(struct state *st, fireshell *fs)
+{
+ float air_drag;
+ unsigned int n;
+ unsigned int h = st->height;
+ unsigned int w = st->width;
+ unsigned char r, g, b;
+ unsigned char *prgba;
+ unsigned char *palaka = st->palaka1;
+ firepix *fp = fs->fpix;
+ if (fs->mortar_fired)
+ {
+ if (--fs->cy == fs->explode_y)
+ {
+ fs->mortar_fired = 0;
+ mix_colors(fs);
+ render_light_map(st, fs);
+ }
+ else
+ {
+ fs->flash_r =
+ fs->flash_g =
+ fs->flash_b = 50 + (fs->cy - fs->explode_y) * 10;
+ prgba = palaka + (fs->cy * w + fs->cx + rnd(5) - 2) * 4;
+ prgba[0] = (rnd(32) + 128);
+ prgba[1] = (rnd(32) + 128);
+ prgba[2] = (rnd(32) + 128);
+ return(1);
+ }
+ }
+ if ((fs->bicolor + 1) % 50 == 0) rotate_hue(fs, 180);
+ if (fs->bicolor) --fs->bicolor;
+ if (fs->hshift) rotate_hue(fs, rnd(8));
+ if (fs->vshift) wave_value(fs);
+ if (fs->flash_r > 1.0) fs->flash_r *= st->flash_fade;
+ if (fs->flash_g > 1.0) fs->flash_g *= st->flash_fade;
+ if (fs->flash_b > 1.0) fs->flash_b *= st->flash_fade;
+ air_drag = fs->air_drag;
+ r = fs->r;
+ g = fs->g;
+ b = fs->b;
+ for (n = 0; n < PIXCOUNT; n++, fp++)
+ {
+ if (!fp->burn) continue;
+ --fp->burn;
+ if (fs->flies)
+ {
+ fp->x += fp->xv = fp->xv * air_drag + frand(0.1) - 0.05;
+ fp->y += fp->yv = fp->yv * air_drag + frand(0.1) - 0.05 + G_ACCELERATION;
+ }
+ else
+ {
+ fp->x += fp->xv = fp->xv * air_drag + frand(0.01) - 0.005;
+ fp->y += fp->yv = fp->yv * air_drag + frand(0.005) - 0.0025 + G_ACCELERATION;
+ }
+ if (fp->y > h)
+ {
+ if (rnd(5) == 3)
+ {
+ fp->yv *= -0.24;
+ fp->y = h;
+ }
+ /* touch muddy ground :) */
+ else fp->burn = 0;
+ }
+ if (fp->x < w && fp->x > 0 && fp->y < h && fp->y > 0)
+ {
+ prgba = palaka + ((int)fp->y * w + (int)fp->x) * 4;
+ prgba[0] = b;
+ prgba[1] = g;
+ prgba[2] = r;
+ }
+ }
+ return(--fs->life);
+}
+
+#ifdef __SSE2__
+
+/* SSE2 optimized versions of glow_blur() and chromo_2x2_light() */
+
+static void glow_blur(struct state *st)
+{
+ unsigned int n, nn;
+ unsigned char *ps = st->palaka1;
+ unsigned char *pd = st->palaka2;
+ unsigned char *pa = st->palaka1 - (st->width * 4);
+ unsigned char *pb = st->palaka1 + (st->width * 4);
+ __m128i xmm0, xmm1, xmm2, xmm3, xmm4;
+
+ xmm0 = _mm_setzero_si128();
+ nn = st->width * st->height * 4;
+ for (n = 0; n < nn; n+=16)
+ {
+ _mm_prefetch((const void *)&ps[n+16],_MM_HINT_T0);
+ _mm_prefetch((const void *)&pa[n+16],_MM_HINT_T0);
+ _mm_prefetch((const void *)&pb[n+16],_MM_HINT_T0);
+
+ xmm1 = _mm_load_si128((const __m128i*)&ps[n]);
+ xmm2 = xmm1;
+ xmm1 = _mm_unpacklo_epi8(xmm1,xmm0);
+ xmm2 = _mm_unpackhi_epi8(xmm2,xmm0);
+ xmm3 = _mm_loadu_si128((const __m128i*)&ps[n+4]);
+ xmm4 = xmm3;
+ xmm3 = _mm_unpacklo_epi8(xmm3,xmm0);
+ xmm4 = _mm_unpackhi_epi8(xmm4,xmm0);
+ xmm3 = _mm_slli_epi16(xmm3,3);
+ xmm4 = _mm_slli_epi16(xmm4,3);
+ xmm1 = _mm_add_epi16(xmm1,xmm3);
+ xmm2 = _mm_add_epi16(xmm2,xmm4);
+ xmm3 = _mm_loadu_si128((const __m128i*)&ps[n+8]);
+ xmm4 = xmm3;
+ xmm3 = _mm_unpacklo_epi8(xmm3,xmm0);
+ xmm4 = _mm_unpackhi_epi8(xmm4,xmm0);
+ xmm1 = _mm_add_epi16(xmm1,xmm3);
+ xmm2 = _mm_add_epi16(xmm2,xmm4);
+
+ xmm3 = _mm_load_si128((const __m128i*)&pa[n]);
+ xmm4 = xmm3;
+ xmm3 = _mm_unpacklo_epi8(xmm3,xmm0);
+ xmm4 = _mm_unpackhi_epi8(xmm4,xmm0);
+ xmm1 = _mm_add_epi16(xmm1,xmm3);
+ xmm2 = _mm_add_epi16(xmm2,xmm4);
+ xmm3 = _mm_loadu_si128((const __m128i*)&pa[n+4]);
+ xmm4 = xmm3;
+ xmm3 = _mm_unpacklo_epi8(xmm3,xmm0);
+ xmm4 = _mm_unpackhi_epi8(xmm4,xmm0);
+ xmm1 = _mm_add_epi16(xmm1,xmm3);
+ xmm2 = _mm_add_epi16(xmm2,xmm4);
+ xmm3 = _mm_loadu_si128((const __m128i*)&pa[n+8]);
+ xmm4 = xmm3;
+ xmm3 = _mm_unpacklo_epi8(xmm3,xmm0);
+ xmm4 = _mm_unpackhi_epi8(xmm4,xmm0);
+ xmm1 = _mm_add_epi16(xmm1,xmm3);
+ xmm2 = _mm_add_epi16(xmm2,xmm4);
+
+ xmm3 = _mm_load_si128((const __m128i*)&pb[n]);
+ xmm4 = xmm3;
+ xmm3 = _mm_unpacklo_epi8(xmm3,xmm0);
+ xmm4 = _mm_unpackhi_epi8(xmm4,xmm0);
+ xmm1 = _mm_add_epi16(xmm1,xmm3);
+ xmm2 = _mm_add_epi16(xmm2,xmm4);
+ xmm3 = _mm_loadu_si128((const __m128i*)&pb[n+4]);
+ xmm4 = xmm3;
+ xmm3 = _mm_unpacklo_epi8(xmm3,xmm0);
+ xmm4 = _mm_unpackhi_epi8(xmm4,xmm0);
+ xmm1 = _mm_add_epi16(xmm1,xmm3);
+ xmm2 = _mm_add_epi16(xmm2,xmm4);
+ xmm3 = _mm_loadu_si128((const __m128i*)&pb[n+8]);
+ xmm4 = xmm3;
+ xmm3 = _mm_unpacklo_epi8(xmm3,xmm0);
+ xmm4 = _mm_unpackhi_epi8(xmm4,xmm0);
+ xmm1 = _mm_add_epi16(xmm1,xmm3);
+ xmm2 = _mm_add_epi16(xmm2,xmm4);
+
+ xmm3 = xmm1;
+ xmm4 = xmm2;
+ xmm1 = _mm_srli_epi16(xmm1,4);
+ xmm2 = _mm_srli_epi16(xmm2,4);
+ xmm3 = _mm_srli_epi16(xmm3,3);
+ xmm4 = _mm_srli_epi16(xmm4,3);
+ xmm1 = _mm_packus_epi16(xmm1,xmm2);
+ xmm3 = _mm_packus_epi16(xmm3,xmm4);
+
+ _mm_storeu_si128((__m128i*)&ps[n+4], xmm1);
+ _mm_storeu_si128((__m128i*)&pd[n+4], xmm3);
+ }
+}
+
+static void chromo_2x2_light(struct state *st)
+{
+ __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6;
+ __m128i xmi4, xmi5, xmi6, xmi7;
+
+ unsigned int x, y, v = 0;
+ unsigned int nl = st->width * 4;
+ unsigned char *mem = st->palaka2;
+ fireshell *fs = st->fireshell_array;
+
+ xmm0 = _mm_setr_ps(fs[0].flash_b, fs[0].flash_g, fs[0].flash_r, 0.0);
+ xmm1 = _mm_setr_ps(fs[1].flash_b, fs[1].flash_g, fs[1].flash_r, 0.0);
+ xmm2 = _mm_setr_ps(fs[2].flash_b, fs[2].flash_g, fs[2].flash_r, 0.0);
+ xmm3 = _mm_setr_ps(fs[3].flash_b, fs[3].flash_g, fs[3].flash_r, 0.0);
+
+ for (y = st->height/2; y; y--, mem += nl)
+ {
+ for (x = st->width/4; x; x--, v += 8, mem += 16)
+ {
+ xmm4 = _mm_set1_ps(st->light_map[v+0]);
+ xmm5 = xmm0;
+ xmm5 = _mm_mul_ps(xmm5,xmm4);
+ xmm4 = _mm_set1_ps(st->light_map[v+1]);
+ xmm4 = _mm_mul_ps(xmm4,xmm1);
+ xmm5 = _mm_add_ps(xmm5,xmm4);
+ xmm4 = _mm_set1_ps(st->light_map[v+2]);
+ xmm4 = _mm_mul_ps(xmm4,xmm2);
+ xmm5 = _mm_add_ps(xmm5,xmm4);
+ xmm4 = _mm_set1_ps(st->light_map[v+3]);
+ xmm4 = _mm_mul_ps(xmm4,xmm3);
+ xmm5 = _mm_add_ps(xmm5,xmm4);
+
+ xmm4 = _mm_set1_ps(st->light_map[v+4]);
+ xmm6 = xmm0;
+ xmm6 = _mm_mul_ps(xmm6,xmm4);
+ xmm4 = _mm_set1_ps(st->light_map[v+5]);
+ xmm4 = _mm_mul_ps(xmm4,xmm1);
+ xmm6 = _mm_add_ps(xmm6,xmm4);
+ xmm4 = _mm_set1_ps(st->light_map[v+6]);
+ xmm4 = _mm_mul_ps(xmm4,xmm2);
+ xmm6 = _mm_add_ps(xmm6,xmm4);
+ xmm4 = _mm_set1_ps(st->light_map[v+7]);
+ xmm4 = _mm_mul_ps(xmm4,xmm3);
+ xmm6 = _mm_add_ps(xmm6,xmm4);
+
+ xmi6 = _mm_cvtps_epi32(xmm5);
+ xmi7 = _mm_cvtps_epi32(xmm6);
+ xmi6 = _mm_packs_epi32(xmi6,xmi6);
+ xmi7 = _mm_packs_epi32(xmi7,xmi7);
+
+ xmi4 = _mm_load_si128((const __m128i*) mem);
+ xmi5 = _mm_unpacklo_epi8(xmi5,xmi4);
+ xmi5 = _mm_srli_epi16(xmi5,8);
+ xmi4 = _mm_unpackhi_epi8(xmi4,xmi4);
+ xmi4 = _mm_srli_epi16(xmi4,8);
+ xmi5 = _mm_add_epi16(xmi5,xmi6);
+ xmi4 = _mm_add_epi16(xmi4,xmi7);
+ xmi5 = _mm_packus_epi16(xmi5,xmi4);
+ _mm_store_si128((__m128i*) mem, xmi5);
+
+ xmi4 = _mm_load_si128((const __m128i*) &mem[nl]);
+ xmi5 = _mm_unpacklo_epi8(xmi5,xmi4);
+ xmi5 = _mm_srli_epi16(xmi5,8);
+ xmi4 = _mm_unpackhi_epi8(xmi4,xmi4);
+ xmi4 = _mm_srli_epi16(xmi4,8);
+ xmi5 = _mm_add_epi16(xmi5,xmi6);
+ xmi4 = _mm_add_epi16(xmi4,xmi7);
+ xmi5 = _mm_packus_epi16(xmi5,xmi4);
+ _mm_store_si128((__m128i*) &mem[nl], xmi5);
+ }
+ }
+}
+