+
+#ifdef useAccumulator
+ if (useAccumulator) {
+ if (A->pool.count) {
+ threadpool_destroy (&A->pool);
+ A->pool.count = 0;
+ }
+
+ free (A->threads);
+ A->threads = NULL;
+
+ if (A->accImage) {
+ destroy_xshm_image (display, A->accImage, &A->shmInfo);
+ A->accImage = NULL;
+ }
+
+ free (A->palette);
+ A->palette = NULL;
+
+ if (A->cols) {
+ if (A->visualClass != TrueColor && A->numCols > 2)
+ XFreeColors (display, MI_COLORMAP(mi), A->cols, A->numCols, 0);
+ free (A->cols);
+ A->cols = NULL;
+ }
+ }
+#endif
+}
+
+/* NRAND() is also in use; making three PRNGs in total here. */
+
+/* ISO/IEC 9899 suggests this one. Doesn't require 64-bit math. */
+#define GOODRND(seed) ((seed) = (((seed) * 1103515245 + 12345) & 0x7fffffff))
+#define GOODRND_BITS 31
+
+/* Extremely cheap entropy: this is often a single LEA instruction. */
+#define CHEAPRND(seed) ((seed) = (seed) * 5)
+
+static void
+init_draw (const ATTRACTOR *A, PRM *x, PRM *y,
+ PRM *xmin, PRM *ymin, PRM *xmax, PRM *ymax, unsigned long *rnd)
+{
+ int n;
+ PRM xo, yo;
+
+ *xmin = UNIT;
+ *ymin = UNIT;
+ *xmax = -UNIT;
+ *ymax = -UNIT;
+
+ *x = *y = DBL_To_PRM(.0);
+ for (n = SKIP_FIRST; n; --n) {
+ (*A->Iterate) (A, *x, *y, &xo, &yo);
+
+#ifdef AUTO_ZOOM
+ if (xo > *xmax)
+ *xmax = xo;
+ if (xo < *xmin)
+ *xmin = xo;
+ if (yo > *ymax)
+ *ymax = yo;
+ if (yo < *ymin)
+ *ymin = yo;
+#endif
+
+ /* Can't use NRAND(), because that modifies global state in a
+ * thread-unsafe way.
+ */
+ *x = xo + (GOODRND(*rnd) >> (GOODRND_BITS - 3)) - 4;
+ *y = yo + (GOODRND(*rnd) >> (GOODRND_BITS - 3)) - 4;
+ }
+}
+
+static void
+recalc_scale (const ATTRACTOR *A, PRM xmin, PRM ymin, PRM xmax, PRM ymax,
+ DBL *Lx, DBL *Ly, PRM *mx, PRM *my)
+{
+#ifndef AUTO_ZOOM
+ xmin = -UNIT;
+ ymin = -UNIT;
+ xmax = UNIT;
+ ymax = UNIT;
+#endif
+
+ *Lx = zoom * (DBL) A->Width / (xmax - xmin);
+ *Ly = -zoom * (DBL) A->Height / (ymax - ymin);
+ *mx = A->Width/2 - (xmax + xmin) * *Lx / 2;
+ *my = A->Height/2 - (ymax + ymin) * *Ly / 2;
+}
+
+#ifdef useAccumulator
+
+static void
+thread_destroy (void *Self_Raw)
+{
+ THREAD *T = (THREAD *)Self_Raw;
+
+ aligned_free (T->accMap[0]);
+ (void) free((void *) T->accMap);
+ aligned_free (T->bloomRows);
+ aligned_free (T->colorRow);
+ aligned_free (T->motionBlur);
+}
+
+static int
+thread_create (void *Self_Raw, struct threadpool *pool, unsigned id)
+{
+ THREAD *T = (THREAD *)Self_Raw;
+ int i;
+ const ATTRACTOR *A = GET_PARENT_OBJ(ATTRACTOR, pool, pool);
+
+ memset (T, 0, sizeof(*T));
+
+ T->Attractor = A;
+ A->threads[id] = T;
+
+ T->Rnd = random();
+
+ /* The gap between y0 and y1 is to preheat the box blur. */
+ T->y1 = A->Height * id / pool->count;
+ T->y2 = A->Height * (id + 1) / pool->count;
+ T->y0 = T->y1 < pointSize ? 0 : T->y1 - pointSize;
+
+ T->accMap = (PIXEL0**)calloc(A->Height,sizeof(PIXEL0*));
+ if (!T->accMap) {
+ thread_destroy (T);
+ return ENOMEM;
+ }
+ T->accMap[0] = NULL;
+ if (aligned_malloc ((void **)&T->accMap[0], __BIGGEST_ALIGNMENT__,
+ A->alignedWidth * A->Height * sizeof(PIXEL0))) {
+ thread_destroy (T);
+ return ENOMEM;
+ }
+ for (i=0;i<A->Height;i++)
+ T->accMap[i] = T->accMap[0] + A->alignedWidth * i;
+
+ if (aligned_malloc ((void **)&T->bloomRows, __BIGGEST_ALIGNMENT__,
+ A->alignedWidth * (pointSize + 2) * sizeof(*T->bloomRows))) {
+ thread_destroy (T);
+ return ENOMEM;
+ }
+ if (aligned_malloc ((void **)&T->colorRow, __BIGGEST_ALIGNMENT__,
+ A->alignedWidth * sizeof(*T->colorRow))) {
+ thread_destroy (T);
+ return ENOMEM;
+ }
+ if (A->blurFac) {
+ if (aligned_malloc ((void **)&T->motionBlur, __BIGGEST_ALIGNMENT__,
+ A->alignedWidth * (T->y2 - T->y1) * sizeof(*T->motionBlur))) {
+ thread_destroy (T);
+ return ENOMEM;
+ }
+
+ memset (T->motionBlur, 0, A->alignedWidth * (T->y2 - T->y1) * sizeof(*T->motionBlur));
+ }
+
+ return 0;
+}
+
+static void
+points_thread (void *Self_Raw)
+{
+ /* Restricts viewable area to 2^(32-L_Bits). */
+ const unsigned L_Bits = 19; /* Max. image size: 8192x8192. */
+
+ THREAD *T = (THREAD *)Self_Raw;
+ const ATTRACTOR *A = T->Attractor;
+ int n;
+ PRM x, y, xo, yo;
+ DBL Lx, Ly;
+ PRM iLx, iLy, cx, cy;
+ void (*Iterate) (const ATTRACTOR *, PRM, PRM, PRM *, PRM *);
+ unsigned Rnd;
+ PRM xmax, xmin, ymax, ymin;
+
+ Iterate = A->Iterate;
+
+ if (useAccumulator) {
+ memset (T->accMap[0], 0, sizeof(PIXEL0) * A->alignedWidth * A->Height);
+ }
+
+ /* Using CHEAPRND() by itself occasionally gets stuck at 0 mod 8, so seed it
+ * from GOODRND().
+ */
+ init_draw (A, &x, &y, &xmin, &ymin, &xmax, &ymax, &T->Rnd);
+ recalc_scale (A, xmin, ymin, xmax, ymax, &Lx, &Ly, &cx, &cy);
+
+ Rnd = GOODRND(T->Rnd);
+
+ iLx = Lx * (1 << L_Bits);
+ iLy = Ly * (1 << L_Bits);
+ if (!iLx) /* Can happen with small windows. */
+ iLx = 1;
+ if (!iLy)
+ iLy = 1;
+
+ for (n = T->Attractor->Max_Pt / A->pool.count; n; --n) {
+ unsigned mx,my;
+ (*Iterate) (T->Attractor, x, y, &xo, &yo);
+ mx = ((iLx * x) >> L_Bits) + cx;
+ my = ((iLy * y) >> L_Bits) + cy;
+ /* Fun trick: making m[x|y] unsigned means we can skip mx<0 && my<0. */
+ if (mx<A->Width && my<A->Height)
+ T->accMap[my][mx]++;
+
+ #ifdef AUTO_ZOOM
+ if (xo > xmax)
+ xmax = xo;
+ if (xo < xmin)
+ xmin = xo;
+ if (yo > ymax)
+ ymax = yo;
+ if (yo < ymin)
+ ymin = yo;
+
+ if (!(n & 0xfff)) {
+ recalc_scale (A, xmin, ymin, xmax, ymax, &Lx, &Ly, &cx, &cy);
+ }
+ #endif
+
+ /* Skimp on the randomness. */
+ x = xo + (CHEAPRND(Rnd) >> (sizeof(Rnd) * 8 - 3)) - 4;
+ y = yo + (CHEAPRND(Rnd) >> (sizeof(Rnd) * 8 - 3)) - 4;
+ }
+}
+
+static void
+rasterize_thread (void *Self_Raw)
+{
+ THREAD *T = (THREAD *)Self_Raw;
+ const ATTRACTOR *A = T->Attractor;
+ unsigned i, j, k;
+ PRM xmax = 0, xmin = A->Width, ymax = 0, ymin = A->Height;
+ unsigned long colorScale =
+ (double)A->Width * A->Height
+ * (1 << COLOR_BITS) * brightness
+ * A->colorFac
+ * (zoom * zoom) / (0.9 * 0.9)
+ / 640.0 / 480.0
+ / (pointSize * pointSize)
+ * 800000.0
+ / (float)A->Max_Pt
+ * (float)A->numCols/256;
+ #ifdef VARY_SPEED_TO_AVOID_BOREDOM
+ unsigned pixelCount = 0;
+ #endif
+ PIXEL0 *motionBlurRow = T->motionBlur;
+ void *outRow = (char *)A->accImage->data
+ + A->accImage->bytes_per_line * T->y1;
+
+ /* Clang needs these for loop-vectorizing; A->Width doesn't work. */
+ unsigned w = A->Width, aw = A->alignedWidth;
+
+ if (A->numCols == 2) /* Brighter for monochrome. */
+ colorScale *= 4;
+
+ /* bloomRows: row ring buffer, bloom accumulator, in that order. */
+ memset (T->bloomRows, 0, (pointSize + 1) * aw * sizeof(*T->bloomRows));
+
+ /* This code is highly amenable to auto-vectorization; on GCC use -O3 to get
+ * that. On x86-32, also add -msse2.
+ */
+ for (j=T->y0;j<T->y2;j++) {
+ Bool has_col = False;
+ int accum = 0;
+
+ PIXEL0 *bloomRow =
+ ALIGN_HINT(T->bloomRows + A->alignedWidth * (j % pointSize));
+ PIXEL0 *accumRow =
+ ALIGN_HINT(T->bloomRows + A->alignedWidth * pointSize);
+ PIXEL1 *colRow = T->colorRow;
+
+ /* Moderately fast bloom. */
+
+ for (i=0;i<aw;i++) {
+ accumRow[i] -= bloomRow[i];
+ bloomRow[i] = 0;
+ }
+
+ for (k=0;k<A->pool.count;k++) {
+ const PIXEL0 *inRow = ALIGN_HINT(A->threads[k]->accMap[j]);
+ for (i=0;i<aw;i++) {
+ /* Lots of last-level cache misses. Such is life. */
+ bloomRow[i] += inRow[i];
+ }
+ }
+
+ /* Hardware prefetching works better going forwards than going backwards.
+ * Since this blurs/blooms/convolves in-place, it expands points to the
+ * right instead of to the left.
+ */
+ for (i=0;i<pointSize-1;i++)
+ accum += bloomRow[i];
+
+ for (i=0;i<aw;i++) {
+ PIXEL0 oldBloom = bloomRow[i];
+
+ /* alignedWidth has extra padding for this one line. */
+ accum += bloomRow[i+pointSize-1];
+
+ bloomRow[i] = accum;
+ accumRow[i] += accum;
+ accum -= oldBloom;
+ }
+
+ if (j>=T->y1) {
+ PIXEL0 *accumRow1 = A->blurFac ? motionBlurRow : accumRow;
+ PIXEL0 blurFac = A->blurFac;
+
+ if (blurFac) {
+ /* TODO: Do I vectorize OK? */
+ if (blurFac == 0x8000) {
+ /* Optimization for the default. */
+ for (i=0;i<aw;i++)
+ motionBlurRow[i] = (motionBlurRow[i] >> 1) + accumRow[i];
+ } else {
+ for (i=0;i<aw;i++)
+ motionBlurRow[i] = (PIXEL0)(((PIXEL0X)motionBlurRow[i] * blurFac) >> 16) + accumRow[i];
+ }
+
+ motionBlurRow += aw;
+ }
+
+ for (i=0;i<aw;i++) {
+ unsigned col = (accumRow1[i] * colorScale) >> COLOR_BITS;
+ if (col>A->numCols-1) {
+ col = A->numCols-1;
+ }
+ #ifdef VARY_SPEED_TO_AVOID_BOREDOM
+ if (col>0) {
+ if (col<A->numCols-1) /* we don't count maxxed out pixels */
+ pixelCount++;
+ if (i > xmax)
+ xmax = i;
+ if (i < xmin)
+ xmin = i;
+ has_col = True;
+ }
+ #endif
+ colRow[i] = A->cols[col];
+ }
+
+ #ifdef VARY_SPEED_TO_AVOID_BOREDOM
+ if (has_col) {
+ if (j > ymax)
+ ymax = j;
+ if (j < ymin)
+ ymin = j;
+ }
+ #endif
+
+#if 0
+ for (i=0;i<aw;i++) {
+ if (MI_NPIXELS(mi) < 2)
+ XSetForeground(display, gc, MI_WHITE_PIXEL(mi));
+ else
+ /*XSetForeground(display, gc, MI_PIXEL(mi, A->Col % MI_NPIXELS(mi)));*/
+ XSetForeground(display, gc, cols[col].pixel);
+
+ if (A->dbuf != None) {
+ XSetForeground(display, A->dbuf_gc, cols[col].pixel);
+ XDrawPoint(display, A->dbuf, A->dbuf_gc, i, j);
+ } else {
+ XSetForeground(display, gc, cols[col].pixel);
+ XDrawPoint(display, window, gc, i, j);
+ }
+ }
+#endif
+
+ if (A->accImage->bits_per_pixel==32 &&
+ A->accImage->byte_order==LOCAL_BYTE_ORDER) {
+ for (i=0;i<aw;i++)
+ ((uint32_t *)outRow)[i] = colRow[i];
+ } else if (A->accImage->bits_per_pixel==16 &&
+ A->accImage->byte_order==LOCAL_BYTE_ORDER) {
+ for (i=0;i<aw;i++)
+ ((uint16_t *)outRow)[i] = colRow[i];
+ } else if (A->accImage->bits_per_pixel==8) {
+ for (i=0;i<aw;i++)
+ ((uint8_t *)outRow)[i] = colRow[i];
+ } else {
+ for (i=0;i<w;i++)
+ XPutPixel (A->accImage, i, j, colRow[i]);
+ }
+
+ outRow = (char *)outRow + A->accImage->bytes_per_line;
+ }
+ }
+
+ /*
+ uint64_t dt = 0;
+ uint64_t t0 = tick();
+ dt += tick() - t0;
+
+ printf("B %g\t(%d,%d)\t%dx%d\t%d\n", 1000.0 * dt / frq(),
+ xmin, ymin, xmax - xmin, ymax - ymin, pixelCount);
+ */
+
+ T->xmax = xmax;
+ T->xmin = xmin;
+ T->ymax = ymax;
+ T->ymin = ymin;
+ T->pixelCount = pixelCount;
+}
+
+static void
+ramp_color (const XColor *color_in, XColor *color_out, unsigned i, unsigned n)
+{
+ float li;
+ #define MINBLUE 1
+ #define FULLBLUE 128
+ #define LOW_COLOR(c) ((c)*li/FULLBLUE)
+ #define HIGH_COLOR(c) ((65535-(c))*(li-FULLBLUE)/(256-FULLBLUE)+(c))
+ li = MINBLUE
+ + (255.0-MINBLUE) * log(1.0 + ACC_GAMMA*(float)i/n)
+ / log(1.0 + ACC_GAMMA);
+ if (li<FULLBLUE) {
+ color_out->red = LOW_COLOR(color_in->red);
+ color_out->green = LOW_COLOR(color_in->green);
+ color_out->blue = LOW_COLOR(color_in->blue);
+ } else {
+ color_out->red = HIGH_COLOR(color_in->red);
+ color_out->green = HIGH_COLOR(color_in->green);
+ color_out->blue = HIGH_COLOR(color_in->blue);
+ }
+}
+
+#endif
+
+static void
+draw_points (Display *display, Drawable d, GC gc, const void *Buf,
+ unsigned Count)
+{
+ if (pointSize == 1)
+ XDrawPoints(display, d, gc, (XPoint *)Buf, Count, CoordModeOrigin);
+ else
+ XFillRectangles(display, d, gc, (XRectangle *)Buf, Count);