+/* FAST_SQRT_EXTRA_BITS = 3: Smallest useful value
+ * = 5/6: A little bit of banding, wave_height table is on par with regular
+ * sqrt() code.
+ * = 7: No apparent difference with original @ radius = 800.
+ * = 8: One more just to be comfortable.
+ */
+
+# define FAST_SQRT_EXTRA_BITS 8
+
+union int_float
+{
+ uint32_t i;
+ float f;
+};
+
+static unsigned fast_log2(unsigned x)
+{
+ union int_float u;
+ if(!x)
+ return x;
+ u.f = x;
+ return ((u.i - 0x3f800000) >> (23 - FAST_SQRT_EXTRA_BITS)) + 1;
+}
+
+static float fast_inv_log2(unsigned x)
+{
+ union int_float u;
+ if(!x)
+ return 0.0f;
+ u.i = ((x - 1) << (23 - FAST_SQRT_EXTRA_BITS)) + 0x3f800000;
+ return u.f;
+}
+
+#endif
+
+#ifdef USE_FAST_SQRT_BIGTABLE2
+
+/* I eyeballed these figures. They could be improved. - D.O. */
+
+# define FAST_SQRT_DISCARD_BITS1 4
+/* = 5: Dot in center is almost invisible at radius = 800. */
+/* = 4: Dot in center looks OK at radius = 50. */
+
+/* 156/2740/9029 */
+/* # define FAST_SQRT_DISCARD_BITS2 8 */
+/* # define FAST_SQRT_CUTOFF 64 * 64 */
+
+/* 156/2242/5386 */
+# define FAST_SQRT_DISCARD_BITS2 9
+# define FAST_SQRT_CUTOFF 128 * 128
+
+/*
+ * This is a little faster:
+ * 44.5 FPS, 19/5000/17578
+ *
+ * # define FAST_SQRT_DISCARD_BITS1 7
+ * # define FAST_SQRT_DISCARD_BITS2 7
+ * # define FAST_SQRT_CUTOFF 0
+ *
+ * For radius = 800, FAST_SQRT_DISCARD_BITS2 =
+ * = 9/10: Approximately the original table size, some banding near origins.
+ * = 7: wave_height is 20 KB, and just fits inside a 32K L1 cache.
+ * = 6: Nearly indistinguishable from original
+ */
+
+/*
+ FAST_TABLE(x) is equivalent to, but slightly faster than:
+ x < FAST_SQRT_CUTOFF ?
+ (x >> FAST_SQRT_DISCARD_BITS1) :
+ ((x - FAST_SQRT_CUTOFF) >> FAST_SQRT_DISCARD_BITS2) +
+ (FAST_SQRT_CUTOFF >> FAST_SQRT_DISCARD_BITS1);
+*/
+
+#define FAST_TABLE(x) \
+ ((x) < FAST_SQRT_CUTOFF ? \
+ ((x) >> FAST_SQRT_DISCARD_BITS1) : \
+ (((x) + \
+ ((FAST_SQRT_CUTOFF << (FAST_SQRT_DISCARD_BITS2 - \
+ FAST_SQRT_DISCARD_BITS1)) - FAST_SQRT_CUTOFF)) >> \
+ FAST_SQRT_DISCARD_BITS2))
+
+static double fast_inv_table(unsigned x)
+{
+ return x < (FAST_SQRT_CUTOFF >> FAST_SQRT_DISCARD_BITS1) ?
+ (x << FAST_SQRT_DISCARD_BITS1) :
+ ((x - (FAST_SQRT_CUTOFF >> FAST_SQRT_DISCARD_BITS1)) <<
+ FAST_SQRT_DISCARD_BITS2) + FAST_SQRT_CUTOFF;
+}
+
+#endif
+
+/* Also destroys c->row. */
+static void destroy_image(Display* dpy, struct inter_context* c)
+{
+#ifdef USE_XIMAGE
+ if(c->ximage) {
+# ifdef HAVE_XSHM_EXTENSION
+ if(c->use_shm) {
+ destroy_xshm_image(dpy, c->ximage, &c->shm_info);
+ } else
+# endif
+ {
+ /* Also frees c->ximage->data, which isn't allocated by XCreateImage. */
+ XDestroyImage(c->ximage);
+ }
+ }
+
+ free(c->row);
+#endif
+}
+
+static void inter_free(Display* dpy, struct inter_context* c)
+{
+#ifndef USE_XIMAGE
+ unsigned i;
+#endif
+
+ if(c->pix_buf)
+ XFreePixmap(dpy, c->pix_buf);
+
+ if(c->copy_gc)
+ XFreeGC(dpy, c->copy_gc);
+
+ destroy_image(dpy, c);
+
+ if(c->colors <= 2)
+ free(c->pal);
+ else
+ free_colors(c->screen, c->cmap, c->pal, c->colors);
+
+#ifndef USE_XIMAGE
+ for(i = 0; i != c->colors; ++i)
+ XFreeGC(dpy, c->gcs[i]);
+ free(c->gcs);
+#endif
+
+ free(c->wave_height);
+ free(c->source);
+}
+
+static void abort_no_mem(void)
+{
+ fprintf(stderr, "interference: %s\n", strerror(ENOMEM));
+ exit(1);
+}
+
+static void check_no_mem(Display* dpy, struct inter_context* c, void* ptr)
+{
+ if(!ptr) {
+ inter_free(dpy, c);
+ abort_no_mem();
+ }
+}
+
+/* On allocation error, c->row == NULL. */
+static void create_image(
+ Display* dpy,
+ struct inter_context* c,
+ const XWindowAttributes* xgwa)
+{
+#ifdef USE_XIMAGE
+ c->row = malloc((c->w / c->grid_size) * sizeof(uint32_t));
+ check_no_mem(dpy, c, c->row);
+
+# ifdef HAVE_XSHM_EXTENSION
+ /*
+ * interference used to put one row at a time to the X server. This changes
+ * today.
+ *
+ * XShmPutImage is asynchronous; the contents of the XImage must not be
+ * modified until the server has placed the data on the screen. Waiting for
+ * an XShmCompletionEvent after every line of pixels is a little nutty, so
+ * shared-memory XImages will cover the entire screen, and it only has to be
+ * sent once per frame.
+ *
+ * The non-SHM code, on the other hand is noticeably slower when
+ * gridsize = 1 with one row at a time. If, on the other hand, gridsize >= 2,
+ * there's a slight speed increase with one row at a time.
+ *
+ * This uses a lot more RAM than the single line approach. Users with only
+ * 4 MB of RAM may wish to disable USE_BIG_XIMAGE and specify -no-shm on the
+ * command line. Since this is 2013 and desktop computers are shipping with
+ * 8 GB of RAM, I doubt that this will be a major issue. - D.O.
+ */
+
+ if (c->use_shm)
+ {
+ c->ximage = create_xshm_image(dpy, xgwa->visual, xgwa->depth,
+ ZPixmap, 0, &c->shm_info,
+ xgwa->width, xgwa->height);
+ if (!c->ximage)
+ c->use_shm = False;
+ /* If create_xshm_image fails, it will not be attempted again. */
+
+ c->shm_can_draw = True;
+ }
+# endif /* HAVE_XSHM_EXTENSION */
+
+ if (!c->ximage)
+ {
+ c->ximage =
+ XCreateImage(dpy, xgwa->visual,
+ xgwa->depth, ZPixmap, 0, 0, /* depth, fmt, offset, data */
+ xgwa->width, /* width */
+# ifdef USE_BIG_XIMAGE
+ xgwa->height, /* height */
+# else
+ c->grid_size, /* height */
+# endif
+ 8, 0); /* pad, bpl */
+
+ if(c->ximage)
+ {
+ c->ximage->data = (char *)
+ calloc(c->ximage->height, c->ximage->bytes_per_line);
+
+ if(!c->ximage->data)
+ {
+ free(c->ximage);
+ c->ximage = NULL;
+ }
+ }
+ }
+
+ if(!c->ximage)
+ {
+ free(c->row);
+ c->row = 0;
+ }
+
+ check_no_mem(dpy, c, c->row);
+#endif /* USE_XIMAGE */
+}
+
+static void create_pix_buf(Display* dpy, Window win, struct inter_context *c,
+ const XWindowAttributes* xgwa)
+{
+#ifdef HAVE_DOUBLE_BUFFER_EXTENSION
+ if(c->back_buf)
+ return;
+#endif /* HAVE_DOUBLE_BUFFER_EXTENSION */
+ c->pix_buf = XCreatePixmap(dpy, win, xgwa->width, xgwa->height, xgwa->depth);
+}
+
+static double float_time(void)
+{
+ struct timeval result;
+ gettimeofday(
+ &result
+#ifdef GETTIMEOFDAY_TWO_ARGS
+ , NULL
+#endif
+ );
+
+ return result.tv_usec * 1.0e-6 + result.tv_sec;
+}
+
+static void inter_init(Display* dpy, Window win, struct inter_context* c)