dda_maths.c/.h: implement an integer inverse square root algorithm.

This 1/sqrt(x) implementation is a 12 bits fixed point implementation and a bit faster than a 32 bits divide (it takes about 11% less time to complete) and could be even faster if one requires only 8 bits. Also, precision starts getting poor for big values of n which are likely to be required by small acceleration values.
2013-09-10 23:03:47 +02:00 · 2013-09-10 23:03:47 +02:00 · a80eff84f8
parent da5c29a7dd
commit a80eff84f8
2 changed files with 41 additions and 0 deletions
--- a/dda_maths.c
+++ b/dda_maths.c
@ -178,6 +178,44 @@ uint16_t int_sqrt(uint32_t a) {
  return x;
 }

+/*!
+  integer inverse square root algorithm
+  \param a find the inverse of the square root of this number
+  \return 0x1000 / sqrt(a) - 1 < returnvalue <= 0x1000 / sqrt(a)
+
+  This is a binary search but it uses only the minimum required bits for each step.
+*/
+uint16_t int_inv_sqrt(uint16_t a) {
+  /// 16bits inverse (much faster than doing a full 32bits inverse)
+  /// the 0xFFFFU instead of 0x10000UL hack allows using 16bits and 8bits
+  /// variable for the first 8 steps without overflowing and it seems to
+  /// give better results for the ramping equation too :)
+  uint8_t z = 0, i;
+  uint16_t x, j;
+  uint32_t q = ((uint32_t)(0xFFFFU / a)) << 8;
+
+  for (i = 0x80; i; i >>= 1) {
+    uint16_t y;
+
+    z |= i;
+    y = (uint16_t)z * z;
+    if (y > (q >> 8))
+      z ^= i;
+  }
+
+  x = z << 4;
+  for (j = 0x8; j; j >>= 1) {
+    uint32_t y;
+
+    x |= j;
+    y = (uint32_t)x * x;
+    if (y > q)
+      x ^= j;
+  }
+
+  return x;
+}
+
 // this is an ultra-crude pseudo-logarithm routine, such that:
 // 2 ^ msbloc(v) >= v
 /*! crude logarithm algorithm
--- a/dda_maths.h
+++ b/dda_maths.h
@ -59,6 +59,9 @@ uint32_t approx_distance_3(uint32_t dx, uint32_t dy, uint32_t dz);
 // integer square root algorithm
 uint16_t int_sqrt(uint32_t a);

+// integer inverse square root, 12bits precision
+uint16_t int_inv_sqrt(uint16_t a);
+
 // this is an ultra-crude pseudo-logarithm routine, such that:
 // 2 ^ msbloc(v) >= v
 const uint8_t msbloc (uint32_t v);