Faster implementation of integer square root.

Implementation by Roland Brochard <zuzuf86@gmail.com>.

Note: If you wonder how code doing multiplications can be faster than
code doing just shifts and increments: I've measured it. One million
square roots in 30 seconds with the new code instead of 220 seconds
with the old code on a Gen7 20 MHz. That's just 30 microseconds or
600 CPU cycles per root.

Code used for the measurement (by a stopwatch) in mendel.c:
...
*include "dda_maths.h"
*include "delay.h"
int main (void)
{
  uint32_t i, j;

  serial_init();
  sei();
  serial_writestr_P(PSTR("start\n"));
  for (i = 0; i < 1000000; i++) {
    j = int_sqrt(i);
  }
  serial_writestr_P(PSTR("done\n"));
  delay_ms(20);
  cli();

  init();
...

--Traumflug
This commit is contained in:
Roland Brochard 2013-09-10 22:44:37 +02:00 committed by Markus Hitter
parent 13ec2d7521
commit af12c7a68a
1 changed files with 36 additions and 16 deletions

View File

@ -135,27 +135,47 @@ uint32_t approx_distance_3(uint32_t dx, uint32_t dy, uint32_t dz) {
\param a find square root of this number
\return sqrt(a - 1) < returnvalue <= sqrt(a)
see http://www.embedded-systems.com/98/9802fe2.htm
This is a binary search but it uses only the minimum required bits for
each step.
*/
// courtesy of http://www.embedded-systems.com/98/9802fe2.htm
uint16_t int_sqrt(uint32_t a) {
uint32_t rem = 0;
uint32_t root = 0;
uint16_t b = a >> 16;
uint8_t c = b >> 8;
uint16_t x = 0;
uint8_t z = 0;
uint16_t i;
uint8_t j;
for (i = 0; i < 16; i++) {
root <<= 1;
rem = ((rem << 2) + (a >> 30));
a <<= 2;
root++;
if (root <= rem) {
rem -= root;
root++;
}
else
root--;
for (j = 0x8; j; j >>= 1) {
uint8_t y2;
z |= j;
y2 = z * z;
if (y2 > c)
z ^= j;
}
return (uint16_t) ((root >> 1) & 0xFFFFL);
x = z << 4;
for(i = 0x8; i; i >>= 1) {
uint16_t y2;
x |= i;
y2 = x * x;
if (y2 > b)
x ^= i;
}
x <<= 8;
for(i = 0x80; i; i >>= 1) {
uint32_t y2;
x |= i;
y2 = (uint32_t)x * x;
if (y2 > a)
x ^= i;
}
return x;
}
// this is an ultra-crude pseudo-logarithm routine, such that: