Faster implementation of integer square root.
Implementation by Roland Brochard <zuzuf86@gmail.com>.
Note: If you wonder how code doing multiplications can be faster than
code doing just shifts and increments: I've measured it. One million
square roots in 30 seconds with the new code instead of 220 seconds
with the old code on a Gen7 20 MHz. That's just 30 microseconds or
600 CPU cycles per root.
Code used for the measurement (by a stopwatch) in mendel.c:
...
*include "dda_maths.h"
*include "delay.h"
int main (void)
{
uint32_t i, j;
serial_init();
sei();
serial_writestr_P(PSTR("start\n"));
for (i = 0; i < 1000000; i++) {
j = int_sqrt(i);
}
serial_writestr_P(PSTR("done\n"));
delay_ms(20);
cli();
init();
...
--Traumflug
This commit is contained in:
parent
13ec2d7521
commit
af12c7a68a
52
dda_maths.c
52
dda_maths.c
|
|
@ -135,27 +135,47 @@ uint32_t approx_distance_3(uint32_t dx, uint32_t dy, uint32_t dz) {
|
|||
\param a find square root of this number
|
||||
\return sqrt(a - 1) < returnvalue <= sqrt(a)
|
||||
|
||||
see http://www.embedded-systems.com/98/9802fe2.htm
|
||||
This is a binary search but it uses only the minimum required bits for
|
||||
each step.
|
||||
*/
|
||||
// courtesy of http://www.embedded-systems.com/98/9802fe2.htm
|
||||
uint16_t int_sqrt(uint32_t a) {
|
||||
uint32_t rem = 0;
|
||||
uint32_t root = 0;
|
||||
uint16_t b = a >> 16;
|
||||
uint8_t c = b >> 8;
|
||||
uint16_t x = 0;
|
||||
uint8_t z = 0;
|
||||
uint16_t i;
|
||||
uint8_t j;
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
root <<= 1;
|
||||
rem = ((rem << 2) + (a >> 30));
|
||||
a <<= 2;
|
||||
root++;
|
||||
if (root <= rem) {
|
||||
rem -= root;
|
||||
root++;
|
||||
}
|
||||
else
|
||||
root--;
|
||||
for (j = 0x8; j; j >>= 1) {
|
||||
uint8_t y2;
|
||||
|
||||
z |= j;
|
||||
y2 = z * z;
|
||||
if (y2 > c)
|
||||
z ^= j;
|
||||
}
|
||||
return (uint16_t) ((root >> 1) & 0xFFFFL);
|
||||
|
||||
x = z << 4;
|
||||
for(i = 0x8; i; i >>= 1) {
|
||||
uint16_t y2;
|
||||
|
||||
x |= i;
|
||||
y2 = x * x;
|
||||
if (y2 > b)
|
||||
x ^= i;
|
||||
}
|
||||
|
||||
x <<= 8;
|
||||
for(i = 0x80; i; i >>= 1) {
|
||||
uint32_t y2;
|
||||
|
||||
x |= i;
|
||||
y2 = (uint32_t)x * x;
|
||||
if (y2 > a)
|
||||
x ^= i;
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
// this is an ultra-crude pseudo-logarithm routine, such that:
|
||||
|
|
|
|||
Loading…
Reference in New Issue