preprocessor_math.h, SQRT(): take a better initial guess.

Now results are apparently accurate across the whole uint32 range.
At least, this test passes with all numbers being exact:

  #include "preprocessor_math.h"
  #include <math.h>
  ... in main() ...
  sersendf_P(PSTR("0:  %lu  %lu\n"), (uint32_t)SQRT(0), (uint32_t)sqrt(0));
  sersendf_P(PSTR("1:  %lu  %lu\n"), (uint32_t)SQRT(1), (uint32_t)sqrt(1));
  sersendf_P(PSTR("2:  %lu  %lu\n"), (uint32_t)SQRT(2), (uint32_t)sqrt(2));
  sersendf_P(PSTR("3:  %lu  %lu\n"), (uint32_t)SQRT(3), (uint32_t)sqrt(3));
  sersendf_P(PSTR("4:  %lu  %lu\n"), (uint32_t)SQRT(4), (uint32_t)sqrt(4));
  sersendf_P(PSTR("5:  %lu  %lu\n"), (uint32_t)SQRT(5), (uint32_t)sqrt(5));
  sersendf_P(PSTR("6:  %lu  %lu\n"), (uint32_t)SQRT(6), (uint32_t)sqrt(6));
  sersendf_P(PSTR("7:  %lu  %lu\n"), (uint32_t)SQRT(7), (uint32_t)sqrt(7));
  sersendf_P(PSTR("8:  %lu  %lu\n"), (uint32_t)SQRT(8), (uint32_t)sqrt(8));
  sersendf_P(PSTR("9:  %lu  %lu\n"), (uint32_t)SQRT(9), (uint32_t)sqrt(9));
  sersendf_P(PSTR("10:  %lu  %lu\n"), (uint32_t)SQRT(10), (uint32_t)sqrt(10));
  sersendf_P(PSTR("20:  %lu  %lu\n"), (uint32_t)SQRT(20), (uint32_t)sqrt(20));
  sersendf_P(PSTR("30:  %lu  %lu\n"), (uint32_t)SQRT(30), (uint32_t)sqrt(30));
  sersendf_P(PSTR("40:  %lu  %lu\n"), (uint32_t)SQRT(40), (uint32_t)sqrt(40));
  sersendf_P(PSTR("50:  %lu  %lu\n"), (uint32_t)SQRT(50), (uint32_t)sqrt(50));
  sersendf_P(PSTR("60:  %lu  %lu\n"), (uint32_t)SQRT(60), (uint32_t)sqrt(60));
  sersendf_P(PSTR("70:  %lu  %lu\n"), (uint32_t)SQRT(70), (uint32_t)sqrt(70));
  sersendf_P(PSTR("80:  %lu  %lu\n"), (uint32_t)SQRT(80), (uint32_t)sqrt(80));
  sersendf_P(PSTR("90:  %lu  %lu\n"), (uint32_t)SQRT(90), (uint32_t)sqrt(90));
  sersendf_P(PSTR("100:  %lu  %lu\n"), (uint32_t)SQRT(100), (uint32_t)sqrt(100));
  sersendf_P(PSTR("200:  %lu  %lu\n"), (uint32_t)SQRT(200), (uint32_t)sqrt(200));
  sersendf_P(PSTR("300:  %lu  %lu\n"), (uint32_t)SQRT(300), (uint32_t)sqrt(300));
  sersendf_P(PSTR("400:  %lu  %lu\n"), (uint32_t)SQRT(400), (uint32_t)sqrt(400));
  sersendf_P(PSTR("500:  %lu  %lu\n"), (uint32_t)SQRT(500), (uint32_t)sqrt(500));
  sersendf_P(PSTR("600:  %lu  %lu\n"), (uint32_t)SQRT(600), (uint32_t)sqrt(600));
  sersendf_P(PSTR("700:  %lu  %lu\n"), (uint32_t)SQRT(700), (uint32_t)sqrt(700));
  sersendf_P(PSTR("800:  %lu  %lu\n"), (uint32_t)SQRT(800), (uint32_t)sqrt(800));
  sersendf_P(PSTR("900:  %lu  %lu\n"), (uint32_t)SQRT(900), (uint32_t)sqrt(900));
  sersendf_P(PSTR("1000:  %lu  %lu\n"), (uint32_t)SQRT(1000), (uint32_t)sqrt(1000));
  sersendf_P(PSTR("2000:  %lu  %lu\n"), (uint32_t)SQRT(2000), (uint32_t)sqrt(2000));
  sersendf_P(PSTR("3000:  %lu  %lu\n"), (uint32_t)SQRT(3000), (uint32_t)sqrt(3000));
  sersendf_P(PSTR("4000:  %lu  %lu\n"), (uint32_t)SQRT(4000), (uint32_t)sqrt(4000));
  sersendf_P(PSTR("5000:  %lu  %lu\n"), (uint32_t)SQRT(5000), (uint32_t)sqrt(5000));
  sersendf_P(PSTR("6000:  %lu  %lu\n"), (uint32_t)SQRT(6000), (uint32_t)sqrt(6000));
  sersendf_P(PSTR("7000:  %lu  %lu\n"), (uint32_t)SQRT(7000), (uint32_t)sqrt(7000));
  sersendf_P(PSTR("8000:  %lu  %lu\n"), (uint32_t)SQRT(8000), (uint32_t)sqrt(8000));
  sersendf_P(PSTR("9000:  %lu  %lu\n"), (uint32_t)SQRT(9000), (uint32_t)sqrt(9000));
  sersendf_P(PSTR("10000:  %lu  %lu\n"), (uint32_t)SQRT(10000), (uint32_t)sqrt(10000));
  sersendf_P(PSTR("20000:  %lu  %lu\n"), (uint32_t)SQRT(20000), (uint32_t)sqrt(20000));
  sersendf_P(PSTR("30000:  %lu  %lu\n"), (uint32_t)SQRT(30000), (uint32_t)sqrt(30000));
  sersendf_P(PSTR("40000:  %lu  %lu\n"), (uint32_t)SQRT(40000), (uint32_t)sqrt(40000));
  sersendf_P(PSTR("50000:  %lu  %lu\n"), (uint32_t)SQRT(50000), (uint32_t)sqrt(50000));
  sersendf_P(PSTR("60000:  %lu  %lu\n"), (uint32_t)SQRT(60000), (uint32_t)sqrt(60000));
  sersendf_P(PSTR("70000:  %lu  %lu\n"), (uint32_t)SQRT(70000), (uint32_t)sqrt(70000));
  sersendf_P(PSTR("80000:  %lu  %lu\n"), (uint32_t)SQRT(80000), (uint32_t)sqrt(80000));
  sersendf_P(PSTR("90000:  %lu  %lu\n"), (uint32_t)SQRT(90000), (uint32_t)sqrt(90000));
  sersendf_P(PSTR("100000:  %lu  %lu\n"), (uint32_t)SQRT(100000), (uint32_t)sqrt(100000));
  sersendf_P(PSTR("200000:  %lu  %lu\n"), (uint32_t)SQRT(200000), (uint32_t)sqrt(200000));
  sersendf_P(PSTR("300000:  %lu  %lu\n"), (uint32_t)SQRT(300000), (uint32_t)sqrt(300000));
  sersendf_P(PSTR("400000:  %lu  %lu\n"), (uint32_t)SQRT(400000), (uint32_t)sqrt(400000));
  sersendf_P(PSTR("500000:  %lu  %lu\n"), (uint32_t)SQRT(500000), (uint32_t)sqrt(500000));
  sersendf_P(PSTR("600000:  %lu  %lu\n"), (uint32_t)SQRT(600000), (uint32_t)sqrt(600000));
  sersendf_P(PSTR("700000:  %lu  %lu\n"), (uint32_t)SQRT(700000), (uint32_t)sqrt(700000));
  sersendf_P(PSTR("800000:  %lu  %lu\n"), (uint32_t)SQRT(800000), (uint32_t)sqrt(800000));
  sersendf_P(PSTR("900000:  %lu  %lu\n"), (uint32_t)SQRT(900000), (uint32_t)sqrt(900000));
  sersendf_P(PSTR("1000000:  %lu  %lu\n"), (uint32_t)SQRT(1000000), (uint32_t)sqrt(1000000));
  sersendf_P(PSTR("2000000:  %lu  %lu\n"), (uint32_t)SQRT(2000000), (uint32_t)sqrt(2000000));
  sersendf_P(PSTR("3000000:  %lu  %lu\n"), (uint32_t)SQRT(3000000), (uint32_t)sqrt(3000000));
  sersendf_P(PSTR("4000000:  %lu  %lu\n"), (uint32_t)SQRT(4000000), (uint32_t)sqrt(4000000));
  sersendf_P(PSTR("5000000:  %lu  %lu\n"), (uint32_t)SQRT(5000000), (uint32_t)sqrt(5000000));
  sersendf_P(PSTR("6000000:  %lu  %lu\n"), (uint32_t)SQRT(6000000), (uint32_t)sqrt(6000000));
  sersendf_P(PSTR("7000000:  %lu  %lu\n"), (uint32_t)SQRT(7000000), (uint32_t)sqrt(7000000));
  sersendf_P(PSTR("8000000:  %lu  %lu\n"), (uint32_t)SQRT(8000000), (uint32_t)sqrt(8000000));
  sersendf_P(PSTR("9000000:  %lu  %lu\n"), (uint32_t)SQRT(9000000), (uint32_t)sqrt(9000000));
  sersendf_P(PSTR("10000000:  %lu  %lu\n"), (uint32_t)SQRT(10000000), (uint32_t)sqrt(10000000));
  sersendf_P(PSTR("20000000:  %lu  %lu\n"), (uint32_t)SQRT(20000000), (uint32_t)sqrt(20000000));
  sersendf_P(PSTR("30000000:  %lu  %lu\n"), (uint32_t)SQRT(30000000), (uint32_t)sqrt(30000000));
  sersendf_P(PSTR("40000000:  %lu  %lu\n"), (uint32_t)SQRT(40000000), (uint32_t)sqrt(40000000));
  sersendf_P(PSTR("50000000:  %lu  %lu\n"), (uint32_t)SQRT(50000000), (uint32_t)sqrt(50000000));
  sersendf_P(PSTR("60000000:  %lu  %lu\n"), (uint32_t)SQRT(60000000), (uint32_t)sqrt(60000000));
  sersendf_P(PSTR("70000000:  %lu  %lu\n"), (uint32_t)SQRT(70000000), (uint32_t)sqrt(70000000));
  sersendf_P(PSTR("80000000:  %lu  %lu\n"), (uint32_t)SQRT(80000000), (uint32_t)sqrt(80000000));
  sersendf_P(PSTR("90000000:  %lu  %lu\n"), (uint32_t)SQRT(90000000), (uint32_t)sqrt(90000000));
  sersendf_P(PSTR("100000000:  %lu  %lu\n"), (uint32_t)SQRT(100000000), (uint32_t)sqrt(100000000));
  sersendf_P(PSTR("200000000:  %lu  %lu\n"), (uint32_t)SQRT(200000000), (uint32_t)sqrt(200000000));
  sersendf_P(PSTR("300000000:  %lu  %lu\n"), (uint32_t)SQRT(300000000), (uint32_t)sqrt(300000000));
  sersendf_P(PSTR("400000000:  %lu  %lu\n"), (uint32_t)SQRT(400000000), (uint32_t)sqrt(400000000));
  sersendf_P(PSTR("500000000:  %lu  %lu\n"), (uint32_t)SQRT(500000000), (uint32_t)sqrt(500000000));
  sersendf_P(PSTR("600000000:  %lu  %lu\n"), (uint32_t)SQRT(600000000), (uint32_t)sqrt(600000000));
  sersendf_P(PSTR("700000000:  %lu  %lu\n"), (uint32_t)SQRT(700000000), (uint32_t)sqrt(700000000));
  sersendf_P(PSTR("800000000:  %lu  %lu\n"), (uint32_t)SQRT(800000000), (uint32_t)sqrt(800000000));
  sersendf_P(PSTR("900000000:  %lu  %lu\n"), (uint32_t)SQRT(900000000), (uint32_t)sqrt(900000000));
  sersendf_P(PSTR("1000000000:  %lu  %lu\n"), (uint32_t)SQRT(1000000000), (uint32_t)sqrt(1000000000));
  sersendf_P(PSTR("2000000000:  %lu  %lu\n"), (uint32_t)SQRT(2000000000), (uint32_t)sqrt(2000000000));
  sersendf_P(PSTR("3000000000:  %lu  %lu\n"), (uint32_t)SQRT(3000000000), (uint32_t)sqrt(3000000000));
  sersendf_P(PSTR("4000000000:  %lu  %lu\n"), (uint32_t)SQRT(4000000000), (uint32_t)sqrt(4000000000));
This commit is contained in:
Markus Hitter 2014-07-06 23:17:06 +02:00
parent 6f83519a1d
commit 2ad7517e27
1 changed files with 10 additions and 12 deletions

View File

@ -18,17 +18,16 @@
equals equals
(uint32_t)(sqrt(i) + .5) (uint32_t)(sqrt(i) + .5)
These two provide identical results up to 1'861'860 (tested at runtime) and These two provide identical results for all tested numbers across the
up to 10'000'000 at compile time. At 90'000'000, deviation is about 5%, uint32 range. This "+ .5" is for rounding and not crucial. Casting to
increasing further at even higher numbers. This "+ .5" is for rounding and other sizes is also possible.
not crucial. Casting to other sizes is also possible.
Can be used for calculations at runtime, too, where it costs 944(!) bytes Can principally be used for calculations at runtime, too, but its compiled
binary size and takes roughly 10'000(!) clock cycles. size is prohibitively large (more than 20kB per instance).
Initial version found on pl.comp.lang.c, posted by Jean-Louis PATANE. Initial version found on pl.comp.lang.c, posted by Jean-Louis PATANE.
*/ */
#define SQR00(x) (((double)(x)) / 2) #define SQR00(x) (((x) > 65535) ? (double)65535 : (double)(x) / 2)
#define SQR01(x) ((SQR00(x) + ((x) / SQR00(x))) / 2) #define SQR01(x) ((SQR00(x) + ((x) / SQR00(x))) / 2)
#define SQR02(x) ((SQR01(x) + ((x) / SQR01(x))) / 2) #define SQR02(x) ((SQR01(x) + ((x) / SQR01(x))) / 2)
#define SQR03(x) ((SQR02(x) + ((x) / SQR02(x))) / 2) #define SQR03(x) ((SQR02(x) + ((x) / SQR02(x))) / 2)
@ -41,11 +40,10 @@
#define SQR10(x) ((SQR09(x) + ((x) / SQR09(x))) / 2) #define SQR10(x) ((SQR09(x) + ((x) / SQR09(x))) / 2)
#define SQR11(x) ((SQR10(x) + ((x) / SQR10(x))) / 2) #define SQR11(x) ((SQR10(x) + ((x) / SQR10(x))) / 2)
#define SQR12(x) ((SQR11(x) + ((x) / SQR11(x))) / 2) #define SQR12(x) ((SQR11(x) + ((x) / SQR11(x))) / 2)
// You can add more of these lines here, each additional line increases // We use 9 iterations, note how SQR10() and up get ignored. You can add more
// accurate range by about factor 2 and costs additional 40 bytes binary size // iterations here, but beware, the length of the preprocessed term
// in the non-constant case. But beware, the length of the preprocessed term // explodes, leading to several seconds compile time above about SQR10().
// explodes, leading to several seconds compile time above about SQR13. #define SQRT(x) ((SQR09(x) + ((x) / SQR09(x))) / 2)
#define SQRT(x) ((SQR12(x) + ((x) / SQR12(x))) / 2)
#endif /* _PREPROCESSOR_MATH_H */ #endif /* _PREPROCESSOR_MATH_H */