From 2ad7517e27efe04ce0cfb034409a6a3b4d322cd7 Mon Sep 17 00:00:00 2001 From: Markus Hitter Date: Sun, 6 Jul 2014 23:17:06 +0200 Subject: [PATCH] preprocessor_math.h, SQRT(): take a better initial guess. Now results are apparently accurate across the whole uint32 range. At least, this test passes with all numbers being exact: #include "preprocessor_math.h" #include ... in main() ... sersendf_P(PSTR("0: %lu %lu\n"), (uint32_t)SQRT(0), (uint32_t)sqrt(0)); sersendf_P(PSTR("1: %lu %lu\n"), (uint32_t)SQRT(1), (uint32_t)sqrt(1)); sersendf_P(PSTR("2: %lu %lu\n"), (uint32_t)SQRT(2), (uint32_t)sqrt(2)); sersendf_P(PSTR("3: %lu %lu\n"), (uint32_t)SQRT(3), (uint32_t)sqrt(3)); sersendf_P(PSTR("4: %lu %lu\n"), (uint32_t)SQRT(4), (uint32_t)sqrt(4)); sersendf_P(PSTR("5: %lu %lu\n"), (uint32_t)SQRT(5), (uint32_t)sqrt(5)); sersendf_P(PSTR("6: %lu %lu\n"), (uint32_t)SQRT(6), (uint32_t)sqrt(6)); sersendf_P(PSTR("7: %lu %lu\n"), (uint32_t)SQRT(7), (uint32_t)sqrt(7)); sersendf_P(PSTR("8: %lu %lu\n"), (uint32_t)SQRT(8), (uint32_t)sqrt(8)); sersendf_P(PSTR("9: %lu %lu\n"), (uint32_t)SQRT(9), (uint32_t)sqrt(9)); sersendf_P(PSTR("10: %lu %lu\n"), (uint32_t)SQRT(10), (uint32_t)sqrt(10)); sersendf_P(PSTR("20: %lu %lu\n"), (uint32_t)SQRT(20), (uint32_t)sqrt(20)); sersendf_P(PSTR("30: %lu %lu\n"), (uint32_t)SQRT(30), (uint32_t)sqrt(30)); sersendf_P(PSTR("40: %lu %lu\n"), (uint32_t)SQRT(40), (uint32_t)sqrt(40)); sersendf_P(PSTR("50: %lu %lu\n"), (uint32_t)SQRT(50), (uint32_t)sqrt(50)); sersendf_P(PSTR("60: %lu %lu\n"), (uint32_t)SQRT(60), (uint32_t)sqrt(60)); sersendf_P(PSTR("70: %lu %lu\n"), (uint32_t)SQRT(70), (uint32_t)sqrt(70)); sersendf_P(PSTR("80: %lu %lu\n"), (uint32_t)SQRT(80), (uint32_t)sqrt(80)); sersendf_P(PSTR("90: %lu %lu\n"), (uint32_t)SQRT(90), (uint32_t)sqrt(90)); sersendf_P(PSTR("100: %lu %lu\n"), (uint32_t)SQRT(100), (uint32_t)sqrt(100)); sersendf_P(PSTR("200: %lu %lu\n"), (uint32_t)SQRT(200), (uint32_t)sqrt(200)); sersendf_P(PSTR("300: %lu %lu\n"), (uint32_t)SQRT(300), (uint32_t)sqrt(300)); sersendf_P(PSTR("400: %lu %lu\n"), (uint32_t)SQRT(400), (uint32_t)sqrt(400)); sersendf_P(PSTR("500: %lu %lu\n"), (uint32_t)SQRT(500), (uint32_t)sqrt(500)); sersendf_P(PSTR("600: %lu %lu\n"), (uint32_t)SQRT(600), (uint32_t)sqrt(600)); sersendf_P(PSTR("700: %lu %lu\n"), (uint32_t)SQRT(700), (uint32_t)sqrt(700)); sersendf_P(PSTR("800: %lu %lu\n"), (uint32_t)SQRT(800), (uint32_t)sqrt(800)); sersendf_P(PSTR("900: %lu %lu\n"), (uint32_t)SQRT(900), (uint32_t)sqrt(900)); sersendf_P(PSTR("1000: %lu %lu\n"), (uint32_t)SQRT(1000), (uint32_t)sqrt(1000)); sersendf_P(PSTR("2000: %lu %lu\n"), (uint32_t)SQRT(2000), (uint32_t)sqrt(2000)); sersendf_P(PSTR("3000: %lu %lu\n"), (uint32_t)SQRT(3000), (uint32_t)sqrt(3000)); sersendf_P(PSTR("4000: %lu %lu\n"), (uint32_t)SQRT(4000), (uint32_t)sqrt(4000)); sersendf_P(PSTR("5000: %lu %lu\n"), (uint32_t)SQRT(5000), (uint32_t)sqrt(5000)); sersendf_P(PSTR("6000: %lu %lu\n"), (uint32_t)SQRT(6000), (uint32_t)sqrt(6000)); sersendf_P(PSTR("7000: %lu %lu\n"), (uint32_t)SQRT(7000), (uint32_t)sqrt(7000)); sersendf_P(PSTR("8000: %lu %lu\n"), (uint32_t)SQRT(8000), (uint32_t)sqrt(8000)); sersendf_P(PSTR("9000: %lu %lu\n"), (uint32_t)SQRT(9000), (uint32_t)sqrt(9000)); sersendf_P(PSTR("10000: %lu %lu\n"), (uint32_t)SQRT(10000), (uint32_t)sqrt(10000)); sersendf_P(PSTR("20000: %lu %lu\n"), (uint32_t)SQRT(20000), (uint32_t)sqrt(20000)); sersendf_P(PSTR("30000: %lu %lu\n"), (uint32_t)SQRT(30000), (uint32_t)sqrt(30000)); sersendf_P(PSTR("40000: %lu %lu\n"), (uint32_t)SQRT(40000), (uint32_t)sqrt(40000)); sersendf_P(PSTR("50000: %lu %lu\n"), (uint32_t)SQRT(50000), (uint32_t)sqrt(50000)); sersendf_P(PSTR("60000: %lu %lu\n"), (uint32_t)SQRT(60000), (uint32_t)sqrt(60000)); sersendf_P(PSTR("70000: %lu %lu\n"), (uint32_t)SQRT(70000), (uint32_t)sqrt(70000)); sersendf_P(PSTR("80000: %lu %lu\n"), (uint32_t)SQRT(80000), (uint32_t)sqrt(80000)); sersendf_P(PSTR("90000: %lu %lu\n"), (uint32_t)SQRT(90000), (uint32_t)sqrt(90000)); sersendf_P(PSTR("100000: %lu %lu\n"), (uint32_t)SQRT(100000), (uint32_t)sqrt(100000)); sersendf_P(PSTR("200000: %lu %lu\n"), (uint32_t)SQRT(200000), (uint32_t)sqrt(200000)); sersendf_P(PSTR("300000: %lu %lu\n"), (uint32_t)SQRT(300000), (uint32_t)sqrt(300000)); sersendf_P(PSTR("400000: %lu %lu\n"), (uint32_t)SQRT(400000), (uint32_t)sqrt(400000)); sersendf_P(PSTR("500000: %lu %lu\n"), (uint32_t)SQRT(500000), (uint32_t)sqrt(500000)); sersendf_P(PSTR("600000: %lu %lu\n"), (uint32_t)SQRT(600000), (uint32_t)sqrt(600000)); sersendf_P(PSTR("700000: %lu %lu\n"), (uint32_t)SQRT(700000), (uint32_t)sqrt(700000)); sersendf_P(PSTR("800000: %lu %lu\n"), (uint32_t)SQRT(800000), (uint32_t)sqrt(800000)); sersendf_P(PSTR("900000: %lu %lu\n"), (uint32_t)SQRT(900000), (uint32_t)sqrt(900000)); sersendf_P(PSTR("1000000: %lu %lu\n"), (uint32_t)SQRT(1000000), (uint32_t)sqrt(1000000)); sersendf_P(PSTR("2000000: %lu %lu\n"), (uint32_t)SQRT(2000000), (uint32_t)sqrt(2000000)); sersendf_P(PSTR("3000000: %lu %lu\n"), (uint32_t)SQRT(3000000), (uint32_t)sqrt(3000000)); sersendf_P(PSTR("4000000: %lu %lu\n"), (uint32_t)SQRT(4000000), (uint32_t)sqrt(4000000)); sersendf_P(PSTR("5000000: %lu %lu\n"), (uint32_t)SQRT(5000000), (uint32_t)sqrt(5000000)); sersendf_P(PSTR("6000000: %lu %lu\n"), (uint32_t)SQRT(6000000), (uint32_t)sqrt(6000000)); sersendf_P(PSTR("7000000: %lu %lu\n"), (uint32_t)SQRT(7000000), (uint32_t)sqrt(7000000)); sersendf_P(PSTR("8000000: %lu %lu\n"), (uint32_t)SQRT(8000000), (uint32_t)sqrt(8000000)); sersendf_P(PSTR("9000000: %lu %lu\n"), (uint32_t)SQRT(9000000), (uint32_t)sqrt(9000000)); sersendf_P(PSTR("10000000: %lu %lu\n"), (uint32_t)SQRT(10000000), (uint32_t)sqrt(10000000)); sersendf_P(PSTR("20000000: %lu %lu\n"), (uint32_t)SQRT(20000000), (uint32_t)sqrt(20000000)); sersendf_P(PSTR("30000000: %lu %lu\n"), (uint32_t)SQRT(30000000), (uint32_t)sqrt(30000000)); sersendf_P(PSTR("40000000: %lu %lu\n"), (uint32_t)SQRT(40000000), (uint32_t)sqrt(40000000)); sersendf_P(PSTR("50000000: %lu %lu\n"), (uint32_t)SQRT(50000000), (uint32_t)sqrt(50000000)); sersendf_P(PSTR("60000000: %lu %lu\n"), (uint32_t)SQRT(60000000), (uint32_t)sqrt(60000000)); sersendf_P(PSTR("70000000: %lu %lu\n"), (uint32_t)SQRT(70000000), (uint32_t)sqrt(70000000)); sersendf_P(PSTR("80000000: %lu %lu\n"), (uint32_t)SQRT(80000000), (uint32_t)sqrt(80000000)); sersendf_P(PSTR("90000000: %lu %lu\n"), (uint32_t)SQRT(90000000), (uint32_t)sqrt(90000000)); sersendf_P(PSTR("100000000: %lu %lu\n"), (uint32_t)SQRT(100000000), (uint32_t)sqrt(100000000)); sersendf_P(PSTR("200000000: %lu %lu\n"), (uint32_t)SQRT(200000000), (uint32_t)sqrt(200000000)); sersendf_P(PSTR("300000000: %lu %lu\n"), (uint32_t)SQRT(300000000), (uint32_t)sqrt(300000000)); sersendf_P(PSTR("400000000: %lu %lu\n"), (uint32_t)SQRT(400000000), (uint32_t)sqrt(400000000)); sersendf_P(PSTR("500000000: %lu %lu\n"), (uint32_t)SQRT(500000000), (uint32_t)sqrt(500000000)); sersendf_P(PSTR("600000000: %lu %lu\n"), (uint32_t)SQRT(600000000), (uint32_t)sqrt(600000000)); sersendf_P(PSTR("700000000: %lu %lu\n"), (uint32_t)SQRT(700000000), (uint32_t)sqrt(700000000)); sersendf_P(PSTR("800000000: %lu %lu\n"), (uint32_t)SQRT(800000000), (uint32_t)sqrt(800000000)); sersendf_P(PSTR("900000000: %lu %lu\n"), (uint32_t)SQRT(900000000), (uint32_t)sqrt(900000000)); sersendf_P(PSTR("1000000000: %lu %lu\n"), (uint32_t)SQRT(1000000000), (uint32_t)sqrt(1000000000)); sersendf_P(PSTR("2000000000: %lu %lu\n"), (uint32_t)SQRT(2000000000), (uint32_t)sqrt(2000000000)); sersendf_P(PSTR("3000000000: %lu %lu\n"), (uint32_t)SQRT(3000000000), (uint32_t)sqrt(3000000000)); sersendf_P(PSTR("4000000000: %lu %lu\n"), (uint32_t)SQRT(4000000000), (uint32_t)sqrt(4000000000)); --- preprocessor_math.h | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/preprocessor_math.h b/preprocessor_math.h index d4fe60a..f4c8dbb 100644 --- a/preprocessor_math.h +++ b/preprocessor_math.h @@ -18,17 +18,16 @@ equals (uint32_t)(sqrt(i) + .5) - These two provide identical results up to 1'861'860 (tested at runtime) and - up to 10'000'000 at compile time. At 90'000'000, deviation is about 5%, - increasing further at even higher numbers. This "+ .5" is for rounding and - not crucial. Casting to other sizes is also possible. + These two provide identical results for all tested numbers across the + uint32 range. This "+ .5" is for rounding and not crucial. Casting to + other sizes is also possible. - Can be used for calculations at runtime, too, where it costs 944(!) bytes - binary size and takes roughly 10'000(!) clock cycles. + Can principally be used for calculations at runtime, too, but its compiled + size is prohibitively large (more than 20kB per instance). Initial version found on pl.comp.lang.c, posted by Jean-Louis PATANE. */ -#define SQR00(x) (((double)(x)) / 2) +#define SQR00(x) (((x) > 65535) ? (double)65535 : (double)(x) / 2) #define SQR01(x) ((SQR00(x) + ((x) / SQR00(x))) / 2) #define SQR02(x) ((SQR01(x) + ((x) / SQR01(x))) / 2) #define SQR03(x) ((SQR02(x) + ((x) / SQR02(x))) / 2) @@ -41,11 +40,10 @@ #define SQR10(x) ((SQR09(x) + ((x) / SQR09(x))) / 2) #define SQR11(x) ((SQR10(x) + ((x) / SQR10(x))) / 2) #define SQR12(x) ((SQR11(x) + ((x) / SQR11(x))) / 2) -// You can add more of these lines here, each additional line increases -// accurate range by about factor 2 and costs additional 40 bytes binary size -// in the non-constant case. But beware, the length of the preprocessed term -// explodes, leading to several seconds compile time above about SQR13. -#define SQRT(x) ((SQR12(x) + ((x) / SQR12(x))) / 2) +// We use 9 iterations, note how SQR10() and up get ignored. You can add more +// iterations here, but beware, the length of the preprocessed term +// explodes, leading to several seconds compile time above about SQR10(). +#define SQRT(x) ((SQR09(x) + ((x) / SQR09(x))) / 2) #endif /* _PREPROCESSOR_MATH_H */