preprocessor_math.h, SQRT(): take a better initial guess.

Now results are apparently accurate across the whole uint32 range.
At least, this test passes with all numbers being exact:

  #include "preprocessor_math.h"
  #include <math.h>
  ... in main() ...
  sersendf_P(PSTR("0:  %lu  %lu\n"), (uint32_t)SQRT(0), (uint32_t)sqrt(0));
  sersendf_P(PSTR("1:  %lu  %lu\n"), (uint32_t)SQRT(1), (uint32_t)sqrt(1));
  sersendf_P(PSTR("2:  %lu  %lu\n"), (uint32_t)SQRT(2), (uint32_t)sqrt(2));
  sersendf_P(PSTR("3:  %lu  %lu\n"), (uint32_t)SQRT(3), (uint32_t)sqrt(3));
  sersendf_P(PSTR("4:  %lu  %lu\n"), (uint32_t)SQRT(4), (uint32_t)sqrt(4));
  sersendf_P(PSTR("5:  %lu  %lu\n"), (uint32_t)SQRT(5), (uint32_t)sqrt(5));
  sersendf_P(PSTR("6:  %lu  %lu\n"), (uint32_t)SQRT(6), (uint32_t)sqrt(6));
  sersendf_P(PSTR("7:  %lu  %lu\n"), (uint32_t)SQRT(7), (uint32_t)sqrt(7));
  sersendf_P(PSTR("8:  %lu  %lu\n"), (uint32_t)SQRT(8), (uint32_t)sqrt(8));
  sersendf_P(PSTR("9:  %lu  %lu\n"), (uint32_t)SQRT(9), (uint32_t)sqrt(9));
  sersendf_P(PSTR("10:  %lu  %lu\n"), (uint32_t)SQRT(10), (uint32_t)sqrt(10));
  sersendf_P(PSTR("20:  %lu  %lu\n"), (uint32_t)SQRT(20), (uint32_t)sqrt(20));
  sersendf_P(PSTR("30:  %lu  %lu\n"), (uint32_t)SQRT(30), (uint32_t)sqrt(30));
  sersendf_P(PSTR("40:  %lu  %lu\n"), (uint32_t)SQRT(40), (uint32_t)sqrt(40));
  sersendf_P(PSTR("50:  %lu  %lu\n"), (uint32_t)SQRT(50), (uint32_t)sqrt(50));
  sersendf_P(PSTR("60:  %lu  %lu\n"), (uint32_t)SQRT(60), (uint32_t)sqrt(60));
  sersendf_P(PSTR("70:  %lu  %lu\n"), (uint32_t)SQRT(70), (uint32_t)sqrt(70));
  sersendf_P(PSTR("80:  %lu  %lu\n"), (uint32_t)SQRT(80), (uint32_t)sqrt(80));
  sersendf_P(PSTR("90:  %lu  %lu\n"), (uint32_t)SQRT(90), (uint32_t)sqrt(90));
  sersendf_P(PSTR("100:  %lu  %lu\n"), (uint32_t)SQRT(100), (uint32_t)sqrt(100));
  sersendf_P(PSTR("200:  %lu  %lu\n"), (uint32_t)SQRT(200), (uint32_t)sqrt(200));
  sersendf_P(PSTR("300:  %lu  %lu\n"), (uint32_t)SQRT(300), (uint32_t)sqrt(300));
  sersendf_P(PSTR("400:  %lu  %lu\n"), (uint32_t)SQRT(400), (uint32_t)sqrt(400));
  sersendf_P(PSTR("500:  %lu  %lu\n"), (uint32_t)SQRT(500), (uint32_t)sqrt(500));
  sersendf_P(PSTR("600:  %lu  %lu\n"), (uint32_t)SQRT(600), (uint32_t)sqrt(600));
  sersendf_P(PSTR("700:  %lu  %lu\n"), (uint32_t)SQRT(700), (uint32_t)sqrt(700));
  sersendf_P(PSTR("800:  %lu  %lu\n"), (uint32_t)SQRT(800), (uint32_t)sqrt(800));
  sersendf_P(PSTR("900:  %lu  %lu\n"), (uint32_t)SQRT(900), (uint32_t)sqrt(900));
  sersendf_P(PSTR("1000:  %lu  %lu\n"), (uint32_t)SQRT(1000), (uint32_t)sqrt(1000));
  sersendf_P(PSTR("2000:  %lu  %lu\n"), (uint32_t)SQRT(2000), (uint32_t)sqrt(2000));
  sersendf_P(PSTR("3000:  %lu  %lu\n"), (uint32_t)SQRT(3000), (uint32_t)sqrt(3000));
  sersendf_P(PSTR("4000:  %lu  %lu\n"), (uint32_t)SQRT(4000), (uint32_t)sqrt(4000));
  sersendf_P(PSTR("5000:  %lu  %lu\n"), (uint32_t)SQRT(5000), (uint32_t)sqrt(5000));
  sersendf_P(PSTR("6000:  %lu  %lu\n"), (uint32_t)SQRT(6000), (uint32_t)sqrt(6000));
  sersendf_P(PSTR("7000:  %lu  %lu\n"), (uint32_t)SQRT(7000), (uint32_t)sqrt(7000));
  sersendf_P(PSTR("8000:  %lu  %lu\n"), (uint32_t)SQRT(8000), (uint32_t)sqrt(8000));
  sersendf_P(PSTR("9000:  %lu  %lu\n"), (uint32_t)SQRT(9000), (uint32_t)sqrt(9000));
  sersendf_P(PSTR("10000:  %lu  %lu\n"), (uint32_t)SQRT(10000), (uint32_t)sqrt(10000));
  sersendf_P(PSTR("20000:  %lu  %lu\n"), (uint32_t)SQRT(20000), (uint32_t)sqrt(20000));
  sersendf_P(PSTR("30000:  %lu  %lu\n"), (uint32_t)SQRT(30000), (uint32_t)sqrt(30000));
  sersendf_P(PSTR("40000:  %lu  %lu\n"), (uint32_t)SQRT(40000), (uint32_t)sqrt(40000));
  sersendf_P(PSTR("50000:  %lu  %lu\n"), (uint32_t)SQRT(50000), (uint32_t)sqrt(50000));
  sersendf_P(PSTR("60000:  %lu  %lu\n"), (uint32_t)SQRT(60000), (uint32_t)sqrt(60000));
  sersendf_P(PSTR("70000:  %lu  %lu\n"), (uint32_t)SQRT(70000), (uint32_t)sqrt(70000));
  sersendf_P(PSTR("80000:  %lu  %lu\n"), (uint32_t)SQRT(80000), (uint32_t)sqrt(80000));
  sersendf_P(PSTR("90000:  %lu  %lu\n"), (uint32_t)SQRT(90000), (uint32_t)sqrt(90000));
  sersendf_P(PSTR("100000:  %lu  %lu\n"), (uint32_t)SQRT(100000), (uint32_t)sqrt(100000));
  sersendf_P(PSTR("200000:  %lu  %lu\n"), (uint32_t)SQRT(200000), (uint32_t)sqrt(200000));
  sersendf_P(PSTR("300000:  %lu  %lu\n"), (uint32_t)SQRT(300000), (uint32_t)sqrt(300000));
  sersendf_P(PSTR("400000:  %lu  %lu\n"), (uint32_t)SQRT(400000), (uint32_t)sqrt(400000));
  sersendf_P(PSTR("500000:  %lu  %lu\n"), (uint32_t)SQRT(500000), (uint32_t)sqrt(500000));
  sersendf_P(PSTR("600000:  %lu  %lu\n"), (uint32_t)SQRT(600000), (uint32_t)sqrt(600000));
  sersendf_P(PSTR("700000:  %lu  %lu\n"), (uint32_t)SQRT(700000), (uint32_t)sqrt(700000));
  sersendf_P(PSTR("800000:  %lu  %lu\n"), (uint32_t)SQRT(800000), (uint32_t)sqrt(800000));
  sersendf_P(PSTR("900000:  %lu  %lu\n"), (uint32_t)SQRT(900000), (uint32_t)sqrt(900000));
  sersendf_P(PSTR("1000000:  %lu  %lu\n"), (uint32_t)SQRT(1000000), (uint32_t)sqrt(1000000));
  sersendf_P(PSTR("2000000:  %lu  %lu\n"), (uint32_t)SQRT(2000000), (uint32_t)sqrt(2000000));
  sersendf_P(PSTR("3000000:  %lu  %lu\n"), (uint32_t)SQRT(3000000), (uint32_t)sqrt(3000000));
  sersendf_P(PSTR("4000000:  %lu  %lu\n"), (uint32_t)SQRT(4000000), (uint32_t)sqrt(4000000));
  sersendf_P(PSTR("5000000:  %lu  %lu\n"), (uint32_t)SQRT(5000000), (uint32_t)sqrt(5000000));
  sersendf_P(PSTR("6000000:  %lu  %lu\n"), (uint32_t)SQRT(6000000), (uint32_t)sqrt(6000000));
  sersendf_P(PSTR("7000000:  %lu  %lu\n"), (uint32_t)SQRT(7000000), (uint32_t)sqrt(7000000));
  sersendf_P(PSTR("8000000:  %lu  %lu\n"), (uint32_t)SQRT(8000000), (uint32_t)sqrt(8000000));
  sersendf_P(PSTR("9000000:  %lu  %lu\n"), (uint32_t)SQRT(9000000), (uint32_t)sqrt(9000000));
  sersendf_P(PSTR("10000000:  %lu  %lu\n"), (uint32_t)SQRT(10000000), (uint32_t)sqrt(10000000));
  sersendf_P(PSTR("20000000:  %lu  %lu\n"), (uint32_t)SQRT(20000000), (uint32_t)sqrt(20000000));
  sersendf_P(PSTR("30000000:  %lu  %lu\n"), (uint32_t)SQRT(30000000), (uint32_t)sqrt(30000000));
  sersendf_P(PSTR("40000000:  %lu  %lu\n"), (uint32_t)SQRT(40000000), (uint32_t)sqrt(40000000));
  sersendf_P(PSTR("50000000:  %lu  %lu\n"), (uint32_t)SQRT(50000000), (uint32_t)sqrt(50000000));
  sersendf_P(PSTR("60000000:  %lu  %lu\n"), (uint32_t)SQRT(60000000), (uint32_t)sqrt(60000000));
  sersendf_P(PSTR("70000000:  %lu  %lu\n"), (uint32_t)SQRT(70000000), (uint32_t)sqrt(70000000));
  sersendf_P(PSTR("80000000:  %lu  %lu\n"), (uint32_t)SQRT(80000000), (uint32_t)sqrt(80000000));
  sersendf_P(PSTR("90000000:  %lu  %lu\n"), (uint32_t)SQRT(90000000), (uint32_t)sqrt(90000000));
  sersendf_P(PSTR("100000000:  %lu  %lu\n"), (uint32_t)SQRT(100000000), (uint32_t)sqrt(100000000));
  sersendf_P(PSTR("200000000:  %lu  %lu\n"), (uint32_t)SQRT(200000000), (uint32_t)sqrt(200000000));
  sersendf_P(PSTR("300000000:  %lu  %lu\n"), (uint32_t)SQRT(300000000), (uint32_t)sqrt(300000000));
  sersendf_P(PSTR("400000000:  %lu  %lu\n"), (uint32_t)SQRT(400000000), (uint32_t)sqrt(400000000));
  sersendf_P(PSTR("500000000:  %lu  %lu\n"), (uint32_t)SQRT(500000000), (uint32_t)sqrt(500000000));
  sersendf_P(PSTR("600000000:  %lu  %lu\n"), (uint32_t)SQRT(600000000), (uint32_t)sqrt(600000000));
  sersendf_P(PSTR("700000000:  %lu  %lu\n"), (uint32_t)SQRT(700000000), (uint32_t)sqrt(700000000));
  sersendf_P(PSTR("800000000:  %lu  %lu\n"), (uint32_t)SQRT(800000000), (uint32_t)sqrt(800000000));
  sersendf_P(PSTR("900000000:  %lu  %lu\n"), (uint32_t)SQRT(900000000), (uint32_t)sqrt(900000000));
  sersendf_P(PSTR("1000000000:  %lu  %lu\n"), (uint32_t)SQRT(1000000000), (uint32_t)sqrt(1000000000));
  sersendf_P(PSTR("2000000000:  %lu  %lu\n"), (uint32_t)SQRT(2000000000), (uint32_t)sqrt(2000000000));
  sersendf_P(PSTR("3000000000:  %lu  %lu\n"), (uint32_t)SQRT(3000000000), (uint32_t)sqrt(3000000000));
  sersendf_P(PSTR("4000000000:  %lu  %lu\n"), (uint32_t)SQRT(4000000000), (uint32_t)sqrt(4000000000));

This commit is contained in:

Markus Hitter

2014-07-06 23:17:06 +02:00

parent 6f83519a1d

commit 2ad7517e27

1 changed files with 10 additions and 12 deletions

									
										22

preprocessor_math.h

										View File
									
					@ -18,17 +18,16 @@

					  equals

					  equals

					  (uint32_t)(sqrt(i) + .5)

					  (uint32_t)(sqrt(i) + .5)

					  These two provide identical results up to 1'861'860 (tested at runtime) and

					  These two provide identical results for all tested numbers across the

					  up to 10'000'000 at compile time. At 90'000'000, deviation is about 5%,

					  uint32 range. This "+ .5" is for rounding and not crucial. Casting to

					  increasing further at even higher numbers. This "+ .5" is for rounding and

					  other sizes is also possible.

					  not crucial. Casting to other sizes is also possible.

					  Can be used for calculations at runtime, too, where it costs 944(!) bytes

					  Can principally be used for calculations at runtime, too, but its compiled

					  binary size and takes roughly 10'000(!) clock cycles.

					  size is prohibitively large (more than 20kB per instance).

					  Initial version found on pl.comp.lang.c, posted by Jean-Louis PATANE.

					  Initial version found on pl.comp.lang.c, posted by Jean-Louis PATANE.

					*/

					*/

					#define SQR00(x) (((double)(x)) / 2)

					#define SQR00(x) (((x) > 65535) ? (double)65535 : (double)(x) / 2)

					#define SQR01(x) ((SQR00(x) + ((x) / SQR00(x))) / 2)

					#define SQR01(x) ((SQR00(x) + ((x) / SQR00(x))) / 2)

					#define SQR02(x) ((SQR01(x) + ((x) / SQR01(x))) / 2)

					#define SQR02(x) ((SQR01(x) + ((x) / SQR01(x))) / 2)

					#define SQR03(x) ((SQR02(x) + ((x) / SQR02(x))) / 2)

					#define SQR03(x) ((SQR02(x) + ((x) / SQR02(x))) / 2)

					@ -41,11 +40,10 @@

					#define SQR10(x) ((SQR09(x) + ((x) / SQR09(x))) / 2)

					#define SQR10(x) ((SQR09(x) + ((x) / SQR09(x))) / 2)

					#define SQR11(x) ((SQR10(x) + ((x) / SQR10(x))) / 2)

					#define SQR11(x) ((SQR10(x) + ((x) / SQR10(x))) / 2)

					#define SQR12(x) ((SQR11(x) + ((x) / SQR11(x))) / 2)

					#define SQR12(x) ((SQR11(x) + ((x) / SQR11(x))) / 2)

					// You can add more of these lines here, each additional line increases

					// We use 9 iterations, note how SQR10() and up get ignored. You can add more

					// accurate range by about factor 2 and costs additional 40 bytes binary size

					// iterations here, but beware, the length of the preprocessed term

					// in the non-constant case. But beware, the length of the preprocessed term

					// explodes, leading to several seconds compile time above about SQR10().

					// explodes, leading to several seconds compile time above about SQR13.

					#define SQRT(x) ((SQR09(x) + ((x) / SQR09(x))) / 2)

					#define SQRT(x) ((SQR12(x) + ((x) / SQR12(x))) / 2)

					#endif /* _PREPROCESSOR_MATH_H */

					#endif /* _PREPROCESSOR_MATH_H */

preprocessor_math.h, SQRT(): take a better initial guess.

22 preprocessor_math.h Unescape Escape View File

22

preprocessor_math.h

View File