DDA: don't count individual axis steps.

Using the Bresenham algorithm it's safe to assume that if the axis with the most steps is done, all other axes are done, too. This way we save a lot of variable loading in dda_step(). We also save this very expensive comparison of all axis counters against zero. Minor drawback: update_current_position() is now even slower. About performance. The slowest step decreased from 719 to 604 clocks, which is quite an improvement. Average step time increased for single axis movements by 16 clocks and decreased for multi- axis movements. At the bottom line this should improve real-world performance quite a bit, because a printer movement speed isn't limited by average timings, but by the time needed for the slowest step. Along the way, binary size dropped by nice 244 bytes, RAM usage by also nice 16 bytes. ATmega sizes '168 '328(P) '644(P) '1280 Program: 19564 bytes 137% 64% 31% 16% Data: 2175 bytes 213% 107% 54% 27% EEPROM: 32 bytes 4% 2% 2% 1% short-moves.gcode statistics: LED on occurences: 888. LED on time minimum: 326 clock cycles. LED on time maximum: 595 clock cycles. LED on time average: 333.62 clock cycles. smooth-curves.gcode statistics: LED on occurences: 23648. LED on time minimum: 318 clock cycles. LED on time maximum: 604 clock cycles. LED on time average: 333.311 clock cycles. triangle-odd.gcode statistics: LED on occurences: 1636. LED on time minimum: 318 clock cycles. LED on time maximum: 585 clock cycles. LED on time average: 335.233 clock cycles.
2016-11-21 11:56:13 +01:00 · 2016-11-21 11:56:13 +01:00 · 20a0808887
parent 92516b55ea
commit 20a0808887
2 changed files with 44 additions and 50 deletions
--- a/dda.c
+++ b/dda.c
@ -522,12 +522,12 @@ void dda_start(DDA *dda) {
 		// initialise state variable
    move_state.counter[X] = move_state.counter[Y] = move_state.counter[Z] = \
      move_state.counter[E] = -(dda->total_steps >> 1);
-    memcpy(&move_state.steps[X], &dda->delta[X], sizeof(uint32_t) * 4);
    move_state.endstop_stop = 0;
 		#ifdef ACCELERATION_RAMPING
 			move_state.step_no = 0;
 		#endif
 		#ifdef ACCELERATION_TEMPORAL
+      memcpy(&move_state.steps[X], &dda->delta[X], sizeof(uint32_t) * 4);
      move_state.time[X] = move_state.time[Y] = \
        move_state.time[Z] = move_state.time[E] = 0UL;
 		#endif
@ -566,38 +566,27 @@ void dda_start(DDA *dda) {
 void dda_step(DDA *dda) {

  #if ! defined ACCELERATION_TEMPORAL
-  if (move_state.steps[X]) {
    move_state.counter[X] -= dda->delta[X];
    if (move_state.counter[X] < 0) {
 			x_step();
-      move_state.steps[X]--;
      move_state.counter[X] += dda->total_steps;
 		}
-	}
-  if (move_state.steps[Y]) {
    move_state.counter[Y] -= dda->delta[Y];
    if (move_state.counter[Y] < 0) {
 			y_step();
-      move_state.steps[Y]--;
      move_state.counter[Y] += dda->total_steps;
 		}
-	}
-  if (move_state.steps[Z]) {
    move_state.counter[Z] -= dda->delta[Z];
    if (move_state.counter[Z] < 0) {
 			z_step();
-      move_state.steps[Z]--;
      move_state.counter[Z] += dda->total_steps;
 		}
-	}
-  if (move_state.steps[E]) {
    move_state.counter[E] -= dda->delta[E];
    if (move_state.counter[E] < 0) {
 			e_step();
-      move_state.steps[E]--;
      move_state.counter[E] += dda->total_steps;
 		}
-	}
+    move_state.step_no++;
  #endif

 	#ifdef ACCELERATION_REPRAP
@ -628,10 +617,6 @@ void dda_step(DDA *dda) {
 		}
 	#endif

-	#ifdef ACCELERATION_RAMPING
-		move_state.step_no++;
-	#endif
-
  #ifdef ACCELERATION_TEMPORAL
    /** How is this ACCELERATION TEMPORAL expected to work?

@ -710,12 +695,14 @@ void dda_step(DDA *dda) {
  //
  // TODO: with ACCELERATION_TEMPORAL this duplicates some code. See where
  //       dda->live is zero'd, about 10 lines above.
-  if ((move_state.steps[X] == 0 && move_state.steps[Y] == 0 &&
+  #if ! defined ACCELERATION_TEMPORAL
+    if (move_state.step_no >= dda->total_steps ||
+        (move_state.endstop_stop && dda->n <= 0))
+  #else
+    if (move_state.steps[X] == 0 && move_state.steps[Y] == 0 &&
        move_state.steps[Z] == 0 && move_state.steps[E] == 0)
-    #ifdef ACCELERATION_RAMPING
-      || (move_state.endstop_stop && dda->n <= 0)
  #endif
-      ) {
+  {
 		dda->live = 0;
    dda->done = 1;
    #ifdef LOOKAHEAD
@ -958,26 +945,36 @@ void update_current_position() {
    ((STEPS_PER_M_E + 500) / 1000)
  };

-	if (queue_empty()) {
+  if (dda->live) {
+    uint32_t axis_steps;
+
    for (i = X; i < AXIS_COUNT; i++) {
-      current_position.axis[i] = startpoint.axis[i];
-    }
-	}
-	else if (dda->live) {
-    for (i = X; i < AXIS_COUNT; i++) {
-      current_position.axis[i] = dda->endpoint.axis[i] -
-          (int32_t)get_direction(dda, i) *
+      #if ! defined ACCELERATION_TEMPORAL
+        axis_steps = muldiv(dda->total_steps - move_state.step_no,
+                            dda->delta[i], dda->total_steps);
+      #else
+        axis_steps = move_state.steps[i];
+      #endif
+      current_position.axis[i] =
+        dda->endpoint.axis[i] - (int32_t)get_direction(dda, i) *
        // Should be: move_state.steps[i] * 1000000 / steps_per_m_P[i])
        // but steps[i] can be like 1000000 already, so we'd overflow.
        // Unfortunately, using muldiv() overwhelms the compiler.
        // Also keep the parens around this term, else results go wrong.
-          ((move_state.steps[i] * 1000) / pgm_read_dword(&steps_per_mm_P[i]));
+        ((axis_steps * 1000) / pgm_read_dword(&steps_per_mm_P[i]));
    }

-    if (dda->endpoint.e_relative)
+    if (dda->endpoint.e_relative) {
+      // We support only one extruder, so axis_steps is already valid.
      current_position.axis[E] =
-          (move_state.steps[E] * 1000) / pgm_read_dword(&steps_per_mm_P[E]);
+          (axis_steps * 1000) / pgm_read_dword(&steps_per_mm_P[E]);
+    }

 		// current_position.F is updated in dda_start()
 	}
+  else {
+    for (i = X; i < AXIS_COUNT; i++) {
+      current_position.axis[i] = startpoint.axis[i];
+    }
+	}
 }
--- a/dda.h
+++ b/dda.h
@ -56,14 +56,11 @@ typedef struct {
 	// bresenham counters
  axes_int32_t      counter; ///< counter for total_steps vs each axis

-	// step counters
-  axes_uint32_t     steps;   ///< number of steps on each axis
-
-	#ifdef ACCELERATION_RAMPING
+  #if ! defined ACCELERATION_TEMPORAL
 	/// counts actual steps done
 	uint32_t					step_no;
-	#endif
-	#ifdef ACCELERATION_TEMPORAL
+	#else
+    axes_uint32_t   steps;      ///< number of steps on each axis
  axes_uint32_t     time;       ///< time of the last step on each axis
  uint32_t          last_time;  ///< time of the last step of any axis
 	#endif