From 20a0808887074b08c4cff82b1ab2cc4bd8ced6e1 Mon Sep 17 00:00:00 2001 From: Markus Hitter Date: Mon, 21 Nov 2016 11:56:13 +0100 Subject: [PATCH] DDA: don't count individual axis steps. Using the Bresenham algorithm it's safe to assume that if the axis with the most steps is done, all other axes are done, too. This way we save a lot of variable loading in dda_step(). We also save this very expensive comparison of all axis counters against zero. Minor drawback: update_current_position() is now even slower. About performance. The slowest step decreased from 719 to 604 clocks, which is quite an improvement. Average step time increased for single axis movements by 16 clocks and decreased for multi- axis movements. At the bottom line this should improve real-world performance quite a bit, because a printer movement speed isn't limited by average timings, but by the time needed for the slowest step. Along the way, binary size dropped by nice 244 bytes, RAM usage by also nice 16 bytes. ATmega sizes '168 '328(P) '644(P) '1280 Program: 19564 bytes 137% 64% 31% 16% Data: 2175 bytes 213% 107% 54% 27% EEPROM: 32 bytes 4% 2% 2% 1% short-moves.gcode statistics: LED on occurences: 888. LED on time minimum: 326 clock cycles. LED on time maximum: 595 clock cycles. LED on time average: 333.62 clock cycles. smooth-curves.gcode statistics: LED on occurences: 23648. LED on time minimum: 318 clock cycles. LED on time maximum: 604 clock cycles. LED on time average: 333.311 clock cycles. triangle-odd.gcode statistics: LED on occurences: 1636. LED on time minimum: 318 clock cycles. LED on time maximum: 585 clock cycles. LED on time average: 335.233 clock cycles. --- dda.c | 85 ++++++++++++++++++++++++++++------------------------------- dda.h | 9 +++---- 2 files changed, 44 insertions(+), 50 deletions(-) diff --git a/dda.c b/dda.c index a9d3646..7824493 100644 --- a/dda.c +++ b/dda.c @@ -220,7 +220,7 @@ void dda_create(DDA *dda, const TARGET *target) { // Apply extrusion multiplier. if (target->e_multiplier != 256) { - steps[E] *= target->e_multiplier; + steps[E] *= target->e_multiplier; steps[E] += 128; steps[E] /= 256; } @@ -522,12 +522,12 @@ void dda_start(DDA *dda) { // initialise state variable move_state.counter[X] = move_state.counter[Y] = move_state.counter[Z] = \ move_state.counter[E] = -(dda->total_steps >> 1); - memcpy(&move_state.steps[X], &dda->delta[X], sizeof(uint32_t) * 4); move_state.endstop_stop = 0; #ifdef ACCELERATION_RAMPING move_state.step_no = 0; #endif #ifdef ACCELERATION_TEMPORAL + memcpy(&move_state.steps[X], &dda->delta[X], sizeof(uint32_t) * 4); move_state.time[X] = move_state.time[Y] = \ move_state.time[Z] = move_state.time[E] = 0UL; #endif @@ -565,40 +565,29 @@ void dda_start(DDA *dda) { */ void dda_step(DDA *dda) { -#if ! defined ACCELERATION_TEMPORAL - if (move_state.steps[X]) { + #if ! defined ACCELERATION_TEMPORAL move_state.counter[X] -= dda->delta[X]; if (move_state.counter[X] < 0) { x_step(); - move_state.steps[X]--; move_state.counter[X] += dda->total_steps; } - } - if (move_state.steps[Y]) { move_state.counter[Y] -= dda->delta[Y]; if (move_state.counter[Y] < 0) { y_step(); - move_state.steps[Y]--; move_state.counter[Y] += dda->total_steps; } - } - if (move_state.steps[Z]) { move_state.counter[Z] -= dda->delta[Z]; if (move_state.counter[Z] < 0) { z_step(); - move_state.steps[Z]--; move_state.counter[Z] += dda->total_steps; } - } - if (move_state.steps[E]) { move_state.counter[E] -= dda->delta[E]; if (move_state.counter[E] < 0) { e_step(); - move_state.steps[E]--; move_state.counter[E] += dda->total_steps; } - } -#endif + move_state.step_no++; + #endif #ifdef ACCELERATION_REPRAP // linear acceleration magic, courtesy of http://www.embedded.com/design/mcus-processors-and-socs/4006438/Generate-stepper-motor-speed-profiles-in-real-time @@ -628,10 +617,6 @@ void dda_step(DDA *dda) { } #endif - #ifdef ACCELERATION_RAMPING - move_state.step_no++; - #endif - #ifdef ACCELERATION_TEMPORAL /** How is this ACCELERATION TEMPORAL expected to work? @@ -710,12 +695,14 @@ void dda_step(DDA *dda) { // // TODO: with ACCELERATION_TEMPORAL this duplicates some code. See where // dda->live is zero'd, about 10 lines above. - if ((move_state.steps[X] == 0 && move_state.steps[Y] == 0 && - move_state.steps[Z] == 0 && move_state.steps[E] == 0) - #ifdef ACCELERATION_RAMPING - || (move_state.endstop_stop && dda->n <= 0) - #endif - ) { + #if ! defined ACCELERATION_TEMPORAL + if (move_state.step_no >= dda->total_steps || + (move_state.endstop_stop && dda->n <= 0)) + #else + if (move_state.steps[X] == 0 && move_state.steps[Y] == 0 && + move_state.steps[Z] == 0 && move_state.steps[E] == 0) + #endif + { dda->live = 0; dda->done = 1; #ifdef LOOKAHEAD @@ -958,26 +945,36 @@ void update_current_position() { ((STEPS_PER_M_E + 500) / 1000) }; - if (queue_empty()) { + if (dda->live) { + uint32_t axis_steps; + + for (i = X; i < AXIS_COUNT; i++) { + #if ! defined ACCELERATION_TEMPORAL + axis_steps = muldiv(dda->total_steps - move_state.step_no, + dda->delta[i], dda->total_steps); + #else + axis_steps = move_state.steps[i]; + #endif + current_position.axis[i] = + dda->endpoint.axis[i] - (int32_t)get_direction(dda, i) * + // Should be: move_state.steps[i] * 1000000 / steps_per_m_P[i]) + // but steps[i] can be like 1000000 already, so we'd overflow. + // Unfortunately, using muldiv() overwhelms the compiler. + // Also keep the parens around this term, else results go wrong. + ((axis_steps * 1000) / pgm_read_dword(&steps_per_mm_P[i])); + } + + if (dda->endpoint.e_relative) { + // We support only one extruder, so axis_steps is already valid. + current_position.axis[E] = + (axis_steps * 1000) / pgm_read_dword(&steps_per_mm_P[E]); + } + + // current_position.F is updated in dda_start() + } + else { for (i = X; i < AXIS_COUNT; i++) { current_position.axis[i] = startpoint.axis[i]; } } - else if (dda->live) { - for (i = X; i < AXIS_COUNT; i++) { - current_position.axis[i] = dda->endpoint.axis[i] - - (int32_t)get_direction(dda, i) * - // Should be: move_state.steps[i] * 1000000 / steps_per_m_P[i]) - // but steps[i] can be like 1000000 already, so we'd overflow. - // Unfortunately, using muldiv() overwhelms the compiler. - // Also keep the parens around this term, else results go wrong. - ((move_state.steps[i] * 1000) / pgm_read_dword(&steps_per_mm_P[i])); - } - - if (dda->endpoint.e_relative) - current_position.axis[E] = - (move_state.steps[E] * 1000) / pgm_read_dword(&steps_per_mm_P[E]); - - // current_position.F is updated in dda_start() - } } diff --git a/dda.h b/dda.h index 18f2fe8..0aa853f 100644 --- a/dda.h +++ b/dda.h @@ -56,14 +56,11 @@ typedef struct { // bresenham counters axes_int32_t counter; ///< counter for total_steps vs each axis - // step counters - axes_uint32_t steps; ///< number of steps on each axis - - #ifdef ACCELERATION_RAMPING + #if ! defined ACCELERATION_TEMPORAL /// counts actual steps done uint32_t step_no; - #endif - #ifdef ACCELERATION_TEMPORAL + #else + axes_uint32_t steps; ///< number of steps on each axis axes_uint32_t time; ///< time of the last step on each axis uint32_t last_time; ///< time of the last step of any axis #endif