DDA: don't count individual axis steps.

Using the Bresenham algorithm it's safe to assume that if the axis
with the most steps is done, all other axes are done, too.

This way we save a lot of variable loading in dda_step(). We also
save this very expensive comparison of all axis counters against
zero. Minor drawback: update_current_position() is now even slower.

About performance. The slowest step decreased from 719 to 604
clocks, which is quite an improvement. Average step time increased
for single axis movements by 16 clocks and decreased for multi-
axis movements. At the bottom line this should improve real-world
performance quite a bit, because a printer movement speed isn't
limited by average timings, but by the time needed for the slowest
step.

Along the way, binary size dropped by nice 244 bytes, RAM usage by
also nice 16 bytes.

  ATmega sizes               '168   '328(P)   '644(P)     '1280
  Program:  19564 bytes      137%       64%       31%       16%
     Data:   2175 bytes      213%      107%       54%       27%
   EEPROM:     32 bytes        4%        2%        2%        1%

  short-moves.gcode statistics:
  LED on occurences: 888.
  LED on time minimum: 326 clock cycles.
  LED on time maximum: 595 clock cycles.
  LED on time average: 333.62 clock cycles.

  smooth-curves.gcode statistics:
  LED on occurences: 23648.
  LED on time minimum: 318 clock cycles.
  LED on time maximum: 604 clock cycles.
  LED on time average: 333.311 clock cycles.

  triangle-odd.gcode statistics:
  LED on occurences: 1636.
  LED on time minimum: 318 clock cycles.
  LED on time maximum: 585 clock cycles.
  LED on time average: 335.233 clock cycles.
This commit is contained in:
Markus Hitter 2016-11-21 11:56:13 +01:00
parent 92516b55ea
commit 20a0808887
2 changed files with 44 additions and 50 deletions

61
dda.c
View File

@ -522,12 +522,12 @@ void dda_start(DDA *dda) {
// initialise state variable
move_state.counter[X] = move_state.counter[Y] = move_state.counter[Z] = \
move_state.counter[E] = -(dda->total_steps >> 1);
memcpy(&move_state.steps[X], &dda->delta[X], sizeof(uint32_t) * 4);
move_state.endstop_stop = 0;
#ifdef ACCELERATION_RAMPING
move_state.step_no = 0;
#endif
#ifdef ACCELERATION_TEMPORAL
memcpy(&move_state.steps[X], &dda->delta[X], sizeof(uint32_t) * 4);
move_state.time[X] = move_state.time[Y] = \
move_state.time[Z] = move_state.time[E] = 0UL;
#endif
@ -566,38 +566,27 @@ void dda_start(DDA *dda) {
void dda_step(DDA *dda) {
#if ! defined ACCELERATION_TEMPORAL
if (move_state.steps[X]) {
move_state.counter[X] -= dda->delta[X];
if (move_state.counter[X] < 0) {
x_step();
move_state.steps[X]--;
move_state.counter[X] += dda->total_steps;
}
}
if (move_state.steps[Y]) {
move_state.counter[Y] -= dda->delta[Y];
if (move_state.counter[Y] < 0) {
y_step();
move_state.steps[Y]--;
move_state.counter[Y] += dda->total_steps;
}
}
if (move_state.steps[Z]) {
move_state.counter[Z] -= dda->delta[Z];
if (move_state.counter[Z] < 0) {
z_step();
move_state.steps[Z]--;
move_state.counter[Z] += dda->total_steps;
}
}
if (move_state.steps[E]) {
move_state.counter[E] -= dda->delta[E];
if (move_state.counter[E] < 0) {
e_step();
move_state.steps[E]--;
move_state.counter[E] += dda->total_steps;
}
}
move_state.step_no++;
#endif
#ifdef ACCELERATION_REPRAP
@ -628,10 +617,6 @@ void dda_step(DDA *dda) {
}
#endif
#ifdef ACCELERATION_RAMPING
move_state.step_no++;
#endif
#ifdef ACCELERATION_TEMPORAL
/** How is this ACCELERATION TEMPORAL expected to work?
@ -710,12 +695,14 @@ void dda_step(DDA *dda) {
//
// TODO: with ACCELERATION_TEMPORAL this duplicates some code. See where
// dda->live is zero'd, about 10 lines above.
if ((move_state.steps[X] == 0 && move_state.steps[Y] == 0 &&
#if ! defined ACCELERATION_TEMPORAL
if (move_state.step_no >= dda->total_steps ||
(move_state.endstop_stop && dda->n <= 0))
#else
if (move_state.steps[X] == 0 && move_state.steps[Y] == 0 &&
move_state.steps[Z] == 0 && move_state.steps[E] == 0)
#ifdef ACCELERATION_RAMPING
|| (move_state.endstop_stop && dda->n <= 0)
#endif
) {
{
dda->live = 0;
dda->done = 1;
#ifdef LOOKAHEAD
@ -958,26 +945,36 @@ void update_current_position() {
((STEPS_PER_M_E + 500) / 1000)
};
if (queue_empty()) {
if (dda->live) {
uint32_t axis_steps;
for (i = X; i < AXIS_COUNT; i++) {
current_position.axis[i] = startpoint.axis[i];
}
}
else if (dda->live) {
for (i = X; i < AXIS_COUNT; i++) {
current_position.axis[i] = dda->endpoint.axis[i] -
(int32_t)get_direction(dda, i) *
#if ! defined ACCELERATION_TEMPORAL
axis_steps = muldiv(dda->total_steps - move_state.step_no,
dda->delta[i], dda->total_steps);
#else
axis_steps = move_state.steps[i];
#endif
current_position.axis[i] =
dda->endpoint.axis[i] - (int32_t)get_direction(dda, i) *
// Should be: move_state.steps[i] * 1000000 / steps_per_m_P[i])
// but steps[i] can be like 1000000 already, so we'd overflow.
// Unfortunately, using muldiv() overwhelms the compiler.
// Also keep the parens around this term, else results go wrong.
((move_state.steps[i] * 1000) / pgm_read_dword(&steps_per_mm_P[i]));
((axis_steps * 1000) / pgm_read_dword(&steps_per_mm_P[i]));
}
if (dda->endpoint.e_relative)
if (dda->endpoint.e_relative) {
// We support only one extruder, so axis_steps is already valid.
current_position.axis[E] =
(move_state.steps[E] * 1000) / pgm_read_dword(&steps_per_mm_P[E]);
(axis_steps * 1000) / pgm_read_dword(&steps_per_mm_P[E]);
}
// current_position.F is updated in dda_start()
}
else {
for (i = X; i < AXIS_COUNT; i++) {
current_position.axis[i] = startpoint.axis[i];
}
}
}

9
dda.h
View File

@ -56,14 +56,11 @@ typedef struct {
// bresenham counters
axes_int32_t counter; ///< counter for total_steps vs each axis
// step counters
axes_uint32_t steps; ///< number of steps on each axis
#ifdef ACCELERATION_RAMPING
#if ! defined ACCELERATION_TEMPORAL
/// counts actual steps done
uint32_t step_no;
#endif
#ifdef ACCELERATION_TEMPORAL
#else
axes_uint32_t steps; ///< number of steps on each axis
axes_uint32_t time; ///< time of the last step on each axis
uint32_t last_time; ///< time of the last step of any axis
#endif