DDA: don't queue up nullmoves.

Nullmoves are movements which don't actually move a stepper. For
example because it's a velocity change only or the movement is
shorter than a single motor step.

Not queueing them up removes the necessity to check for them,
which reduces code in critical areas. It also removes the
necessity to run dda_start() twice to get past a nullmove.

Best of this is, it also makes lookahead perform better. Before,
a nullmove just changing speed interrupted the lookahead chain,
now it no longer does. See straight-speeds.gcode and
...-Fsep.gcode, which produced different timings before, now
results are identical.

Also update the function description for dda_create().

Performance increase is impressive: another 75 clock cycles off
the slowest step, only 36 bytes binary size increase:

  ATmega sizes               '168   '328(P)   '644(P)     '1280
  Program:  19652 bytes      138%       64%       31%       16%
     Data:   2175 bytes      213%      107%       54%       27%
   EEPROM:     32 bytes        4%        2%        2%        1%

  short-moves.gcode statistics:
  LED on occurences: 888.
  LED on time minimum: 280 clock cycles.
  LED on time maximum: 458 clock cycles.
  LED on time average: 284.653 clock cycles.

  smooth-curves.gcode statistics:
  LED on occurences: 23648.
  LED on time minimum: 272 clock cycles.
  LED on time maximum: 501 clock cycles.
  LED on time average: 307.275 clock cycles.

  triangle-odd.gcode statistics:
  LED on occurences: 1636.
  LED on time minimum: 272 clock cycles.
  LED on time maximum: 458 clock cycles.
  LED on time average: 297.625 clock cycles.

Performance of straight-speeds{-Fsep}.gcode before:

  straight-speeds.gcode statistics:
  LED on occurences: 32000.
  LED on time minimum: 272 clock cycles.
  LED on time maximum: 586 clock cycles.
  LED on time average: 298.75 clock cycles.

  straight-speeds-Fsep.gcode statistics:
  LED on occurences: 32000.
  LED on time minimum: 272 clock cycles.
  LED on time maximum: 672 clock cycles.
  LED on time average: 298.79 clock cycles.

Now:

  straight-speeds.gcode statistics:
  LED on occurences: 32000.
  LED on time minimum: 272 clock cycles.
  LED on time maximum: 501 clock cycles.
  LED on time average: 298.703 clock cycles.

  straight-speeds-Fsep.gcode statistics:
  LED on occurences: 32000.
  LED on time minimum: 272 clock cycles.
  LED on time maximum: 501 clock cycles.
  LED on time average: 298.703 clock cycles.

There we save even 171 clock cycles :-)
This commit is contained in:
Markus Hitter 2016-11-22 00:49:26 +01:00
parent 766bd52337
commit 6d95620f32
3 changed files with 45 additions and 34 deletions

31
dda.c
View File

@ -116,7 +116,10 @@ void dda_new_startpoint(void) {
startpoint_steps.axis[E] = um_to_steps(startpoint.axis[E], E); startpoint_steps.axis[E] = um_to_steps(startpoint.axis[E], E);
} }
/*! CREATE a dda given current_position and a target, save to passed location so we can write directly into the queue /**
Create a DDA using startpoint, startpoint_steps and a target, save to passed
location so we can write directly into the queue.
\param *dda pointer to a dda_queue entry to overwrite \param *dda pointer to a dda_queue entry to overwrite
\param *target the target position of this move \param *target the target position of this move
@ -126,8 +129,9 @@ void dda_new_startpoint(void) {
It also pre-fills any data that the selected accleration algorithm needs, and can be pre-computed for the whole move. It also pre-fills any data that the selected accleration algorithm needs, and can be pre-computed for the whole move.
This algorithm is probably the main limiting factor to print speed in terms of firmware limitations This algorithm is the main limiting factor when queuing movements and can
* become a limitation to print speed if there are lots of tiny, fast movements.
* Regarding lookahead, we can distinguish everything into these cases: * Regarding lookahead, we can distinguish everything into these cases:
* *
* 1. Standard movement. To be joined with the previous move. * 1. Standard movement. To be joined with the previous move.
@ -278,6 +282,7 @@ void dda_create(DDA *dda, const TARGET *target) {
if (dda->total_steps == 0) { if (dda->total_steps == 0) {
dda->nullmove = 1; dda->nullmove = 1;
startpoint.F = dda->endpoint.F;
} }
else { else {
// get steppers ready to go // get steppers ready to go
@ -467,16 +472,16 @@ void dda_create(DDA *dda, const TARGET *target) {
if (dda->c < c_limit) if (dda->c < c_limit)
dda->c = c_limit; dda->c = c_limit;
#endif #endif
// next dda starts where we finish
memcpy(&startpoint, &dda->endpoint, sizeof(TARGET));
#ifdef LOOKAHEAD
prev_dda = dda;
#endif
} /* ! dda->total_steps == 0 */ } /* ! dda->total_steps == 0 */
if (DEBUG_DDA && (debug_flags & DEBUG_DDA)) if (DEBUG_DDA && (debug_flags & DEBUG_DDA))
serial_writestr_P(PSTR("] }\n")); serial_writestr_P(PSTR("] }\n"));
// next dda starts where we finish
memcpy(&startpoint, &dda->endpoint, sizeof(TARGET));
#ifdef LOOKAHEAD
prev_dda = dda;
#endif
} }
/*! Start a prepared DDA /*! Start a prepared DDA
@ -498,7 +503,6 @@ void dda_start(DDA *dda) {
dda->endpoint.axis[X], dda->endpoint.axis[Y], dda->endpoint.axis[X], dda->endpoint.axis[Y],
dda->endpoint.axis[Z], dda->endpoint.F); dda->endpoint.axis[Z], dda->endpoint.F);
if ( ! dda->nullmove) {
// get ready to go // get ready to go
psu_timeout = 0; psu_timeout = 0;
#ifdef Z_AUTODISABLE #ifdef Z_AUTODISABLE
@ -538,10 +542,6 @@ void dda_start(DDA *dda) {
// set timeout for first step // set timeout for first step
timer_set(dda->c, 0); timer_set(dda->c, 0);
} }
// else just a speed change, keep dda->live = 0
current_position.F = dda->endpoint.F;
}
/** /**
\brief Do per-step movement maintenance. \brief Do per-step movement maintenance.
@ -971,11 +971,12 @@ void update_current_position() {
current_position.axis[E] = axis_um; current_position.axis[E] = axis_um;
} }
// current_position.F is updated in dda_start() current_position.F = dda->endpoint.F;
} }
else { else {
for (i = X; i < AXIS_COUNT; i++) { for (i = X; i < AXIS_COUNT; i++) {
current_position.axis[i] = startpoint.axis[i]; current_position.axis[i] = startpoint.axis[i];
} }
current_position.F = startpoint.F;
} }
} }

View File

@ -56,8 +56,8 @@ void dda_find_crossing_speed(DDA *prev, DDA *current) {
axes_int32_t prevF, currF; axes_int32_t prevF, currF;
enum axis_e i; enum axis_e i;
// Bail out if there's nothing to join (e.g. G1 F1500). // Bail out if there's nothing to join (e.g. first movement after a pause).
if ( ! prev || prev->nullmove) if ( ! prev)
return; return;
// We always look at the smaller of both combined speeds, // We always look at the smaller of both combined speeds,
@ -180,8 +180,8 @@ void dda_join_moves(DDA *prev, DDA *current) {
moveno++; moveno++;
#endif #endif
// Bail out if there's nothing to join (e.g. G1 F1500). // Bail out if there's nothing to join.
if ( ! prev || prev->nullmove || current->crossF == 0) if ( ! prev || current->crossF == 0)
return; return;
// Show the proposed crossing speed - this might get adjusted below. // Show the proposed crossing speed - this might get adjusted below.

View File

@ -63,7 +63,7 @@ DDA *queue_current_movement() {
ATOMIC_START ATOMIC_START
current = &movebuffer[mb_tail]; current = &movebuffer[mb_tail];
if ( ! current->live || current->waitfor_temp || current->nullmove) if ( ! current->live || current->waitfor_temp)
current = NULL; current = NULL;
ATOMIC_END ATOMIC_END
@ -125,23 +125,32 @@ void enqueue_home(TARGET *t, uint8_t endstop_check, uint8_t endstop_stop_cond) {
} }
dda_create(new_movebuffer, t); dda_create(new_movebuffer, t);
// make certain all writes to global memory /**
// are flushed before modifying mb_head. It's pointless to queue up movements which don't actually move the stepper,
MEMORY_BARRIER(); e.g. pure velocity changes or movements shorter than a single motor step.
mb_head = h; That said, accept movements which do move the steppers by forwarding
mb_head. Also kick off movements if it's the first movement after a pause.
*/
if ( ! new_movebuffer->nullmove) {
// make certain all writes to global memory
// are flushed before modifying mb_head.
MEMORY_BARRIER();
uint8_t isdead; mb_head = h;
ATOMIC_START uint8_t isdead;
isdead = (movebuffer[mb_tail].live == 0);
ATOMIC_END
if (isdead) { ATOMIC_START
timer_reset(); isdead = (movebuffer[mb_tail].live == 0);
next_move(); ATOMIC_END
// Compensate for the cli() in timer_set().
sei(); if (isdead) {
timer_reset();
next_move();
// Compensate for the cli() in timer_set().
sei();
}
} }
} }
@ -154,7 +163,8 @@ void enqueue_home(TARGET *t, uint8_t endstop_check, uint8_t endstop_stop_cond) {
/// move buffer was dead in the non-interrupt case (which indicates that the /// move buffer was dead in the non-interrupt case (which indicates that the
/// timer interrupt is disabled). /// timer interrupt is disabled).
void next_move() { void next_move() {
while ((queue_empty() == 0) && (movebuffer[mb_tail].live == 0)) {
if (queue_empty() == 0) {
// Tail must be set before calling timer_set(), as timer_set() reenables // Tail must be set before calling timer_set(), as timer_set() reenables
// the timer interrupt, potentially exposing mb_tail to the timer // the timer interrupt, potentially exposing mb_tail to the timer
// interrupt routine. // interrupt routine.