ARM: get FastIO for writing into place.

Only SET_OUTPUT() and WRITE() for now, reading follows later.

A loop like this:

  SET_OUTPUT(PIO0_1);
  for (;;) {
    WRITE(PIO0_1, 0);
    WRITE(PIO0_1, 1);
  }

toggles a pin at about 5.3 MHz. The low period is 63 ns on the
scope, so 3 clock cycles. With this loop, the binary is 1648
bytes.

Assembly shows four instructions inside the loop, which is about
as good as it can get:

  movs  r2, #0
  str   r2, [r3, #8]
  adds  r2, #2
  str   r2, [r3, #8]

For comparison, using the MBED provided gpio routines give a
toggle frequency of about 300 kHz, with a low period of 72 clock
cycles. Microoptimisation isn't just the last few percent ...

Tested with this code before main():

static void delay(uint32_t delay) {
  while (delay) {
    __ASM volatile ("nop");
    delay--;
  }
}

... and in main():

  SET_OUTPUT(PIO0_1);
  SET_OUTPUT(PIO0_2);
  SET_OUTPUT(PIO0_3);
  SET_OUTPUT(PIO0_4);
  __ASM (".balign 16");
  while (1) {
    // 1 pulse on pin 1, two pulses on pin 2, ...
    WRITE(PIO0_1, 0);
    WRITE(PIO0_1, 1);
    WRITE(PIO0_2, 0);
    WRITE(PIO0_2, 1);
    WRITE(PIO0_2, 0);
    WRITE(PIO0_2, 1);
    WRITE(PIO0_3, 0);
    WRITE(PIO0_3, 1);
    WRITE(PIO0_3, 0);
    WRITE(PIO0_3, 1);
    WRITE(PIO0_3, 0);
    WRITE(PIO0_3, 1);
    // PIO0_4 needs a pullup 10k to 3.3V
    // to show a visible signal.
    WRITE(PIO0_4, 0);
    delay(10);
    WRITE(PIO0_4, 1);
    delay(10);
    WRITE(PIO0_4, 0);
    delay(10);
    WRITE(PIO0_4, 1);
    delay(10);
    WRITE(PIO0_4, 0);
    delay(10);
    WRITE(PIO0_4, 1);
    delay(10);
    WRITE(PIO0_4, 0);
    delay(10);
    WRITE(PIO0_4, 1);
    delay(1000);
  }

With a 10k pullup, PIO0_4 has a rise time of about 1 microsecond.
This commit is contained in:
Markus Hitter 2015-07-21 23:13:09 +02:00
parent 370bb9f93c
commit 2c90a2dfc7
4 changed files with 141 additions and 5 deletions

View File

@ -69,7 +69,9 @@
#elif defined __ARMEL__
#define DIO0_PIN remove when actually defined.
#if defined (__ARM_LPC1114__)
#include "arduino_lpc1114.h"
#endif
#elif defined SIMULATOR
@ -77,8 +79,10 @@
#endif /* __AVR__, __ARMEL__, SIMULATOR */
#ifndef DIO0_PIN
#error pins for this chip not defined in arduino.h! If you write an appropriate pin definition and have this firmware work on your chip, please tell us via the forum thread
#if ! defined DIO0_PIN && ! defined PIO0_1_PIN
#error Pins for this chip not defined in arduino.h! If you write an \
appropriate pin definition and have this firmware work on your chip, \
please tell us via Github or the forum thread.
#endif
#ifndef BSS

110
arduino_lpc1114.h Normal file
View File

@ -0,0 +1,110 @@
/** \file
\brief MCU pin mappings.
Here we map the pins required by Teacup to the names known by CMSIS.
*/
/** I/O pins.
In MBED, I/O pin handling is rather complicated. Lots of enums, lots of
functions, spread over various files, slow execution (pin toggling about
15 times slower than what we have here).
Accordingly, the strategy for FastIO is derived directly from the data sheet.
There one can see that hardware is very similar to that of AVRs: pins grouped
in ports (here: 12 pins per port) and a number of additional registers with
bits for additional proterties: wether input or output, wether a pullup is
attached, and so on. See chapter 12 of the LPC111x User Manual. Other than
AVRs, many ARMs (including the LPC1114) support bit-banding, which is
interrupt safe and also a few clock cycles faster. See macros in pinio.h.
*/
#include "mbed-LPC11xx.h"
/**
Offsets to the various GPIO registers. See chapter 12.3 in LPC111x User
Manual.
*/
#define IO_MASK_OFFSET 0x0000 // Bit-masked data registers.
#define IO_DATA_OFFSET 0x3FFC // Data register.
#define IO_DIR_OFFSET 0x8000 // Data direction register.
#define IO_IS_OFFSET 0x8004 // Interrupt sense register.
#define IO_IBE_OFFSET 0x8008 // Interrupt both edges register.
#define IO_IEV_OFFSET 0x800C // Interrupt event register.
#define IO_IE_OFFSET 0x8010 // Interrupt mask register.
#define IO_RIS_OFFSET 0x8014 // Raw interrupt status register.
#define IO_MIS_OFFSET 0x8018 // Masked interrupt status register.
#define IO_IC_OFFSET 0x801C // Interrupt clear register.
/**
We define only pins available on the DIP28/TSSOP28 package here, because
the LPC1114FN28/102 is currently the only LPC1114 known to be used in a
RepRap controller.
*/
// Reset pin. Don't use.
//#define PIO0_0_PIN 0
//#define PIO0_0_PORT LPC_GPIO0
#define PIO0_1_PIN 1
#define PIO0_1_PORT LPC_GPIO0
#define PIO0_2_PIN 2
#define PIO0_2_PORT LPC_GPIO0
#define PIO0_3_PIN 3
#define PIO0_3_PORT LPC_GPIO0
#define PIO0_4_PIN 4
#define PIO0_4_PORT LPC_GPIO0
#define PIO0_5_PIN 5
#define PIO0_5_PORT LPC_GPIO0
#define PIO0_6_PIN 6
#define PIO0_6_PORT LPC_GPIO0
#define PIO0_7_PIN 7
#define PIO0_7_PORT LPC_GPIO0
#define PIO0_8_PIN 8
#define PIO0_8_PORT LPC_GPIO0
#define PIO0_9_PIN 9
#define PIO0_9_PORT LPC_GPIO0
#define PIO0_10_PIN 10
#define PIO0_10_PORT LPC_GPIO0
#define PIO0_11_PIN 11
#define PIO0_11_PORT LPC_GPIO0
#define PIO1_0_PIN 0
#define PIO1_0_PORT LPC_GPIO1
#define PIO1_1_PIN 1
#define PIO1_1_PORT LPC_GPIO1
#define PIO1_2_PIN 2
#define PIO1_2_PORT LPC_GPIO1
#define PIO1_3_PIN 3
#define PIO1_3_PORT LPC_GPIO1
#define PIO1_4_PIN 4
#define PIO1_4_PORT LPC_GPIO1
#define PIO1_5_PIN 5
#define PIO1_5_PORT LPC_GPIO1
#define PIO1_6_PIN 6
#define PIO1_6_PORT LPC_GPIO1
#define PIO1_7_PIN 7
#define PIO1_7_PORT LPC_GPIO1
#define PIO1_8_PIN 8
#define PIO1_8_PORT LPC_GPIO1
#define PIO1_9_PIN 9
#define PIO1_9_PORT LPC_GPIO1

View File

@ -42,7 +42,9 @@
#include "debug.h"
#include "heater.h"
#include "analog.h"
#endif /* __ARMEL_NOTYET__ */
#include "pinio.h"
#ifndef __ARMEL_NOTYET__
#include "clock.h"
#include "intercom.h"
#include "spi.h"
@ -248,10 +250,10 @@ void init(void) {
// reset watchdog
wd_reset();
#endif /* __ARMEL_NOTYET__ */
// prepare the power supply
power_init();
#endif /* __ARMEL_NOTYET__ */
// say hi to host
serial_writestr_P(PSTR("start\nok\n"));

22
pinio.h
View File

@ -47,6 +47,26 @@
/// Check if pin is an output.
#define _GET_OUTPUT(IO) ((IO ## _DDR & MASK(IO ## _PIN)) != 0)
#elif defined __ARMEL__
/**
The LPC1114 supports bit-banding by mapping the bit mask to the address.
See chapter 12 in the LPC111x User Manual. A read-modify-write cycle like
on AVR costs 5 clock cycles, this implementation works with 3 clock cycles.
*/
/// Write to a pin.
#define _WRITE(IO, v) \
do { \
IO ## _PORT->MASKED_ACCESS[MASK(IO ## _PIN)] = \
(v) ? MASK(IO ## _PIN) : 0; \
} while (0)
/// Set pin as output.
#define _SET_OUTPUT(IO) \
do { \
IO ## _PORT->DIR |= MASK(IO ## _PIN); \
} while (0)
#elif defined SIMULATOR
#include "simulator.h"
@ -57,7 +77,7 @@
void _SET_OUTPUT(pin_t pin);
void _SET_INPUT(pin_t pin);
#endif /* __AVR__, SIMULATOR */
#endif /* __AVR__, __ARMEL__, SIMULATOR */
/**
Why double up on these macros?