4AvrBugTrapper

>trap wrap-arounds
>trap bad interrupts
>trap watchdog resets
>'dump' all sram (r0-RAMEND)
>software uart txmit on any pin
>no stack used
>only 6 registers used
>ascii hex format output, w/cr every 32 bytes
>now an S file, no compiler version 'variations'

sample dumps
application to read dump file

More work to do-
>more testing
>tx pin may be used by alternate pin function, so more work
 needed to take care of this 'problem'
>create app to analyze dump file (determine which irq's enabled,
 show address on stack, etc)






//---------------------------------------------------------------------------------------------------------//
// __ __ ___ ____ ______ //
// / // / / |_ _______/ __ )__ ______ /_ __/________ _____ ____ ___ ____ //
// / // /_/ /| | | / / ___/ __ / / / / __ `// / / ___/ __ `/ __ \/ __ \/ _ \/ ___/ //
// /__ __/ ___ | |/ / / / /_/ / /_/ / /_/ // / / / / /_/ / /_/ / /_/ / __/ / //
// /_/ /_/ |_|___/_/ /_____/\__,_/\__, //_/ /_/ \__,_/ .___/ .___/\___/_/ //
// /____/ /_/ /_/ //
// __ __ //
// / / __ __ ______ ______/ /__ ____ _ //
// / _ \/ // / / __/ // / __/ __/ |/ / ' \ //
// /_.__/\_, / \__/\_,_/_/ \__/|___/_/_/_/ //
// /___/ //
// //
// Copyright 2007 by Curt Van Maanen //
// Version 2007.11.19 //
//---------------------------------------------------------------------------------------------------------//
// 4AvrBugTrapper.S //
//---------------------------------------------------------------------------------------------------------//



/*-----------------------------------------------------------------------------------------------------------
overview of where this code is located-
section .vectors vector table

//this file starts here
section .init0
//this file ends here

section .init2 clear r1, stack init

reset vector-> __init (this file)
missing irq--> __bad_interrupt-> __vector_default (this file)


change the defines as needed, add this file to your project, hook up the TX_PIN defined below to a serial
chip (level converter), and capture 'debug' info on any reset, wrap-around, or bad irq

if no debugging wanted, you can still use this file to 'catch' any wrap-arounds and bad interrupts, which
will then cause a watchdog reset to get the cpu back to a known state

-----------------------------------------------------------------------------------------------------------*/



/*-----------------------------------------------------------------------------------------------------------
___ _ _ ___ _ _ _ ___ ___ ___
|_ _| \| |/ __| | | | | | \| __/ __|
| || .` | (__| |_| |_| | |) | _|\__ \
|___|_|\_|\___|____\___/|___/|___|___/

-----------------------------------------------------------------------------------------------------------*/

#include <avr/io.h>



/*-----------------------------------------------------------------------------------------------------------
___ ___ ___ ___ _ _ ___ ___
| \| __| __|_ _| \| | __/ __|
| |) | _|| _| | || .` | _|\__ \
|___/|___|_| |___|_|\_|___|___/

-----------------------------------------------------------------------------------------------------------*/

#define TX_PIN 1 //the pin number of
#define TX_PORT PORTD //the port used for the tx pin (has to be i/o < 0x40)
#define DEBUG_BAUD 38400 //baud rate

#define CLEAR_STACK 1 //0 if stack mark/clear NOT wanted
#define STACK_MARKER 0x55 //hex value to mark stack
#define SAVE_SREG2R2 1 //0 if save sreg to r2 NOT wanted
#define SAVE_PC_FUNC 1 //0 if save_pc function NOT wanted
#define FAKE_FLASHEND 1 //0 if jmp not wanted right before bootloader section

#define DEBUG_MODE 2 //use table below

/* |
\_________________________________
\
|
------------------------------------------------------------
| DEBUG_MODE | RESETS | WDRF | WRAPS | BadIRQS| NUMBER |
|------------------------------------------------------------|
| NEVER | N | N | N | N | 0 |
|------------------------------------------------------------|
| ERRORS_ONLY | N | Y | Y | Y | 1 |
|------------------------------------------------------------|
| ALWAYS | Y | Y | Y | Y | 2 |
------------------------------------------------------------

*/


/*---------------------------------------------------------------------------------------------------------*/

#if defined(MCUCSR)
#define _RST_FLGS MCUCSR
#else
#define _RST_FLGS MCUSR
#endif

#if defined(WDTCSR)
#define _WD_CTRL WDTCSR
#else
#define _WD_CTRL WDTCR
#endif

#if defined(WDTOE)
#define _WD_CE WDTOE
#else
#define _WD_CE WDCE
#endif



/*-----------------------------------------------------------------------------------------------------------
_ ___ __ __ __ __ _ ___ ___ ___ ___
/_\ / __| \/ | | \/ | /_\ / __| _ \/ _ \/ __|
/ _ \\__ \ |\/| | | |\/| |/ _ \ (__| / (_) \__ \
/_/ \_\___/_| |_| |_| |_/_/ \_\___|_|_\\___/|___/

-----------------------------------------------------------------------------------------------------------*/

/*-----------------------------------------------------------------------------------------------------------
asm macros to take care of using the correct instructions for i/o and sram

LOAD/STORE will use the instructions in/out/lds/sts as needed
LOAD_S/STORE_S will always use lds/sts (force lds/sts even when in/out would work)

the BugTrapper will only use the macros below, and not use in/out/lds/sts instructions directly
-----------------------------------------------------------------------------------------------------------*/

.macro STORE addr,reg
.if \addr < 0x60
out \addr - 0x20,\reg
.else
sts \addr,\reg
.endif
.endm

.macro LOAD reg,addr
.if \addr < 0x60
in \reg,\addr - 0x20
.else
lds \reg,\addr
.endif
.endm

.macro STORE_S addr,reg
sts \addr,\reg
.endm

.macro LOAD_S reg,addr
lds \reg,\addr
.endm

.macro CLRBIT reg,bit
cbi \reg - 0x20,\bit
.endm

.macro SETBIT reg,bit
sbi \reg - 0x20,\bit
.endm

.macro ADDI reg,num
subi \reg,-\num
.endm

/*-----------------------------------------------------------------------------------------------------------
get F_CPU into something we can use here (need to remove the UL at then end of F_CPU passed to assembler)
go through each character in F_CPU, create the same number but without UL
then check if BIT_DELAY will be > 255, if so, generate an error
-----------------------------------------------------------------------------------------------------------*/

.set MY_FREQ,0 //init to 0

.irpc param,F_CPU //go through all 'characters' in F_CPU
.ifnc \param,U //if not a 'U'
.ifnc \param,L //and not an 'L'
.set MY_FREQ,MY_FREQ*10+\param //left shift,then add
.endif
.endif
.endr

.if (((MY_FREQ/DEBUG_BAUD)-9)/3)/256 //if number > 255
.error "BIT_DELAY will be greater than 255, you need to increase baud rate (DEBUG_BAUD)"
.else
.set BIT_DELAY,(((MY_FREQ/DEBUG_BAUD)-9)/3) //is < 256, so create BIT_DELAY
.endif



/*-----------------------------------------------------------------------------------------------------------
___ _ _ ___ _____ __
|_ _| \| |_ _|_ _/ \
_ | || .` || | | || () |
(_)___|_|\_|___| |_| \__/

-----------------------------------------------------------------------------------------------------------*/

.section .init0,"ax",@progbits //.init0 section
.global __init
.global __vector_default


/*-----------------------------------------------------------------------------------------------------------
uint16_t save_pc(void)
call/rcall to this function will put PC of caller address + 1 on the stack
return r25:r24 as 16bit PC address

usage-> this_address = save_pc();
-----------------------------------------------------------------------------------------------------------*/

#if (SAVE_PC_FUNC & DEBUG_MODE)
.global save_pc
.func save_pc

save_pc:

pop r25 //get PC high byte
pop r24 //get PC low byte
push r24 //put low byte back on stack
push r25 //put high byte back on stack
ret //return with PC in r25:r24

.endfunc
#endif

/*-----------------------------------------------------------------------------------------------------------
reset vector - either got here from a reset or power up, a wrap-around, or a call/jump to 0
determine the cause in this function, pass the cause to __vector_default in r24

__init is a weak define in gcrt1.S, set at the start of .init0, and also is what the reset vector is set
to 'jump' to, and we just give our function the name __init, and we are all set

registers modified in this function-
r24,r25,SREG,(r2)
-----------------------------------------------------------------------------------------------------------*/

.func __init //resets will end up here

__init:

#if (SAVE_SREG2R2 && DEBUG_MODE) //if want to save sreg (to check I bit in dump file)
LOAD r2,SREG //get sreg into r2 before we cli() below
#endif //wrap arounds or call/jump to 0 could have I bit set
cli //turn off irq's in case it was a wrap-around
wdr //reset watchdog in case it was wrap-around
LOAD r25,_RST_FLGS //get reset flags
tst r25 //check if MCUSR is 0

/*---------------------------------------------------------------------------------------------------
if reset flags = 0, was a wrap-around (or a call/jump to 0. or call/jump to this function)
---------------------------------------------------------------------------------------------------*/

#if (DEBUG_MODE) //if debug
brne init0_check_wd //flags not 0, go check wdrf now
ldi r24,'a' //load 'a' into r24 ('a' = wrap-around)
rjmp from_init //jump into .init1 section (not at beginning though)

init0_check_wd:

cpi r25,(1<<WDRF) //check if WDRF is the only flag set
brne init0_other_reset //nope, go to next
ldi r24,'w' //load 'w' into r24 ('w' = watchdog reset)
rjmp from_init //jump into

init0_other_reset:

#if (DEBUG_MODE & 2) //if wanting to debug all resets
ldi r24,'r' //load 'r' into r24 ('r' = not a watchdog only reset)
rjmp from_init //jump to next
#else //debug of other resets not wanted
rjmp wd_off //else just turn off wd
#endif

#else //no debug
brne wd_off //a reset flag was set,so just turn off wd in .init1
//rjmp wd_reset //no reset flags were set, so 'fall' to wd reset below

#endif //(DEBUG_MODE)

/*---------------------------------------------------------------------------------------------------
bad interrupt function, jumped to from vector table ->__bad_interrupt->__vector_default
or rjmp from above

__vector_default is a weak define in grct1.S, initially set to __vectors which is the reset address
0, but since we use it here, the __bad_interrupt function in gcrt1.S that originally just 'jumped'
to __vectors, now 'jumps' to __vector_default (here) instead

registers modified in this function-
r16,r17,r24,r25,r30,r31,SREG
---------------------------------------------------------------------------------------------------*/

__vector_default: //bad interrupts will now end up here

/*---------------------------------------------------------------------------------------------------
we got here because of a jump from __bad_interrupt, so load TXbyte (r24) with 'b' if debug on, else
skip to the part where we just cause a watchdog reset to recover from a bad interrupt
---------------------------------------------------------------------------------------------------*/

/****************************/
#define bitcnt r16
#define temp r17
#define TXbyte r24
#define TXsave r25
/****************************/

#if (DEBUG_MODE) //if any debug wanted, dump data
ldi TXbyte,'b' //set 'reset' cause to 'b' (bad interrupt)

/*---------------------------------------------------------------------------------------------------
now r24='a' if wrap-around, 'r' if reset, 'b' if bad interrupt, 'w' if watchdog reset
---------------------------------------------------------------------------------------------------*/

from_init: //jump from __init

/*---------------------------------------------------------------------------------------------------
setup tx pin (more work to do here- the port pin could be overriden by an alternate port function)
---------------------------------------------------------------------------------------------------*/

SETBIT TX_PORT,TX_PIN //set TX_PIN bit in TX_PORT
SETBIT TX_PORT-1,TX_PIN //set TX_PIN bit in TX_PORT -1 (DDRx)

/*---------------------------------------------------------------------------------------------------
dump all sram starting from 0 (r0)

output will then be - (just showing 0's here)

r (reason for 'dump')
0000000000000000000000000000000000000000000000000000000000000000 (r0-r31, sram 0x00-0x1F)
0000000000000000000000000000000000000000000000000000000000000000 (i/o 0-0x1F, sram 0x20-0x3F)
0000000000000000000000000000000000000000000000000000000000000000 (i/o 0x20-0x3F, sram 0x40-0x5F)
0000000000000000000000000000000000000000000000000000000000000000 (i/o or sram 0x60-0x7F)
0000000000000000000000000000000000000000000000000000000000000000 (i/o or sram 0x80-0x9F)
................................................................
................................................................
0000000000000000000000000000000000000000000000000000000000000000 (sram 0xXXXX-RAMEND)

-----------------------------------------------------------------------------------------------------
set ram pointer to 0, jump into while loop, tx character now in r24 (reset cause)
---------------------------------------------------------------------------------------------------*/

clr ZL //Z pointer to 0
clr ZH
rjmp putchar //output r24 first (character already in r24)

dump_loop: //loop

/*---------------------------------------------------------------------------------------------------
get low byte of ram pointer (r30), check if any bits0-4 are set, if so, jump to no_cr
---------------------------------------------------------------------------------------------------*/

mov r24,ZL //get Zl
andi r24,0x1F //Zl & 0x1F (31, 0b00011111)
brne no_cr //if not 0, goto no_cr

/*---------------------------------------------------------------------------------------------------
no bits0-4 were set (r30 was 0,32,64,96,128,etc)
now check if bit7 of r31 is clear (which means this is the first time through when r30=0,32,etc
if bit7 was set (we were here already), jump to no_cr
---------------------------------------------------------------------------------------------------*/

sbrc ZH,7 //was 0, so skip next if Zh bit7 clear
rjmp no_cr //bit was set, already been here

/*---------------------------------------------------------------------------------------------------
first time through when r30=0,32,64,etc
so set bit7 of r31, load cr into r24 (TXbyte), then jump to putchar (single char only)
next time through, r30 still the same, but bit7 will be set in r31, so we don't get stuck in a loop
---------------------------------------------------------------------------------------------------*/

sbr ZH,0x80 //set bit7 of Zh (a flag to let us know we were here)
ldi r24,'\r' //load cr
rjmp putchar //tx character only

/*---------------------------------------------------------------------------------------------------
cr already done, or not needed
clear bit7 of r31 (if last time was a cr, we now clear that bit)
---------------------------------------------------------------------------------------------------*/

no_cr: //data byte (no cr)

cbr ZH,0x80 //clear bit7 of Zh (flag)

/*---------------------------------------------------------------------------------------------------
check if ram pointer still pointing to a valid ram location
if not, break out of this loop
---------------------------------------------------------------------------------------------------*/

ldi r24,hi8(RAMEND+1) //compare Z to RAMEND+1
cpi ZL,lo8(RAMEND+1)
cpc ZH,r24
brcc dump_done //if no carry, Z is >=RAMEND+1


/*---------------------------------------------------------------------------------------------------
load TXbyte with data from sram, then inc the ram pointer
---------------------------------------------------------------------------------------------------*/

ld TXbyte,Z+ //load sram byte into TXbyte, inc ram pointer (Z)

/*---------------------------------------------------------------------------------------------------
send a hex byte out as an ascii pair (0x12 -> '1' '2')
first time through (upper nibble), T bit will be set
second time through (lower nibble), T bit will be clear
---------------------------------------------------------------------------------------------------*/

hex2ascii:

set // set t bit in sreg
mov TXsave,TXbyte // save a copy of TXbyte
swap TXbyte // swap TXbyte to get high nibble->low

again: // start of hex->ascii

andi TXbyte,0x0F // TXbyte &= 0x0F (strip off high nibble)
ADDI TXbyte,'0' // TXbyte += '0'
cpi TXbyte,'9'+1 // test if TXbyte > '9'
brcs putchar // is not > '9', skip next
ADDI TXbyte,7 // TXbyte += 7 (was > '9', is now 'A'-'F')

/*---------------------------------------------------------------------------------------------------
AVR305 Software UART (can jump to 'putchar' instead of hex2ascii if binary output wanted)
putchar0->b_delay=5clk, b_delay->putchar0=delay loops x 3clk + 4clk
total bit delay= (delay loops x 3clk) + 9clk
bit delay clks wanted= cpu freq clk Hz / baud rate
bit delay count= (bit delay clks wanted - 9) / 3
actual bit time= (bit delay count x 3) + 9
actual baud rate= cpu freq hz / actual bit time
baud rate error%= ((actual baud rate / desired baud rate) - 1) x 100

8Mhz/38.4Kb -> bit delay count = 66 (0x42), actual bit time= (66 x 3) + 9 = 207
actual baud rate= 8000000 / 207 = 38647bps
baud rate error%= ((38647 / 38400) - 1) x 100 = ((1.006) - 1) x 100 = .6%
---------------------------------------------------------------------------------------------------*/

putchar:

wdr
ldi bitcnt,11 //bit counter, 1start,8data,2stop
com TXbyte //invert data (so stop bits are 'high')
sec //set carry bit in sreg(start bit)

putchar0: //loop start

brcc putchar1 //if carry clear, branch to putchar1
CLRBIT TX_PORT,TX_PIN
rjmp b_delay //goto delay

putchar1: //carry was clear, so set tx pin high

SETBIT TX_PORT,TX_PIN
nop //keep cycles same for 1 and 0

b_delay: //start of bit delay

ldi temp,BIT_DELAY //delay loops
1:
dec temp //dec loop counter
brne 1b //if not 0, dec again
lsr TXbyte //done. shift byte right, into carry bit
dec bitcnt //dec bit counter
brne putchar0 //if not all bits done, repeat

/*---------------------------------------------------------------------------------------------------
AVR305 Software UART end
---------------------------------------------------------------------------------------------------*/

brtc txdone //if t bit clear, done
mov TXbyte,TXsave //else get TXbyte again
clt //clear t bit now
rjmp again //now do low nibble

/*---------------------------------------------------------------------------------------------------
hex2ascii end
---------------------------------------------------------------------------------------------------*/

txdone: //that's it

rjmp dump_loop

/*---------------------------------------------------------------------------------------------------
put tx port/ddr back to initial state
---------------------------------------------------------------------------------------------------*/

dump_done:

CLRBIT TX_PORT-1,TX_PIN //clear TX_PIN bit in TX_PORT -1 (DDRx)
CLRBIT TX_PORT,TX_PIN //clear TX_PIN bit in TX_PORT

/*---------------------------------------------------------------------------------------------------
if was not a reset, cause a watchdog reset to get back to square 1
---------------------------------------------------------------------------------------------------*/

LOAD r24,_RST_FLGS //get MCUSR (reset flags)
and r24,r24 //check if 0
brne wd_off //no, was a reset of some kind

#endif //(DEBUG_MODE)

wd_reset:

ldi r25,(1<<_WD_CE) | (1<<WDE)
ldi r24,(1<<WDE)
STORE _WD_CTRL,r25 //enable watchdog change
STORE _WD_CTRL,r24 //enable with lowest timeout value (0, ~16ms)
1:
rjmp 1b //loop here to cause the timeout

/*---------------------------------------------------------------------------------------------------
end of .init1 section, just before entering .init2 where the compiler starts doing its thing
clear all reset flags, turn off watchdog
---------------------------------------------------------------------------------------------------*/

wd_off:

clr r24 //clear r24
ldi r25,(1<<_WD_CE) | (1<<WDE) //set r25 to enable change in wd reg
STORE _RST_FLGS,r24 //clear all reset flags
STORE _WD_CTRL,r25 //enable watchdog change
STORE _WD_CTRL,r24 //disable it

/*-------------------------------------------------------------------------------------------------------
optionally clear/mark stack space to see in dump file how much stack space is used
-------------------------------------------------------------------------------------------------------*/

#if (CLEAR_STACK)
//.extern __heap_start; //start of stack space (after .data section)
//.extern __stack; //top of stack (normally RAMEND)

mark_stack:

ldi ZL,lo8(__heap_start) //get __heap_start into Z
ldi ZH,hi8(__heap_start)

ldi r24,STACK_MARKER //load r24 with byte to 'mark' the stack
ldi r25,hi8(__stack+1) //store high byte of end of stack +1

mark_stack_loop: //loop

st Z+,r24 //store byte marker to ram pointed to by Z
cpi ZL,lo8(__stack+1) //check if done
cpc ZH,r25
brcs mark_stack_loop //not done, do again

#endif

__init_end: //helps our lss listings- to see where this ends

.endfunc