
    //---------------------------------------------------------------------------------------------------------//
    //     __ __  ___             ___              _ _                                                         //
    //    / // / /   |_   _______/   |  __________(_|_)     ___  ___   _                                       //
    //   / // /_/ /| | | / / ___/ /| | / ___/ ___/ / /     (__ \/ __) / )                                      //
    //  /__  __/ ___ | |/ / /  / ___ |(__  ) /__/ / /      / __/\__ \/ , \                                     //
    //    /_/ /_/  |_|___/_/  /_/  |_/____/\___/_/_/       \___)(___/\___/                                     //
    //   ____        _           ____              _   _                 _                                     //
    //  | __ )  __ _| |__  _   _| __ )  ___   ___ | |_| | ___   __ _  __| | ___ _ __                           //
    //  |  _ \ / _` | '_ \| | | |  _ \ / _ \ / _ \| __| |/ _ \ / _` |/ _` |/ _ \ '__|                          //
    //  | |_) | (_| | |_) | |_| | |_) | (_) | (_) | |_| | (_) | (_| | (_| |  __/ |                             //
    //  |____/ \__,_|_.__/ \__, |____/ \___/ \___/ \__|_|\___/ \__,_|\__,_|\___|_|                             //
    //                     |___/                                                                               //
    //     __                       __                                                                         //
    //    / /  __ __  ______ ______/ /__  ____ _                                                               //
    //   / _ \/ // / / __/ // / __/ __/ |/ /  ' \                                                              //
    //  /_.__/\_, /  \__/\_,_/_/  \__/|___/_/_/_/                                                              //
    //       /___/                                                                                             //
    //                                                                                                         //
    //  Curt's ATmega88/ATmega168/ATmega16 4AvrAscii 256 BabyBootloader                                        //
    //  Intel Hex Ascii Upload Bootloader in 256 Words                                                         //
    //                                                                                                         //
    //  Copyright 2007 by Curt Van Maanen                                                                      //
    //                                                                                                         //
    //  Version 2007.12.12                                                                                     //
    //---------------------------------------------------------------------------------------------------------//



    /*-----------------------------------------------------------------------------------------------------------

     3 sections to this file, in this order-
        CONFIG  -  make your changes needed in this section
        PROGRAM -  program located here, no changes normally needed
        HELP    -  help information section

     CHECKLIST-

     1. .text section is set to bootloader start address (see help section)
     1b .jmptbl section is set to FLASHEND-7 (see help section)
     2. linker options set to -nostartfiles and -nodefaultlibs (see help section)
     3. -mshort-calls added to custom options (see help section)
     4. change whatever needed in the config section (see config section)
     5. make sure boot reset fuse is programmed (via your avr programming method)
     6. make sure boot size is correct- 256 words (via your avr programming method)
     7. for 'newer' avr's, make sure your application clears watchdog reset flag (and WDE if not using watchdog)
     8. terminal program settings-
            8 data bits, no parity, 1 stop bit
            20msec transmit line delay              no flow control used, so this is needed
            incoming cr=cr+lf                       bootloader only sends cr, so terminal program needs to add lf
            outgoing cr only                        (lf's will be flushed after cr anyway, so it doesn't matter)
            no local echo                           bootloader will do the echo
     9. goto 1

    -----------------------------------------------------------------------------------------------------------*/



    /*-----------------------------------------------------------------------------------------------------------
         ___ _  _  ___ _   _   _ ___  ___ ___
        |_ _| \| |/ __| | | | | |   \| __/ __|
         | || .` | (__| |_| |_| | |) | _|\__ \
        |___|_|\_|\___|____\___/|___/|___|___/
        
    -----------------------------------------------------------------------------------------------------------*/
    #include                        <avr/io.h>
    #include                        <avr/pgmspace.h>                    // for reading from flash memory
    #include                        <avr/boot.h>                        // for writing to flash memory
    #include                        <avr/wdt.h>                         // watchdog stuff



    //---------------------------------------------------------------------------------------------------------//
    //                        ______   ______   .__   __.  _______  __    _______                              //
    //                       /      | /  __  \  |  \ |  | |   ____||  |  /  _____|                             //
    //                      |  ,----'|  |  |  | |   \|  | |  |__   |  | |  |  __                               //
    //                      |  |     |  |  |  | |  . `  | |   __|  |  | |  | |_ |                              //
    //                      |  `----.|  `--'  | |  |\   | |  |     |  | |  |__| |                              //
    //                       \______| \______/  |__| \__| |__|     |__|  \______|                              //
    //                                                                                                         //
    //---------------------------------------------------------------------------------------------------------//
    //                     CONFIGURATION SECTION DEFINES (change defines as needed)                            //
    //---------------------------------------------------------------------------------------------------------//



    /*-----------------------------------------------------------------------------------------------------------
     baud rate defines
     page 197 of datasheet (mega88/168), 38,400 has little error, and works well with internal 8mhz clock
     set to whatever you need for your clock and desired baud rate
     high baud register is not used in bootloader, so no high cpu freq's with low baud rates
    -----------------------------------------------------------------------------------------------------------*/

    #define BAUD_LO_VALUE           12                                  // BAUD 38400 .2% error @ 8Mhz

    /*-----------------------------------------------------------------------------------------------------------
     set the 'key' (character) you want for 'triggering' the bootloader
     (is set to 'escape' character here)
    -----------------------------------------------------------------------------------------------------------*/

    #define BL_TRIGGER_KEY          '\e'                                // character to trigger bootloader

    /*-----------------------------------------------------------------------------------------------------------
     set commands here for 'go' and 'erase', and 'dump flash' (only when decrypt is off)
     they have to be an odd number, so commands will not be confused with intel hex records (as they cannot have
     odd length data records)
     the command in hex will be the same as ascii-> for erase >:EF<enter>, for go >APP<enter>
    -----------------------------------------------------------------------------------------------------------*/

    #define BL_GO_COMMAND           0x99                                // :pp (app - 0x0A,0x09,0x09)
    #define BL_ERASE_COMMAND        0xEF                                // 'erase flash'
    #define BL_READ_PGM             0xDF                                // 'dump flash'

    /*-----------------------------------------------------------------------------------------------------------
     if the intel hex file is 'encrypted', decryption will need to be on
     this will usually be off until/if needed
    -----------------------------------------------------------------------------------------------------------*/

    #define DECRYPT_ON              0                                   // 1 = enable decryption (encrypted hex)

    /*-----------------------------------------------------------------------------------------------------------
     if 'pin trigger' wanted instead of a 'trigger character' on the usart, turn it on here
     and set the desired port and pin wanted, and what state will trigger the bootloader
    -----------------------------------------------------------------------------------------------------------*/

    #define USE_PIN_TRIGGER         0                                   // 1 = use a pin to trigger bootloader

    #define PIN_TRIGGER             PINB                                // which port you want to use
    #define PINn_TRIGGER            5                                   // which pin number you want to use
    #define PINn_TRIGGER_STATE      0                                   // 0 = LOW will trigger (HIGH = bypass)
                                                                        // 1 = HIGH will trigger (LOW = bypass)

    /*-----------------------------------------------------------------------------------------------------------
     ATmega16 has UDR, ATmega88/168 has UDR0, so use defines here to get them the same
     if you have an avr with udr1/udr2, change the number below to the desired usart number
    -----------------------------------------------------------------------------------------------------------*/

    #ifdef  UDR                                                         // for avr's with just a 'plain' udr,
    #define USART_NUM                                                   // leave this blank, nothing, empty
    #else                                                               // for newer avr's, set usart number
    #define USART_NUM               0                                   // usart number( here using usart0 )
    #endif



    //---------------------------------------------------------------------------------------------------------//
    //          .______   .______        ______     _______ .______          ___       .___  ___.              //
    //          |   _  \  |   _  \      /  __  \   /  _____||   _  \        /   \      |   \/   |              //
    //          |  |_)  | |  |_)  |    |  |  |  | |  |  __  |  |_)  |      /  ^  \     |  \  /  |              //
    //          |   ___/  |      /     |  |  |  | |  | |_ | |      /      /  /_\  \    |  |\/|  |              //
    //          |  |      |  |\  \----.|  `--'  | |  |__| | |  |\  \----./  _____  \   |  |  |  |              //
    //          | _|      | _| `._____| \______/   \______| | _| `._____/__/     \__\  |__|  |__|              //
    //                                                                                                         //
    //---------------------------------------------------------------------------------------------------------//
    //                      PROGRAM (normally no changes needed in this section)                               //
    //---------------------------------------------------------------------------------------------------------//



    /*-----------------------------------------------------------------------------------------------------------
         ___  ___ ___ ___ _  _ ___ ___
        |   \| __| __|_ _| \| | __/ __|
        | |) | _|| _| | || .` | _|\__ \
        |___/|___|_| |___|_|\_|___|___/

    -----------------------------------------------------------------------------------------------------------*/

    /*-----------------------------------------------------------------------------------------------------------
     this defines the bootloader size in bytes, and the bootloader start address in bytes
    -----------------------------------------------------------------------------------------------------------*/

    #define BL_SIZE_BYTES           512     
    #define BL_ADDRESS              (((uint16_t)FLASHEND+1) - BL_SIZE_BYTES)
    #define APP_ZERO                (main+BL_SIZE_BYTES)

    /*-----------------------------------------------------------------------------------------------------------
     concat macros, used in usart defines
    -----------------------------------------------------------------------------------------------------------*/

    #define MY_CONCAT3b(a,b,c)      a ## b ## c                         // needed to expand USART_NUM
    #define MY_CONCAT3(a,b,c)       MY_CONCAT3b(a,b,c)                  // concat 3 'words'

    /*-----------------------------------------------------------------------------------------------------------
     undefine older usart names for mega64, so usart macros below work correctly for the mega64
    -----------------------------------------------------------------------------------------------------------*/

    #if defined (__AVR_ATmega64__)
        #undef RXEN
        #undef TXEN
        #undef RXC
        #undef TXC
        #undef UDR
        #undef UDRE
        #undef FE
        #undef DOR
        #undef U2X
    #endif

    /*-----------------------------------------------------------------------------------------------------------
     usart definitions
    -----------------------------------------------------------------------------------------------------------*/

    #define BAUD_RATE_REG_LO        MY_CONCAT3(UBRR,USART_NUM,L)
    #define USART_CONTROL_B         MY_CONCAT3(UCSR,USART_NUM,B)
    #define USART_STATUS_B          MY_CONCAT3(UCSR,USART_NUM,B)
    #define USART_CONTROL_A         MY_CONCAT3(UCSR,USART_NUM,A)
    #define USART_STATUS_A          MY_CONCAT3(UCSR,USART_NUM,A)
    #define RX_ENABLE               __CONCAT(RXEN,USART_NUM)
    #define TX_ENABLE               __CONCAT(TXEN,USART_NUM)
    #define RX_COMPLETED            __CONCAT(RXC,USART_NUM)
    #define TX_COMPLETED            __CONCAT(TXC,USART_NUM)
    #define RX_DATA                 __CONCAT(UDR,USART_NUM)
    #define TX_DATA                 __CONCAT(UDR,USART_NUM)
    #define TX_BUFFER_EMPTY         __CONCAT(UDRE,USART_NUM)
    #define FRAMING_ERROR           __CONCAT(FE,USART_NUM)
    #define OVERRUN_ERROR           __CONCAT(DOR,USART_NUM)
    #define USART_2X_SPEED          __CONCAT(U2X,USART_NUM)

    #define DISABLE_USART()         USART_CONTROL_B = 0
    #define ENABLE_USART()          USART_CONTROL_B = ((1<<RX_ENABLE) | (1<<TX_ENABLE))
    #define MY_RX_FLAGS             ((1<<RX_COMPLETED)|(1<<FRAMING_ERROR) | (1<<OVERRUN_ERROR))

    /*-----------------------------------------------------------------------------------------------------------
     set maximum size of data bytes we can receive in an intel hex record
     limit by page size, or if page is greater than 128, limit to 128 (rx_buffer needs to stay < 256)
    -----------------------------------------------------------------------------------------------------------*/

    #if (SPM_PAGESIZE > 128)                                            // max data words for a record
    #define MAX_DD_WORDS             64                                 // limit to 128 bytes (64 words)
    #else
    #define MAX_DD_WORDS             (SPM_PAGESIZE/2)                   // else limit to page size (words)
    #endif

    /*-----------------------------------------------------------------------------------------------------------
     make our own watchdog macro, without the irq bit save/restore
    -----------------------------------------------------------------------------------------------------------*/

    #define ENABLE_WDT(value)       _WD_CONTROL_REG = (_BV(_WD_CHANGE_BIT) | _BV(WDE));\
                                    _WD_CONTROL_REG = ((uint8_t) ((value & 0x08 ? _WD_PS3_MASK : 0x00) | \
                                                        _BV(WDE) | (value & 0x07)) )

    /*-----------------------------------------------------------------------------------------------------------
     pin trigger or trigger char received macros
    -----------------------------------------------------------------------------------------------------------*/

    #if (USE_PIN_TRIGGER)                                               // if using pin state for a trigger

    #if (PINn_TRIGGER_STATE)                                            // and trigger state is not 0
    #define NO_TRIGGER              (PIN_TRIGGER&(1<<PINn_TRIGGER))==0  // loop if 0
    #else                                                               // else trigger state is 0
    #define NO_TRIGGER              (PIN_TRIGGER&(1<<PINn_TRIGGER))!=0  // so loop if not 0
    #endif

    #else                                                               // using trigger char on usart
    #define NO_TRIGGER              RX_DATA != BL_TRIGGER_KEY           // loop if no trigger char received
    #endif

    /*-----------------------------------------------------------------------------------------------------------
     some error checking stuff here
    -----------------------------------------------------------------------------------------------------------*/

    #if ( (!(BL_GO_COMMAND & 1)) || (!(BL_ERASE_COMMAND & 1)) || (!(BL_READ_PGM & 1)) )
    #error Commands need to be an odd number                            // if not odd number, make error
    #endif

    #if (FLASHEND > 0xFFFF)
    #error "This AVR is not supported, is >64kb"
    #endif

    #ifndef IVSEL
    #error "This AVR is not supported, no IVSEL"
    #endif

    /*-----------------------------------------------------------------------------------------------------------
     reset flags register
    -----------------------------------------------------------------------------------------------------------*/

    #ifdef  MCUCSR
    #define RESET_FLAGS             MCUCSR
    #else
    #define RESET_FLAGS             MCUSR
    #endif

    /*-----------------------------------------------------------------------------------------------------------
     IVSEL register
    -----------------------------------------------------------------------------------------------------------*/

    #ifdef  GICR
    #define IRQ_CONTROL             GICR
    #else
    #define IRQ_CONTROL             MCUCR
    #endif

    /*-----------------------------------------------------------------------------------------------------------
     bootloader status defines
    -------------------------------------------------------------------------------------------------------------

          bl_status bits1-0  (Y=1, N=0)
          ------------------------------
         | >After   |   Go?   | Program?|
         |------------------------------|
         | Start    |    Y    |    N    | after bootloader starts, can go, but not program (erase first)
         |------------------------------|
         | Erase    |    N    |    Y    | after erase, cannot go (nothing to go to), can program
         |------------------------------|
         | Error    |    N    |    N    | after error, cannot go (unkown flash state), cannot program
         |------------------------------|
         | Record ok|    N    |    Y    | after a good record programmed, cannot go (unkown flash state)
         |------------------------------|
         | EOF ok   |    Y    |    N    | after eof, can go, but cannot program (erase first)
          ------------------------------

    -----------------------------------------------------------------------------------------------------------*/

    #define BL_NO_PROGRAM       (!(bl_status & 1))                      // true if bit0 not set
    #define BL_CAN_GO           (bl_status & 2)                         // true if bit1 set
    #define BL_SET_STATUS(s)    bl_status = s                           // set bootloader status

    #define BL_ERROR            0                                       // error state
    #define BL_RECORD_OK        1                                       // record ok state
    #define BL_ERASE_OK         1                                       // erase ok state
    #define BL_START            2                                       // start state (entering bootloader)
    #define BL_EOF_OK           2                                       // file ok state



    /*-----------------------------------------------------------------------------------------------------------
         ___ _   _ _  _  ___ _____ ___ ___  _  _     ___ ___  ___ _____ ___ _______   _____ ___ ___
        | __| | | | \| |/ __|_   _|_ _/ _ \| \| |   | _ \ _ \/ _ \_   _/ _ \_   _\ \ / / _ \ __/ __|
        | _|| |_| | .` | (__  | |  | | (_) | .` |   |  _/   / (_) || || (_) || |  \ V /|  _/ _|\__ \
        |_|  \___/|_|\_|\___| |_| |___\___/|_|\_|   |_| |_|_\\___/ |_| \___/ |_|   |_| |_| |___|___/

    -----------------------------------------------------------------------------------------------------------*/

    /*-----------------------------------------------------------------------------------------------------------
     make sure main the first to run (.init9), and no extra compiler code (naked)
    -----------------------------------------------------------------------------------------------------------*/

    int     main        (void)                      __attribute__ ((naked,section (".init9")));

    /*-----------------------------------------------------------------------------------------------------------
     asm coded functions
    -----------------------------------------------------------------------------------------------------------*/

    void    hex2ascii   (uint8_t h)                 __attribute__ ((naked));
    void    do_spm      (uint16_t addr,uint8_t cmd) __attribute__ ((naked));

    /*-----------------------------------------------------------------------------------------------------------
     asm coded functions that are inside the above 'real' functions (to save space)
    -----------------------------------------------------------------------------------------------------------*/

    void    sendchar    (uint8_t c);
    void    safeT_check (void);

    /*-----------------------------------------------------------------------------------------------------------
     jump table (not a real function, just asm rjmp's)
    -----------------------------------------------------------------------------------------------------------*/

    void    jump_table  (void)                      __attribute__ ((naked,section (".jmptbl")));



    /*-----------------------------------------------------------------------------------------------------------
          ___ _    ___  ___   _   _     __   ___   ___ ___   _   ___ _    ___ ___
         / __| |  / _ \| _ ) /_\ | |    \ \ / /_\ | _ \_ _| /_\ | _ ) |  | __/ __|
        | (_ | |_| (_) | _ \/ _ \| |__   \ V / _ \|   /| | / _ \| _ \ |__| _|\__ \
         \___|____\___/|___/_/ \_\____|   \_/_/ \_\_|_\___/_/ \_\___/____|___|___/

    -----------------------------------------------------------------------------------------------------------*/

    /*-----------------------------------------------------------------------------------------------------------
     receive buffer to hold intel hex record (receive loop expects this to be an odd
     number, used by nibble shifting, so if changed and is an even number, receive loop needs changing
    -----------------------------------------------------------------------------------------------------------*/

    struct {
        uint8_t colon;                                                  // ':' (colon)
        uint8_t LL;                                                     // LL   - record length
        uint8_t AAh;                                                    // AAxx - data byte address (byte)
        uint8_t AAl;                                                    // xxAA - low byte of address
        uint8_t TT;                                                     // TT   - record type (0 or 1 used here)
        uint16_t DDDD[MAX_DD_WORDS];                                    // DDDD - data bytes (words)
        uint8_t CC;                                                     // CC   - used just for sizing of buffer
        uint8_t CR;                                                     // cr   - used just for sizing of buffer
    }rx_buffer;



    /*===========================================================================================================
         __  __   _   ___ _  _
        |  \/  | /_\ |_ _| \| |
        | |\/| |/ _ \ | || .` |
        |_|  |_/_/ \_\___|_|\_|

    ===========================================================================================================*/

    int main(void){

        /*-------------------------------------------------------------------------------------------------------
         local variables of main()
         r2-r17,r28,r29 are call saved registers, which means the compiler will save these if used in a function
         all others may not survive between function calls, and the compiler may optimize a register variable
         away if a function call is 'between' when that variable is used
         to prevent register variables being optimized away, the do_spm is called using asm, where we can tell
         the compiler which registers are actually 'clobbered' after the rcall
        -------------------------------------------------------------------------------------------------------*/

        uint16_t    address;                                            // address to program/erase

        register uint8_t    bl_status       asm("r2");                  // status of bootloader
        register uint8_t    recordlength    asm("r21");                 // store data length of record
        register uint8_t    checksum        asm("r16");                 // store accumulating checksum

        register uint16_t   addressV        asm("r16");                 // store address for verify
        register uint8_t    temp_r16        asm("r16");                 // used to get AAAA swapped into addressV
        register uint8_t    temp_r17        asm("r17");                 // used to get AAAA swapped into addressV

        register uint8_t    index           asm("r20");                 // DDDD array index
        register uint8_t    indexV          asm("r23");                 // DDDD array verify index

        register uint8_t    temp_r24        asm("r24");                 // r24, (use between calls only)


        /*-------------------------------------------------------------------------------------------------------

              Reset Flags 'Logic' Table
              ----------------------------------------------------------------------
             | MCU Status Register - MCUSR   (1 = set   0 = clear   x = don't care) |
             |----------------------------------------------------------------------|
             |x|x|x|x|  Bit3 |  Bit2 |  Bit1 |  Bit0 |   Run Bootloader?            |
             |----------------------------------------------------------------------|
             |x|x|x|x|  WDRF |  BORF | EXTRF |  PORF |                              |
             |----------------------------------------------------------------------|
             |x|x|x|x|   0   |   x   |   x   |   1   |  YES, this could be power up |
             |----------------------------------------------------------------------|
             |x|x|x|x|   1   |   x   |   x   |   x   |  NO, watchdog reset          |
             |----------------------------------------------------------------------|
             |x|x|x|x|   0   |   x   |   x   |   0   |  NO, not a power up reset    |
              ----------------------------------------------------------------------

        -------------------------------------------------------------------------------------------------------*/

        asm volatile(

            /*---------------------------------------------------------------------------------------------------
             first three instructions will cause an exit from the bootloader if the watchdog flag is set
             the fourth instruction sets up to enable ivsel change, the fifth sets ivce in MCUCR or GIFR

             any 'errant' jump into the bootloader to any of the first five instructions will still have to
             pass the PORF=1 test which takes place below the change enable instruction
            ---------------------------------------------------------------------------------------------------*/

            "in     r0,%[flags]                 \n\t"                   // get flags into r0
            "sbrc   r0,%[wdrf]                  \n\t"                   // skip if wdrf=0,
            "rjmp   %[app_0]                    \n\t"                   // else exit (wdrf=1)
            "ldi    r31,%[ivce]                 \n\t"                   // r31=(1<<ivce),
            "out    %[irqctrl],r31              \n\t"                   // enable iv change

            /*---------------------------------------------------------------------------------------------------
             any 'errant' jump into the bootloader anywhere below this line will not cause ivsel to be set
            ---------------------------------------------------------------------------------------------------*/

            "in     r0,%[flags]                 \n\t"                   // clk+1, get flags into r0 again
            "ldi    r31,1<<%[ivsel]             \n\t"                   // clk+2, r31=ivsel
            "sbrc   r0,%[porf]                  \n\t"                   // clk+3, if porf=0 skip next
            "out    %[irqctrl],r31              \n\t"                   // clk+4, ivsel=1

            /*---------------------------------------------------------------------------------------------------
             now check if the change occurred, if not, porf was 0
             will leave bootloader with i/o registers untouched, including SREG, (GIFR or MCUCR will be 0
             though, which IS the inital value of those registers), r0 will =flags,r31 will =irq_ctrl register
            ---------------------------------------------------------------------------------------------------*/

            "in     r31,%[irqctrl]              \n\t"                   // get ivsel into r31
            "sbrs   r31,%[ivsel]                \n\t"                   // if ivsel=1, skip next
            "rjmp   %[app_0]                    \n\t"                   // else ivsel=0, change didn't work
            :
            :
            [flags]     "I" (_SFR_IO_ADDR(RESET_FLAGS)),
            [irqctrl]   "I" (_SFR_IO_ADDR(IRQ_CONTROL)),
            [wdrf]      "I" (WDRF),
            [porf]      "I" (PORF),
            [ivce]      "M" (1<<IVCE),
            [ivsel]     "I" (IVSEL),
            [app_0]     "i" (APP_ZERO)
        );

        /*-------------------------------------------------------------------------------------------------------
         we got this far, watchdog reset flag NOT set AND power on reset flag IS set (WDRF=0 AND PORF=1)
         now enable and set the watchdog timeout to 1/8 second
        -------------------------------------------------------------------------------------------------------*/

        asm volatile("jmp_start:");                                     // app jump will start here
        asm volatile("clr __zero_reg__");                               // compiler needs r1 to be 0
        ENABLE_WDT(WDTO_120MS);                                         // enable watchdog, 1/8th sec timeout

        /*-------------------------------------------------------------------------------------------------------
         setup stack pointer 
        -------------------------------------------------------------------------------------------------------*/

        SP=RAMEND;

        /*-------------------------------------------------------------------------------------------------------
         setup the serial port-> baud rate, tx/rx enable
        -------------------------------------------------------------------------------------------------------*/

        BAUD_RATE_REG_LO = BAUD_LO_VALUE;                               // set baud low (baud high not used)
        ENABLE_USART();                                                 // enable usart tx/rx macro

        /*-------------------------------------------------------------------------------------------------------
         if all reset flags were clear, skip this step, and just go to the bootloader

         otherwise-
         just keep checking rx data for an escape character (or whatever you define as BL_TRIGGER_KEY)
         if no trigger character received, watchdog will timeout and cause a reset (setting WDRF flag)

         no check of RXC flag is made here, just keep reading udr over and over, the first time reading a new byte
         that came in the udr register from the shift register, the rxc flag will be cleared, allowing the shift
         register to move the next byte into udr when its received (same data IS read over and over until a new
         byte is received)

         watchdog timer was set to 1/8 second, so that's how long we have to receive the escape
         the advantages of doing this with the watchdog is 1-that we use it as a timer that has its
         own clock and will timeout to the value we want regardless of cpu frequency 2-it will also
         take care of resetting the cpu so our application will start with a fresh restart, and all
         I/O settings that the datasheet says are initial settings, will hold true

         OR

         if using a pin to trigger the bootloader, just keep checking the pin, if not in the correct
         state, will just loop until watchdog causes a reset
        -------------------------------------------------------------------------------------------------------*/

        if(RESET_FLAGS){                                                // only if 'reset' into bootloader
            do{
                safeT_check();                                          // all loops need safeT_check
            }while(NO_TRIGGER);                                         // loop while not in trigger state
        }

        /*-------------------------------------------------------------------------------------------------------
         ok, received escape, pin state correct, or just jumped here, WE ARE NOW IN THE BOOTLOADER
        -------------------------------------------------------------------------------------------------------*/

        /*-------------------------------------------------------------------------------------------------------
         bl_status will be BL_START initially (which means you HAVE TO erase before programming)
        -------------------------------------------------------------------------------------------------------*/

        BL_SET_STATUS(BL_START);

        /*-------------------------------------------------------------------------------------------------------
         now proceed with bootloader
         keep looping through this forever (well, maybe not forever)
        -------------------------------------------------------------------------------------------------------*/

        while(1){

            /*---------------------------------------------------------------------------------------------------
             goto command prompt
            ---------------------------------------------------------------------------------------------------*/

            goto _cr;                                                   // goto command prompt

            /*---------------------------------------------------------------------------------------------------
             error (erase verify failed, record length is odd number, checksum bad,
             address went into bootloader area, programming verify error, rx framing/overrun 
            ---------------------------------------------------------------------------------------------------*/

            _error:
            asm volatile("_err:");
            sendchar('!');                                              // notify that error happened
            BL_SET_STATUS(BL_ERROR);                                    // any error clears 'bl_status'
            goto _cr;                                                   // skip _did_nothing, goto _cr

           /*---------------------------------------------------------------------------------------------------
             nothing was done, so show '?', command or record was not processed for some reason- first character
             not a ':', cr was not the last character, cannot 'go', cannot 'program'
            ---------------------------------------------------------------------------------------------------*/

            _did_nothing:
            sendchar('?');                                              // means nothing done with input 'line'

            /*---------------------------------------------------------------------------------------------------
             flush rx here, will flush out linefeeds from the hex file, and will also prevent buffer overruns-
             if multiple 'bad' characters received at command prompt, we are receiving 1 character, then
             transmitting 2 (cr + prompt), and we can't keep that up without overrun in the rx buffer, so just
             flush it (line delay in terminal program will also prevent any other characters in buffer except the
             linefeed) tx will continue to transmit error character above (!) when turned off (datasheet says
             anything in shift register and buffer will still continue to transmit until completed)

             command prompt-  can ALWAYS erase and flash dump, any command prompt
             command prompt-  >   can go, cannot program
             command prompt-  =   can program, cannot go
             command prompt-  <   can only erase
            ---------------------------------------------------------------------------------------------------*/

            _cr:
            DISABLE_USART();                                            // flush any LF, also prevent overrun
            ENABLE_USART();                                             // enable again
            sendchar('\r');                                             // cr
            sendchar('<' + bl_status);                                  // prompt state 0= '<' 1= '=' 2= '>'


            /*---------------------------------------------------------------------------------------------------
             process intel hex record
              intel hex record   >:LLAAAATTDD...DDCC   Always starts with a ':'
               LL     = Record length (in bytes, of DD bytes)
               AAAA   = memory address (in bytes)
               TT     = record type (00 = data,01 = end of file, others not used)
               DD     = data byte , number of bytes = LL
               CC     =  checksum (add all bytes, byte sum only, invert, then add 1)
                      CC = (!(LL + AA + AA + TT + DD...DD )) + 1, checksum total including checksum byte = 0

             get the whole record here (input limited by size of rx_buffer)
             if rx_buffer overrun, last character will not be cr, and will ignore record (back to command prompt)
             cr has to be the last char (and breaks out of do loop)
             any 'control' character (0x00-0x1F) breaks out of loop
            ---------------------------------------------------------------------------------------------------*/

            asm volatile(

                "ldi    r17,%[bufsize]          \n\t"                   // keep us inside the buffer
                "ldi    r16,0x00                \n\t"                   // temp use
                "ldi    r28,lo8(%[bufptr])      \n\t"                   // Y pointer-> &rxbuffer
                "ldi    r29,hi8(%[bufptr])      \n\t"
                "st     Y,r16                   \n\t"                   // clear first buffer position (colon)

                /*-----------------------------------------------------------------------------------------------
                 get rx status, wait until RXCn, FEn or DORn flag set (any one or more)
                 also clear watchdog, since we could be here a while
                 if any flag set, drop out of do loop, read udr to either get data or to flush rx & reset flags,
                 then check if RXCn is the only flag set, if not, framing or overrun flag(s) set, so goto _error
                -----------------------------------------------------------------------------------------------*/

                "1:                             \n\t"                   // rx loop
                "rcall  safeT_check             \n\t"                   // safeT_check inside loops
                "wdr                            \n\t"                   // reset watchdog
                "lds    r25,%[status]           \n\t"                   // get usart status
                "andi   r25,%[flags]            \n\t"                   // mask off unwanted bits
                "breq   1b                      \n\t"                   // if no flags, try again

                "lds    r24,%[udr]              \n\t"                   // read udr
                "cpi    r25,%[rxc]              \n\t"                   // check if rxc is the only flag set
                "brne   _err                    \n\t"                   // no, buffer overrun or framing error

                "cpi    r24,' '                 \n\t"                   // if rx_data >= ' '

                /*-----------------------------------------------------------------------------------------------
                 if < ' ' (0x00-0x1F), its a 'control' character, done
                -----------------------------------------------------------------------------------------------*/

                "brcs   5f                      \n\t"                   // if not, done here

                /*-----------------------------------------------------------------------------------------------
                 if >= ' ' (0x20-0xFF), echo it, subtract 7 if >= 0x40, mask off high nibble to leave 0x00-0x0F
                 '0' (0x30) converts to 0x00, '9' (0x39) to 0x09, 'A' (0x41) to 0x0A, 'F' (0x46) to 0x0F
                 ANY characters above 0x1F are converted to 0x00-0x0F
                -----------------------------------------------------------------------------------------------*/

                "rcall  sendchar                \n\t"                   // echo back character
                "cpi    r24,0x40                \n\t"                   // if 'A'-'F' (0x41-0x46)
                "brcs   2f                      \n\t"                   // if not, skip next
                "subi   r24,0x07                \n\t"                   // yes, subtract 7 ('A'=0x41->0x3A,etc)
                "2:                             \n\t"
                "andi   r24,0x0F                \n\t"                   // mask off bits3-7, make ALL 0x00-0x0F

                /*-----------------------------------------------------------------------------------------------
                 rx_buffer is odd number length, so first character (':') will be stored right away as a
                 'complete' byte (that's why rx_buffer.colon had to be cleared)
                 the second character will be shifted left into the high nibble position, third character will
                 be added to second character, then saved, and so on                 
                -----------------------------------------------------------------------------------------------*/

                "sbrc   r17,0                   \n\t"                   // if odd number
                "rjmp   3f                      \n\t"                   // jump ahead
                "swap   r24                     \n\t"                   // else even, low nibble to high nibble
                "st     Y,r24                   \n\t"                   // save it to buffer
                "rjmp   4f                      \n\t"                   // check if still in buffer
                "3:                             \n\t"                   // odd 'position'
                "ld     r25,Y                   \n\t"                   // get previous high nibble
                "add    r24,r25                 \n\t"                   // merge high nibble + low nibble
                "st     Y+,r24                  \n\t"                   // store it in buffer, inc Y pointer
                "add    r16,r24                 \n\t"                   // add to checksum
                "4:                             \n\t"                   // now check if still in buffer
                "subi   r17,0x01                \n\t"                   // buffer size - 1
                "brne   1b                      \n\t"                   // if not 0, get another character
                "5:                             \n\t"                   // done
                ::
                [bufsize]   "M" (sizeof(rx_buffer)),
                [bufptr]    "i" (&rx_buffer),
                [status]    "M" (_SFR_MEM_ADDR(USART_STATUS_A)),
                [flags]     "M" (MY_RX_FLAGS),
                [udr]       "M" (_SFR_MEM_ADDR(RX_DATA)),
                [rxc]       "M" (1<<RX_COMPLETED)

            );
    
            /*---------------------------------------------------------------------------------------------------
             if last rx character was not cr, go back to command prompt
             could be rx_buffer overrun or a 'control' character other than cr (like esc key)
            ---------------------------------------------------------------------------------------------------*/

            if(temp_r24 != '\r'){                                       // if last char not cr
                goto _did_nothing;                                      // show that nothing was done
            }

            /*---------------------------------------------------------------------------------------------------
             if first character was not ':', go back to command prompt
            ---------------------------------------------------------------------------------------------------*/

            if(rx_buffer.colon != (':' & 0x0F)){                        // if rx not ':' (0x3A & 0x0F = 0x0A)
                goto _did_nothing;                                      // show that nothing was done
            }

            /*---------------------------------------------------------------------------------------------------
             store record length - LL (data byte count)
            ---------------------------------------------------------------------------------------------------*/

            recordlength = rx_buffer.LL;                                // store recordlength

            /*---------------------------------------------------------------------------------------------------
             if LL is the 'go' command, loop and cause a wdt reset IF go bit set in bl_status
            ---------------------------------------------------------------------------------------------------*/

            if(recordlength == BL_GO_COMMAND){                          // if LL= BL_GO_COMMAND
                do{                                                     // loop if can 'go' (cause wdt reset)
                    safeT_check();                                      // for any errant jumps into loop
                }while(BL_CAN_GO);
                goto _did_nothing;                                      // show that nothing was done
            }

            /*---------------------------------------------------------------------------------------------------
             Erase Flash loop
             if LL=0xEF, erase flash, set bl_status
             address will be a byte address, SPM_PAGESIZE is a byte size
            ---------------------------------------------------------------------------------------------------*/

            if(recordlength == BL_ERASE_COMMAND){
                address = BL_ADDRESS;                                   // set to bootloader start address

                /*-----------------------------------------------------------------------------------------------
                 loop through all pages (including page 0)
                -----------------------------------------------------------------------------------------------*/
                while(address){                                         // until address == 0
                    sendchar('.');
                    wdt_reset();                                        // need to clear watchdog in this loop
                    address -= SPM_PAGESIZE;                            // back it up one page
                    do_spm(address,__BOOT_PAGE_ERASE);                  // erase page
                }

                /*-----------------------------------------------------------------------------------------------
                 set bl_status, then back to command prompt
                -----------------------------------------------------------------------------------------------*/

                BL_SET_STATUS(BL_ERASE_OK);                             // set 'bl_status'
                goto _cr;                                               // ok
            }

            /*---------------------------------------------------------------------------------------------------
             Dump Flash
             if LL=BL_READ_PGM, 'dump' (read) program flash (ascii output)
            ---------------------------------------------------------------------------------------------------*/

            #if (!(DECRYPT_ON))                                         // only do IF decryption NOT on
            if(recordlength == BL_READ_PGM){
                asm volatile(
                    "ldi    r30,0               \n\t"                   // Z=0
                    "ldi    r31,0               \n\t"        
                    "ldi    r16,%[bl_addrH]     \n\t"                   // r16= bootloader address high byte
                    "1:                         \n\t"                   // loop start
                    "wdr                        \n\t"                   // reset watchdog in this loop
                    "mov    r17,r30             \n\t"                   // get Zl
                    "andi   r17,0x1F            \n\t"                   // and with 0x1F (every 32 bytes)
                    "brne   2f                  \n\t"                   // if not 0bxxx00000, skip next
                    "ldi    r24,'\r'            \n\t"                   // cr
                    "rcall  sendchar            \n\t"                   // send it
                    "2:                         \n\t"
                    "lpm    r24,Z+              \n\t"                   // get pgm flash byte, inc Z
                    "rcall  hex2ascii           \n\t"                   // display byte as ascii hex pair
                    "cpi    r30,%[bl_addrL]     \n\t"                   // see if Z < BL_ADDRESS
                    "cpc    r31,r16             \n\t"                
                    "brcs   1b                  \n\t"                   // if so, do again
                    ::
                    [bl_addrH]  "M" (BL_ADDRESS>>8),                    // bootloader address high byte
                    [bl_addrL]  "M" (BL_ADDRESS&0xFF)                   // bootloader address low byte
                );
                goto _cr;                                               // back to command prompt
            }
            #endif

            /*---------------------------------------------------------------------------------------------------
             if program bit not set in bl_status, do not continue from here (erase flash first)
            ---------------------------------------------------------------------------------------------------*/

            if(BL_NO_PROGRAM){                                          // if program bit not set
                goto _did_nothing;                                      // show that nothing was done
            }

            /*---------------------------------------------------------------------------------------------------
             only programming words, so if LL is an odd number, that is an error
             (already limited to MAX_DD_SIZE by record receive loop, so no need to check size)
            ---------------------------------------------------------------------------------------------------*/

            if(recordlength & 1){                                       // no odd number DD bytes
                goto _error;                                            // error
            }

            /*---------------------------------------------------------------------------------------------------
             if checksum on all bytes received not 0, checksum error (':' was added, so need to check if 0x0A)
            ---------------------------------------------------------------------------------------------------*/

            if(checksum != 0x0A){                                       // if not 0 + 0x0A
                goto _error;                                            // checksum error
            }

            /*---------------------------------------------------------------------------------------------------
             TT - Record type
             if TT is 1, that is an EOF record, set bl_status, goto command prompt
            ---------------------------------------------------------------------------------------------------*/

            if(rx_buffer.TT == 1){                                      // if TT is eof record
                BL_SET_STATUS(BL_EOF_OK);                               // set bl_status
                goto _cr;                                               // ok
            }

            /*---------------------------------------------------------------------------------------------------
             now check if TT is NOT 0 (we only do data records which are type 0x00)
            ---------------------------------------------------------------------------------------------------*/

            if(rx_buffer.TT != 0){                                      // if TT not 0
                goto _did_nothing;                                      // ignore none 0 records
            }

            /*---------------------------------------------------------------------------------------------------
             so far, so good. now write to program memory
             index-> used to store rx_buffer.DDDD (word) array position
             indexV-> store  buffer position before page loop, used for verify loop
             address-> used to determine page address (byte address)
             addressV-> store address before page loop, used for verify loop
             recordlength-> number of bytes to program (byte length)
            ---------------------------------------------------------------------------------------------------*/

            index = 0;                                                  // rx_buffer.DDDD array number
            indexV = 0;                                                 // store DDDD position (for verify)

            /*---------------------------------------------------------------------------------------------------
             AAAA is in sram high byte in lower memory, low byte in higher memory
             compiler needs uint16_t low byte in lower location, high byte in higher location
             which is good for our data bytes, as they are 'reversed' in the intel hex record
             C code can swap this, but the compiler seems to want to generate too much code
             so this is my temporary solution-
             get the low AA byte, put it in the 'low' register pair of r17:r16, get the high AA byte,
             put it in the 'high' register pair r17:r16 (addressV), then copy the 'swapped' AAAA to 'address'
            ---------------------------------------------------------------------------------------------------*/

            temp_r16 = rx_buffer.AAl;                                   // get low AA
            temp_r17 = rx_buffer.AAh;                                   // get high AA
            address = addressV;                                         // copy it to address

            /*---------------------------------------------------------------------------------------------------
             program flash loop
            ---------------------------------------------------------------------------------------------------*/

            do{

                /*-----------------------------------------------------------------------------------------------
                 page loop, program data inside 1 page
                -----------------------------------------------------------------------------------------------*/

                do{

                    /*-------------------------------------------------------------------------------------------
                     if address tries to get into our bootloader space-> error
                    -------------------------------------------------------------------------------------------*/

                    if(address >= (BL_ADDRESS-1)){                      // keep out of our bootloader, please
                        goto _error;                                    // overwrite attempt error
                    }

                    /*-------------------------------------------------------------------------------------------
                     fill page buffer (DDDD was reversed from hex record, but uint16_t type of rx_record.DDDD[]
                     gets it right again- low byte from low address, high byte from high address)
                    -------------------------------------------------------------------------------------------*/

                    #if (DECRYPT_ON)                                    // IF decryption needed
                    uint16_t pgm_word;                                  // program memory word variable
                    pgm_word = pgm_read_word((address)|BL_ADDRESS);     // get word from bootloader code
                    rx_buffer.DDDD[index] ^= pgm_word;                  // need xor'd into buffer, for verify
                    #endif

                    asm volatile(
                        "movw   r0,%[buf]       \n\t"                   // data->r1:r0
                        "movw   r24,%[addr]     \n\t"                   // address->r25:r24
                        "ldi    r22,%[bpf]      \n\t"                   // command->r22
                        "rcall  do_spm          \n\t"                   // fill page buffer
                        ::
                        [buf]   "r" (rx_buffer.DDDD[index]),
                        [addr]  "r" (address),
                        [bpf]   "M" (__BOOT_PAGE_FILL)
                        : "r30","r31","r22"                             // clobbered registers
                    );
                    
                    index++;                                            // next DDDD (inc +1,for next DDDD word)
                    address += 2;                                       // next address (is byte address, so +2)
                    recordlength -=2;                                   // dec length by 2 bytes (1 word)

                /*-----------------------------------------------------------------------------------------------
                 end of page loop, do while more data words AND page not crossed
                -----------------------------------------------------------------------------------------------*/

                }while((recordlength)&&(address &((SPM_PAGESIZE) - 1)));

                /*-----------------------------------------------------------------------------------------------
                 no more data bytes OR a page boundary was crossed
                 so write the page (address needs to be backed up to previous page in case a page crossed)
                 asm used to prevent compiler from optimizing away register variable 'recordlength')
                -----------------------------------------------------------------------------------------------*/

                asm volatile(
                    "movw   r24,%[addr]         \n\t"                   // address->r25:r24
                    "sbiw   r24,2               \n\t"                   // address -2 (in case page crossed)
                    "ldi    r22,%[bpw]          \n\t"                   // spm command->r22
                    "rcall  do_spm              \n\t"                   // write buffer to flash
                    ::
                    [addr]  "r" (address),
                    [bpw]   "M" (__BOOT_PAGE_WRITE)
                    : "r30","r31","r22"                                 // clobbered registers
                );

                /*-----------------------------------------------------------------------------------------------
                 now read back/verify what was just programmed
                 address2 holds the address before the page loading started
                 checksum holds the rx_buffer.DDDD position before the page loading started
                 do until we get to current DDDD position (checksum will = temp again)
                -----------------------------------------------------------------------------------------------*/

                do{
                    if(pgm_read_word(addressV++) != rx_buffer.DDDD[indexV]){ // if read word != buffer word,
                        goto _error;                                    // verify error
                    }
                    addressV++;                                         // inc address again (reading words)
                }while(++indexV < index);                               // up to current buffer position

            /*---------------------------------------------------------------------------------------------------
             end of program flash loop
             if a page was crossed, recordlength will not be 0, so there will be more data to program
            ---------------------------------------------------------------------------------------------------*/

            }while(recordlength);                                       // while more data to do (page crossed)

            /*---------------------------------------------------------------------------------------------------
             Yeah!, record programmed without error, go do it again, and again, and again...
            ---------------------------------------------------------------------------------------------------*/

            BL_SET_STATUS(BL_RECORD_OK);                                // set bl_status (no 'go' until eof now)

        }                                                               // while(1)
    }                                                                   // main()



    /*===========================================================================================================
         ___ _   _ _  _  ___ _____ ___ ___  _  _ ___
        | __| | | | \| |/ __|_   _|_ _/ _ \| \| / __|
        | _|| |_| | .` | (__  | |  | | (_) | .` \__ \
        |_|  \___/|_|\_|\___| |_| |___\___/|_|\_|___/
        
    ===========================================================================================================*/

    /*-----------------------------------------------------------------------------------------------------------
     send a hex byte out as an ascii pair (0x12 -> '1' '2')
     void hex2ascii(uint8_t h);

     r24=h      changed
     r25        changed

     send a character on usart, wait for buffer empty first, then put in byte
     void sendchar(uint8_t c); (function inside hex2ascii, saves an rjmp in hex2ascii)

     r24=c      unchanged
     r25        changed

     (the sendchar loop is ok to leave 'unchecked' for possible 'bad jumps' inside it, as udre will always
      eventually become 1, even if the tx is turned off with data in udr, as the transmitter will always tx
      any data in udr and the shift register, which means we will never get 'stuck' inside a never ending loop)

     these functions can also be called from an app using the jump table (FLASHEND-5 and FLASHEND-7),
    -----------------------------------------------------------------------------------------------------------*/

    void hex2ascii(uint8_t h){

        asm volatile(
            "hex2ascii_start:                   \n\t"                   // for jump table use
            "push   r24                         \n\t"                   // save a copy of h
            "swap   r24                         \n\t"                   // swap h to get high nibble->low nibble
            "rcall  1f                          \n\t"                   // put pc on stack, skip next instruction
            "pop    r24                         \n\t"                   // first time ret gets us here
            "1:                                 \n\t"
            "andi   r24,0x0F                    \n\t"                   // h = h & 0x0F (strip off high nibble)
            "subi   r24,-'0'                    \n\t"                   // h += '0'
            "cpi    r24,'9'+1                   \n\t"                   // test if h < '9'+1 (0x3A)
            "brcs   .+2                         \n\t"                   // is not > '9', skip next
            "subi   r24,-7                      \n\t"                   // h += 7 (was > '9', is now 'A'-'F')

            "sendchar:                          \n\t"
            "lds    r25,%[status]               \n\t"                   // get usart status a
            "sbrs   r25,%[udre]                 \n\t"                   // skip if udr empty
            "rjmp   sendchar                    \n\t"                   // else keep checking
            "sts    %[udr],r24                  \n\t"                   // store c to udr
            "ret                                \n\t"                   // done
            ::
            [status]    "M" (_SFR_MEM_ADDR(USART_STATUS_A)),
            [udre]      "I" (TX_BUFFER_EMPTY),
            [udr]       "M" (_SFR_MEM_ADDR(TX_DATA))
        );
    }



    /*-----------------------------------------------------------------------------------------------------------
     SPM function

     r1:r0 will have to be setup before this function is called (page fill needs data in r1:r0)
     r25:r24 will have the address, r22 will have the spm command

     spm will fail if IVSEL=0 (and exit bootloader)
     spm will fail on any 'errant' jump into the bootloader (assuming IVSEL=0)
     spm will fail on any 'errant' jump into the bootloader (assuming spm register already setup) as an
     rjmp/rcall takes 2 cycles, so if it 'lands' on anthing other than 'spm', it will have not met the 4 cycle
     requirement, or it will exit the bootloader if it 'lands' on the rjmp instruction

     so, in order for an 'errant' jump into the bootloader to do an spm, it will either have had to set IVSEL,
     OR setup spm register, then 'jump' exactly on the spm instruction

     this function could be used in an application if wanted- save sreg,cli,get IVSEL=1,setup r1:r0 if needed,
     call this function,get IVSEL=0,restore sreg

     if cmd=__BOOT_PAGE_FILL, no spm wait or rww enable will take place- spm wait not needed for page fill, and
     enable of rww not wanted as it will erase the page buffer

     void do_spm(uint16_t addr,uint8_t cmd);

     r22=cmd            changed
     r24:r25=addr,r0    unchanged
     r30,r31            changed
     r1                 cleared

     check if we got into the bootloader through the 'front door' (ivsel will =1)
     void safeT_check(void); (function inside do_spm)

     r22                changed
     r1                 cleared

    -----------------------------------------------------------------------------------------------------------*/

    void do_spm(uint16_t addr,uint8_t cmd){ //r25:r24=addr,r22=cmd,(r1,r0 on your own if needed)

        asm volatile(
            "do_spm_start:                      \n\t"                   // for jump table use
            "movw   r30,r24                     \n\t"                   // put addr into Z
            "cpi    r22,%[bpf]                  \n\t"                   // if page fill,
            "breq   2f                          \n\t"                   // no rww enable wanted, just spm
            "rcall  2f                          \n\t"                   // call spm, return to next instruction

            "1:                                 \n\t"
            "lds    r22,%[spmreg]               \n\t"                   // get spm register
            "sbrc   r22,%[spmen]                \n\t"                   // if spmen=0, skip next
            "rjmp   1b                          \n\t"                   // else check again
            "ldi    r22,%[rwwen]                \n\t"                   // enable rww section

            "2:                                 \n\t"
            "sts    %[spmreg],r22               \n\t"                   // load spm command

            "safeT_check:                       \n\t"
            "in     r22,%[irqctrl]              \n\t"                   // clk+1, check if ivsel=1
            "sbrs   r22,%[ivsel]                \n\t"                   // clk+2+3, skip next if ivsel=1
            "rjmp   %[app_0]                    \n\t"                   // else ivsel=0, exit bootloader
            "spm                                \n\t"                   // clk+4, one spm in bootloader, here
            "clr    r1                          \n\t"                   // need to clear r1 in case was page fill
            "ret                                \n\t"                   // ret (page fill), or back to rww enable
            ::
            [bpf]       "M" (__BOOT_PAGE_FILL),
            [spmreg]    "M" (_SFR_MEM_ADDR(__SPM_REG)),
            [spmen]     "M" (SPMEN),
            [rwwen]     "M" (__BOOT_RWW_ENABLE),
            [irqctrl]   "I" (_SFR_IO_ADDR(IRQ_CONTROL)),
            [ivsel]     "I" (IVSEL),
            [app_0]     "i" (APP_ZERO)
        );
    }



    /*-----------------------------------------------------------------------------------------------------------
     jump table for functions

        to use in applications, add the following-

        extern void* __vectors;
        #define HEX2ASCII(byte)     ( (void(*)(uint8_t h))              &__vectors-8 )(byte)
        #define SENDCHAR(byte)      ( (void(*)(uint8_t c))              &__vectors-6 )(byte)
        #define DO_SPM(addr,cmd)    ( (void(*)(uint16_t a,uint8_t c))   &__vectors-4 )(addr,cmd)
        #define BOOTLOADER()        ( (void(*)(void))                   &__vectors-2 )()

        the __vectors is needed as a reference to 0x0000, otherwise the numbers will be in reference to the
        section you are currently in (cannot put in absolute addresses like 0x1FFE)

        if not using the gcc startup files, and you need a reference to 0x0000 (absolute), another option would
        be to pass a symbol to the linker (-Wl,--defsym=__absolute_zero=0), then use it in the macros, like
        ' extern void* __absolute_zero; ( (void(*)(void)) &__absolute_zero-2 )() '

        so, for example, to use the sendchar function, just use it as you normally would- sendchar('a'); -which
        will load r24 with 'a', then rcall(or call) 0x0000-6, which will be FLASHEND-5, which will rjmp to the
        sendchar function, and when the return is reached, it will go back to your app code

    -----------------------------------------------------------------------------------------------------------*/

    void jump_table(void){
        asm volatile("rjmp hex2ascii_start");   // FLASHEND-7 (for app use, no ivsel check)
        asm volatile("rjmp sendchar");          // FLASHEND-5 (for app use, no ivsel check)
        asm volatile("rjmp do_spm_start");      // FLASHEND-3 (for app spm use, will need ivsel=1)
        asm volatile("rjmp jmp_start");         // FLASHEND-1 (for app jump into bootloader)

    }




    //---------------------------------------------------------------------------------------------------------//
    //                                   __    __   _______  __      .______                                   //
    //                                  |  |  |  | |   ____||  |     |   _  \                                  //
    //                                  |  |__|  | |  |__   |  |     |  |_)  |                                 //
    //                                  |   __   | |   __|  |  |     |   ___/                                  //
    //                                  |  |  |  | |  |____ |  `----.|  |                                      //
    //                                  |__|  |__| |_______||_______|| _|                                      //
    //                                                                                                         //
    //---------------------------------------------------------------------------------------------------------//
    //      1. general information                                                                             //
    //      2. avr studio settings                                                                             //
    //      3. reset flags logic                                                                               //
    //      4. encryption                                                                                      //
    //      5. eeprom programming                                                                              //
    //      6. program and data memory lock bits                                                               //
    //---------------------------------------------------------------------------------------------------------//


    /*

    -------------------------------------------------------------------------------------------------------------
     1. general information
    -------------------------------------------------------------------------------------------------------------


    Upload intel hex file as a text file using any terminal program
    set terminal program to 38400 baud (or whatever baud value you set)
    8 bits, 1 stop bit, no parity, 20msec transmit line delay, incoming cr=cr+lf, outgoing cr only, no local echo

    make sure boot reset fuse set to correct setting

    only 4 things possible with this bootloader-
    1. erase flash
    2. program flash from an intel hex record
    3. read program flash (only if 'encryption' is off)
    4. 'go' (run application)

    that's it.

    program flash loop just processes an individual intel hex record that is done over and over (line by line)

    to enter bootloader, press (hold down) 'trigger' key in terminal program while powering on the avr
    (trigger key set to 'escape' key here, can change in defines)

    command prompt will show up >

    there are 3 command prompts-  >  =  <  which indicate the bootloader status, and what you can do
        you can ALWAYS erase or read program flash, anytime
        command prompt-  >   can go, cannot program
        command prompt-  =   can program, cannot go
        command prompt-  <   error(s), can only erase now

    first command prompt > (indicates you can 'go' because app still in place, but cannot program until erase)

    colon ':' character HAS to be the first character for any command, or start of hex record
    (':' converts to 0x0A, these also convert to 0x0A- '*','A','a','Q','q', so you can substitute these for ':')

    command to erase flash, can be changed in defines
    >:EF (erase flash)
    =  (prompt indicates you cannot 'go' because nothing to 'go' to, can program as flash is now erased)

    command prompt now =

    to program, upload an intel hex file (text), however your terminal program handles text uploads
    =:1002A000892B31F488E591E0E1DE84E67BDF089577
    =

    any errors will cause ! to be transmitted like so
    =:1002A000892B31F488E591E0E1DE84E67BDF089577!
    < (command prompt switches to < to indicate you are now in an error state, which requires an erase to clear)

    after you send an EOF record, and no previous errors, the command prompt will change to > again

    you can now 'go' ( :99 is default 'go' command, can be changed in defines, 'APP' can be used-converts to :99)

    >:DF (dump flash) will output a hex/ascii image of the application section, if decrypt is NOT on
    you can 'capture' the output to a file if needed

    any time you enter a 'bad' command, or after a hex record produces an error, a '?' after the command or
    record indicates nothing was done

    your APPLICATION code will need to clear the watchdog reset flag, and if not used, turn off the watchdog
    as recommended in the Atmel datasheet (page 51 of mega88) - because the bootloader uses the watchdog reset to
    start the application code, here is sample code to do that

        #include <avr/wdt.h>    // read this file for more information
        MCUSR = 0;              // clear all reset flags, or save first if needed
        wdt_disable();          // disable wdt (if fused on, you need to use wdt_enable(your_desired_timeout))

    here is the paragraph in the datasheet page 51- (atmega88/168)-

        Note: If the Watchdog is accidentally enabled, for example by a runaway pointer or brown-out
        condition, the device will be reset and the Watchdog Timer will stay enabled. If the code is not
        set up to handle the Watchdog, this might lead to an eternal loop of time-out resets. To avoid this
        situation, the application software should always clear the Watchdog System Reset Flag
        (WDRF) and the WDE control bit in the initialisation routine, even if the Watchdog is not in use.



    -------------------------------------------------------------------------------------------------------------
     2. avr studio settings
    -------------------------------------------------------------------------------------------------------------


    THIS WAS COMPILED USING WinAVR20060421 and Avr Sudio 4 - other versions of WinAVR may compile to >256words


    to change the compiled bootloader to load in the bootloader section-

        AVR Studio - Project-Configuration Options-
        Memory Settings-
    
        Add ->   Flash .text 0xF00 (ATMega88)
        ---OR---
        Add ->   Flash .text 0x1F00 (ATMega168)
        ---OR---
        Add ->   Flash .text 0x1F00 (ATMega16)
        ---OR---
        for your avr-
        will be ((FLASHEND+1) - 0x200) / 2
        (end of program memory +1, minus the bootloader size (in bytes), divided by 2 to get the word size)
    
        AVR studio uses a word size for this address (which is converted to a byte size for gcc)

     Use WORD address for this setting
         \  /
          ||
           \------------------------------\-----------------------------------------------\
     -------------------------------------||-------  -------------------------------------||-------
     | ATMega88                                   |  | ATMega168, ATMega16                        |
     ----------------------------------------------  ----------------------------------------------
     | Bootloader   | BYTE Address | WORD Address |  | Bootloader   | BYTE Address | WORD Address |
     | Size - WORDS | Start        | Start        |  | Size - WORDS | Start        | Start        |
     ----------------------------------------------  ----------------------------------------------
     |    1024      |     1800     |     C00      |  |    1024      |     3800     |     1C00     |
     ----------------------------------------------  ----------------------------------------------
     |     512      |     1C00     |     E00      |  |     512      |     3C00     |     1E00     |
     ----------------------------------------------  ----------------------------------------------
     |    *256      |     1E00     |  -> F00 <-   |  |    *256      |     3E00     |  -> 1F00 <-  |
     ----------------------------------------------  ----------------------------------------------
     |     128      |     1F00     |     F80      |  |     128      |     3F00     |     1F80     |
     ----------------------------------------------  ----------------------------------------------

 

    also, to set our jump table up, we need to also add a Flash section called '.jmptbl', and its address
     will be FLASHEND-7

        AVR Studio - Project-Configuration Options-
        Memory Settings-
    
        Add ->   Flash .jmptbl 0xFFC (ATMega88)
        ---OR---
        Add ->   Flash .jmptbl 0x1FFC (ATMega168)
        ---OR---
        Add ->   Flash .jmptbl 0x1FFC (ATMega16)
        ---OR---
        for your avr-
        will be ((FLASHEND+1)/2)-4
        that is, end of program memory +1, divided by 2 to get the word size, -4 words (4 rjmp's)


     to save space/prevent interrupt vectors (not using interrupts), and normal C startup code-
     AVR Studio - Project-Configuration Options-
     Custom Options- Linker Options

     Add->    -nostartfiles
     Add->    -nodefaultlibs

     for avr's >8K, to make all calls 'rcall' instead of call (save some space- only need rcall in bootloader)
     AVR Studio - Project-Configuration Options-
     Custom Options- All Files

     Add-> -mshort-calls


     RESULTS COMPILED USING WinAVR20060421 and Avr Sudio 4

      ------------------------------------------------------------------------------------
     | Device       Data Bytes  Program Bytes  DECRYPT_ON=1  USE_PIN_TRIGGER=1     MAX    |
     |------------------------------------------------------------------------------------|
     | ATmega88         71           510           -18              -4             510    |
     |------------------------------------------------------------------------------------|
     | ATmega168       135           510           -18              -4             510    |
     |------------------------------------------------------------------------------------|
     | ATmega16        135           496           -18              -2             496    |
     |------------------------------------------------------------------------------------|
     | ATmega32        135           472           -18              -2             496    |
     |------------------------------------------------------------------------------------|
     |                                                                                    |
     |------------------------------------------------------------------------------------|
     |                                                                                    |
      ------------------------------------------------------------------------------------



    -------------------------------------------------------------------------------------------------------------
     3. reset flags logic
    -------------------------------------------------------------------------------------------------------------


     the bootloader WILL 'run' IF the watchdog flag NOT set, AND the power up flag IS set (power up)


    -------------------------------------------------------------------------------------------------------------
     
     POWER UP-

     on a power up, the watchdog flag will NOT be set, the power up flag WILL be set, so the
     bootloader will then check for esc key for 1/8 second (or whatever key you define)

     IF no esc key is pressed, the watchdog timer will cause a watchdog reset, which brings
     us right back to the bootloader code, since the BOOTRST fuse will be programmed

     the bootloader will see the watchdog reset flag IS set, so just rjmp to 0x0000

     your APPLICATION will have to clear the WDRF flag and WDE control bit (if not using wdt) on startup as
     recommended in the Atmel datasheet (to prevent an 'eternal loop of time-out resets' on newer avr's)

     IF the APPLICATION clears NO other flags (besides WDRF), on a watchdog reset (if app enabled it, or
     is set in fuses to be always on), the bootloader will NOT attempt to run (because WDRF is set)

     IF the APPLICATION clears NO other flags (besides WDRF), on a brown out or external reset, the
     bootloader WILL attempt to run because the application turned off the watchdog and did nothing to
     the power reset flags

     so, to prevent the bootloader from attempting to run (checking for 'trigger'), clear all reset flags in
     your application startup code

    -------------------------------------------------------------------------------------------------------------

     APPLICATION JUMP-

     IF you want to 'jump' to the bootloader FROM your APPLICATION-
     turn off interrupts            cli();              // no interrupts in bootloader
     clear MCU status register      MCUSR=0;            // clear reset flags (if not already cleared)
     IF you have watchdog on        wdt_reset();        // just in case its about to timeout
     change vectors                 MCUCR = (1<<IVCE);  // enable ivsel change (or use GICR, depending on avr)
                                    MCUCR = (1<<IVSEL); // change it
                                    'jump to FLASHEND-1'// however you want to do it


     IF you use some other usart settings in your APPLICATION, you will need to make sure baud high,
     usart control A and usart C are set correctly (only if you use something different than the
     bootloader, which would be unusual, but possible if your 'app' uses different usart settings)

     now just jump to FLASHEND-1 (jump table at the end of the bootloader)
     this method, which works on 'any' avr (as jmp is not available on all avr's), and doesn't depend on C code
     assuming 256word bootloader size-
          asm volatile("ldi r31,%0" : : "M" ( (((FLASHEND-1)/2) >>8) );
          asm volatile("ldi r30,%0" : : "M" ( (((FLASHEND-1)/2) &0x00FF) );
          asm volatile("ijmp");
        OR
          goto *FLASHEND-1;
        OR
          //use macro to jump to FLASHEND-1
          BOOTLOADER();

     so, altogether now-
          cli();        // must
          MCUSR=0;      // if not already cleared
          wdt_reset();  // if watchdog used in app, not a bad idea
          MCUCR = (1<<IVCE);
          MCUCR = (1<<IVSEL);
          asm volatile("ldi r31,%0" : : "M" ((((FLASHEND-1)/2) >> 8)) );
          asm volatile("ldi r30,%0" : : "M" ((((FLASHEND-1)/2) & 0x00FF)) );
          asm volatile("ijmp");
        OR
          cli();        // must
          MCUSR=0;      // if not already cleared
          wdt_reset();  // if watchdog used in app, not a bad idea
          MCUCR = (1<<IVCE);
          MCUCR = (1<<IVSEL);
          goto *FLASHEND-1; //does the same thing as above, but easier
        OR
          cli();        // must
          MCUSR=0;      // if not already cleared
          wdt_reset();  // if watchdog used in app, not a bad idea
          MCUCR = (1<<IVCE);
          MCUCR = (1<<IVSEL);
          BOOTLOADER(); // using macro



    -------------------------------------------------------------------------------------------------------------
     4. encryption
    -------------------------------------------------------------------------------------------------------------


     the term 'encryption' will be loosely used here, 'obfuscation' may better describe what's going on here,
     but that doesn't sound as cool

     !!!! I am NOT an 'ecryption' expert, so this MAY ALL BE BOGUS thinking by me, 'BUYER BEWARE' !!!!

     the idea here, is to prevent others from using your 'firmware updates' to program their own avr

     for an example, lets say I made a very cool device using a mega168, I spent a lot of time developing the
     code, but want to be able to do 'firmware updates'. If I let my firmware updates out, anyone could just
     copy my hardware, use my firmware update to program the avr, and have their very own 'cool device' without
     a dime going to me. So I'm selling my device for $150, and then a copy of it starts showing up on ebay for
     $75. I just spent all my time programming, so I could help some ebayer make money? AND decrease my sales
     at the same time? (I'm usually not very paranoid, or think I could produce a 'cool device', or that my
     programming skills are so great they couldn't be duplicated, but I would want someone else to at least
     write their own code, and not use mine.)

     first, the ground rules- the bootloader code is a secret 'key' that has to remain a secret, so fuses have
     to be set in such a way as to prevent reading of the bootloader section from the application section, the
     bootloader itself cannot read and display the application section (which it does not do when 'encryption' is
     turned on), and reading of the bootloader code from a device programmer has to be prevented (see 6. fuses
     for security)

     here's the general idea- take the 'secret key' (bootloader code), xor it with my application code, to
     produce a 'firmware update'. The 'firmware update' is uploaded to the avr, the avr xor's that code with
     its own bootloader code, and programs the 'unencrypted' application. Unless someone has the bootloader
     code (all 4096 bits exactly), they will not be able to program other avr's with the 'firmware update',
     or be able to upload their own firmware (using the bootloader, at least).


     let's take a simple 1 word (2 bytes) data record ('unencrypted')-

                        LL AAAA TT DDDD CC
     :0202A000892BA8    02 02A0 00 892B A8

        xor the data bytes with our bootloade code (at its 'mirrored' bootloader address on a '168)

                        LL AAAA TT DDDD
                           3EA0    3286

        result = 'encrypted' record

                                   DDDD
     :0202A000BBADF4               BBAD


        we now upload our 'encrypted' record (only DDDD is 'encrypted')


     :0202A000BBADF4

        the bootloader 'decrypts' the record with its own code
        BBAD xor 3286 = 892B (original data bytes)
        and programs the code

        the 'decryption' takes place inside the avr, so it is not revealed anywhere


     a simple way to test this, is to turn on 'decryption' (DECRYPT_ON set to 1 in defines), recompile
     take a working application, upload it with the bootloader
     use your programmer (isp or debugwire or whatever) to read the avr to a hex file
     this hex file will be 'encoded' with the bootloader code
     with a text editor, remove everything from the hex file EXCEPT the application AND the EOF record
     now go back to the bootloader, upload the 'encrypted. hex file
     test it, you should now have a working application
     you can also use your programmer to read the avr again, and compare it to the original hex file


     there may be a flaw in my thinking, but I am not smart enough to know what that would be

     the only thing I can think of, is with the address of the hex record known, you are giving away 4 bits
     for every rjmp in the reset vector table, and 10 bits for the parts that use jmp (and those bits would
     reveal the mirrored bits in the bootloader code). So, we would be giving away 260 bits for a part like
     the '168, but, none of those bits make up a 'complete' byte, so would not reveal any 'complete' bytes

     another step that could be taken, is to 'encrypt' the address in the hex record, and have the hex records
     in a random order in the hex file, but a way would be needed to decide how that address is 'decrypted',
     as we wouldn't have an address to lookup for decryption.

     another question that can be raised, is how can you have a bootloader listed on the internet, and intend
     to keep the bootloader code 'secret'? I guess the answer would be- unless YOUR compiled version compiles
     exactly like everybody else's, there will enough difference in the compiled code to make it more work to
     'decrypt' than it would be to write their own code. (don't 'release' YOUR bootloader code - that's a
     secret). another way would be to sprinkle some "nop's" in your bootloader code, shifting things around
     a little bit.


     there may other ways/techniques that could be simply added to make this 'encryption' more secure


     the main idea, is to make it a lot of work to 'decrypt', not necessarily impossible



    -------------------------------------------------------------------------------------------------------------
     5. eeprom programming
    -------------------------------------------------------------------------------------------------------------

     run bootloader, erase flash, upload 4AvrAsciiBootloaderBuddy, type 'go' command,
     eeprom app runs (prompt will be #) , when done, type 'go' command
     back to bootloader



    -------------------------------------------------------------------------------------------------------------
     6. program and data memory lock bits
    -------------------------------------------------------------------------------------------------------------

     All info here is from the ATmega88 datasheet

     the lock bits can ONLY be 'erased' (back to a '1') with a chip erase command (serial/parallel programming)

     --------------------------------------------------------------------------------
     | Lock Bit Bytes  (unprogrammed=1, programmed=0)                               |
     |------------------------------------------------------------------------------|
     |        bit-> |   7       6   |   5       4   |   3       2   |   1       0   |
     |------------------------------------------------------------------------------|
     |       name-> |   -       -   | BLB12   BLB11 | BLB02   BLB01 |  LB2     LB1  |
     |------------------------------------------------------------------------------|
     |    default-> |   1       1   |   1       1   |   1       1   |   1       1   |
     |------------------------------------------------------------------------------|
     | applies to-> |     unused    |  Bootloader   |  Application  |    Memory     |
     |------------------------------------------------------------------------------|
     | app can't read the bootloader -> 0       0 <-no writing to bootloader area   |
     |------------------------------------------------------------------------------|
     |     bootloader is allowed to read application -> 1       1 <-write allowed   |
     |------------------------------------------------------------------------------|
     |  programmer is unable to read/write flash/eeprom (erase first)   0       0   |
     |------------------------------------------------------------------------------|
     |  my secure-> |   x       x   |   0       0   |   1       1   |   0       0   |
     --------------------------------------------------------------------------------


     -Boot Loader Section- BLB1 Mode 3 (BLB12=0,BLB11=0, both programmed)
     this will prevent the bootloader from writing over itself (even though it already protects itself as
     currently programmed), and prevent the APPLICATION from reading the bootloader section (security)

     -Application Section- BLB0 Mode 1 (BLB02=1,BLB01=1, both unprogrammed)
     these have to be left unprogrammed, otherwise the bootloader won't work (can't read or write to app space)

     -Memory Lock Bits- LB Mode 3 (LB2=0,LB1=0, both programmed)
     this will prevent programming and reading by a programmer, and the only way to read/program will be a
     'chip erase' command (which will wipe out the bootloader and application)



    */



