;-------------------------------------------------------------------------------
; FFT routines for C-Control Pro.
; Adapted from : Fixed-point FFT routines for megaAVRs, (C)ChaN, 2005
;-------------------------------------------------------------------------------

;-------------------------------------------------------------------------------
; Set FFT_N, the number of samples below (64, 128, 256, 512 or 1024).
; Conform this setting:
;  - set FFT_B below (2^FFT_B = FFT_N)
;  - adapt flash constant tables in FFT.cc
;  - adapt length of arrays in FFT function calls.
; Notice: in each FFT routine, the global interrupt flag is disabled. As a
; consequence, timer interrupts could be delayed and the clock, multitasking
; and servos might be not accurate.
;-------------------------------------------------------------------------------
;.equ FFT_N = 1024
;.equ FFT_B = 10
;.equ FFT_N = 512
;.equ FFT_B = 9
.equ FFT_N = 256
.equ FFT_B = 8
;.equ FFT_N = 128
;.equ FFT_B = 7
;.equ FFT_N = 64
;.equ FFT_B = 6

;-------------------------------------------------------------------------------
; Definitions
;-------------------------------------------------------------------------------
.def    T0L   = r0    ; free to use
.def    T0H   = r1    ; free to use
.def    T2L   = r2
.def    T2H   = r3
.def    T4L   = r4
.def    T4H   = r5
.def    T6L   = r6    ; stack pointer low
.def    T6H   = r7    ; stack pointer high
.def    T8L   = r8    ; RAM top low
.def    T8H   = r9    ; RAM top high
.def    T10L  = r10   ; parameter stack pointer low
.def    T10H  = r11   ; parameter stack pointer high
.def    T12L  = r12
.def    T12H  = r13
.def    T14L  = r14
.def    T14H  = r15
.def    AL    = r16
.def    AH    = r17
.def    BL    = r18
.def    BH    = r19
.def    CL    = r20
.def    CH    = r21
.def    DL    = r22   ; free to use
.def    DH    = r23   ; free to use
.def    EL    = r24   ; free to use
.def    EH    = r25   ; free to use
.def    XL    = r26   ; free to use
.def    XH    = r27   ; free to use
.def    YL    = r28
.def    YH    = r29
.def    ZL    = r30   ; free to use
.def    ZH    = r31   ; free to use

.equ ADCSRA = 0x06     ; ADC Control and Status Register A
.equ ADPS0  = 0        ; ADC Prescaler Select Bits
.equ ADPS1  = 1        ; ADC Prescaler Select Bits
.equ ADPS2  = 2        ; ADC Prescaler Select Bits
.equ ADIE   = 3        ; ADC Interrupt Enable
.equ ADIF   = 4        ; ADC Interrupt Flag
.equ ADFR   = 5        ; ADC Free Running Select
.equ ADSC   = 6        ; ADC Start Conversion
.equ ADEN   = 7        ; ADC Enable

.equ ADCH   = 0x05     ; ADC Data Register High Byte
.equ ADCL   = 0x04     ; ADC Data Register Low Byte

;-------------------------------------------------------------------------------
; Macros
;-------------------------------------------------------------------------------
.macro    ldiw    ;dh,dl, abs
    ldi    @1, LOW(@2)
    ldi    @0, HIGH(@2)
.endm

.macro    subiw   ;dh,dl, abs
    subi   @1, LOW(@2)
    sbci   @0, HIGH(@2)
.endm

.macro    addw    ;dh,dl, sh,sl
    add    @1, @3
    adc    @0, @2
.endm

.macro    addd    ;d3,d2,d1,d0, s3,s2,s1,s0
    add    @3, @7
    adc    @2, @6
    adc    @1, @5
    adc    @0, @4
.endm

.macro    subw    ;dh,dl, sh,sl
    sub    @1, @3
    sbc    @0, @2
.endm

.macro    subd    ;d3,d2,d1,d0, s3,s2,s1,s0
    sub    @3, @7
    sbc    @2, @6
    sbc    @1, @5
    sbc    @0, @4
.endm

.macro    lddwz   ;dh,dl
    ldd    @1, Z+0
    ldd    @0, Z+1
.endm

.macro    lddwy   ;dh,dl
    ldd    @1, Y+0
    ldd    @0, Y+1
.endm

.macro    lddwy2  ;dh,dl
    ldd    @1, Y+2
    ldd    @0, Y+3
.endm

.macro    ldwx    ;dh,dl
    ld     @1, X+
    ld     @0, X+
.endm

.macro    ldwy    ;dh,dl
    ld     @1, Y+
    ld     @0, Y+
.endm

.macro    ldwz    ;dh,dl
    ld     @1, Z+
    ld     @0, Z+
.endm

.macro    stwx    ;dh,dl
    st     X+, @1
    st     X+, @0
.endm

.macro    stwy    ;dh,dl
    st     Y+, @1
    st     Y+, @0
.endm

.macro    stwz    ;dh,dl
    st     Z+, @1
    st     Z+, @0
.endm

.macro    clrw    ;dh, dl
    clr    @0
    clr    @1
.endm

.macro    lsrw    ;dh, dl
    lsr    @0
    ror    @1
.endm

.macro    asrw    ;dh, dl
    asr    @0
    ror    @1
.endm

.macro    lslw    ;dh, dl
    lsl    @1
    rol    @0
.endm

.macro    pushw   ;dh,dl
    push   @0
    push   @1
.endm

.macro    popw    ;dh,dl
    pop    @1
    pop    @0
.endm

.macro    rjne    ;lbl
    breq   lbl_99
    rjmp   @0
 lbl_99:
.endm

.macro    FMULS16 ;d3,d2,d1,d0 ,s1h,s1l, s2h,s2l    ;Fractional Multiply (19clk)
    fmuls   @4, @6
    movw    @1, T0L
    fmul    @5, @7
    movw    @3, T0L
    adc     @1, EH ;EH: zero reg.
    fmulsu  @4, @7
    sbc     @0, EH
    add     @2, T0L
    adc     @1, T0H
    adc     @0, EH
    fmulsu  @6, @5
    sbc     @0, EH
    add     @2, T0L
    adc     @1, T0H
    adc     @0, EH
.endm

.macro    SQRT32  ; 32bit square root (526..542clk)
    clr     T6L
    clr     T6H
    clr     T8L
    clr     T8H
    ldi     BL, 1
    ldi     BH, 0
    clr     CL
    clr     CH
    ldi     DH, 16
 SQRT32_1:
    lsl     T2L
    rol     T2H
    rol     T4L
    rol     T4H
    rol     T6L
    rol     T6H
    rol     T8L
    rol     T8H
    lsl     T2L
    rol     T2H
    rol     T4L
    rol     T4H
    rol     T6L
    rol     T6H
    rol     T8L
    rol     T8H
    brpl    SQRT32_2
    add     T6L, BL
    adc     T6H, BH
    adc     T8L, CL
    adc     T8H, CH
    rjmp    SQRT32_3
 SQRT32_2:
    sub     T6L, BL
    sbc     T6H, BH
    sbc     T8L, CL
    sbc     T8H, CH
 SQRT32_3:
    lsl     BL
    rol     BH
    rol     CL
    andi    BL, 0b11111000
    ori     BL, 0b00000101
    sbrc    T8H, 7
    subi    BL, 2
    dec     DH
    brne    SQRT32_1
    lsr     CL
    ror     BH
    ror     BL
	lsr	    CL
	ror	    BH
	ror	    BL
.endm

;-------------------------------------------------------------------------------
; Hamming windowed input from AD converter in Free Running Mode
; The sample rate is reduced from the ADC sample clock by a factor decimate
; (1,2,3,...,255).
;-------------------------------------------------------------------------------
.ifdef Tag_fft_input
fft_input:
    cli                         ; disable interrupts
    pushw   T2H, T2L
    pushw   T4H, T4L
    pushw   AH, AL
    pushw   BH, BL
    pushw   CH, CL
    pushw   YH, YL

    movw    XL, T10L            ; get pointer to parameter stack
    ld      T4H, X+             ; T4H = decimate
    ld      ZL, X+              ; Z = *tbl_window
    ld      ZH, X+
    ld      YL, X+              ; Y = *array_bfly
    ld      YH, X+

    clr     EH                  ; Zero

    ldiw    AH,AL, FFT_N        ; A = FFT_N;

    sbi     ADCSRA, ADFR        ; set Free Running mode
    sbi     ADCSRA, ADEN        ; set ADC Enable
    sbi     ADCSRA, ADSC        ; set Start Conversion

 fft_input_1:
    mov     EL, T4H             ; set counter EL to value of decimate
 fft_input_2:                   ; wait for ADC to complete
    sbis    ADCSRA, ADIF        ;
    rjmp    fft_input_2         ; /

    sbi     ADCSRA, ADIF        ; clear ADC interrupt flag

    subi    EL, 1               ; while(--EL)
    brne    fft_input_2

    in      CL, ADCL            ; load ADC value in C (I-axis)
    in      CH, ADCH
    ldwz    BH, BL              ; B = (Z++); (window)
    FMULS16 DH,DL,T2H,T2L,BH,BL,CH,CL  ; D = B * C;
    stwy    DH, DL              ; (Y++) = D;
    stwy    DH, DL              ; (Y++) = D;
    subiw   AH, AL, 1           ; while(--A)
    brne    fft_input_1

    popw    YH, YL
    popw    CH, CL
    popw    BH, BL
    popw    AH, AL
    popw    T4H, T4L
    popw    T2H, T2L
    sei                         ; enable interrupts
    ret
.endif

;-------------------------------------------------------------------------------
; Butterfly operations on sampled wave form
;-------------------------------------------------------------------------------
.ifdef Tag_fft_execute
fft_execute:
    cli                         ; disable interrupts
    pushw    T2H,T2L
    pushw    T4H,T4L
    pushw    T6H,T6L
    pushw    T8H,T8L
    pushw    T10H,T10L
    pushw    T12H,T12L
    pushw    T14H,T14L
    pushw    AH,AL
    pushw    BH,BL
    pushw    CH,CL
    pushw    YH,YL

    movw    XL, T10L            ; get pointer to parameter stack
    ld      DL, X+              ; D = *tbl_cos_sin
    ld      DH, X+              ;
    ld      ZL, X+              ; Z = *bfly_buff
    ld      ZH, X               ;

    ldiw    EH,EL, 1            ; E = 1;
    ldiw    XH,XL, FFT_N/2      ; X = FFT_N/2;
 fft_execute_1:
    ldi     AL, 4               ; T12 = E; (angular speed)
    mul     EL, AL              ;
    movw    T12L, T0L           ;
    mul     EH, AL              ;
    add     T12H, T0L           ; /
    movw    T14L, EL            ; T14 = E;
    pushw   EH,EL
    movw    YL, ZL              ; Z = &array_bfly[0];
    mul     XL, AL              ; Y = &array_bfly[X];
    addw    YH,YL, T0H,T0L      ;
    mul     XH, AL              ;
    add     YH, T0L             ; /
    pushw   ZH,ZL
 fft_execute_2:
    clrw    T10H,T10L           ; T10 = 0 (angle)
    clr     EH                  ; Zero reg.
    pushw   XH,XL               ; extra: save X
    movw    XL, DL              ; extra: X  = *tbl_cos_sin
 fft_execute_3:
    lddwz   AH,AL               ; A = *Z - *Y; *Z++ += *Y;
    asrw    AH,AL               ;
    lddwy   DH,DL               ;
    asrw    DH,DL               ;
    movw    CL, AL              ;
    subw    AH,AL, DH,DL        ;
    addw    CH,CL, DH,DL        ;
    stwz    CH,CL               ; /
    lddwz   BH,BL               ; B = *Z - *Y; *Z++ += *Y;
    asrw    BH,BL               ;
    lddwy2  DH,DL               ;
    asrw    DH,DL               ;
    movw    CL, BL              ;
    subw    BH,BL, DH,DL        ;
    addw    CH,CL, DH,DL        ;
    stwz    CH,CL               ; /
    movw    T0L, XL             ; Save X
    addw    XH,XL, T10H,T10L    ; C = cos(T10); D = sin(T10);
    ldwx    CH,CL               ;
    ldwx    DH,DL               ; /
    movw    XL, T0L             ; Restore X
    FMULS16 T4H,T4L,T2H,T2L, AH,AL, CH,CL    ;*Y++ = A * C + B * D;
    FMULS16 T8H,T8L,T6H,T6L, BH,BL, DH,DL    ;
    addd    T4H,T4L,T2H,T2L, T8H,T8L,T6H,T6L ;
    stwy    T4H,T4L                          ;/
    FMULS16 T4H,T4L,T2H,T2L, BH,BL, CH,CL    ;*Y++ = B * C - A * D;
    FMULS16 T8H,T8L,T6H,T6L, AH,AL, DH,DL    ;
    subd    T4H,T4L,T2H,T2L, T8H,T8L,T6H,T6L ;
    stwy    T4H,T4L                          ;/
    addw    T10H,T10L, T12H,T12L        ;T10 += T12; (next angle)
.if FFT_N >= 128
    sbrs    T10H, FFT_B - 7     ;while(T10 < pi)
.else
    sbrs    T10L, FFT_B + 1
.endif
    rjmp    fft_execute_3       ; /
    movw    DL,XL               ; extra: make D pointer to tbl_cos_sin
    popw    XH,XL               ; extra: restore X
    ldi     AL, 4               ; Y += X; Z += X; (skip split segment)
    mul     XL, AL
    addw    YH,YL, T0H,T0L      ;
    addw    ZH,ZL, T0H,T0L      ;
    mul     XH, AL              ;
    add     YH, T0L             ;
    add     ZH, T0L             ; /
    ldi     EL, 1               ; while(--T14)
    subw    T14H,T14L, EH,EL    ;
    rjne    fft_execute_2       ; /
    popw    ZH,ZL
    popw    EH,EL
    lslw    EH,EL               ; E *= 2;
    lsrw    XH,XL               ; while(X /= 2)
    adiw    XL, 0               ;
    rjne    fft_execute_1       ; /

    popw    YH,YL
    popw    CH,CL
    popw    BH,BL
    popw    AH,AL
    popw    T14H,T14L
    popw    T12H,T12L
    popw    T10H,T10L
    popw    T8H,T8L
    popw    T6H,T6L
    popw    T4H,T4L
    popw    T2H,T2L

    sei                         ; enable interrupts
    ret
.endif

;-------------------------------------------------------------------------------
; Conversion of results of butterfly operations to spectrum.
; The frequency scale is obtained by multiplying the index (0..FFT_N/2) with
; sample rate/FFT_N.
;-------------------------------------------------------------------------------
.ifdef Tag_fft_output;(int p_bfly_buff[], int p_tbl_bitrev[])
fft_output:
    cli                         ; disable interrupts
    pushw   T2H,T2L
    pushw   T4H,T4L
    pushw   T6H,T6L
    pushw   T8H,T8L
    pushw   T10H,T10L
    pushw   T12H,T12L
    pushw   T14H,T14L
    pushw   AH,AL
    pushw   BH,BL
    pushw   CH,CL
    pushw   YH,YL

    movw    ZL, T10L            ; get pointer to parameter stack
    ldwz    YH,YL               ; Y = *tbl_bitrev
    ldwz    T10H, T10L          ; T10 = *bfly_buff
    movw    ZL, YL              ; Z = *tbl_bitrev (used to store the spectrum)

    clr     EH                  ; Zero
    ldiw    AH,AL, FFT_N/2      ; A = FFN / 2; (plus only)
 fft_output_1:
    ldwz    XH,XL               ; X = *Z++;
    addw    XH,XL, T10H,T10L    ; X += bfly_buff;
    ldwx    BH,BL               ; B = *X++;
    ldwx    CH,CL               ; C = *X++;
    FMULS16 T4H,T4L,T2H,T2L,BH,BL,BH,BL     ;T4:T2 = B * B;
    FMULS16 T8H,T8L,T6H,T6L,CH,CL,CH,CL     ;T8:T6 = C * C;
    addd    T4H,T4L,T2H,T2L,T8H,T8L,T6H,T6L ;T4:T2 += T8:T6;
    SQRT32                      ; B = sqrt(T4:T2);
    stwy    BH,BL               ; *Y++ = B;
    subiw   AH,AL, 1            ; while(--A)
    rjne    fft_output_1        ;/

    popw    YH,YL
    popw    CH,CL
    popw    BH,BL
    popw    AH,AL
    popw    T14H,T14L
    popw    T12H,T12L
    popw    T10H,T10L
    popw    T8H,T8L
    popw    T6H,T6L
    popw    T4H,T4L
    popw    T2H,T2L

    sei                         ; enable interrupts
    ret
.endif

;-------------------------------------------------------------------------------
; Conversion of results of butterfly operations to spectrum.
; The frequency scale is obtained by multiplying the index (0..FFT_N/2) with
; sample rate/FFT_N.
; The routine returns the index of the highest peak, skipping points 0 and 1
; because these belong to the DC component.
;-------------------------------------------------------------------------------
.ifdef Tag_fft_i_max_output
fft_i_max_output:
    cli                         ; disable interrupts
    pushw   T2H,T2L
    pushw   T4H,T4L
    pushw   T8H,T8L
    pushw   T10H,T10L
    pushw   T12H,T12L
    pushw   T14H,T14L
    pushw   AH,AL
    pushw   BH,BL
    pushw   CH,CL
    pushw   YH,YL
    pushw   T6H,T6L             ; save stack pointer

    movw    ZL, T10L            ; copy parameter stack pointer from R10 to D
    ldwz    T2H,T2L             ; max = threshold
    ldwz    YH,YL               ; Y = *tbl_bitrev (used to store the spectrum)
    ldwz    T10H,T10L           ; T10 = *bfly_buff
    movw    ZL, YL              ; Z = *tbl_bitrev

    clr     EH                  ; Zero
    ldi     EL, 1               ; One
    clrw    T4H,T4L             ; index of max = zero
    clrw    DH,DL               ; index = zero
    ldiw    AH,AL, FFT_N/2      ; A = FFN / 2; (plus only)

 fft_output_1:
    pushw   T2H,T2L             ; Save max
    pushw   T4H,T4L             ; Save index of max

    ldwz    XH,XL               ; X = *Z++;
    addw    XH,XL, T10H,T10L    ; X += bfly_buff;
    ldwx    BH,BL               ; B = *X++;
    ldwx    CH,CL               ; C = *X++;
    FMULS16 T4H,T4L,T2H,T2L, BH,BL,BH,BL     ;T4:T2 = B * B;
    FMULS16 T8H,T8L,T6H,T6L, CH,CL,CH,CL     ;T8:T6 = C * C;
    addd    T4H,T4L,T2H,T2L, T8H,T8L,T6H,T6L ;T4:T2 += T8:T6;
    SQRT32                      ; B = sqrt(T4:T2);
    stwy    BH,BL               ; *Y++ = B;

    popw    T4H,T4L             ; Restore index of max
    popw    T2H,T2L             ; Restore max

    ldiw    CH,CL, 2            ; Skip index 0 and 1 with finding max
    cp      DL, CL              ; Index < 2?
    cpc     DH, CH              ;
    brlo    fft_output_2        ; /

    cp      T2L, BL             ; Compare max with new value
    cpc     T2H, BH             ;
    brsh    fft_output_2        ; /
    movw    T2L, BL             ; max = new value
    movw    T4L, DL             ; index of max = index

 fft_output_2:
    add     DL, EL              ; index++
    adc     DH, EH

    subiw   AH,AL, 1            ; while(--A)
    rjne    fft_output_1        ; /

    popw    ZH,ZL               ; get stack pointer
    adiw    ZL, 4               ; add 4 to SP
    movw    T6L, ZL             ; copy result to SP location R6
    stwz    T4H,T4L             ; store function result (index of max) on stack

    popw    YH,YL
    popw    CH,CL
    popw    BH,BL
    popw    AH,AL
    popw    T14H,T14L
    popw    T12H,T12L
    popw    T10H,T10L
    popw    T8H,T8L
    popw    T4H,T4L
    popw    T2H,T2L

    sei                         ; enable interrupts
    ret
.endif
;-------------------------------------------------------------------------------
