;; Quick and dirty version of Kalman filter for PIC16 processors.
	;; Uses 32X16 multiply routine extracted from Microchip application
	;; note.
	;;
	;; The following register storage is required:
	;;
	;; AARGB0
	;; AARGB1
	;; AARGB2
	;; AARGB3
	;; AARGB4
	;; AARGB5
	;;
	;; BARGB0
	;; BARGB1
	;; BARGB2
	;; BARGB3
	;;
	;; LOOPCOUNT
	;;
	;; TEMPB3
	;; TEMPB2
	;; TEMPB1
	;; TEMPB0
	;;
	;; SIGN
	;;
	;; T3
	;; T2
	;; T1
	;; T0
	;;
	;; ALTB3
	;; ALTB2
	;; ALTB1
	;; ALTB0
	;; 
	;; VELB3
	;; VELB2
	;; VELB1
	;; VELB0
	;;
	;; ACCELB3
	;; ACCELB2
	;; ACCELB1
	;; ACCELB0
	;;
	;; There may be something else hiding in the multiply code but
	;; I think I found all of them. That adds up to 32 locations.
	;;
	;; Execution time will be driven by the multiplications. These take
	;; a maximum of 76 machines cycles per each. A PIC operating on a
	;; 20MHz clock with at 64 samples/second has around 78,000 machine
	;; cycles per sample. More than enough time. In fact it should be
	;; possible to decrease the clock rate to save power and/or increase
	;; the sample rate.
	;;
	;; The primary limitation is code and data space availability. The
	;; 16F84 has 1024 words code space and 68 bytes data space. This code
	;; takes 309 words of code space. Could be pretty
	;; tight. A 16F87X would be a better fit.
	;;
	;; There are some fairly obvious oportunities to tighten up this code.
	;; I was primarily interested in getting it all down.
	;; It HAS been run through MPASM.
	;;
	;; Changing the sample rate requires changing the number of bits
	;; to shift in the state update code. It is currently set for
	;; 64 SPS
	;;
	;; Kalman gains are computed by a "C" application and are determined
	;; by sample rate, measurement noise, and model noise. To compute
	;; gains, select sample rate, measure pressure sensor noise
	;; (standard deviation), and select measurement/model noise ratio. I
	;; recomend a ratio of 50,000. Input parameters into kgain.exe and
	;; note results. Gains shown here are based on 64 SPS, noise of 1.6
	;; counts (standard deviation), and noise ratio of 50,000
	;;
	;; Initialization:
	;;
	;; Clear VELB0-VELB3 and ACCELB0-ACCELB3. Read an ADC value and load
	;; into ALTB0-3. Then you can start running the filter.
	;;
	;; If you start the filter with random garbage for the intial values
	;; you will get random garbage out for quite a while.
	;;
	;; A note on number representation.
	;;
	;; Altitude, velocity, and acceleration are stored in 16.16 fixed
	;; point format. The two most significant bytes represent the integer
	;; portion of the number and the two LSB's the fractional portion. It's
	;; just like base ten if you are missing 8 fingers. :-)
	;;
	;; The Kalman gains are only 16 bit values because the integer part is
	;; zero. This allows use of a 32X16 multiply with 48 bit result instead
	;; of a 32x32 bit multiply with 64 bit result.
	;; 
	;; Because pressure decreases with altitude, upward velocity and
	;; positive acceleration will be NEGATIVE numbers.
	;; 
	;; Buyer beware.
	;; Batteries not included.
	;; Your miledge may vary.
	;;

	#include p16f84.inc

	;; Data space declarations
	
	cblock	0x0c
	AARGB0, AARGB1, AARGB2, AARGB3, AARGB4, AARGB5
	BARGB0, BARGB1, BARGB2, BARGB3
	LOOPCOUNT
	TEMPB3, TEMPB2, TEMPB1, TEMPB0
	SIGN
	T3, T2, T1, T0
	ALTB3, ALTB2, ALTB1, ALTB0
	VELB3, VELB2, VELB1, VELB0
	ACCELB3, ACCELB2, ACCELB1, ACCELB0
	endc

K1_HI	equ	0x70
K1_LO	equ	0x59
K2_HI	equ	0x07
K2_LO	equ	0xe4
K3_HI	equ	0x00
K3_LO	equ	0x0f
	
MSB	equ	7	



;	RCS Header $Id: fxm26.a16 2.3 1996/10/16 14:23:23 F.J.Testa Exp $

;	$Revision: 2.3 $

;       32x16 PIC16 FIXED POINT MULTIPLY ROUTINES
	;; extracted from Microchip apllication note AN617
;
;       Input:  fixed point arguments in AARG and BARG
;
;       Output: product AARGxBARG in AARG
;
;       All timings are worst case cycle counts
;
;       It is useful to note that the additional unsigned routines requiring a non-power of two
;       argument can be called in a signed multiply application where it is known that the
;       respective argument is nonnegative, thereby offering some improvement in
;       performance.
;
;         Routine            Clocks     Function
;
;       FXM3216S     423        32x16 -> 48 bit signed fixed point multiply
;       The above timings are based on the looped macros. If space permits,
;       approximately 65-88 clocks can be saved by using the unrolled macros.
;

;**********************************************************************************************
;**********************************************************************************************

;       32x16 Bit Multiplication Macros

SMUL3216L        macro

;       Max Timing:     2+13+6*26+25+2+6*27+26+7 = 393 clks

;       Min Timing:     2+7*6+5+2+6*6+5+6 = 98 clks

;       PM: 19+60 = 79            DM: 11


                MOVLW   0x8
                MOVWF   LOOPCOUNT

LOOPSM3216A
                RRF     BARGB1,F
                BTFSC  STATUS,C
                GOTO    ALSM3216NA
                DECFSZ  LOOPCOUNT,F
                GOTO    LOOPSM3216A

                MOVLW   0x7
                MOVWF   LOOPCOUNT

LOOPSM3216B
                RRF     BARGB0,F
                BTFSC  STATUS,C
                GOTO    BLSM3216NA
                DECFSZ  LOOPCOUNT,F
                GOTO    LOOPSM3216B

                CLRF    AARGB0
                CLRF    AARGB1
                CLRF    AARGB2
                CLRF    AARGB3
                RETLW   0x00

ALOOPSM3216
                RRF             BARGB1,F
                BTFSS  STATUS,C
                GOTO    ALSM3216NA
                MOVF   TEMPB3,W
                ADDWF   AARGB3,F
                MOVF            TEMPB2,W
                BTFSC          STATUS,C
                INCFSZ          TEMPB2,W
                ADDWF           AARGB2,F
                MOVF            TEMPB1,W
                BTFSC          STATUS,C
                INCFSZ          TEMPB1,W
                ADDWF           AARGB1,F
                MOVF            TEMPB0,W
                BTFSC          STATUS,C
                INCFSZ          TEMPB0,W
                ADDWF           AARGB0,F

ALSM3216NA      RLF    SIGN,W
                RRF    AARGB0,F
                RRF    AARGB1,F
                RRF    AARGB2,F
                RRF    AARGB3,F
                RRF    AARGB4,F
                DECFSZ  LOOPCOUNT,F
                GOTO    ALOOPSM3216

                MOVLW   0x7
                MOVWF   LOOPCOUNT

BLOOPSM3216
                RRF             BARGB0,F
                BTFSS  STATUS,C
                GOTO    BLSM3216NA
                MOVF   TEMPB3,W
                ADDWF   AARGB3,F
                MOVF            TEMPB2,W
                BTFSC          STATUS,C
                INCFSZ          TEMPB2,W
                ADDWF           AARGB2,F
                MOVF            TEMPB1,W
                BTFSC          STATUS,C
                INCFSZ          TEMPB1,W
                ADDWF           AARGB1,F
                MOVF            TEMPB0,W
                BTFSC          STATUS,C
                INCFSZ          TEMPB0,W
                ADDWF           AARGB0,F

BLSM3216NA      RLF    SIGN,W
                RRF    AARGB0,F
                RRF    AARGB1,F
                RRF    AARGB2,F
                RRF             AARGB3,F
                RRF             AARGB4,F
                RRF             AARGB5,F
                DECFSZ  LOOPCOUNT,F
                GOTO    BLOOPSM3216

                RLF    TEMPB0,W
                RRF    AARGB0,F
                RRF    AARGB1,F
                RRF    AARGB2,F
                RRF             AARGB3,F
                RRF             AARGB4,F
                RRF             AARGB5,F

                endm

;**********************************************************************************************
;**********************************************************************************************
        
;       32x16 Bit Signed Fixed Point Multiply 32x16 -> 32

;       Input:  16 bit signed fixed point multiplicand in AARGB0
;                       16 bit signed fixed point multiplier in BARGB0

;       Use:    CALL    FXM3216S

;       Output: 32 bit signed fixed point product in AARGB0

;       Result: AARG  <--  AARG x BARG

;       Max Timing:     13+393+2 = 408 clks                B > 0
;                       28+393+2 = 423 clks                B < 0

;       Min Timing:     13+98 = 111 clks

;       PM: 18+79+1 = 98              DM: 9


FXM3216S	CLRF	AARGB4          ; clear partial product
		CLRF	AARGB5
		CLRF	SIGN
		MOVF	AARGB0,W
		IORWF	AARGB1,W
		IORWF	AARGB2,W
		IORWF	AARGB3,W
		BTFSC	STATUS,Z
		RETLW	0x00
		
		MOVF	AARGB0,W
		XORWF	BARGB0,W
		MOVWF	TEMPB0
		BTFSC	TEMPB0,MSB
		COMF	SIGN,F

	        BTFSS   BARGB0,MSB
                GOTO    M3216SOK

                COMF            BARGB1,F
                COMF            BARGB0,F
                INCF            BARGB1,F
		BTFSC		STATUS,Z
		INCF		BARGB0,F

                COMF            AARGB3,F
                COMF            AARGB2,F
                COMF            AARGB1,F
                COMF            AARGB0,F
		INCF		AARGB3,F
		BTFSC		STATUS,Z
                INCF            AARGB2,F
                BTFSC          STATUS,Z
                INCF            AARGB1,F
		BTFSC		STATUS,Z
		INCF		AARGB0,F

		BTFSC	BARGB0,MSB
		GOTO	M3216SX

M3216SOK        MOVF   AARGB0,W
                MOVWF   TEMPB0
                MOVF   AARGB1,W
                MOVWF   TEMPB1
                MOVF   AARGB2,W
                MOVWF   TEMPB2
                MOVF   AARGB3,W
                MOVWF   TEMPB3

                SMUL3216L

                RETLW           0x00


M3216SX		CLRF	AARGB4
		CLRF	AARGB5
		RLF	SIGN,W
		RRF	AARGB0,F
		RRF	AARGB1,F
		RRF	AARGB2,F
		RRF	AARGB3,F
		RRF	AARGB4,F

		RETLW	0x00
	

	;;
	;; Shift TEMP right LOOPCOUNT places
RSTEMP:	movf	T0,W		; Copy MSB to use for sign extension
	movwf	SIGN
RSLOOP:	rlf	SIGN,W		; Pick up the sign bit in C
	rrf	T0,F
	rrf	T1,F
	rrf	T2,F
	rrf	T3,F
	decfsz	LOOPCOUNT,F
	goto	RSLOOP
	retlw	0
	
	;;
	;; Begin state update
UPDATE:	movf	VELB3,W		; Copy velocity to temp location
	movwf	T3
	movf	VELB2,W
	movwf	T2
	movf	VELB1,W
	movwf	T1
	movf	VELB0,W
	movwf	T0
	movlw	6		;  load count for right shift
	movwf	LOOPCOUNT
	call	RSTEMP		; shift temp right 5 bits
	
	movf	T3,W		; add temp to altitude
	addwf	ALTB3,F

	movf	T2,W
	btfsc	STATUS,C	; handle the carry
	incfsz	T2,W
	addwf	ALTB2,F

	movf	T1,W
	btfsc	STATUS,C
	incfsz	T1,W
	addwf	ALTB1,F

	movf	T0,W
	btfsc	STATUS,C
	incfsz	T0,W
	addwf	ALTB0,F


	movf	ACCELB3,W	; Copy acceleration to temp location
	movwf	T3
	movf	ACCELB2,W
	movwf	T2
	movf	ACCELB1,W
	movwf	T1
	movf	ACCELB0,W
	movwf	T0
	movlw	13		;  load count for right shift
	movwf	LOOPCOUNT
	call	RSTEMP		; shift temp right 11 bits
	
	movf	T3,W		; add temp to altitude
	addwf	ALTB3,F

	movf	T2,W
	btfsc	STATUS,C	; handle the carry
	incfsz	T2,W
	addwf	ALTB2,F

	movf	T1,W
	btfsc	STATUS,C
	incfsz	T1,W
	addwf	ALTB1,F

	movf	T0,W
	btfsc	STATUS,C
	incfsz	T0,W
	addwf	ALTB0,F

	movf	ACCELB3,W	; Copy acceleration to temp location
	movwf	T3
	movf	ACCELB2,W
	movwf	T2
	movf	ACCELB1,W
	movwf	T1
	movf	ACCELB0,W
	movwf	T0
	movlw	6		;  load count for right shift
	movwf	LOOPCOUNT
	call	RSTEMP		; shift temp right 5 bits
	
	movf	T3,W		; add temp to altitude
	addwf	VELB3,F

	movf	T2,W
	btfsc	STATUS,C	; handle the carry
	incfsz	T2,W
	addwf	VELB2,F

	movf	T1,W
	btfsc	STATUS,C
	incfsz	T1,W
	addwf	VELB1,F

	movf	T0,W
	btfsc	STATUS,C
	incfsz	T0,W
	addwf	VELB0,F

	;; This completes the state update

	;; Begin state correction
	;; A miracle occurs and the latest ADC value for the
	;; pressure appears in the upper two bytes of T0-T3 (T0 MSB)
	;; with the lower two bytes zeroed. Actual length of ADC value
	;; is not critical so long as Kalman gains are correct. However,
	;; it should not be greater than a 15 bit unsigned value.
	;; 
	;; First step is to subtract altitude from pressure reading to
	;; get the inovation. This is left in T0-T3 for later use.

	movf	ALTB3,W
	subwf	T3,F

	movf	ALTB2,W
	btfss	STATUS,C
	incfsz	ALTB2,W
	subwf	T2,F

	movf	ALTB1,W
	btfss	STATUS,C
	incfsz	ALTB1,W
	subwf	T1,F

	movf	ALTB0,W
	btfss	STATUS,C
	incfsz	ALTB0,W
	subwf	T0,F

	;; Altitude correction
			
	movf	T0,W		; Copy T0-T3 to AARG in preperation for multiply
	movwf	AARGB0
	movf	T1,W
	movwf	AARGB1
	movf	T2,W
	movwf	AARGB2
	movf	T3,W
	movwf	AARGB3

	movlw	K1_HI		;  Load Kalman gain
	movwf	BARGB0
	movlw	K1_LO
	movwf	BARGB1

	call	FXM3216S	; Perform the signed multiply

	movf	AARGB3,W	; add upper 32 bits of result to altitude
	addwf	ALTB3,F
	
	movf	AARGB2,W
	btfsc	STATUS,C	; handle the carry
	incfsz	AARGB2,W
	addwf	ALTB2,F

	movf	AARGB1,W
	btfsc	STATUS,C
	incfsz	AARGB1,W
	addwf	ALTB1,F

	movf	AARGB0,W
	btfsc	STATUS,C
	incfsz	AARGB0,W
	addwf	ALTB0,F

	;; Velocity correction

	movf	T0,W		; Copy T0-T3 to AARG in preperation for multiply
	movwf	AARGB0
	movf	T1,W
	movwf	AARGB1
	movf	T2,W
	movwf	AARGB2
	movf	T3,W
	movwf	AARGB3

	movlw	K2_HI		;  Load Kalman gain
	movwf	BARGB0
	movlw	K2_LO
	movwf	BARGB1

	call	FXM3216S	; Perform the signed multiply

	movf	AARGB3,W	; add upper 32 bits of result to altitude
	addwf	VELB3,F
	
	movf	AARGB2,W
	btfsc	STATUS,C	; handle the carry
	incfsz	AARGB2,W
	addwf	VELB2,F

	movf	AARGB1,W
	btfsc	STATUS,C
	incfsz	AARGB1,W
	addwf	VELB1,F

	movf	AARGB0,W
	btfsc	STATUS,C
	incfsz	AARGB0,W
	addwf	VELB0,F

	;;  Acceleration correction

	
	movf	T0,W		; Copy T0-T3 to AARG in preperation for multiply
	movwf	AARGB0
	movf	T1,W
	movwf	AARGB1
	movf	T2,W
	movwf	AARGB2
	movf	T3,W
	movwf	AARGB3

	movlw	K3_HI		;  Load Kalman gain
	movwf	BARGB0
	movlw	K3_LO
	movwf	BARGB1

	call	FXM3216S	; Perform the signed multiply

	movf	AARGB3,W	; add upper 32 bits of result to altitude
	addwf	VELB3,F
	
	movf	AARGB2,W
	btfsc	STATUS,C	; handle the carry
	incfsz	AARGB2,W
	addwf	VELB2,F

	movf	AARGB1,W
	btfsc	STATUS,C
	incfsz	AARGB1,W
	addwf	VELB1,F

	movf	AARGB0,W
	btfsc	STATUS,C
	incfsz	AARGB0,W
	addwf	VELB0,F

	;; Fine'
	;; Done
	;; Compleate
	end