;;########################################################################### ;; ;; FILE: IQNisqrt.asm ;; ;; TITLE: C Callable IQ Inverse Square Root Math Function ;; ;;########################################################################### ;; ;; Ver | Date | Who | Description of changes ;; =====|==============|=======|============================================= ;; 1.2 | 11 Oct 2001 | A. T. | Original Release. ;; -----|--------------|-------|--------------------------------------------- ;; 1.3 | 19 Nov 2001 | A. T. | Improved accuracy. ;; -----|--------------|-------|--------------------------------------------- ;; 1.4 | 17 May 2002 | A. T. | Corrected cycle benchmarks. ;; -----|--------------|-------|--------------------------------------------- ;; | | | ;; ;;########################################################################### ;;=========================================================================== ;; Function: _IQNisqrt ;;=========================================================================== ;; ;; C Usage: extern long _IQNisqrt(long X); // with saturation and rounding ;; ;;--------------------------------------------------------------------------- ;; ;; On Entry: ACC = Val in IQ format ;; ;; Regs Used: XAR7, XAR6, XAR5, XAR4 ;; XT, P, XAR0 ;; ;; On Exit: ACC = 1/sqrt(X) result in IQ format ;; ACC = 0 if input is -ve or 0 ;; ACC = max value "0x7FFFFFFF" if result saturates ;; ;; Q range: 30 to 1 ;; ;;--------------------------------------------------------------------------- ;; Algorithm: The procedure for calculating "Y = 1/sqrt(X)" is as follows: ;; ;; Step 1) Normalize Input: ;; ;; V = X * 2^n ;; ;; Step 2) Obtain initial estimate from "_IQisqrtTable" ;; using the upper 8-bits of the normalized V value. ;; ;; Step 3) Use Newton-Raphson algorithm to improve accuracy. ;; Repeat following equation two times. First iteration ;; gives 16-bit accuracy. Second iteration gives 32-bit ;; accuracy: ;; ;; Yn = Yn(1.5 - Yn*Yn*V/2) ;; ;; Yn = 1/sqrt(V) ;; ;; Step 4) Denormalize result, round and saturate: ;; ;; Y = Yn / sqrt(2^n) ;; ;;--------------------------------------------------------------------------- ;; Benchmark: ;; ;; Assumptions: ;; * Code executes from 0-wait SARAM block. ;; * _IQisqrt Tables located in seperate 0-wait SARAM or 1-wait ROM block. ;; * Stack located in seperate 0-wait SARAM block. ;; * Includes LCR/LRETR. ;; ;; Cycles = 66 (30 >= Q >= 18 if _IQisqrt Tables in 0-wait SARAM) ;; 67 (17 >= Q >= 0 if _IQisqrt Tables in 0-wait SARAM) ;; ;; Cycles = 73 (30 >= Q >= 18 if _IQisqrt Tables in 1-wait ROM) ;; 74 (17 >= Q >= 0 if _IQisqrt Tables in 1-wait ROM) ;; ;;=========================================================================== ;;############################################################################# ;;! ;;! Copyright: Copyright (C) 2023 Texas Instruments Incorporated - ;;! All rights reserved not granted herein. ;;! Limited License. ;;! ;;! Texas Instruments Incorporated grants a world-wide, royalty-free, ;;! non-exclusive license under copyrights and patents it now or hereafter ;;! owns or controls to make, have made, use, import, offer to sell and sell ;;! ("Utilize") this software subject to the terms herein. With respect to the ;;! foregoing patent license, such license is granted solely to the extent that ;;! any such patent is necessary to Utilize the software alone. The patent ;;! license shall not apply to any combinations which include this software, ;;! other than combinations with devices manufactured by or for TI ;;! ("TI Devices"). ;;! No hardware patent is licensed hereunder. ;;! ;;! Redistributions must preserve existing copyright notices and reproduce this ;;! license (including the above copyright notice and the disclaimer and ;;! (if applicable) source code license limitations below) in the documentation ;;! and/or other materials provided with the distribution. ;;! ;;! Redistribution and use in binary form, without modification, are permitted ;;! provided that the following conditions are met: ;;! ;;! * No reverse engineering, decompilation, or disassembly of this software is ;;! permitted with respect to any software provided in binary form. ;;! * Any redistribution and use are licensed by TI for use only ;;! with TI Devices. ;;! * Nothing shall obligate TI to provide you with source code for the ;;! software licensed and provided to you in object code. ;;! ;;! If software source code is provided to you, modification and redistribution ;;! of the source code are permitted provided that the following conditions ;;! are met: ;;! ;;! * any redistribution and use of the source code, including any resulting ;;! derivative works, are licensed by TI for use only with TI Devices. ;;! * any redistribution and use of any object code compiled from the source ;;! code and any resulting derivative works, are licensed by TI for use ;;! only with TI Devices. ;;! ;;! Neither the name of Texas Instruments Incorporated nor the names of its ;;! suppliers may be used to endorse or promote products derived from this ;;! software without specific prior written permission. ;;############################################################################# IQNisqrt .macro q_value CSB ACC ; Count sign bits, T = n LSLL ACC,T ; ACC = V/2 (Q32) MOVL XAR6,@ACC ; XAR6 = V/2 (Q32) ASR AH,#6 MOVB @AH,#0xFE,LEQ ; AR0 -> zero value if -ve or 0 input SUB @AH,#254 ; Scale to table offset MOVZ AR0,@AH ; AR0 -> index into _IQisqrtTable for ; initial estimate of Yn TBIT @T,#0 ; TC = odd/even shift indicator MOV AH,@T ; AH = n LSR AH,#1 ; AH = n/2 MOVL *SP++,ACC ; SP-2 = n/2 MOVL XAR7,#_IQisqrtTable ; XAR7 = pointer to _IQisqrtTable MOVL XAR4,*+XAR7[AR0] ; (1) XAR4 = initial Yn estimate (Q30) MOVL XAR7,#_IQisqrtRoundSatTable MOVL XAR5,*XAR7++ ; XAR5 = 1.5 (Q30) .if (q_value & 0x0001) == 0 ; Q even MOVB @AR0,#12,NTC ; AR0 -> 1/sqrt(2) if n == even (Q30) MOVB @AR0,#8,TC ; AR0 -> 1.0 if n == odd (Q30) .endif .if (q_value & 0x0001) == 1 ; Q odd MOVB @AR0,#12,TC ; AR0 -> 1/sqrt(2) if n == odd (Q30) MOVB @AR0,#10,NTC ; AR0 -> 0.5 if n == even (Q30) .endif MOVL XT,@XAR4 ;(2) XT = Yn (Q30) QMPYL ACC,XT,@XT ;(3) ACC = Yn*Yn (Q28) MOVL XT,@XAR6 ;(4) XT = V/2 (Q32) LSL ACC,#2 ;(5) ACC = Yn*Yn (Q30) QMPYL ACC,XT,@ACC ;(6) ACC = Yn*Yn*V/2 (Q30) MOVL XT,@XAR5 ;(7) XT = 1.5 (Q30) SUBL @XT,ACC ;(8) XT = 1.5 - Yn*Yn*V/2 (Q30) QMPYL ACC,XT,@XAR4 ;(9) ACC = Yn(1.5 - Yn*Yn*V/2) (Q28) LSL ACC,#2 ;(10) ACC = Yn(1.5 - Yn*Yn*V/2) (Q30) MOVL XAR4,@ACC ;(1) XAR4 = Yn (Q30) MOVL XT,@XAR4 ;(2) XT = Yn (Q30) QMPYL ACC,XT,@XT ;(3) ACC = Yn*Yn (Q28) MOVL XT,@XAR6 ;(4) XT = V/2 (Q32) LSL ACC,#2 ;(5) ACC = Yn*Yn (Q30) QMPYL ACC,XT,@ACC ;(6) ACC = Yn*Yn*V/2 (Q30) MOVL XT,@XAR5 ;(7) XT = 1.5 (Q30) SUBL @XT,ACC ;(8) XT = 1.5 - Yn*Yn*V/2 (Q30) QMPYL ACC,XT,@XAR4 ;(9) ACC = Yn(1.5 - Yn*Yn*V/2) (Q28) LSL ACC,#2 ;(10) ACC = Yn(1.5 - Yn*Yn*V/2) (Q30) MOVL XT,*+XAR7[AR0] ; Scale with 1.0 or 1/sqrt(X) or 0.5 IMPYL P,XT,@ACC QMPYL ACC,XT,@ACC .if q_value == 30 ; Scale final result based on Q value LSL64 ACC:P,#2 .endif .if q_value == 29 LSL64 ACC:P,#1 .endif .if (q_value >= 18) & (q_value <= 28) ASR64 ACC:P,#((86 - q_value*3)/2) .endif .if (q_value >= 7) & (q_value <= 17) ASR64 ACC:P,#16 ASR64 ACC:P,#(((86 - q_value*3)/2) - 16) .endif .if q_value <= 6 ASR64 ACC:P,#16 ASR64 ACC:P,#16 ASR64 ACC:P,#(((86 - q_value*3)/2) - 32) .endif ASR64 ACC:P,#16 MOVL XT,*--SP ; T = n/2 LSL64 ACC:P,T ; Scale final result based on n/2 ADDUL P,*+XAR7[0] ; Round result ADDCL ACC,*+XAR7[2] MINL ACC,*+XAR7[6] ; Saturate if exceeds max pos value MINCUL P,*+XAR7[4] LSL64 ACC:P,#16 LRETR .endm ;; The value "GLOBAL_Q" needs to be supplied by the assembler ;; using the "-dGLOBAL_Q=q_value" directive: .sect "IQmath" .ref _IQisqrtTable .ref _IQisqrtRoundSatTable .if GLOBAL_Q == 30 .def __IQ30isqrt __IQ30isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 29 .def __IQ29isqrt __IQ29isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 28 .def __IQ28isqrt __IQ28isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 27 .def __IQ27isqrt __IQ27isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 26 .def __IQ26isqrt __IQ26isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 25 .def __IQ25isqrt __IQ25isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 24 .def __IQ24isqrt __IQ24isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 23 .def __IQ23isqrt __IQ23isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 22 .def __IQ22isqrt __IQ22isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 21 .def __IQ21isqrt __IQ21isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 20 .def __IQ20isqrt __IQ20isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 19 .def __IQ19isqrt __IQ19isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 18 .def __IQ18isqrt __IQ18isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 17 .def __IQ17isqrt __IQ17isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 16 .def __IQ16isqrt __IQ16isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 15 .def __IQ15isqrt __IQ15isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 14 .def __IQ14isqrt __IQ14isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 13 .def __IQ13isqrt __IQ13isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 12 .def __IQ12isqrt __IQ12isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 11 .def __IQ11isqrt __IQ11isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 10 .def __IQ10isqrt __IQ10isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 9 .def __IQ9isqrt __IQ9isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 8 .def __IQ8isqrt __IQ8isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 7 .def __IQ7isqrt __IQ7isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 6 .def __IQ6isqrt __IQ6isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 5 .def __IQ5isqrt __IQ5isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 4 .def __IQ4isqrt __IQ4isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 3 .def __IQ3isqrt __IQ3isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 2 .def __IQ2isqrt __IQ2isqrt: IQNisqrt GLOBAL_Q .endif .if GLOBAL_Q == 1 .def __IQ1isqrt __IQ1isqrt: IQNisqrt GLOBAL_Q .endif ;;########################################################################### ;; No More. ;;###########################################################################