| /* SPDX-License-Identifier: BSD-2-Clause */ |
| /* |
| * Copyright (c) 2014, STMicroelectronics International N.V. |
| */ |
| .syntax unified |
| .thumb |
| .section .text |
| .align 2 |
| |
| .global __mpa_full_adder |
| .global __mpa_full_sub |
| .global __mpa_full_adder_ackum |
| .global __mpa_div_dword |
| .global __mpa_mul_add_word |
| .global __mpa_mul_add_word_cum |
| .global __mpa_montgomery_mul_add |
| .global __mpa_montgomery_sub_ack |
| |
| @ -------------------------------------------------------------------- |
| @ Function: __mpa_full_adder |
| @ |
| @ A word_t sized full adder. Incoming carry is in *carry. |
| @ The sum will be put in *sum and the |
| @ outgoing carry will be returned in *carry |
| @ |
| @ void __mpa_full_adder( mpa_word_t a, |
| @ mpa_word_t b, |
| @ mpa_word_t* sum, |
| @ mpa_word_t* carry) |
| @ |
| .thumb_func |
| .type __mpa_full_adder, %function |
| __mpa_full_adder: |
| push {r9} |
| ldr r9, [r3] @ r9 holds incoming carry |
| adds r1, r1, r0 @ r1 holds b + a |
| mov r12, #0 @ |
| adc r0, r12, #0 @ r0 holds carry of a + b |
| adds r1, r1, r9 @ r1 holds a + b + incoming carry |
| str r1, [r2] @ *sum <- r1 |
| adc r0, r0, #0 @ r0 holds acc carry |
| str r0, [r3] @ *carry <- r0 |
| pop {r9} |
| bx lr |
| |
| @ -------------------------------------------------------------------- |
| @ Function: __mpa_full_sub |
| @ |
| @ A word_t sized full subtraction function. Incoming carry is in *carry |
| @ The difference will be put in *diff and the outgoing carry will be returned |
| @ in *carry |
| @ |
| @ void __mpa_full_sub(mpa_word_t a, |
| @ mpa_word_t b, |
| @ mpa_word_t* diff, |
| @ mpa_word_t* carry); |
| @ |
| .thumb_func |
| .type __mpa_full_sub, %function |
| __mpa_full_sub: |
| push {r9} |
| ldr r9, [r3] @ r9 holds incomming carry |
| subs r1, r0, r1 @ r1 holds a - b |
| mov r12, #0 |
| sbc r0, r12, #0 @ r0 holds carry |
| subs r1, r1, r9 @ r1 holds a - b - carry |
| str r1, [r2] @ *diff <- r1 |
| sbc r0, r0, #0 @ r0 <- r0 - carry |
| rsbs r0, r0, #0 @ r0 <- 0 - r0 outgoing carry |
| str r0, [r3] @ *carry <- r0 |
| pop {r9} |
| bx lr |
| |
| @ -------------------------------------------------------------------- |
| @ Function: __mpa_full_adder_ackum |
| @ |
| @ A word_t sized full adder with ackumulate. *d = *d + e + *carry |
| @ Outgoing carry is in *carry |
| @ |
| @ void __mpa_full_adder_ackum( mpa_word_t* d, |
| @ mpa_word_t e, |
| @ mpa_word_t* carry) |
| @ |
| .thumb_func |
| .type __mpa_full_adder_ackum, %function |
| __mpa_full_adder_ackum: |
| push {r9} |
| ldr r12, [r0] @ r12 <- *d |
| mov r9, #0 @ |
| ldr r3, [r2] @ r3 holds incoming carry |
| adds r1, r1, r12 @ r1 <- e + *d |
| adc r9, r9, #0 @ r9 <- carry |
| adds r1, r1, r3 @ r1 <- r1 + *carry |
| str r1, [r0] @ *d <- sum |
| adc r0, r9, #0 @ r0 <- outgoing carry |
| str r0, [r2] @ *carry <- carry |
| pop {r9} |
| bx lr |
| |
| |
| @ -------------------------------------------------------------------- |
| @ Function: __mpa_div_dword |
| @ |
| @ Wrapper for the soft div. Calculates quotient and remainder of |
| @ ((x1 << WORD_SIZE) + x0 ) / y |
| @ |
| @ mpa_word_t __mpa_div_dword(mpa_word_t x0, |
| @ mpa_word_t x1, |
| @ mpa_word_t y, |
| @ mpa_word_t* rem) |
| @ |
| @ returns the quotient in r0 and the remainder in *rem |
| @ |
| @ At entrace |
| @ r0 low32 of num |
| @ r1 high32 of num |
| @ r2 y (becomes high32 of den) |
| @ r3 addr of rem |
| .thumb_func |
| .type __mpa_div_dword, %function |
| __mpa_div_dword: |
| push {r4, r5, r6, r7, r8, r9, lr}@ |
| |
| mov r12, #0 @ r12 holds low32 of den |
| @ r2 holds high32 of den |
| mov lr, #1 @ lr holds high32 of qbit |
| movs r4, #0 @ r4 holds low32 of qbit |
| cmp r2, #-1 @ if den >= 0 |
| bgt normalize @ branch to normalize |
| mov r9, #0 @ r9 holds low of quot |
| mov r8, #0 @ r8 holds high of quot |
| |
| soft_div_main: |
| cmp r12, r0 @ cmp low(den) low(num) |
| mov r6, #0 @ |
| it hi @ |
| movhi r6, #1 @ r6 is 1 if low(den) > low(num) |
| cmp r2, r1 @ cmp high(den) high(num) |
| mov r5, #0 @ |
| it hi @ |
| movhi r5, #1 @ r5 if 1 if high(den) > high(num) |
| it eq @ |
| moveq r5, r6 @ if high is equal let low decide |
| cbnz r5, shift_right @ if r5 == 1 branch |
| |
| adds r9, r9, r4 @ quot += qbit |
| adc r8, r8, lr @ |
| subs r0, r0, r12 @ num -= den |
| sbcs r1, r2 @ |
| |
| shift_right: |
| @ |
| @ These right shifts should be done as |
| @ |
| @ lsrs r2, r2, #1 @ right shift den >>= 1 |
| @ rrx r12, r12 |
| @ lsrs lr, lr, #1 @ right shift qbit >>= 1 |
| @ rrx r4, r4 |
| @ |
| @ but due to a compiler error in the STE toolchain (2012-05-03) |
| @ we must implement this as: |
| |
| and r7, r2, #1 |
| mov r2, r2, lsr #1 |
| mov r7, r7, lsl #31 |
| mov r12, r12, lsr #1 |
| orr r12, r12, r7 |
| |
| and r7, lr, #1 |
| mov lr, lr, lsr #1 |
| mov r7, r7, lsl #31 |
| mov r4, r4, lsr #1 |
| orr r4, r4, r7 |
| |
| @ |
| @ end of compiler bug fix |
| @ |
| orrs r5, r4, lr @ while qbit != 0 |
| bne soft_div_main @ do mainloop |
| |
| store_and_exit: |
| cmp r3, #0 @ if (r != NULL) |
| it ne @ |
| strne r0, [r3] @ store remainder into *r |
| mov r0, r9 @ |
| pop {r4, r5, r6, r7, r8, r9, pc}@ clean up and exit |
| |
| normalize: |
| adds r4, r4, r4 @ qbit <<= 1 |
| adc lr, lr, lr @ |
| adds r12, r12, r12 @ den <<= 1 |
| adcs r2, r2 @ |
| cmp r2, #-1 @ while den >= 0 |
| bgt normalize @ do normalize |
| |
| mov r9, #0 @ |
| orrs r6, r4, lr @ if qbit == 0 |
| beq store_and_exit @ done, branch to store r and exit |
| |
| mov r8, r9 @ set quot = 0 (r9 = 0 r8 = 0) |
| b soft_div_main @ and branch to main loop |
| |
| |
| @ -------------------------------------------------------------------- |
| @ Function: __mpa_mul_add_word |
| @ |
| @ Multiplies a and b and adds the incoming carry tp produce the product. |
| @ Outgoing carry is stored in *carry |
| @ |
| @ void __mpa_mul_add_word(mpa_word_t a, |
| @ mpa_word_t b, |
| @ mpa_word_t* p, |
| @ mpa_word_t* carry) |
| @ |
| @ |
| .thumb_func |
| .type __mpa_mul_add_word, %function |
| __mpa_mul_add_word: |
| push {r9} |
| umull r1, r9, r1, r0 @ r1, r9 <- a * b |
| ldr r0, [r3] @ r0 <- incoming carry |
| adds r1, r1, r0 @ add incoming carry to product |
| str r1, [r2] @ store low32 of product |
| adc r0, r9, #0 @ add carry to high32 |
| str r0, [r3] @ store outgoing carry |
| pop {r9} |
| bx lr |
| |
| @ -------------------------------------------------------------------- |
| @ Function: __mpa_mul_add_word_cum |
| @ |
| @ Multiplies a and b and adds the incoming carry and the cumulative |
| @ product. |
| @ Outgoing carry is stored in *carry |
| @ |
| @ void __mpa_mul_add_word_cum(mpa_word_t a, |
| @ mpa_word_t b, |
| @ mpa_word_t* p, |
| @ mpa_word_t* carry) |
| @ |
| @ |
| .thumb_func |
| .type __mpa_mul_add_word_cum, %function |
| __mpa_mul_add_word_cum: |
| push {r9} |
| umull r12, r9, r1, r0 @ r9, r12 <- a * b |
| ldr r0, [r2] @ r0 holds incoming product |
| ldr r1, [r3] @ r1 holds incoming carry |
| adds r0, r0, r12 @ r0 holds incoming product + new low32 |
| adc r9, r9, #0 @ add carry to high32 |
| adds r0, r0, r1 @ add incoming carry |
| str r0, [r2] @ store outgoing product |
| adc r0, r9, #0 @ store outgoing carry in r0 |
| str r0, [r3] @ and write to [r3] |
| pop {r9} |
| bx lr |
| |
| |
| @ -------------------------------------------------------------------- |
| @ Function: __mpa_montgomery_mul_add |
| @ |
| @ Calculates dest = dest + src * w |
| @ Dest must be big enough to hold the result |
| @ |
| @ void __mpa_montgomery_mul_add(mpanum dest, |
| @ mpanum src, |
| @ mpa_word_t w) |
| @ |
| .thumb_func |
| .type __mpa_montgomery_mul_add, %function |
| __mpa_montgomery_mul_add: |
| push {r4, r5, r6, r7, lr} |
| add r7, sp, #12 |
| push {r8, r9, r10, r11} |
| cbz r2, mm_mul_add_exit @ if w == 0 return |
| ldr r3, [r1, #4] @ r3 holds src->size |
| add r9, r0, #8 @ r9 holds &dest->d (dest_begin) |
| cmp r3, #1 @ |
| mov r3, r9 @ r3 holds &dest->d (ddig) |
| blt mm_mul_add_check_size @ if src->size == NULL jump |
| add r12, r1, #8 @ r12 holds &src->d |
| mov lr, #0 @ |
| movs r3, #0 @ |
| movs r4, #0 @ r4 holds carry |
| movs r5, #0 @ r5 holds inx |
| |
| mm_main_loop: |
| ldr r6, [r12, r5, lsl #2] @ r6 holds src->d[idx] |
| adds r3, #4 |
| ldr r11, [r9, r5, lsl #2] @ r11 holds dest->d[idx] |
| umull r10, r8, r6, r2 @ r8, r10 holds src->d[idx] * w |
| mla r8, r6, lr, r8 @ r8 <- r6*0 + r8 ?? |
| adds r6, r11, r4 @ r6 <- dest->d[idx] + carry |
| adc r4, lr, #0 @ r4 holds C |
| adds r6, r6, r10 @ r6 holds low32 of result |
| str r6, [r9, r5, lsl #2] @ store into dest->d[idx] |
| add r5, r5, #1 @ idx++ |
| ldr r6, [r1, #4] @ r6 holds src->size |
| adc r4, r4, r8 @ r4 holds high32 of result |
| cmp r5, r6 @ while (idx < src->size) |
| blt mm_main_loop @ do jump |
| |
| adds r1, r0, r3 @ |
| cmp r4, #0 @ check carry |
| add r3, r1, #8 @ r3 holds &dest->d[idx] |
| beq mm_mul_add_check_size @ jump if no carry |
| movs r1, #0 |
| |
| mm_carry_loop: |
| ldr r2, [r3] @ r2 holds dest->d[idx] |
| adds r2, r2, r4 @ r2 <- r2 + carry |
| str r2, [r3], #4 @ store at dest->d[idx++] |
| adcs r4, r1, #0 @ r4 holds new C |
| bne mm_carry_loop @ if r4 != 0 jump |
| |
| mm_mul_add_check_size: |
| sub r1, r3, r9 @ r1 holds ddig - dest_begin |
| ldr r2, [r0, #4] @ r2 holds dest->size |
| asrs r1, r1, #2 @ mult by 2 to go from byte addr to word |
| cmp r1, r2 @ |
| it gt @ |
| strgt r1, [r0, #4] @ store new size if idx > dest->size |
| |
| mm_mul_add_exit: |
| pop {r8, r9, r10, r11} |
| pop {r4, r5, r6, r7, pc} |
| |
| |
| @ -------------------------------------------------------------------- |
| @ Function: __mpa_montgomery_sub_ack |
| @ Calculates dest = dest - src |
| @ Assumption: dest >= src and both non-negative |
| @ and dest > src. |
| @ |
| @ |
| @ void __mpa_montgomery_sub_ack(mpanum dest, |
| @ mpanum src) |
| @ |
| .thumb_func |
| .type __mpa_montgomery_sub_ack, %function |
| __mpa_montgomery_sub_ack: |
| push {r4, r5, r7, r9, lr} |
| ldr r2, [r1, #4] |
| add r9, r0, #8 |
| mov lr, #0 |
| add r7, sp, #8 |
| cmp r2, #0 |
| bgt LBB1_10 |
| movs r2, #0 |
| b LBB1_6 |
| LBB1_2: |
| cbz r4, LBB1_6 |
| movs r1, #0 |
| LBB1_4: |
| add r5, r0, r2, lsl #2 |
| adds r3, #4 |
| adds r2, #1 |
| ldr r4, [r5, #8] |
| subs r4, r4, r12 |
| str r4, [r5, #8] |
| sbcs r5, r1, #0 |
| rsb r12, r5, #0 |
| bne LBB1_4 |
| add r9, r0, r3 |
| LBB1_6: |
| ldr r3, [r0, #4] |
| cmp r2, r3 |
| blt LBB1_12 |
| sub r1, r9, #4 |
| subs r2, r3, #1 |
| LBB1_8: |
| adds r3, r2, #1 |
| cmp r3, #1 |
| blt LBB1_12 |
| ldr r3, [r1] |
| cmp r3, #0 |
| it ne |
| popne {r4, r5, r7, r9, pc} |
| str r2, [r0, #4] |
| subs r2, #1 |
| subs r1, #4 |
| b LBB1_8 |
| LBB1_10: |
| movs r3, #8 |
| movs r2, #0 |
| mov r12, #0 |
| LBB1_11: |
| add r5, r1, r2, lsl #2 |
| ldr r4, [r9] |
| adds r3, #4 |
| adds r2, #1 |
| ldr r5, [r5, #8] |
| subs r5, r4, r5 |
| sbc r4, lr, #0 |
| subs r5, r5, r12 |
| str r5, [r9], #4 |
| sbc r4, r4, #0 |
| ldr r5, [r1, #4] |
| rsb r12, r4, #0 |
| cmp r2, r5 |
| blt LBB1_11 |
| b LBB1_2 |
| LBB1_12: |
| pop {r4, r5, r7, r9, pc} |
| |
| |