progs/sum: 92 -> 74 cycles (thx @ loop unrollen)
[calu.git] / progs / sum.s
index d762c60a1d69b779c7992c9329a757235599d877..c03df084ea3f8ce97ce947940f4c482c3de535c1 100644 (file)
@@ -81,12 +81,42 @@ sum:
        ldis r0, 0
        addi r3, r1, 0
        retzs- ; check len != 0
+
+       andx r1, 0x3
+
+       cmpi r1, 1
+       addideq r3, r3, 3
+       subideq r2, r2, 12
+       breq- loop1
+
+       cmpi r1, 2
+       addideq r3, r3, 2
+       subideq r2, r2, 8
+       breq- loop2
+
+       cmpi r1, 3
+       addideq r3, r3, 1
+       subideq r2, r2, 4
+       breq- loop3
+
+       addi r3, r3, 4
 loop:
-       ;load arr data
+       subi r3, r3, 4
+       retzs-
        ldw r4, 0(r2)
-       ;sum += arr[i];
        add r0, r0, r4
-       addi r2, r2, 4
-       subi r3, r3, 1
-       brnz+ loop
-       ret
+
+loop3:
+       ldw r4, 4(r2)
+       add r0, r0, r4
+
+loop2:
+       ldw r4, 8(r2)
+       add r0, r0, r4
+
+loop1:
+       ldw r4, 12(r2)
+       add r0, r0, r4
+
+       addi r2, r2, 4*4
+       br+ loop