ldis r0, 0
addi r3, r1, 0
retzs- ; check len != 0
+
+ andx r1, 0x3
+
+ cmpi r1, 1
+ addideq r3, r3, 3
+ subideq r2, r2, 12
+ breq- loop1
+
+ cmpi r1, 2
+ addideq r3, r3, 2
+ subideq r2, r2, 8
+ breq- loop2
+
+ cmpi r1, 3
+ addideq r3, r3, 1
+ subideq r2, r2, 4
+ breq- loop3
+
+ addi r3, r3, 4
loop:
- ;load arr data
+ subi r3, r3, 4
+ retzs-
ldw r4, 0(r2)
- ;sum += arr[i];
add r0, r0, r4
- addi r2, r2, 4
- subi r3, r3, 1
- brnz+ loop
- ret
+
+loop3:
+ ldw r4, 4(r2)
+ add r0, r0, r4
+
+loop2:
+ ldw r4, 8(r2)
+ add r0, r0, r4
+
+loop1:
+ ldw r4, 12(r2)
+ add r0, r0, r4
+
+ addi r2, r2, 4*4
+ br+ loop