progs/max: loop unrolling (102->84)
authorBernhard Urban <lewurm@gmail.com>
Wed, 19 Jan 2011 11:06:24 +0000 (12:06 +0100)
committerBernhard Urban <lewurm@gmail.com>
Wed, 19 Jan 2011 11:06:24 +0000 (12:06 +0100)
progs/max.s

index 0008d64da5479a394a7c682e49012e2d8ca293e8..7192ae3b70d75903acadd1cb1941a339ffa3c323 100644 (file)
@@ -80,12 +80,52 @@ max:
        ; r4 = arr[x]
        ; r0 = x = arr[0]
        ldw r0, 0(r2)
-       subi r1, r1, 1
+
+       subi r5, r1, 1
+       andx r5, 0x3
+
+       cmpi r5, 0
+       breq+ loop0
+
+       cmpi r5, 1
+       addideq r1, r1, 3-1
+       subideq r2, r2, 12
+       breq+ loop1
+
+       cmpi r5, 2
+       addideq r1, r1, 2-1
+       subideq r2, r2, 8
+       breq+ loop2
+
+       cmpi r5, 3
+       ; addideq r1, r1, 1-1
+       subideq r2, r2, 4
+       breq+ loop3
+
+loop0:
+       addi r1, r1, 4-1
 maxloop:
+       subi r1, r1, 4
+       retzs-
+
        ldw r4, 4(r2)
        cmp r0, r4
        movlt r0, r4
-       subi r1, r1, 1
-       addid r2, r2, 4
-       brnq+ maxloop
-       ret+
+
+loop1:
+       ldw r4, 8(r2)
+       cmp r0, r4
+       movlt r0, r4
+
+loop2:
+       ldw r4, 12(r2)
+       cmp r0, r4
+       movlt r0, r4
+
+loop3:
+       ldw r4, 16(r2)
+       cmp r0, r4
+       movlt r0, r4
+
+       addi r2, r2, 4*4
+       br+ maxloop