aboutsummaryrefslogtreecommitdiff
path: root/pixman/pixman/pixman-arm-neon-asm.h
diff options
context:
space:
mode:
Diffstat (limited to 'pixman/pixman/pixman-arm-neon-asm.h')
-rw-r--r--pixman/pixman/pixman-arm-neon-asm.h45
1 files changed, 32 insertions, 13 deletions
diff --git a/pixman/pixman/pixman-arm-neon-asm.h b/pixman/pixman/pixman-arm-neon-asm.h
index 97adc6a87..1673b080f 100644
--- a/pixman/pixman/pixman-arm-neon-asm.h
+++ b/pixman/pixman/pixman-arm-neon-asm.h
@@ -212,27 +212,39 @@
.macro pixld1_s elem_size, reg1, mem_operand
.if elem_size == 16
mov TMP1, VX, asr #16
- add VX, VX, UNIT_X
+ adds VX, VX, UNIT_X
+5: subpls VX, VX, SRC_WIDTH_FIXED
+ bpl 5b
add TMP1, mem_operand, TMP1, asl #1
mov TMP2, VX, asr #16
- add VX, VX, UNIT_X
+ adds VX, VX, UNIT_X
+5: subpls VX, VX, SRC_WIDTH_FIXED
+ bpl 5b
add TMP2, mem_operand, TMP2, asl #1
vld1.16 {d&reg1&[0]}, [TMP1, :16]
mov TMP1, VX, asr #16
- add VX, VX, UNIT_X
+ adds VX, VX, UNIT_X
+5: subpls VX, VX, SRC_WIDTH_FIXED
+ bpl 5b
add TMP1, mem_operand, TMP1, asl #1
vld1.16 {d&reg1&[1]}, [TMP2, :16]
mov TMP2, VX, asr #16
- add VX, VX, UNIT_X
+ adds VX, VX, UNIT_X
+5: subpls VX, VX, SRC_WIDTH_FIXED
+ bpl 5b
add TMP2, mem_operand, TMP2, asl #1
vld1.16 {d&reg1&[2]}, [TMP1, :16]
vld1.16 {d&reg1&[3]}, [TMP2, :16]
.elseif elem_size == 32
mov TMP1, VX, asr #16
- add VX, VX, UNIT_X
+ adds VX, VX, UNIT_X
+5: subpls VX, VX, SRC_WIDTH_FIXED
+ bpl 5b
add TMP1, mem_operand, TMP1, asl #2
mov TMP2, VX, asr #16
- add VX, VX, UNIT_X
+ adds VX, VX, UNIT_X
+5: subpls VX, VX, SRC_WIDTH_FIXED
+ bpl 5b
add TMP2, mem_operand, TMP2, asl #2
vld1.32 {d&reg1&[0]}, [TMP1, :32]
vld1.32 {d&reg1&[1]}, [TMP2, :32]
@@ -242,7 +254,7 @@
.endm
.macro pixld2_s elem_size, reg1, reg2, mem_operand
-.if elem_size == 32
+.if 0 /* elem_size == 32 */
mov TMP1, VX, asr #16
add VX, VX, UNIT_X, asl #1
add TMP1, mem_operand, TMP1, asl #2
@@ -268,12 +280,16 @@
.macro pixld0_s elem_size, reg1, idx, mem_operand
.if elem_size == 16
mov TMP1, VX, asr #16
- add VX, VX, UNIT_X
+ adds VX, VX, UNIT_X
+5: subpls VX, VX, SRC_WIDTH_FIXED
+ bpl 5b
add TMP1, mem_operand, TMP1, asl #1
vld1.16 {d&reg1&[idx]}, [TMP1, :16]
.elseif elem_size == 32
mov TMP1, VX, asr #16
- add VX, VX, UNIT_X
+ adds VX, VX, UNIT_X
+5: subpls VX, VX, SRC_WIDTH_FIXED
+ bpl 5b
add TMP1, mem_operand, TMP1, asl #2
vld1.32 {d&reg1&[idx]}, [TMP1, :32]
.endif
@@ -964,15 +980,17 @@ fname:
TMP1 .req r4
TMP2 .req r5
DST_R .req r6
+ SRC_WIDTH_FIXED .req r7
.macro pixld_src x:vararg
pixld_s x
.endm
ldr UNIT_X, [sp]
- push {r4-r6, lr}
+ push {r4-r8, lr}
+ ldr SRC_WIDTH_FIXED, [sp, #(24 + 4)]
.if mask_bpp != 0
- ldr MASK, [sp, #(16 + 4)]
+ ldr MASK, [sp, #(24 + 8)]
.endif
.else
/*
@@ -1044,7 +1062,7 @@ fname:
cleanup
.if use_nearest_scaling != 0
- pop {r4-r6, pc} /* exit */
+ pop {r4-r8, pc} /* exit */
.else
bx lr /* exit */
.endif
@@ -1058,7 +1076,7 @@ fname:
cleanup
.if use_nearest_scaling != 0
- pop {r4-r6, pc} /* exit */
+ pop {r4-r8, pc} /* exit */
.unreq DST_R
.unreq SRC
@@ -1069,6 +1087,7 @@ fname:
.unreq TMP2
.unreq DST_W
.unreq MASK
+ .unreq SRC_WIDTH_FIXED
.else
bx lr /* exit */