aboutsummaryrefslogtreecommitdiff
path: root/pixman/pixman/pixman-arm-neon-asm.S
diff options
context:
space:
mode:
Diffstat (limited to 'pixman/pixman/pixman-arm-neon-asm.S')
-rw-r--r--pixman/pixman/pixman-arm-neon-asm.S100
1 files changed, 73 insertions, 27 deletions
diff --git a/pixman/pixman/pixman-arm-neon-asm.S b/pixman/pixman/pixman-arm-neon-asm.S
index 8ddbefc87..0229bedfa 100644
--- a/pixman/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman/pixman-arm-neon-asm.S
@@ -253,7 +253,7 @@
vld1.16 {d4, d5}, [DST_R, :128]!
vqadd.u8 q9, q0, q11
vshrn.u16 d6, q2, #8
- vld4.8 {d0, d1, d2, d3}, [SRC]!
+ fetch_src_pixblock
vshrn.u16 d7, q2, #3
vsli.u16 q2, q2, #5
vshll.u8 q14, d16, #8
@@ -295,7 +295,7 @@
pixman_composite_over_8888_0565_process_pixblock_tail
vst1.16 {d28, d29}, [DST_W, :128]!
vld1.16 {d4, d5}, [DST_R, :128]!
- vld4.32 {d0, d1, d2, d3}, [SRC]!
+ fetch_src_pixblock
pixman_composite_over_8888_0565_process_pixblock_head
cache_preload 8, 8
.endm
@@ -433,7 +433,7 @@ generate_composite_function \
vsri.u16 q14, q8, #5
PF add PF_X, PF_X, #8
PF tst PF_CTL, #0xF
- vld4.8 {d0, d1, d2, d3}, [SRC]!
+ fetch_src_pixblock
PF addne PF_X, PF_X, #8
PF subne PF_CTL, PF_CTL, #1
vsri.u16 q14, q9, #11
@@ -478,7 +478,7 @@ generate_composite_function \
.macro pixman_composite_src_0565_8888_process_pixblock_tail_head
pixman_composite_src_0565_8888_process_pixblock_tail
vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- vld1.16 {d0, d1}, [SRC]!
+ fetch_src_pixblock
pixman_composite_src_0565_8888_process_pixblock_head
cache_preload 8, 8
.endm
@@ -505,7 +505,7 @@ generate_composite_function \
.endm
.macro pixman_composite_add_8_8_process_pixblock_tail_head
- vld1.8 {d0, d1, d2, d3}, [SRC]!
+ fetch_src_pixblock
PF add PF_X, PF_X, #32
PF tst PF_CTL, #0xF
vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
@@ -537,13 +537,13 @@ generate_composite_function \
/******************************************************************************/
.macro pixman_composite_add_8888_8888_process_pixblock_tail_head
- vld1.8 {d0, d1, d2, d3}, [SRC]!
+ fetch_src_pixblock
PF add PF_X, PF_X, #8
PF tst PF_CTL, #0xF
- vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vld1.32 {d4, d5, d6, d7}, [DST_R, :128]!
PF addne PF_X, PF_X, #8
PF subne PF_CTL, PF_CTL, #1
- vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ vst1.32 {d28, d29, d30, d31}, [DST_W, :128]!
PF cmp PF_X, ORIG_W
PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
@@ -613,7 +613,7 @@ generate_composite_function_single_scanline \
PF cmp PF_X, ORIG_W
vraddhn.u16 d30, q12, q10
vraddhn.u16 d31, q13, q11
- vld4.8 {d0, d1, d2, d3}, [SRC]!
+ fetch_src_pixblock
PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
vmvn.8 d22, d3
PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
@@ -667,7 +667,7 @@ generate_composite_function_single_scanline \
vraddhn.u16 d31, q13, q11
vqadd.u8 q14, q0, q14
vqadd.u8 q15, q1, q15
- vld4.8 {d0, d1, d2, d3}, [SRC]!
+ fetch_src_pixblock
PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
vmvn.8 d22, d3
PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
@@ -887,7 +887,7 @@ generate_composite_function \
.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head
vld1.16 {d4, d5}, [DST_R, :128]!
pixman_composite_over_n_8_0565_process_pixblock_tail
- vld4.8 {d8, d9, d10, d11}, [SRC]!
+ fetch_src_pixblock
cache_preload 8, 8
vld1.8 {d24}, [MASK]!
pixman_composite_over_n_8_0565_process_pixblock_head
@@ -919,7 +919,7 @@ generate_composite_function \
.macro pixman_composite_src_0565_0565_process_pixblock_tail_head
vst1.16 {d0, d1, d2, d3}, [DST_W, :128]!
- vld1.16 {d0, d1, d2, d3}, [SRC]!
+ fetch_src_pixblock
cache_preload 16, 16
.endm
@@ -1065,7 +1065,7 @@ generate_composite_function \
.macro pixman_composite_src_8888_8888_process_pixblock_tail_head
vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
- vld1.32 {d0, d1, d2, d3}, [SRC]!
+ fetch_src_pixblock
cache_preload 8, 8
.endm
@@ -1096,7 +1096,7 @@ generate_composite_function \
.macro pixman_composite_src_x888_8888_process_pixblock_tail_head
vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
- vld1.32 {d0, d1, d2, d3}, [SRC]!
+ fetch_src_pixblock
vorr q0, q0, q2
vorr q1, q1, q2
cache_preload 8, 8
@@ -1395,7 +1395,7 @@ generate_composite_function \
vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
vld1.8 {d24, d25, d26, d27}, [MASK]!
- vld1.8 {d0, d1, d2, d3}, [SRC]!
+ fetch_src_pixblock
cache_preload 32, 32
pixman_composite_add_8_8_8_process_pixblock_head
.endm
@@ -1448,7 +1448,7 @@ generate_composite_function \
vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
vld4.8 {d24, d25, d26, d27}, [MASK]!
- vld4.8 {d0, d1, d2, d3}, [SRC]!
+ fetch_src_pixblock
cache_preload 8, 8
pixman_composite_add_8888_8888_8888_process_pixblock_head
.endm
@@ -1517,7 +1517,7 @@ generate_composite_function_single_scanline \
.macro pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head
vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
- vld4.8 {d0, d1, d2, d3}, [SRC]!
+ fetch_src_pixblock
cache_preload 8, 8
vld4.8 {d12, d13, d14, d15}, [MASK]!
pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
@@ -1554,7 +1554,7 @@ generate_composite_function_single_scanline \
.macro pixman_composite_over_8888_n_8888_process_pixblock_tail_head
vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
pixman_composite_over_8888_n_8888_process_pixblock_tail
- vld4.8 {d0, d1, d2, d3}, [SRC]!
+ fetch_src_pixblock
cache_preload 8, 8
pixman_composite_over_8888_n_8888_process_pixblock_head
vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
@@ -1588,7 +1588,7 @@ generate_composite_function \
.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head
vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
pixman_composite_over_8888_n_8888_process_pixblock_tail
- vld4.8 {d0, d1, d2, d3}, [SRC]!
+ fetch_src_pixblock
cache_preload 8, 8
vld4.8 {d12, d13, d14, d15}, [MASK]!
pixman_composite_over_8888_n_8888_process_pixblock_head
@@ -1630,7 +1630,7 @@ generate_composite_function_single_scanline \
.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head
vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
pixman_composite_over_8888_n_8888_process_pixblock_tail
- vld4.8 {d0, d1, d2, d3}, [SRC]!
+ fetch_src_pixblock
cache_preload 8, 8
vld1.8 {d15}, [MASK]!
pixman_composite_over_8888_n_8888_process_pixblock_head
@@ -1662,7 +1662,7 @@ generate_composite_function \
.macro pixman_composite_src_0888_0888_process_pixblock_tail_head
vst3.8 {d0, d1, d2}, [DST_W]!
- vld3.8 {d0, d1, d2}, [SRC]!
+ fetch_src_pixblock
cache_preload 8, 8
.endm
@@ -1692,7 +1692,7 @@ generate_composite_function \
.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head
vst4.8 {d0, d1, d2, d3}, [DST_W]!
- vld3.8 {d0, d1, d2}, [SRC]!
+ fetch_src_pixblock
vswp d0, d2
cache_preload 8, 8
.endm
@@ -1731,7 +1731,7 @@ generate_composite_function \
.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head
vshll.u8 q14, d0, #8
- vld3.8 {d0, d1, d2}, [SRC]!
+ fetch_src_pixblock
vsri.u16 q14, q8, #5
vsri.u16 q14, q9, #11
vshll.u8 q8, d1, #8
@@ -1777,7 +1777,7 @@ generate_composite_function \
vswp d3, d31
vrshr.u16 q12, q9, #8
vrshr.u16 q13, q10, #8
- vld4.8 {d0, d1, d2, d3}, [SRC]!
+ fetch_src_pixblock
vraddhn.u16 d30, q11, q8
PF add PF_X, PF_X, #8
PF tst PF_CTL, #0xF
@@ -1851,7 +1851,7 @@ generate_composite_function \
.macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head
vld1.8 {d15}, [MASK]!
pixman_composite_over_0565_8_0565_process_pixblock_tail
- vld1.16 {d8, d9}, [SRC]!
+ fetch_src_pixblock
vld1.16 {d10, d11}, [DST_R, :128]!
cache_preload 8, 8
pixman_composite_over_0565_8_0565_process_pixblock_head
@@ -1903,7 +1903,7 @@ generate_composite_function \
.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head
vld1.8 {d15}, [MASK]!
pixman_composite_add_0565_8_0565_process_pixblock_tail
- vld1.16 {d8, d9}, [SRC]!
+ fetch_src_pixblock
vld1.16 {d10, d11}, [DST_R, :128]!
cache_preload 8, 8
pixman_composite_add_0565_8_0565_process_pixblock_head
@@ -1951,7 +1951,7 @@ generate_composite_function \
/* TODO: expand macros and do better instructions scheduling */
.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head
- vld1.8 {d15}, [SRC]!
+ fetch_src_pixblock
pixman_composite_out_reverse_8_0565_process_pixblock_tail
vld1.16 {d10, d11}, [DST_R, :128]!
cache_preload 8, 8
@@ -1973,3 +1973,49 @@ generate_composite_function \
10, /* dst_r_basereg */ \
15, /* src_basereg */ \
0 /* mask_basereg */
+
+/******************************************************************************/
+
+generate_composite_function_nearest_scanline \
+ pixman_scaled_nearest_scanline_8888_8888_OVER_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_8888_process_pixblock_head, \
+ pixman_composite_over_8888_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8888_process_pixblock_tail_head
+
+generate_composite_function_nearest_scanline \
+ pixman_scaled_nearest_scanline_8888_0565_OVER_asm_neon, 32, 0, 16, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_0565_process_pixblock_head, \
+ pixman_composite_over_8888_0565_process_pixblock_tail, \
+ pixman_composite_over_8888_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 24 /* mask_basereg */
+
+generate_composite_function_nearest_scanline \
+ pixman_scaled_nearest_scanline_8888_0565_SRC_asm_neon, 32, 0, 16, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_8888_0565_process_pixblock_head, \
+ pixman_composite_src_8888_0565_process_pixblock_tail, \
+ pixman_composite_src_8888_0565_process_pixblock_tail_head
+
+generate_composite_function_nearest_scanline \
+ pixman_scaled_nearest_scanline_0565_8888_SRC_asm_neon, 16, 0, 32, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_0565_8888_process_pixblock_head, \
+ pixman_composite_src_0565_8888_process_pixblock_tail, \
+ pixman_composite_src_0565_8888_process_pixblock_tail_head