aboutsummaryrefslogtreecommitdiff
path: root/pixman/pixman/pixman-mips-dspr2-asm.S
diff options
context:
space:
mode:
Diffstat (limited to 'pixman/pixman/pixman-mips-dspr2-asm.S')
-rw-r--r--pixman/pixman/pixman-mips-dspr2-asm.S624
1 files changed, 624 insertions, 0 deletions
diff --git a/pixman/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman/pixman-mips-dspr2-asm.S
index 3a6b26a30..b5cae1690 100644
--- a/pixman/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman/pixman-mips-dspr2-asm.S
@@ -1209,6 +1209,630 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips)
END(pixman_composite_over_8888_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips)
+/*
+ * a0 - dst (a8)
+ * a1 - src (a8)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+ SAVE_REGS_ON_STACK 0, v0, v1
+ li t9, 0x00ff00ff
+ beqz a3, 3f
+ nop
+
+ srl v0, a3, 2 /* v0 = how many multiples of 4 dst pixels */
+ beqz v0, 1f /* branch if less than 4 src pixels */
+ nop
+
+0:
+ beqz v0, 1f
+ addiu v0, v0, -1
+ lbu t0, 0(a2)
+ lbu t1, 1(a2)
+ lbu t2, 2(a2)
+ lbu t3, 3(a2)
+ lbu t4, 0(a0)
+ lbu t5, 1(a0)
+ lbu t6, 2(a0)
+ lbu t7, 3(a0)
+
+ addiu a2, a2, 4
+
+ precr_sra.ph.w t1, t0, 0
+ precr_sra.ph.w t3, t2, 0
+ precr_sra.ph.w t5, t4, 0
+ precr_sra.ph.w t7, t6, 0
+
+ precr.qb.ph t0, t3, t1
+ precr.qb.ph t1, t7, t5
+
+ lbu t4, 0(a1)
+ lbu v1, 1(a1)
+ lbu t7, 2(a1)
+ lbu t8, 3(a1)
+
+ addiu a1, a1, 4
+
+ precr_sra.ph.w v1, t4, 0
+ precr_sra.ph.w t8, t7, 0
+
+ muleu_s.ph.qbl t2, t0, t8
+ muleu_s.ph.qbr t3, t0, v1
+ shra_r.ph t4, t2, 8
+ shra_r.ph t5, t3, 8
+ and t4, t4, t9
+ and t5, t5, t9
+ addq.ph t2, t2, t4
+ addq.ph t3, t3, t5
+ shra_r.ph t2, t2, 8
+ shra_r.ph t3, t3, 8
+ precr.qb.ph t0, t2, t3
+
+ addu_s.qb t2, t0, t1
+
+ sb t2, 0(a0)
+ srl t2, t2, 8
+ sb t2, 1(a0)
+ srl t2, t2, 8
+ sb t2, 2(a0)
+ srl t2, t2, 8
+ sb t2, 3(a0)
+ addiu a3, a3, -4
+ b 0b
+ addiu a0, a0, 4
+
+1:
+ beqz a3, 3f
+ nop
+2:
+ lbu t8, 0(a1)
+ lbu t0, 0(a2)
+ lbu t1, 0(a0)
+ addiu a1, a1, 1
+ addiu a2, a2, 1
+
+ mul t2, t0, t8
+ shra_r.ph t3, t2, 8
+ andi t3, t3, 0xff
+ addq.ph t2, t2, t3
+ shra_r.ph t2, t2, 8
+ andi t2, t2, 0xff
+
+ addu_s.qb t2, t2, t1
+ sb t2, 0(a0)
+ addiu a3, a3, -1
+ bnez a3, 2b
+ addiu a0, a0, 1
+
+3:
+ RESTORE_REGS_FROM_STACK 0, v0, v1
+ j ra
+ nop
+
+END(pixman_composite_add_8_8_8_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8_asm_mips)
+/*
+ * a0 - dst (a8)
+ * a1 - src (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+ SAVE_REGS_ON_STACK 0, v0
+ li t9, 0x00ff00ff
+ beqz a3, 3f
+ nop
+
+ srl v0, a3, 2 /* v0 = how many multiples of 4 dst pixels */
+ beqz v0, 1f /* branch if less than 4 src pixels */
+ nop
+
+ srl t8, a1, 24
+ replv.ph t8, t8
+
+0:
+ beqz v0, 1f
+ addiu v0, v0, -1
+ lbu t0, 0(a2)
+ lbu t1, 1(a2)
+ lbu t2, 2(a2)
+ lbu t3, 3(a2)
+ lbu t4, 0(a0)
+ lbu t5, 1(a0)
+ lbu t6, 2(a0)
+ lbu t7, 3(a0)
+
+ addiu a2, a2, 4
+
+ precr_sra.ph.w t1, t0, 0
+ precr_sra.ph.w t3, t2, 0
+ precr_sra.ph.w t5, t4, 0
+ precr_sra.ph.w t7, t6, 0
+
+ precr.qb.ph t0, t3, t1
+ precr.qb.ph t1, t7, t5
+
+ muleu_s.ph.qbl t2, t0, t8
+ muleu_s.ph.qbr t3, t0, t8
+ shra_r.ph t4, t2, 8
+ shra_r.ph t5, t3, 8
+ and t4, t4, t9
+ and t5, t5, t9
+ addq.ph t2, t2, t4
+ addq.ph t3, t3, t5
+ shra_r.ph t2, t2, 8
+ shra_r.ph t3, t3, 8
+ precr.qb.ph t0, t2, t3
+
+ addu_s.qb t2, t0, t1
+
+ sb t2, 0(a0)
+ srl t2, t2, 8
+ sb t2, 1(a0)
+ srl t2, t2, 8
+ sb t2, 2(a0)
+ srl t2, t2, 8
+ sb t2, 3(a0)
+ addiu a3, a3, -4
+ b 0b
+ addiu a0, a0, 4
+
+1:
+ beqz a3, 3f
+ nop
+ srl t8, a1, 24
+2:
+ lbu t0, 0(a2)
+ lbu t1, 0(a0)
+ addiu a2, a2, 1
+
+ mul t2, t0, t8
+ shra_r.ph t3, t2, 8
+ andi t3, t3, 0xff
+ addq.ph t2, t2, t3
+ shra_r.ph t2, t2, 8
+ andi t2, t2, 0xff
+
+ addu_s.qb t2, t2, t1
+ sb t2, 0(a0)
+ addiu a3, a3, -1
+ bnez a3, 2b
+ addiu a0, a0, 1
+
+3:
+ RESTORE_REGS_FROM_STACK 0, v0
+ j ra
+ nop
+
+END(pixman_composite_add_n_8_8_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+ SAVE_REGS_ON_STACK 0, s0, s1, s2
+ li t4, 0x00ff00ff
+ beqz a3, 3f
+ nop
+ addiu t1, a3, -1
+ beqz t1, 2f
+ nop
+1:
+ /* a1 = source (32bit constant) */
+ lbu t0, 0(a2) /* t0 = mask (a8) */
+ lbu t1, 1(a2) /* t1 = mask (a8) */
+ lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+ lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */
+ addiu a2, a2, 2
+
+ MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 a1, a1, \
+ t0, t1, \
+ t2, t3, \
+ t5, t6, \
+ t4, t7, t8, t9, s0, s1, s2
+
+ sw t5, 0(a0)
+ sw t6, 4(a0)
+ addiu a3, a3, -2
+ addiu t1, a3, -1
+ bgtz t1, 1b
+ addiu a0, a0, 8
+2:
+ beqz a3, 3f
+ nop
+ /* a1 = source (32bit constant) */
+ lbu t0, 0(a2) /* t0 = mask (a8) */
+ lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */
+
+ MIPS_UN8x4_MUL_UN8_ADD_UN8x4 a1, t0, t1, t2, t4, t3, t5, t6
+
+ sw t2, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, s0, s1, s2
+ j ra
+ nop
+
+END(pixman_composite_add_n_8_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm_mips)
+/*
+ * a0 - dst (r5g6b5)
+ * a1 - src (r5g6b5)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+ SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
+ li t4, 0xf800f800
+ li t5, 0x07e007e0
+ li t6, 0x001F001F
+ li t7, 0x00ff00ff
+ beqz a3, 3f
+ nop
+ addiu t1, a3, -1
+ beqz t1, 2f
+ nop
+1:
+ lhu t0, 0(a1) /* t0 = source (r5g6b5) */
+ lhu t1, 2(a1) /* t1 = source (r5g6b5) */
+ lbu t2, 0(a2) /* t2 = mask (a8) */
+ lbu t3, 1(a2) /* t3 = mask (a8) */
+ lhu t8, 0(a0) /* t8 = destination (r5g6b5) */
+ lhu t9, 2(a0) /* t9 = destination (r5g6b5) */
+ addiu a1, a1, 4
+ addiu a2, a2, 2
+
+ CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
+ CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, s6, s7
+ MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 s0, s1, \
+ t2, t3, \
+ s2, s3, \
+ t0, t1, \
+ t7, s4, s5, s6, s7, t8, t9
+ CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3
+
+ sh s0, 0(a0)
+ sh s1, 2(a0)
+ addiu a3, a3, -2
+ addiu t1, a3, -1
+ bgtz t1, 1b
+ addiu a0, a0, 4
+2:
+ beqz a3, 3f
+ nop
+ lhu t0, 0(a1) /* t0 = source (r5g6b5) */
+ lbu t1, 0(a2) /* t1 = mask (a8) */
+ lhu t2, 0(a0) /* t2 = destination (r5g6b5) */
+
+ CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5
+ CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6
+ MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t3, t1, t4, t0, t7, t2, t5, t6
+ CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
+
+ sh t3, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
+ j ra
+ nop
+
+END(pixman_composite_add_0565_8_0565_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (a8r8g8b8)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+ SAVE_REGS_ON_STACK 0, s0, s1, s2
+ li t4, 0x00ff00ff
+ beqz a3, 3f
+ nop
+ addiu t1, a3, -1
+ beqz t1, 2f
+ nop
+1:
+ lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
+ lw t1, 4(a1) /* t1 = source (a8r8g8b8) */
+ lbu t2, 0(a2) /* t2 = mask (a8) */
+ lbu t3, 1(a2) /* t3 = mask (a8) */
+ lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */
+ lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */
+ addiu a1, a1, 8
+ addiu a2, a2, 2
+
+ MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
+ t2, t3, \
+ t5, t6, \
+ t7, t8, \
+ t4, t9, s0, s1, s2, t0, t1
+
+ sw t7, 0(a0)
+ sw t8, 4(a0)
+ addiu a3, a3, -2
+ addiu t1, a3, -1
+ bgtz t1, 1b
+ addiu a0, a0, 8
+2:
+ beqz a3, 3f
+ nop
+ lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
+ lbu t1, 0(a2) /* t1 = mask (a8) */
+ lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+
+ MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7
+
+ sw t3, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, s0, s1, s2
+ j ra
+ nop
+
+END(pixman_composite_add_8888_8_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (a8r8g8b8)
+ * a2 - mask (32bit constant)
+ * a3 - w
+ */
+
+ SAVE_REGS_ON_STACK 0, s0, s1, s2
+ li t4, 0x00ff00ff
+ beqz a3, 3f
+ nop
+ srl a2, a2, 24
+ addiu t1, a3, -1
+ beqz t1, 2f
+ nop
+1:
+ lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
+ lw t1, 4(a1) /* t1 = source (a8r8g8b8) */
+ /* a2 = mask (32bit constant) */
+ lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+ lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */
+ addiu a1, a1, 8
+
+ MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
+ a2, a2, \
+ t2, t3, \
+ t5, t6, \
+ t4, t7, t8, t9, s0, s1, s2
+
+ sw t5, 0(a0)
+ sw t6, 4(a0)
+ addiu a3, a3, -2
+ addiu t1, a3, -1
+ bgtz t1, 1b
+ addiu a0, a0, 8
+2:
+ beqz a3, 3f
+ nop
+ lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
+ /* a2 = mask (32bit constant) */
+ lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */
+
+ MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, a2, t1, t3, t4, t5, t6, t7
+
+ sw t3, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, s0, s1, s2
+ j ra
+ nop
+
+END(pixman_composite_add_8888_n_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (a8r8g8b8)
+ * a2 - mask (a8r8g8b8)
+ * a3 - w
+ */
+
+ SAVE_REGS_ON_STACK 0, s0, s1, s2
+ li t4, 0x00ff00ff
+ beqz a3, 3f
+ nop
+ addiu t1, a3, -1
+ beqz t1, 2f
+ nop
+1:
+ lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
+ lw t1, 4(a1) /* t1 = source (a8r8g8b8) */
+ lw t2, 0(a2) /* t2 = mask (a8r8g8b8) */
+ lw t3, 4(a2) /* t3 = mask (a8r8g8b8) */
+ lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */
+ lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */
+ addiu a1, a1, 8
+ addiu a2, a2, 8
+ srl t2, t2, 24
+ srl t3, t3, 24
+
+ MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
+ t2, t3, \
+ t5, t6, \
+ t7, t8, \
+ t4, t9, s0, s1, s2, t0, t1
+
+ sw t7, 0(a0)
+ sw t8, 4(a0)
+ addiu a3, a3, -2
+ addiu t1, a3, -1
+ bgtz t1, 1b
+ addiu a0, a0, 8
+2:
+ beqz a3, 3f
+ nop
+ lw t0, 0(a1) /* t0 = source (a8r8g8b8) */
+ lw t1, 0(a2) /* t1 = mask (a8r8g8b8) */
+ lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+ srl t1, t1, 24
+
+ MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7
+
+ sw t3, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, s0, s1, s2
+ j ra
+ nop
+
+END(pixman_composite_add_8888_8888_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_add_8_8_asm_mips)
+/*
+ * a0 - dst (a8)
+ * a1 - src (a8)
+ * a2 - w
+ */
+
+ beqz a2, 3f
+ nop
+ srl t9, a2, 2 /* t9 = how many multiples of 4 dst pixels */
+ beqz t9, 1f /* branch if less than 4 src pixels */
+ nop
+
+0:
+ beqz t9, 1f
+ addiu t9, t9, -1
+ lbu t0, 0(a1)
+ lbu t1, 1(a1)
+ lbu t2, 2(a1)
+ lbu t3, 3(a1)
+ lbu t4, 0(a0)
+ lbu t5, 1(a0)
+ lbu t6, 2(a0)
+ lbu t7, 3(a0)
+
+ addiu a1, a1, 4
+
+ precr_sra.ph.w t1, t0, 0
+ precr_sra.ph.w t3, t2, 0
+ precr_sra.ph.w t5, t4, 0
+ precr_sra.ph.w t7, t6, 0
+
+ precr.qb.ph t0, t3, t1
+ precr.qb.ph t1, t7, t5
+
+ addu_s.qb t2, t0, t1
+
+ sb t2, 0(a0)
+ srl t2, t2, 8
+ sb t2, 1(a0)
+ srl t2, t2, 8
+ sb t2, 2(a0)
+ srl t2, t2, 8
+ sb t2, 3(a0)
+ addiu a2, a2, -4
+ b 0b
+ addiu a0, a0, 4
+
+1:
+ beqz a2, 3f
+ nop
+2:
+ lbu t0, 0(a1)
+ lbu t1, 0(a0)
+ addiu a1, a1, 1
+
+ addu_s.qb t2, t0, t1
+ sb t2, 0(a0)
+ addiu a2, a2, -1
+ bnez a2, 2b
+ addiu a0, a0, 1
+
+3:
+ j ra
+ nop
+
+END(pixman_composite_add_8_8_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (a8r8g8b8)
+ * a2 - w
+ */
+
+ beqz a2, 4f
+ nop
+
+ srl t9, a2, 2 /* t1 = how many multiples of 4 src pixels */
+ beqz t9, 3f /* branch if less than 4 src pixels */
+ nop
+1:
+ addiu t9, t9, -1
+ beqz t9, 2f
+ addiu a2, a2, -4
+
+ lw t0, 0(a1)
+ lw t1, 4(a1)
+ lw t2, 8(a1)
+ lw t3, 12(a1)
+ lw t4, 0(a0)
+ lw t5, 4(a0)
+ lw t6, 8(a0)
+ lw t7, 12(a0)
+ addiu a1, a1, 16
+
+ addu_s.qb t4, t4, t0
+ addu_s.qb t5, t5, t1
+ addu_s.qb t6, t6, t2
+ addu_s.qb t7, t7, t3
+
+ sw t4, 0(a0)
+ sw t5, 4(a0)
+ sw t6, 8(a0)
+ sw t7, 12(a0)
+ b 1b
+ addiu a0, a0, 16
+2:
+ lw t0, 0(a1)
+ lw t1, 4(a1)
+ lw t2, 8(a1)
+ lw t3, 12(a1)
+ lw t4, 0(a0)
+ lw t5, 4(a0)
+ lw t6, 8(a0)
+ lw t7, 12(a0)
+ addiu a1, a1, 16
+
+ addu_s.qb t4, t4, t0
+ addu_s.qb t5, t5, t1
+ addu_s.qb t6, t6, t2
+ addu_s.qb t7, t7, t3
+
+ sw t4, 0(a0)
+ sw t5, 4(a0)
+ sw t6, 8(a0)
+ sw t7, 12(a0)
+
+ beqz a2, 4f
+ addiu a0, a0, 16
+3:
+ lw t0, 0(a1)
+ lw t1, 0(a0)
+ addiu a1, a1, 4
+ addiu a2, a2, -1
+ addu_s.qb t1, t1, t0
+ sw t1, 0(a0)
+ bnez a2, 3b
+ addiu a0, a0, 4
+4:
+ jr ra
+ nop
+
+END(pixman_composite_add_8888_8888_asm_mips)
+
LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
/*
* a0 - *dst