aboutsummaryrefslogtreecommitdiff
path: root/xorg-server/miext/rootless/accel/rlBlt.c
diff options
context:
space:
mode:
Diffstat (limited to 'xorg-server/miext/rootless/accel/rlBlt.c')
-rw-r--r--xorg-server/miext/rootless/accel/rlBlt.c408
1 files changed, 408 insertions, 0 deletions
diff --git a/xorg-server/miext/rootless/accel/rlBlt.c b/xorg-server/miext/rootless/accel/rlBlt.c
new file mode 100644
index 000000000..b5fe74085
--- /dev/null
+++ b/xorg-server/miext/rootless/accel/rlBlt.c
@@ -0,0 +1,408 @@
+/*
+ * Accelerated rootless blit
+ */
+/*
+ * This code is largely copied from fbBlt.c.
+ *
+ * Copyright © 1998 Keith Packard
+ * Copyright (c) 2002 Apple Computer, Inc. All Rights Reserved.
+ * Copyright (c) 2003 Torrey T. Lyons. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. Keith Packard makes no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifdef HAVE_DIX_CONFIG_H
+#include <dix-config.h>
+#endif
+
+#include <stddef.h> /* For NULL */
+#include <string.h>
+#include "fb.h"
+#include "rootlessCommon.h"
+#include "rlAccel.h"
+
+#define InitializeShifts(sx,dx,ls,rs) { \
+ if (sx != dx) { \
+ if (sx > dx) { \
+ ls = sx - dx; \
+ rs = FB_UNIT - ls; \
+ } else { \
+ rs = dx - sx; \
+ ls = FB_UNIT - rs; \
+ } \
+ } \
+}
+
+void
+rlBlt (FbBits *srcLine,
+ FbStride srcStride,
+ int srcX,
+
+ ScreenPtr pDstScreen,
+ FbBits *dstLine,
+ FbStride dstStride,
+ int dstX,
+
+ int width,
+ int height,
+
+ int alu,
+ FbBits pm,
+ int bpp,
+
+ Bool reverse,
+ Bool upsidedown)
+{
+ FbBits *src, *dst;
+ int leftShift, rightShift;
+ FbBits startmask, endmask;
+ FbBits bits, bits1;
+ int n, nmiddle;
+ Bool destInvarient;
+ int startbyte, endbyte;
+ FbDeclareMergeRop ();
+
+#ifdef FB_24BIT
+ if (bpp == 24 && !FbCheck24Pix (pm))
+ {
+ fbBlt24 (srcLine, srcStride, srcX, dstLine, dstStride, dstX,
+ width, height, alu, pm, reverse, upsidedown);
+ return;
+ }
+#endif
+
+ if (alu == GXcopy && pm == FB_ALLONES && !reverse &&
+ !(srcX & 7) && !(dstX & 7) && !(width & 7)) {
+ int i;
+ CARD8 *src = (CARD8 *) srcLine;
+ CARD8 *dst = (CARD8 *) dstLine;
+
+ srcStride *= sizeof(FbBits);
+ dstStride *= sizeof(FbBits);
+ width >>= 3;
+ src += (srcX >> 3);
+ dst += (dstX >> 3);
+
+ if (!upsidedown)
+ for (i = 0; i < height; i++)
+ memcpy(dst + i * dstStride, src + i * srcStride, width);
+ else
+ for (i = height - 1; i >= 0; i--)
+ memcpy(dst + i * dstStride, src + i * srcStride, width);
+
+ return;
+ }
+
+ FbInitializeMergeRop(alu, pm);
+ destInvarient = FbDestInvarientMergeRop();
+ if (upsidedown)
+ {
+ srcLine += (height - 1) * (srcStride);
+ dstLine += (height - 1) * (dstStride);
+ srcStride = -srcStride;
+ dstStride = -dstStride;
+ }
+ FbMaskBitsBytes (dstX, width, destInvarient, startmask, startbyte,
+ nmiddle, endmask, endbyte);
+
+ /*
+ * Beginning of the rootless acceleration code
+ */
+ if (!startmask && !endmask && alu == GXcopy &&
+ height * nmiddle * sizeof(*dst) > rootless_CopyBytes_threshold)
+ {
+ if (pm == FB_ALLONES && SCREENREC(pDstScreen)->imp->CopyBytes)
+ {
+ SCREENREC(pDstScreen)->imp->CopyBytes(
+ nmiddle * sizeof(*dst), height,
+ (char *) srcLine + (srcX >> 3),
+ srcStride * sizeof (*src),
+ (char *) dstLine + (dstX >> 3),
+ dstStride * sizeof (*dst));
+ return;
+ }
+
+ /* FIXME: the pm test here isn't super-wonderful - just because
+ we don't care about the top eight bits doesn't necessarily
+ mean we want them set to 255. But doing this does give a
+ factor of two performance improvement when copying from a
+ pixmap to a window, which is pretty common.. */
+
+ else if (bpp == 32 && sizeof(FbBits) == 4 &&
+ pm == 0x00FFFFFFUL && !reverse &&
+ SCREENREC(pDstScreen)->imp->CompositePixels)
+ {
+ /* need to copy XRGB to ARGB. */
+
+ void *src[2], *dest[2];
+ unsigned int src_rowbytes[2], dest_rowbytes[2];
+ unsigned int fn;
+
+ src[0] = (char *) srcLine + (srcX >> 3);
+ src[1] = NULL;
+ src_rowbytes[0] = srcStride * sizeof(*src);
+ src_rowbytes[1] = 0;
+
+ dest[0] = (char *) dstLine + (dstX >> 3);
+ dest[1] = dest[0];
+ dest_rowbytes[0] = dstStride * sizeof(*dst);
+ dest_rowbytes[1] = dest_rowbytes[0];
+
+ fn = RL_COMPOSITE_FUNCTION(RL_COMPOSITE_SRC, RL_DEPTH_ARGB8888,
+ RL_DEPTH_NIL, RL_DEPTH_ARGB8888);
+
+ if (SCREENREC(pDstScreen)->imp->CompositePixels(
+ nmiddle, height,
+ fn, src, src_rowbytes,
+ NULL, 0, dest, dest_rowbytes) == Success)
+ {
+ return;
+ }
+ }
+ }
+ /* End of the rootless acceleration code */
+
+ if (reverse)
+ {
+ srcLine += ((srcX + width - 1) >> FB_SHIFT) + 1;
+ dstLine += ((dstX + width - 1) >> FB_SHIFT) + 1;
+ srcX = (srcX + width - 1) & FB_MASK;
+ dstX = (dstX + width - 1) & FB_MASK;
+ }
+ else
+ {
+ srcLine += srcX >> FB_SHIFT;
+ dstLine += dstX >> FB_SHIFT;
+ srcX &= FB_MASK;
+ dstX &= FB_MASK;
+ }
+ if (srcX == dstX)
+ {
+ while (height--)
+ {
+ src = srcLine;
+ srcLine += srcStride;
+ dst = dstLine;
+ dstLine += dstStride;
+ if (reverse)
+ {
+ if (endmask)
+ {
+ bits = *--src;
+ --dst;
+ FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
+ }
+ n = nmiddle;
+ if (destInvarient)
+ {
+ while (n--)
+ *--dst = FbDoDestInvarientMergeRop(*--src);
+ }
+ else
+ {
+ while (n--)
+ {
+ bits = *--src;
+ --dst;
+ *dst = FbDoMergeRop (bits, *dst);
+ }
+ }
+ if (startmask)
+ {
+ bits = *--src;
+ --dst;
+ FbDoLeftMaskByteMergeRop(dst, bits, startbyte, startmask);
+ }
+ }
+ else
+ {
+ if (startmask)
+ {
+ bits = *src++;
+ FbDoLeftMaskByteMergeRop(dst, bits, startbyte, startmask);
+ dst++;
+ }
+ n = nmiddle;
+ if (destInvarient)
+ {
+#if 0
+ /*
+ * This provides some speedup on screen->screen blts
+ * over the PCI bus, usually about 10%. But fb
+ * isn't usually used for this operation...
+ */
+ if (_ca2 + 1 == 0 && _cx2 == 0)
+ {
+ FbBits t1, t2, t3, t4;
+ while (n >= 4)
+ {
+ t1 = *src++;
+ t2 = *src++;
+ t3 = *src++;
+ t4 = *src++;
+ *dst++ = t1;
+ *dst++ = t2;
+ *dst++ = t3;
+ *dst++ = t4;
+ n -= 4;
+ }
+ }
+#endif
+ while (n--)
+ *dst++ = FbDoDestInvarientMergeRop(*src++);
+ }
+ else
+ {
+ while (n--)
+ {
+ bits = *src++;
+ *dst = FbDoMergeRop (bits, *dst);
+ dst++;
+ }
+ }
+ if (endmask)
+ {
+ bits = *src;
+ FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
+ }
+ }
+ }
+ }
+ else
+ {
+ if (srcX > dstX)
+ {
+ leftShift = srcX - dstX;
+ rightShift = FB_UNIT - leftShift;
+ }
+ else
+ {
+ rightShift = dstX - srcX;
+ leftShift = FB_UNIT - rightShift;
+ }
+ while (height--)
+ {
+ src = srcLine;
+ srcLine += srcStride;
+ dst = dstLine;
+ dstLine += dstStride;
+
+ bits1 = 0;
+ if (reverse)
+ {
+ if (srcX < dstX)
+ bits1 = *--src;
+ if (endmask)
+ {
+ bits = FbScrRight(bits1, rightShift);
+ if (FbScrRight(endmask, leftShift))
+ {
+ bits1 = *--src;
+ bits |= FbScrLeft(bits1, leftShift);
+ }
+ --dst;
+ FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
+ }
+ n = nmiddle;
+ if (destInvarient)
+ {
+ while (n--)
+ {
+ bits = FbScrRight(bits1, rightShift);
+ bits1 = *--src;
+ bits |= FbScrLeft(bits1, leftShift);
+ --dst;
+ *dst = FbDoDestInvarientMergeRop(bits);
+ }
+ }
+ else
+ {
+ while (n--)
+ {
+ bits = FbScrRight(bits1, rightShift);
+ bits1 = *--src;
+ bits |= FbScrLeft(bits1, leftShift);
+ --dst;
+ *dst = FbDoMergeRop(bits, *dst);
+ }
+ }
+ if (startmask)
+ {
+ bits = FbScrRight(bits1, rightShift);
+ if (FbScrRight(startmask, leftShift))
+ {
+ bits1 = *--src;
+ bits |= FbScrLeft(bits1, leftShift);
+ }
+ --dst;
+ FbDoLeftMaskByteMergeRop (dst, bits, startbyte, startmask);
+ }
+ }
+ else
+ {
+ if (srcX > dstX)
+ bits1 = *src++;
+ if (startmask)
+ {
+ bits = FbScrLeft(bits1, leftShift);
+ if (FbScrLeft(startmask, rightShift))
+ {
+ bits1 = *src++;
+ bits |= FbScrRight(bits1, rightShift);
+ }
+ FbDoLeftMaskByteMergeRop (dst, bits, startbyte, startmask);
+ dst++;
+ }
+ n = nmiddle;
+ if (destInvarient)
+ {
+ while (n--)
+ {
+ bits = FbScrLeft(bits1, leftShift);
+ bits1 = *src++;
+ bits |= FbScrRight(bits1, rightShift);
+ *dst = FbDoDestInvarientMergeRop(bits);
+ dst++;
+ }
+ }
+ else
+ {
+ while (n--)
+ {
+ bits = FbScrLeft(bits1, leftShift);
+ bits1 = *src++;
+ bits |= FbScrRight(bits1, rightShift);
+ *dst = FbDoMergeRop(bits, *dst);
+ dst++;
+ }
+ }
+ if (endmask)
+ {
+ bits = FbScrLeft(bits1, leftShift);
+ if (FbScrLeft(endmask, rightShift))
+ {
+ bits1 = *src;
+ bits |= FbScrRight(bits1, rightShift);
+ }
+ FbDoRightMaskByteMergeRop (dst, bits, endbyte, endmask);
+ }
+ }
+ }
+ }
+}