1 files changed, 370 insertions, 0 deletions
diff --git a/nx-X11/programs/Xserver/miext/rootless/accel/rlBlt.c b/nx-X11/programs/Xserver/miext/rootless/accel/rlBlt.c
new file mode 100644
index 000000000..e5fa5f7bd
--- /dev/null
+++ b/nx-X11/programs/Xserver/miext/rootless/accel/rlBlt.c
@@ -0,0 +1,370 @@
+/*
+ * Accelerated rootless blit
+ */
+/*
+ * This code is largely copied from fbBlt.c.
+ *
+ * Copyright © 1998 Keith Packard
+ * Copyright (c) 2002 Apple Computer, Inc. All Rights Reserved.
+ * Copyright (c) 2003 Torrey T. Lyons. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+/* $XFree86: xc/programs/Xserver/fb/fbblt.c,v 1.8 2000/09/28 00:47:22 keithp Exp $ */
+
+#ifdef HAVE_DIX_CONFIG_H
+#include <dix-config.h>
+#endif
+
+#include "fb.h"
+#include "rootlessCommon.h"
+#include "rlAccel.h"
+
+
+void
+rlBlt (FbBits   *srcLine,
+       FbStride	srcStride,
+       int	srcX,
+
+       ScreenPtr pDstScreen,
+       FbBits   *dstLine,
+       FbStride dstStride,
+       int	dstX,
+
+       int	width,
+       int	height,
+
+       int	alu,
+       FbBits	pm,
+       int	bpp,
+
+       Bool	reverse,
+       Bool	upsidedown)
+{
+    FbBits  *src, *dst;
+    int	    leftShift, rightShift;
+    FbBits  startmask, endmask;
+    FbBits  bits, bits1;
+    int	    n, nmiddle;
+    Bool    destInvarient;
+    int	    startbyte, endbyte;
+    FbDeclareMergeRop ();
+
+#ifdef FB_24BIT
+    if (bpp == 24 && !FbCheck24Pix (pm))
+    {
+	fbBlt24 (srcLine, srcStride, srcX, dstLine, dstStride, dstX,
+		 width, height, alu, pm, reverse, upsidedown);
+	return;
+    }
+#endif
+    FbInitializeMergeRop(alu, pm);
+    destInvarient = FbDestInvarientMergeRop();
+    if (upsidedown)
+    {
+	srcLine += (height - 1) * (srcStride);
+	dstLine += (height - 1) * (dstStride);
+	srcStride = -srcStride;
+	dstStride = -dstStride;
+    }
+    FbMaskBitsBytes (dstX, width, destInvarient, startmask, startbyte,
+		     nmiddle, endmask, endbyte);
+
+    /*
+     * Beginning of the rootless acceleration code
+     */
+    if (!startmask && !endmask && alu == GXcopy &&
+        height * nmiddle * sizeof(*dst) > rootless_CopyBytes_threshold)
+    {
+	if (pm == FB_ALLONES && SCREENREC(pDstScreen)->imp->CopyBytes)
+	{
+	    SCREENREC(pDstScreen)->imp->CopyBytes(
+                            nmiddle * sizeof(*dst), height,
+                            (char *) srcLine + (srcX >> 3),
+                            srcStride * sizeof (*src),
+                            (char *) dstLine + (dstX >> 3),
+                            dstStride * sizeof (*dst));
+	    return;
+	}
+
+	/* FIXME: the pm test here isn't super-wonderful - just because
+	   we don't care about the top eight bits doesn't necessarily
+	   mean we want them set to 255. But doing this does give a
+	   factor of two performance improvement when copying from a
+	   pixmap to a window, which is pretty common.. */
+
+	else if (bpp == 32 && sizeof(FbBits) == 4 &&
+                 pm == 0x00FFFFFFUL && !reverse &&
+                 SCREENREC(pDstScreen)->imp->CompositePixels)
+	{
+	    /* need to copy XRGB to ARGB. */
+
+	    void *src[2], *dest[2];
+	    unsigned int src_rowbytes[2], dest_rowbytes[2];
+            unsigned int fn;
+
+	    src[0] = (char *) srcLine + (srcX >> 3);
+	    src[1] = NULL;
+	    src_rowbytes[0] = srcStride * sizeof(*src);
+	    src_rowbytes[1] = 0;
+
+	    dest[0] = (char *) dstLine + (dstX >> 3);
+	    dest[1] = dest[0];
+	    dest_rowbytes[0] = dstStride * sizeof(*dst);
+	    dest_rowbytes[1] = dest_rowbytes[0];
+
+	    fn = RL_COMPOSITE_FUNCTION(RL_COMPOSITE_SRC, RL_DEPTH_ARGB8888,
+                                       RL_DEPTH_NIL, RL_DEPTH_ARGB8888);
+
+            if (SCREENREC(pDstScreen)->imp->CompositePixels(
+                                nmiddle, height,
+                                fn, src, src_rowbytes,
+                                NULL, 0, dest, dest_rowbytes) == Success)
+            {
+                return;
+            }
+	}
+    }
+    /* End of the rootless acceleration code */
+
+    if (reverse)
+    {
+	srcLine += ((srcX + width - 1) >> FB_SHIFT) + 1;
+	dstLine += ((dstX + width - 1) >> FB_SHIFT) + 1;
+	srcX = (srcX + width - 1) & FB_MASK;
+	dstX = (dstX + width - 1) & FB_MASK;
+    }
+    else
+    {
+	srcLine += srcX >> FB_SHIFT;
+	dstLine += dstX >> FB_SHIFT;
+	srcX &= FB_MASK;
+	dstX &= FB_MASK;
+    }
+    if (srcX == dstX)
+    {
+	while (height--)
+	{
+	    src = srcLine;
+	    srcLine += srcStride;
+	    dst = dstLine;
+	    dstLine += dstStride;
+	    if (reverse)
+	    {
+		if (endmask)
+		{
+		    bits = *--src;
+		    --dst;
+		    FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
+		}
+		n = nmiddle;
+		if (destInvarient)
+		{
+		    while (n--)
+			*--dst = FbDoDestInvarientMergeRop(*--src);
+		}
+		else
+		{
+		    while (n--)
+		    {
+			bits = *--src;
+			--dst;
+			*dst = FbDoMergeRop (bits, *dst);
+		    }
+		}
+		if (startmask)
+		{
+		    bits = *--src;
+		    --dst;
+		    FbDoLeftMaskByteMergeRop(dst, bits, startbyte, startmask);
+		}
+	    }
+	    else
+	    {
+		if (startmask)
+		{
+		    bits = *src++;
+		    FbDoLeftMaskByteMergeRop(dst, bits, startbyte, startmask);
+		    dst++;
+		}
+		n = nmiddle;
+		if (destInvarient)
+		{
+#if 0
+		    /*
+		     * This provides some speedup on screen->screen blts
+		     * over the PCI bus, usually about 10%.  But fb
+		     * isn't usually used for this operation...
+		     */
+		    if (_ca2 + 1 == 0 && _cx2 == 0)
+		    {
+			FbBits	t1, t2, t3, t4;
+			while (n >= 4)
+			{
+			    t1 = *src++;
+			    t2 = *src++;
+			    t3 = *src++;
+			    t4 = *src++;
+			    *dst++ = t1;
+			    *dst++ = t2;
+			    *dst++ = t3;
+			    *dst++ = t4;
+			    n -= 4;
+			}
+		    }
+#endif
+		    while (n--)
+			*dst++ = FbDoDestInvarientMergeRop(*src++);
+		}
+		else
+		{
+		    while (n--)
+		    {
+			bits = *src++;
+			*dst = FbDoMergeRop (bits, *dst);
+			dst++;
+		    }
+		}
+		if (endmask)
+		{
+		    bits = *src;
+		    FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
+		}
+	    }
+	}
+    }
+    else
+    {
+	if (srcX > dstX)
+	{
+	    leftShift = srcX - dstX;
+	    rightShift = FB_UNIT - leftShift;
+	}
+	else
+	{
+	    rightShift = dstX - srcX;
+	    leftShift = FB_UNIT - rightShift;
+	}
+	while (height--)
+	{
+	    src = srcLine;
+	    srcLine += srcStride;
+	    dst = dstLine;
+	    dstLine += dstStride;
+	    
+	    bits1 = 0;
+	    if (reverse)
+	    {
+		if (srcX < dstX)
+		    bits1 = *--src;
+		if (endmask)
+		{
+		    bits = FbScrRight(bits1, rightShift);
+		    if (FbScrRight(endmask, leftShift))
+		    {
+			bits1 = *--src;
+			bits |= FbScrLeft(bits1, leftShift);
+		    }
+		    --dst;
+		    FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
+		}
+		n = nmiddle;
+		if (destInvarient)
+		{
+		    while (n--)
+		    {
+			bits = FbScrRight(bits1, rightShift);
+			bits1 = *--src;
+			bits |= FbScrLeft(bits1, leftShift);
+			--dst;
+			*dst = FbDoDestInvarientMergeRop(bits);
+		    }
+		}
+		else
+		{
+		    while (n--)
+		    {
+			bits = FbScrRight(bits1, rightShift);
+			bits1 = *--src;
+			bits |= FbScrLeft(bits1, leftShift);
+			--dst;
+			*dst = FbDoMergeRop(bits, *dst);
+		    }
+		}
+		if (startmask)
+		{
+		    bits = FbScrRight(bits1, rightShift);
+		    if (FbScrRight(startmask, leftShift))
+		    {
+			bits1 = *--src;
+			bits |= FbScrLeft(bits1, leftShift);
+		    }
+		    --dst;
+		    FbDoLeftMaskByteMergeRop (dst, bits, startbyte, startmask);
+		}
+	    }
+	    else
+	    {
+		if (srcX > dstX)
+		    bits1 = *src++;
+		if (startmask)
+		{
+		    bits = FbScrLeft(bits1, leftShift);
+		    bits1 = *src++;
+		    bits |= FbScrRight(bits1, rightShift);
+		    FbDoLeftMaskByteMergeRop (dst, bits, startbyte, startmask);
+		    dst++;
+		}
+		n = nmiddle;
+		if (destInvarient)
+		{
+		    while (n--)
+		    {
+			bits = FbScrLeft(bits1, leftShift);
+			bits1 = *src++;
+			bits |= FbScrRight(bits1, rightShift);
+			*dst = FbDoDestInvarientMergeRop(bits);
+			dst++;
+		    }
+		}
+		else
+		{
+		    while (n--)
+		    {
+			bits = FbScrLeft(bits1, leftShift);
+			bits1 = *src++;
+			bits |= FbScrRight(bits1, rightShift);
+			*dst = FbDoMergeRop(bits, *dst);
+			dst++;
+		    }
+		}
+		if (endmask)
+		{
+		    bits = FbScrLeft(bits1, leftShift);
+		    if (FbScrLeft(endmask, rightShift))
+		    {
+			bits1 = *src;
+			bits |= FbScrRight(bits1, rightShift);
+		    }
+		    FbDoRightMaskByteMergeRop (dst, bits, endbyte, endmask);
+		}
+	    }
+	}
+    }
+}