From 020ef045e0df09bc2f664d8d6e4b6600da53b41a Mon Sep 17 00:00:00 2001 From: Ulrich Sibiller Date: Tue, 18 Dec 2018 21:32:37 +0100 Subject: fb: Fix memcpy abuse Fixes ArcticaProject/nx-libs#750 Backport of this commit: commit e32cc0b4c85c78cd8743a6e1680dcc79054b57ce Author: Adam Jackson Date: Thu Apr 21 16:37:11 2011 -0400 fb: Fix memcpy abuse The memcpy fast path implicitly assumes that the copy walks left-to-right. That's not something memcpy guarantees, and newer glibc on some processors will indeed break that assumption. Since we walk a line at a time, check the source and destination against the width of the blit to determine whether we can be sloppy enough to allow memcpy. (Having done this, we can remove the check for !reverse as well.) On an Intel Core i7-2630QM with an NVIDIA GeForce GTX 460M running in NoAccel, the broken code and various fixes for -copywinwin{10,100,500} gives (edited to fit in 80 columns): 1: Disable the fastpath entirely 2: Replace memcpy with memmove 3: This fix 4: The code before this fix 1 2 3 4 Operation ------ --------------- --------------- --------------- ------------ 258000 269000 ( 1.04) 544000 ( 2.11) 552000 ( 2.14) Copy 10x10 21300 23000 ( 1.08) 43700 ( 2.05) 47100 ( 2.21) Copy 100x100 960 962 ( 1.00) 1990 ( 2.09) 1990 ( 2.07) Copy 500x500 So it's a modest performance hit, but correctness demands it, and it's probably worth keeping the 2x speedup from having the fast path in the first place. Signed-off-by: Adam Jackson Signed-off-by: Keith Packard --- nx-X11/programs/Xserver/fb/fbblt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'nx-X11/programs') diff --git a/nx-X11/programs/Xserver/fb/fbblt.c b/nx-X11/programs/Xserver/fb/fbblt.c index e820a85eb..acec23bf2 100644 --- a/nx-X11/programs/Xserver/fb/fbblt.c +++ b/nx-X11/programs/Xserver/fb/fbblt.c @@ -67,6 +67,7 @@ fbBlt (FbBits *srcLine, int n, nmiddle; Bool destInvarient; int startbyte, endbyte; + int careful; FbDeclareMergeRop (); #ifdef FB_24BIT @@ -78,7 +79,11 @@ fbBlt (FbBits *srcLine, } #endif - if (alu == GXcopy && pm == FB_ALLONES && !reverse && + careful = !((srcLine < dstLine && srcLine + width * (bpp>>3) > dstLine) || + (dstLine < srcLine && dstLine + width * (bpp>>3) > srcLine)) || + (bpp & 7); + + if (alu == GXcopy && pm == FB_ALLONES && !careful && !(srcX & 7) && !(dstX & 7) && !(width & 7)) { int i; CARD8 *src = (CARD8 *) srcLine; -- cgit v1.2.3 From 034228d75ba1be6b035bdd387c183f14379abdc4 Mon Sep 17 00:00:00 2001 From: Ulrich Sibiller Date: Tue, 18 Dec 2018 21:39:04 +0100 Subject: fb: fix fast-path blt detection Backport of this commit: commit a2880699e8f1f576e1a48ebf25e8982463323f84 Author: Keith Packard Date: Tue Mar 25 08:21:16 2014 -0700 fb: fix fast-path blt detection The width parameter is used to disable the blit fast-path (memcpy) when source and destination rows overlap in memory. This check was added in [0]. Unfortunately, the calculation to determine if source and destination lines overlapped was incorrect: (1) it converts width from pixels to bytes, but width is actually in bits, not pixels. (2) it adds this byte offset to dst/srcLine, which implicitly converts the offset from bytes to sizeof(FbBits). Fix both of these by converting addresses to byte pointers and width to bytes and doing comparisons on the resulting byte address. For example: A 32-bpp 1366 pixel-wide row will have width = 1366 * 32 = 43712 bits bpp = 32 (bpp >> 3) = 4 width * (bpp >> 3) = 174848 FbBits (FbBits *)width => 699392 bytes So, "careful" was true if the destination line was within 699392 bytes, instead of just within its 1366 * 4 = 5464 byte row. This bug causes us to take the slow path for large non-overlapping rows that are "close" in memory. As a data point, XGetImage(1366x768) on my ARM chromebook was taking ~140 ms, but with this fixed, it now takes about 60 ms. XGetImage() -> exaGetImage() -> fbGetImage -> fbBlt() [0] commit e32cc0b4c85c78cd8743a6e1680dcc79054b57ce Author: Adam Jackson Date: Thu Apr 21 16:37:11 2011 -0400 fb: Fix memcpy abuse The memcpy fast path implicitly assumes that the copy walks left-to-right. That's not something memcpy guarantees, and newer glibc on some processors will indeed break that assumption. Since we walk a line at a time, check the source and destination against the width of the blit to determine whether we can be sloppy enough to allow memcpy. (Having done this, we can remove the check for !reverse as well.) v3: Convert to byte units This first checks to make sure the blt is byte aligned, converts all of the data to byte units and then compares for byte address range overlap between source and dest. Signed-off-by: Keith Packard Reviewed-by: Daniel Kurtz --- nx-X11/programs/Xserver/fb/fbblt.c | 61 +++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 27 deletions(-) (limited to 'nx-X11/programs') diff --git a/nx-X11/programs/Xserver/fb/fbblt.c b/nx-X11/programs/Xserver/fb/fbblt.c index acec23bf2..6241af8eb 100644 --- a/nx-X11/programs/Xserver/fb/fbblt.c +++ b/nx-X11/programs/Xserver/fb/fbblt.c @@ -67,9 +67,42 @@ fbBlt (FbBits *srcLine, int n, nmiddle; Bool destInvarient; int startbyte, endbyte; - int careful; + FbDeclareMergeRop (); + if (alu == GXcopy && pm == FB_ALLONES && + !(srcX & 7) && !(dstX & 7) && !(width & 7)) + { + CARD8 *src_byte = (CARD8 *) srcLine + (srcX >> 3); + CARD8 *dst_byte = (CARD8 *) dstLine + (dstX >> 3); + FbStride src_byte_stride = srcStride << (FB_SHIFT - 3); + FbStride dst_byte_stride = dstStride << (FB_SHIFT - 3); + int width_byte = (width >> 3); + + /* Make sure there's no overlap; we can't use memcpy in that + * case as it's not well defined, so fall through to the + * general code + */ + if (src_byte + width_byte <= dst_byte || + dst_byte + width_byte <= src_byte) + { + int i; + + if (!upsidedown) + for (i = 0; i < height; i++) + memcpy(dst_byte + i * dst_byte_stride, + src_byte + i * src_byte_stride, + width_byte); + else + for (i = height - 1; i >= 0; i--) + memcpy(dst_byte + i * dst_byte_stride, + src_byte + i * src_byte_stride, + width_byte); + + return; + } + } + #ifdef FB_24BIT if (bpp == 24 && !FbCheck24Pix (pm)) { @@ -79,32 +112,6 @@ fbBlt (FbBits *srcLine, } #endif - careful = !((srcLine < dstLine && srcLine + width * (bpp>>3) > dstLine) || - (dstLine < srcLine && dstLine + width * (bpp>>3) > srcLine)) || - (bpp & 7); - - if (alu == GXcopy && pm == FB_ALLONES && !careful && - !(srcX & 7) && !(dstX & 7) && !(width & 7)) { - int i; - CARD8 *src = (CARD8 *) srcLine; - CARD8 *dst = (CARD8 *) dstLine; - - srcStride *= sizeof(FbBits); - dstStride *= sizeof(FbBits); - width >>= 3; - src += (srcX >> 3); - dst += (dstX >> 3); - - if (!upsidedown) - for (i = 0; i < height; i++) - memcpy(dst + i * dstStride, src + i * srcStride, width); - else - for (i = height - 1; i >= 0; i--) - memcpy(dst + i * dstStride, src + i * srcStride, width); - - return; - } - FbInitializeMergeRop(alu, pm); destInvarient = FbDestInvarientMergeRop(); if (upsidedown) -- cgit v1.2.3