diff options
author | marha <marha@users.sourceforge.net> | 2013-11-08 11:09:17 +0100 |
---|---|---|
committer | marha <marha@users.sourceforge.net> | 2013-11-08 11:09:17 +0100 |
commit | 401eb04e4dfb179291befb19d74e2e3148c4e268 (patch) | |
tree | bb9056b67a7bdf37cba96fecc69ce81b1809fb03 /mesalib/src/mesa/main/streaming-load-memcpy.c | |
parent | f7050e0ff2d1dd147ff5ef45f8ff7d8d7833db48 (diff) | |
download | vcxsrv-401eb04e4dfb179291befb19d74e2e3148c4e268.tar.gz vcxsrv-401eb04e4dfb179291befb19d74e2e3148c4e268.tar.bz2 vcxsrv-401eb04e4dfb179291befb19d74e2e3148c4e268.zip |
libxtrans libxcb xcb-proto mesa git update 8 nov 2013
libxcb commit e8663a935890ff366f49e356211049dfd0d9756a
libxcb/xcb-proto commit 29beba6bf02bda86a5b163ace63e1d0a4d3eee5b
libxtrans commit 0153d1670e4a1883e1bb6dd971435d6268eac5ba
mesa commit 035cce83f7b3d9a037c9e7cc17a212d6cf7e927f
Diffstat (limited to 'mesalib/src/mesa/main/streaming-load-memcpy.c')
-rw-r--r-- | mesalib/src/mesa/main/streaming-load-memcpy.c | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/mesalib/src/mesa/main/streaming-load-memcpy.c b/mesalib/src/mesa/main/streaming-load-memcpy.c new file mode 100644 index 000000000..d7147afdc --- /dev/null +++ b/mesalib/src/mesa/main/streaming-load-memcpy.c @@ -0,0 +1,85 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * Matt Turner <mattst88@gmail.com> + * + */ + +#include "main/macros.h" +#include "main/streaming-load-memcpy.h" +#include <smmintrin.h> + +/* Copies memory from src to dst, using SSE 4.1's MOVNTDQA to get streaming + * read performance from uncached memory. + */ +void +_mesa_streaming_load_memcpy(void *restrict dst, void *restrict src, size_t len) +{ + char *restrict d = dst; + char *restrict s = src; + + /* If dst and src are not co-aligned, fallback to memcpy(). */ + if (((uintptr_t)d & 15) != ((uintptr_t)s & 15)) { + memcpy(d, s, len); + return; + } + + /* memcpy() the misaligned header. At the end of this if block, <d> and <s> + * are aligned to a 16-byte boundary or <len> == 0. + */ + if ((uintptr_t)d & 15) { + uintptr_t bytes_before_alignment_boundary = 16 - ((uintptr_t)d & 15); + assert(bytes_before_alignment_boundary < 16); + + memcpy(d, s, MIN2(bytes_before_alignment_boundary, len)); + + d = (char *)ALIGN((uintptr_t)d, 16); + s = (char *)ALIGN((uintptr_t)s, 16); + len -= MIN2(bytes_before_alignment_boundary, len); + } + + while (len >= 64) { + __m128i *dst_cacheline = (__m128i *)d; + __m128i *src_cacheline = (__m128i *)s; + + __m128i temp1 = _mm_stream_load_si128(src_cacheline + 0); + __m128i temp2 = _mm_stream_load_si128(src_cacheline + 1); + __m128i temp3 = _mm_stream_load_si128(src_cacheline + 2); + __m128i temp4 = _mm_stream_load_si128(src_cacheline + 3); + + _mm_store_si128(dst_cacheline + 0, temp1); + _mm_store_si128(dst_cacheline + 1, temp2); + _mm_store_si128(dst_cacheline + 2, temp3); + _mm_store_si128(dst_cacheline + 3, temp4); + + d += 64; + s += 64; + len -= 64; + } + + /* memcpy() the tail. */ + if (len) { + memcpy(d, s, len); + } +} |