diff options
author | marha <marha@users.sourceforge.net> | 2012-02-27 07:40:19 +0100 |
---|---|---|
committer | marha <marha@users.sourceforge.net> | 2012-02-27 07:40:19 +0100 |
commit | b3e4046e8fdcd369e61c237f8690772bf1d8c64f (patch) | |
tree | 5ff8cf47771eede4f2ea6e7c5980fd584f2b4992 | |
parent | b436c1b7e3adefe7068fb030b83ac12a826251d3 (diff) | |
parent | c4f44c07c6662d1ce08603945ccc4fa5afaa742a (diff) | |
download | vcxsrv-b3e4046e8fdcd369e61c237f8690772bf1d8c64f.tar.gz vcxsrv-b3e4046e8fdcd369e61c237f8690772bf1d8c64f.tar.bz2 vcxsrv-b3e4046e8fdcd369e61c237f8690772bf1d8c64f.zip |
Merge remote-tracking branch 'origin/released'
Conflicts:
pixman/pixman/pixman-mmx.c
50 files changed, 2001 insertions, 550 deletions
diff --git a/fontconfig/fc-lang/Makefile.am b/fontconfig/fc-lang/Makefile.am index 8f53688a7..1f662c193 100644 --- a/fontconfig/fc-lang/Makefile.am +++ b/fontconfig/fc-lang/Makefile.am @@ -302,5 +302,7 @@ ORTH = \ lah.orth \ nqo.orth \ brx.orth \ - sat.orth + sat.orth \ + doi.orth \ + mni.orth # ^-------------- Add new orth files here diff --git a/fontconfig/fc-lang/doi.orth b/fontconfig/fc-lang/doi.orth new file mode 100644 index 000000000..d4a274af9 --- /dev/null +++ b/fontconfig/fc-lang/doi.orth @@ -0,0 +1,40 @@ +# +# fontconfig/fc-lang/doi.orth +# +# Copyright © 2012 Pravin Satpute <psatpute@redhat.com> +# +# Permission to use, copy, modify, distribute, and sell this software and its +# documentation for any purpose is hereby granted without fee, provided that +# the above copyright notice appear in all copies and that both that +# copyright notice and this permission notice appear in supporting +# documentation, and that the name of the author(s) not be used in +# advertising or publicity pertaining to distribution of the software without +# specific, written prior permission. The author(s) make(s) no +# representations about the suitability of this software for any purpose. It +# is provided "as is" without express or implied warranty. +# +# THE AUTHOR(S) DISCLAIM(S) ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO +# EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR +# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. +# +# Dogri (doi) +# +# Source: Enhanced inscript: http://malayalam.kerala.gov.in/images/8/80/Qwerty_enhancedinscriptkeyboardlayout.pdf Page No. 58 +# Encircled these characters in Unicode chart: http://pravins.fedorapeople.org/Dogri-characters.pdf +# documents +0902-0903 +0905-090c +090f-0910 +0913-0928 +092a-0930 +0932 +0935-0939 +093c-0944 +0947-0948 +094b-094d +0950-0952 +095b-096f diff --git a/fontconfig/fc-lang/mni.orth b/fontconfig/fc-lang/mni.orth new file mode 100644 index 000000000..4388269ca --- /dev/null +++ b/fontconfig/fc-lang/mni.orth @@ -0,0 +1,35 @@ +# +# fontconfig/fc-lang/mni.orth +# +# Copyright © 2012 Pravin Satpute <psatpute@redhat.com> +# +# Permission to use, copy, modify, distribute, and sell this software and its +# documentation for any purpose is hereby granted without fee, provided that +# the above copyright notice appear in all copies and that both that +# copyright notice and this permission notice appear in supporting +# documentation, and that the name of the author(s) not be used in +# advertising or publicity pertaining to distribution of the software without +# specific, written prior permission. The author(s) make(s) no +# representations about the suitability of this software for any purpose. It +# is provided "as is" without express or implied warranty. +# +# THE AUTHOR(S) DISCLAIM(S) ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO +# EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR +# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. +# +# Maniputi (mni) +# +# Source: Script grammer: http://tdil-dc.in/tdildcMain/articles/283709Script_Grammar_for_Manipuri.pdf 6th page +# Characters are encirled in Unicode chart http://pravins.fedorapeople.org/Manipuri-characters.pdf +# documents +include bn.orth +0964 +- 09c4 +09bd +09ce +09e6-09ef +09f1 diff --git a/mesalib/configure.ac b/mesalib/configure.ac index 846b62300..0caa1b1bd 100644 --- a/mesalib/configure.ac +++ b/mesalib/configure.ac @@ -1582,7 +1582,7 @@ fi if test "x$enable_xvmc" = xyes; then PKG_CHECK_MODULES([XVMC], [xvmc >= 1.0.6]) - GALLIUM_STATE_TRACKERS_DIRS="$GALLIUM_STATE_TRACKERS_DIRS xorg/xvmc" + GALLIUM_STATE_TRACKERS_DIRS="$GALLIUM_STATE_TRACKERS_DIRS xvmc" HAVE_ST_XVMC="yes" fi @@ -1932,6 +1932,7 @@ CXXFLAGS="$CXXFLAGS $USER_CXXFLAGS" dnl Substitute the config AC_CONFIG_FILES([configs/autoconf + src/gallium/drivers/r300/Makefile src/gbm/Makefile src/gbm/main/gbm.pc src/egl/wayland/Makefile diff --git a/mesalib/docs/relnotes-8.0.1.html b/mesalib/docs/relnotes-8.0.1.html new file mode 100644 index 000000000..29a314c06 --- /dev/null +++ b/mesalib/docs/relnotes-8.0.1.html @@ -0,0 +1,153 @@ +<HTML> + +<head> +<TITLE>Mesa Release Notes</TITLE> +<link rel="stylesheet" type="text/css" href="mesa.css"> +<meta http-equiv="content-type" content="text/html; charset=utf-8" /> +</head> + +<BODY> + +<body bgcolor="#eeeeee"> + +<H1>Mesa 8.0.1 Release Notes / February 16, 2012</H1> + +<p> +Mesa 8.0.1 is a bug fix release which fixes bugs found since the 8.0 release. +</p> +<p> +Mesa 8.0 implements the OpenGL 3.0 API, but the version reported by +glGetString(GL_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 3.0. +</p> +<p> +See the <a href="install.html">Compiling/Installing page</a> for prerequisites +for DRI hardware acceleration. +</p> + + +<h2>MD5 checksums</h2> +<pre> +4855c2d93bd2ebd43f384bdcc92c9a27 MesaLib-8.0.1.tar.gz +24eeebf66971809d8f40775a379b36c9 MesaLib-8.0.1.tar.bz2 +54e745d14dac5717f7f65b4e2d5c1df2 MesaLib-8.0.1.zip +</pre> + +<h2>New features</h2> +<p>None.</p> + +<h2>Bug fixes</h2> + +<p>This list is likely incomplete.</p> + +<ul> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28924">Bug 28924</a> - [ILK] piglit tex-border-1 fail</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=40864">Bug 40864</a> - [bisected pineview] oglc pxconv-gettex(basic.allCases) fails on pineview</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=43327">Bug 43327</a> - [bisected SNB] HiZ make many oglc cases regressed</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=44333">Bug 44333</a> - [bisected] Color distortion with xbmc mediaplayer</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=44927">Bug 44927</a> - [SNB IVB regression] gl-117 abort when click</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=45221">Bug 45221</a> - [bisected IVB] glean/fbo regression in stencil-only case</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=45877">Bug 45877</a> - main/image.c:1597: _mesa_convert_colors: Assertion `dstType == 0x1406' failed.</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=45578">Bug 45578</a> - main/image.c:1659: _mesa_convert_colors: Assertion `dstType == 0x1403' failed.</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=45872">Bug 45872</a> - [bisected PNV] oglc mustpass(basic.stipple) regressed on pineview</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=45876">Bug 45876</a> - [PNV]oglc texenv(basic.allCases) regressed on pineview</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=45917">Bug 45917</a> - [PNV] Regression in Piglit test general/two-sided-lighting-separate-specular</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=45943">Bug 45943</a> - [r300g] r300_emit.c:365:r300_emit_aa_state: Assertion `(aa-d>dest)->cs_buf' failed.</li> + +<!-- <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=">Bug </a> - </li> --> + +</ul> + + +<h2>Changes</h2> +<p>The full set of changes can be viewed by using the following GIT command:</p> + +<pre> + git log mesa-8.0..mesa-8.0.1 +</pre> + +<p>Alex Deucher (2): +<ul> + <li>r600g: fix tex tile_type offset for cayman</li> + <li>r600g: 128 bit formats require tile_type = 1 on cayman</li> +</ul></p> + +<p>Anuj Phogat (2): +<ul> + <li>meta: Add pixel store/pack operations in decompress_texture_image</li> + <li>meta: Avoid FBO resizing/reallocating in decompress_texture_image</li> +</ul></p> + +<p>Brian Paul (6): +<ul> + <li>docs: add news item for 8.0 release</li> + <li>docs: update info about supported systems, GPUs, APIs</li> + <li>docs: add VMware link</li> + <li>docs: remove link to the GLSL compiler page</li> + <li>mesa: fix proxy texture target initialization</li> + <li>swrast: fix span color type selection</li> +</ul></p> + +<p>Chad Versace (2): +<ul> + <li>i965: Rewrite the HiZ op</li> + <li>i965: Remove file i965/junk, accidentally added in 7b36c68</li> +</ul></p> + +<p>Dave Airlie (1): +<ul> + <li>st/mesa: only resolve if number of samples is > 1</li> +</ul></p> + +<p>Eric Anholt (3): +<ul> + <li>i965: Fix HiZ change compiler warning.</li> + <li>i965: Report the failure message when failing to compile the fragment shader.</li> + <li>i965/fs: Enable register spilling on gen7 too.</li> +</ul></p> + +<p>Ian Romanick (4): +<ul> + <li>docs: Add 8.0 MD5 checksums</li> + <li>glapi: Include GLES2 headers for ES2 extension functions</li> + <li>swrast: Only avoid empty _TexEnvPrograms</li> + <li>mesa: Bump version number to 8.0.1</li> +</ul></p> + +<p>Kenneth Graunke (4): +<ul> + <li>i965: Fix border color on Ironlake.</li> + <li>i965/fs: Add a new fs_inst::regs_written function.</li> + <li>i965/fs: Take # of components into account in try_rewrite_rhs_to_dst.</li> + <li>i965: Emit Ivybridge VS workaround flushes.</li> +</ul></p> + +<p>Mathias Fröhlich (1): +<ul> + <li>state_stracker: Fix access to uninitialized memory.</li> +</ul></p> + +<p>Paul Berry (1): +<ul> + <li>i915: Fix type of "specoffset" variable.</li> +</ul></p> + +<p>Simon Farnsworth (1): +<ul> + <li>r600g: Use a fake reloc to sleep for fences</li> +</ul></p> + +</body> +</html> diff --git a/mesalib/src/mapi/glapi/SConscript b/mesalib/src/mapi/glapi/SConscript index 9882806ce..4097a7fe1 100644 --- a/mesalib/src/mapi/glapi/SConscript +++ b/mesalib/src/mapi/glapi/SConscript @@ -46,7 +46,7 @@ for s in mapi_sources: # # Assembly sources # -if env['gcc'] and env['platform'] != 'windows': +if env['gcc'] and env['platform'] not in ('darwin', 'windows'): if env['machine'] == 'x86': env.Append(CPPDEFINES = [ 'USE_X86_ASM', diff --git a/mesalib/src/mesa/SConscript b/mesalib/src/mesa/SConscript index e9b1f6aaf..10a04689d 100644 --- a/mesalib/src/mesa/SConscript +++ b/mesalib/src/mesa/SConscript @@ -384,7 +384,7 @@ if env['gles']: # # Assembly sources # -if env['gcc'] and env['platform'] != 'windows': +if env['gcc'] and env['platform'] not in ('darwin', 'windows'): if env['machine'] == 'x86': env.Append(CPPDEFINES = [ 'USE_X86_ASM', diff --git a/mesalib/src/mesa/drivers/dri/swrast/swrast.c b/mesalib/src/mesa/drivers/dri/swrast/swrast.c index ff26e2769..bfac47c08 100644 --- a/mesalib/src/mesa/drivers/dri/swrast/swrast.c +++ b/mesalib/src/mesa/drivers/dri/swrast/swrast.c @@ -474,7 +474,7 @@ dri_create_buffer(__DRIscreen * sPriv, dPriv->driverPrivate = drawable; drawable->dPriv = dPriv; - drawable->row = malloc(MAX_WIDTH * 4); + drawable->row = malloc(SWRAST_MAX_WIDTH * 4); if (drawable->row == NULL) goto drawable_fail; diff --git a/mesalib/src/mesa/drivers/windows/gdi/wgl.c b/mesalib/src/mesa/drivers/windows/gdi/wgl.c index 61850c26f..d99473bb3 100644 --- a/mesalib/src/mesa/drivers/windows/gdi/wgl.c +++ b/mesalib/src/mesa/drivers/windows/gdi/wgl.c @@ -33,6 +33,7 @@ #include "main/config.h" #include "glapi/glapi.h" +#include "swrast/swrast.h" #include "GL/wmesa.h" /* protos for wmesa* functions */ /* diff --git a/mesalib/src/mesa/main/config.h b/mesalib/src/mesa/main/config.h index 7b7740ebe..8bf741f99 100644 --- a/mesalib/src/mesa/main/config.h +++ b/mesalib/src/mesa/main/config.h @@ -46,9 +46,6 @@ /** Maximum texture matrix stack depth */ #define MAX_TEXTURE_STACK_DEPTH 10 -/** Maximum color matrix stack depth */ -#define MAX_COLOR_STACK_DEPTH 4 - /** Maximum attribute stack depth */ #define MAX_ATTRIB_STACK_DEPTH 16 @@ -74,11 +71,7 @@ #define MAX_AUX_BUFFERS 1 /** Maximum order (degree) of curves */ -#ifdef AMIGA -# define MAX_EVAL_ORDER 12 -#else -# define MAX_EVAL_ORDER 30 -#endif +#define MAX_EVAL_ORDER 30 /** Maximum Name stack depth */ #define MAX_NAME_STACK_DEPTH 64 @@ -115,7 +108,9 @@ /** Maximum rectangular texture size - GL_NV_texture_rectangle */ #define MAX_TEXTURE_RECT_SIZE 16384 -/** Maximum number of layers in a 1D or 2D array texture - GL_MESA_texture_array */ +/** + * Maximum number of layers in a 1D or 2D array texture - GL_MESA_texture_array + */ #define MAX_ARRAY_TEXTURE_LAYERS 64 /** @@ -141,34 +136,9 @@ #define MAX_TEXTURE_UNITS ((MAX_TEXTURE_COORD_UNITS > MAX_TEXTURE_IMAGE_UNITS) ? MAX_TEXTURE_COORD_UNITS : MAX_TEXTURE_IMAGE_UNITS) -/** - * Maximum viewport/image width. Must accomodate all texture sizes too. - */ - -#ifndef MAX_WIDTH -# define MAX_WIDTH 16384 -#endif -/** Maximum viewport/image height */ -#ifndef MAX_HEIGHT -# define MAX_HEIGHT 16384 -#endif - -/* XXX: hack to prevent stack overflow on windows until all temporary arrays - * [MAX_WIDTH] are allocated from the heap */ -#ifdef WIN32 -#undef MAX_TEXTURE_LEVELS -#undef MAX_3D_TEXTURE_LEVELS -#undef MAX_CUBE_TEXTURE_LEVELS -#undef MAX_TEXTURE_RECT_SIZE -#undef MAX_WIDTH -#undef MAX_HEIGHT -#define MAX_TEXTURE_LEVELS 13 -#define MAX_3D_TEXTURE_LEVELS 9 -#define MAX_CUBE_TEXTURE_LEVELS 13 -#define MAX_TEXTURE_RECT_SIZE 4096 -#define MAX_WIDTH 4096 -#define MAX_HEIGHT 4096 -#endif +/** Maximum viewport size */ +#define MAX_VIEWPORT_WIDTH 16384 +#define MAX_VIEWPORT_HEIGHT 16384 /** Maxmimum size for CVA. May be overridden by the drivers. */ #define MAX_ARRAY_LOCK_SIZE 3000 @@ -176,14 +146,6 @@ /** Subpixel precision for antialiasing, window coordinate snapping */ #define SUB_PIXEL_BITS 4 -/** Size of histogram tables */ -#define HISTOGRAM_TABLE_SIZE 256 - -/** Max convolution filter width */ -#define MAX_CONVOLUTION_WIDTH 9 -/** Max convolution filter height */ -#define MAX_CONVOLUTION_HEIGHT 9 - /** For GL_ARB_texture_compression */ #define MAX_COMPRESSED_TEXTURE_FORMATS 25 @@ -280,6 +242,7 @@ /** For GL_EXT_framebuffer_object */ /*@{*/ #define MAX_COLOR_ATTACHMENTS 8 +#define MAX_RENDERBUFFER_SIZE 16384 /*@}*/ /** For GL_ATI_envmap_bump - support bump mapping on first 8 units */ @@ -299,46 +262,6 @@ /*@}*/ -/** - * \name Mesa-specific parameters - */ -/*@{*/ - - -/** - * If non-zero use GLdouble for walking triangle edges, for better accuracy. - */ -#define TRIANGLE_WALK_DOUBLE 0 - - -/** - * Bits per depth buffer value (max is 32). - */ -#ifndef DEFAULT_SOFTWARE_DEPTH_BITS -#define DEFAULT_SOFTWARE_DEPTH_BITS 16 -#endif -/** Depth buffer data type */ -#if DEFAULT_SOFTWARE_DEPTH_BITS <= 16 -#define DEFAULT_SOFTWARE_DEPTH_TYPE GLushort -#else -#define DEFAULT_SOFTWARE_DEPTH_TYPE GLuint -#endif - - -/** - * Bits per stencil value: 8 - */ -#define STENCIL_BITS 8 - - -/** - * For swrast, bits per color channel: 8, 16 or 32 - */ -#ifndef CHAN_BITS -#define CHAN_BITS 8 -#endif - - /* * Color channel component order * diff --git a/mesalib/src/mesa/main/context.c b/mesalib/src/mesa/main/context.c index 43e7438ad..8d48904f8 100644 --- a/mesalib/src/mesa/main/context.c +++ b/mesalib/src/mesa/main/context.c @@ -268,7 +268,7 @@ _mesa_initialize_visual( struct gl_config *vis, if (depthBits < 0 || depthBits > 32) { return GL_FALSE; } - if (stencilBits < 0 || stencilBits > STENCIL_BITS) { + if (stencilBits < 0 || stencilBits > 8) { return GL_FALSE; } assert(accumRedBits >= 0); @@ -586,8 +586,8 @@ _mesa_init_constants(struct gl_context *ctx) ctx->Const.MaxLights = MAX_LIGHTS; ctx->Const.MaxShininess = 128.0; ctx->Const.MaxSpotExponent = 128.0; - ctx->Const.MaxViewportWidth = MAX_WIDTH; - ctx->Const.MaxViewportHeight = MAX_HEIGHT; + ctx->Const.MaxViewportWidth = MAX_VIEWPORT_WIDTH; + ctx->Const.MaxViewportHeight = MAX_VIEWPORT_HEIGHT; #if FEATURE_ARB_vertex_program init_program_limits(GL_VERTEX_PROGRAM_ARB, &ctx->Const.VertexProgram); #endif @@ -608,7 +608,7 @@ _mesa_init_constants(struct gl_context *ctx) #if FEATURE_EXT_framebuffer_object ctx->Const.MaxColorAttachments = MAX_COLOR_ATTACHMENTS; - ctx->Const.MaxRenderbufferSize = MAX_WIDTH; + ctx->Const.MaxRenderbufferSize = MAX_RENDERBUFFER_SIZE; #endif #if FEATURE_ARB_vertex_shader @@ -712,20 +712,15 @@ check_context_limits(struct gl_context *ctx) assert(ctx->Const.MaxCubeTextureLevels <= MAX_CUBE_TEXTURE_LEVELS); assert(ctx->Const.MaxTextureRectSize <= MAX_TEXTURE_RECT_SIZE); - /* make sure largest texture image is <= MAX_WIDTH in size */ - assert((1 << (ctx->Const.MaxTextureLevels - 1)) <= MAX_WIDTH); - assert((1 << (ctx->Const.MaxCubeTextureLevels - 1)) <= MAX_WIDTH); - assert((1 << (ctx->Const.Max3DTextureLevels - 1)) <= MAX_WIDTH); - /* Texture level checks */ assert(MAX_TEXTURE_LEVELS >= MAX_3D_TEXTURE_LEVELS); assert(MAX_TEXTURE_LEVELS >= MAX_CUBE_TEXTURE_LEVELS); /* Max texture size should be <= max viewport size (render to texture) */ - assert((1 << (MAX_TEXTURE_LEVELS - 1)) <= MAX_WIDTH); - - assert(ctx->Const.MaxViewportWidth <= MAX_WIDTH); - assert(ctx->Const.MaxViewportHeight <= MAX_WIDTH); + assert((1 << (ctx->Const.MaxTextureLevels - 1)) + <= ctx->Const.MaxViewportWidth); + assert((1 << (ctx->Const.MaxTextureLevels - 1)) + <= ctx->Const.MaxViewportHeight); assert(ctx->Const.MaxDrawBuffers <= MAX_DRAW_BUFFERS); diff --git a/mesalib/src/mesa/main/readpix.c b/mesalib/src/mesa/main/readpix.c index 48708a6eb..0f429ab22 100644 --- a/mesalib/src/mesa/main/readpix.c +++ b/mesalib/src/mesa/main/readpix.c @@ -110,6 +110,7 @@ read_depth_pixels( struct gl_context *ctx, GLint j; GLubyte *dst, *map; int dstStride, stride; + GLfloat *depthValues; if (!rb) return; @@ -119,8 +120,6 @@ read_depth_pixels( struct gl_context *ctx, ASSERT(y >= 0); ASSERT(x + width <= (GLint) rb->Width); ASSERT(y + height <= (GLint) rb->Height); - /* width should never be > MAX_WIDTH since we did clipping earlier */ - ASSERT(width <= MAX_WIDTH); if (fast_read_depth_pixels(ctx, x, y, width, height, type, pixels, packing)) return; @@ -136,16 +135,24 @@ read_depth_pixels( struct gl_context *ctx, return; } - /* General case (slower) */ - for (j = 0; j < height; j++, y++) { - GLfloat depthValues[MAX_WIDTH]; - _mesa_unpack_float_z_row(rb->Format, width, map, depthValues); - _mesa_pack_depth_span(ctx, width, dst, type, depthValues, packing); + depthValues = (GLfloat *) malloc(width * sizeof(GLfloat)); - dst += dstStride; - map += stride; + if (depthValues) { + /* General case (slower) */ + for (j = 0; j < height; j++, y++) { + _mesa_unpack_float_z_row(rb->Format, width, map, depthValues); + _mesa_pack_depth_span(ctx, width, dst, type, depthValues, packing); + + dst += dstStride; + map += stride; + } + } + else { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glReadPixels"); } + free(depthValues); + ctx->Driver.UnmapRenderbuffer(ctx, rb); } @@ -163,15 +170,12 @@ read_stencil_pixels( struct gl_context *ctx, struct gl_framebuffer *fb = ctx->ReadBuffer; struct gl_renderbuffer *rb = fb->Attachment[BUFFER_STENCIL].Renderbuffer; GLint j; - GLubyte *map; + GLubyte *map, *stencil; GLint stride; if (!rb) return; - /* width should never be > MAX_WIDTH since we did clipping earlier */ - ASSERT(width <= MAX_WIDTH); - ctx->Driver.MapRenderbuffer(ctx, rb, x, y, width, height, GL_MAP_READ_BIT, &map, &stride); if (!map) { @@ -179,19 +183,27 @@ read_stencil_pixels( struct gl_context *ctx, return; } - /* process image row by row */ - for (j = 0; j < height; j++) { - GLvoid *dest; - GLubyte stencil[MAX_WIDTH]; + stencil = (GLubyte *) malloc(width * sizeof(GLubyte)); - _mesa_unpack_ubyte_stencil_row(rb->Format, width, map, stencil); - dest = _mesa_image_address2d(packing, pixels, width, height, - GL_STENCIL_INDEX, type, j, 0); + if (stencil) { + /* process image row by row */ + for (j = 0; j < height; j++) { + GLvoid *dest; - _mesa_pack_stencil_span(ctx, width, type, dest, stencil, packing); + _mesa_unpack_ubyte_stencil_row(rb->Format, width, map, stencil); + dest = _mesa_image_address2d(packing, pixels, width, height, + GL_STENCIL_INDEX, type, j, 0); - map += stride; + _mesa_pack_stencil_span(ctx, width, type, dest, stencil, packing); + + map += stride; + } } + else { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glReadPixels"); + } + + free(stencil); ctx->Driver.UnmapRenderbuffer(ctx, rb); } @@ -378,7 +390,7 @@ fast_read_depth_stencil_pixels_separate(struct gl_context *ctx, struct gl_framebuffer *fb = ctx->ReadBuffer; struct gl_renderbuffer *depthRb = fb->Attachment[BUFFER_DEPTH].Renderbuffer; struct gl_renderbuffer *stencilRb = fb->Attachment[BUFFER_STENCIL].Renderbuffer; - GLubyte *depthMap, *stencilMap; + GLubyte *depthMap, *stencilMap, *stencilVals; int depthStride, stencilStride, i, j; if (_mesa_get_format_datatype(depthRb->Format) != GL_UNSIGNED_NORMALIZED) @@ -399,21 +411,28 @@ fast_read_depth_stencil_pixels_separate(struct gl_context *ctx, return GL_TRUE; /* don't bother trying the slow path */ } - for (j = 0; j < height; j++) { - GLubyte stencilVals[MAX_WIDTH]; + stencilVals = (GLubyte *) malloc(width * sizeof(GLubyte)); - _mesa_unpack_uint_z_row(depthRb->Format, width, depthMap, dst); - _mesa_unpack_ubyte_stencil_row(stencilRb->Format, width, - stencilMap, stencilVals); + if (stencilVals) { + for (j = 0; j < height; j++) { + _mesa_unpack_uint_z_row(depthRb->Format, width, depthMap, dst); + _mesa_unpack_ubyte_stencil_row(stencilRb->Format, width, + stencilMap, stencilVals); - for (i = 0; i < width; i++) { - dst[i] = (dst[i] & 0xffffff00) | stencilVals[i]; - } + for (i = 0; i < width; i++) { + dst[i] = (dst[i] & 0xffffff00) | stencilVals[i]; + } - depthMap += depthStride; - stencilMap += stencilStride; - dst += dstStride / 4; + depthMap += depthStride; + stencilMap += stencilStride; + dst += dstStride / 4; + } } + else { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glReadPixels"); + } + + free(stencilVals); ctx->Driver.UnmapRenderbuffer(ctx, depthRb); ctx->Driver.UnmapRenderbuffer(ctx, stencilRb); @@ -434,6 +453,9 @@ slow_read_depth_stencil_pixels_separate(struct gl_context *ctx, struct gl_renderbuffer *stencilRb = fb->Attachment[BUFFER_STENCIL].Renderbuffer; GLubyte *depthMap, *stencilMap; int depthStride, stencilStride, j; + GLubyte *stencilVals; + GLfloat *depthVals; + /* The depth and stencil buffers might be separate, or a single buffer. * If one buffer, only map it once. @@ -460,21 +482,29 @@ slow_read_depth_stencil_pixels_separate(struct gl_context *ctx, stencilStride = depthStride; } - for (j = 0; j < height; j++) { - GLubyte stencilVals[MAX_WIDTH]; - GLfloat depthVals[MAX_WIDTH]; + stencilVals = (GLubyte *) malloc(width * sizeof(GLubyte)); + depthVals = (GLfloat *) malloc(width * sizeof(GLfloat)); - _mesa_unpack_float_z_row(depthRb->Format, width, depthMap, depthVals); - _mesa_unpack_ubyte_stencil_row(stencilRb->Format, width, - stencilMap, stencilVals); + if (stencilVals && depthVals) { + for (j = 0; j < height; j++) { + _mesa_unpack_float_z_row(depthRb->Format, width, depthMap, depthVals); + _mesa_unpack_ubyte_stencil_row(stencilRb->Format, width, + stencilMap, stencilVals); - _mesa_pack_depth_stencil_span(ctx, width, type, (GLuint *)dst, - depthVals, stencilVals, packing); + _mesa_pack_depth_stencil_span(ctx, width, type, (GLuint *)dst, + depthVals, stencilVals, packing); - depthMap += depthStride; - stencilMap += stencilStride; - dst += dstStride; + depthMap += depthStride; + stencilMap += stencilStride; + dst += dstStride; + } } + else { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glReadPixels"); + } + + free(stencilVals); + free(depthVals); ctx->Driver.UnmapRenderbuffer(ctx, depthRb); if (stencilRb != depthRb) { diff --git a/mesalib/src/mesa/main/texstore.c b/mesalib/src/mesa/main/texstore.c index e8190c392..d368b1868 100644 --- a/mesalib/src/mesa/main/texstore.c +++ b/mesalib/src/mesa/main/texstore.c @@ -2797,6 +2797,15 @@ _mesa_texstore_z24_s8(TEXSTORE_PARAMS) } else if (srcFormat == GL_DEPTH_COMPONENT || srcFormat == GL_STENCIL_INDEX) { + GLuint *depth = (GLuint *) malloc(srcWidth * sizeof(GLuint)); + GLubyte *stencil = (GLubyte *) malloc(srcWidth * sizeof(GLubyte)); + + if (!depth || !stencil) { + free(depth); + free(stencil); + return GL_FALSE; + } + /* In case we only upload depth we need to preserve the stencil */ for (img = 0; img < srcDepth; img++) { GLuint *dstRow = (GLuint *) dstSlices[img]; @@ -2806,8 +2815,6 @@ _mesa_texstore_z24_s8(TEXSTORE_PARAMS) srcFormat, srcType, img, 0, 0); for (row = 0; row < srcHeight; row++) { - GLuint depth[MAX_WIDTH]; - GLubyte stencil[MAX_WIDTH]; GLint i; GLboolean keepdepth = GL_FALSE, keepstencil = GL_FALSE; @@ -2845,6 +2852,9 @@ _mesa_texstore_z24_s8(TEXSTORE_PARAMS) dstRow += dstRowStride / sizeof(GLuint); } } + + free(depth); + free(stencil); } return GL_TRUE; } @@ -2860,6 +2870,8 @@ _mesa_texstore_s8_z24(TEXSTORE_PARAMS) const GLint srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType); GLint img, row; + GLuint *depth; + GLubyte *stencil; ASSERT(dstFormat == MESA_FORMAT_S8_Z24); ASSERT(srcFormat == GL_DEPTH_STENCIL_EXT || @@ -2868,6 +2880,15 @@ _mesa_texstore_s8_z24(TEXSTORE_PARAMS) ASSERT(srcFormat != GL_DEPTH_STENCIL_EXT || srcType == GL_UNSIGNED_INT_24_8_EXT); + depth = (GLuint *) malloc(srcWidth * sizeof(GLuint)); + stencil = (GLubyte *) malloc(srcWidth * sizeof(GLubyte)); + + if (!depth || !stencil) { + free(depth); + free(stencil); + return GL_FALSE; + } + for (img = 0; img < srcDepth; img++) { GLuint *dstRow = (GLuint *) dstSlices[img]; const GLubyte *src @@ -2876,8 +2897,6 @@ _mesa_texstore_s8_z24(TEXSTORE_PARAMS) srcFormat, srcType, img, 0, 0); for (row = 0; row < srcHeight; row++) { - GLuint depth[MAX_WIDTH]; - GLubyte stencil[MAX_WIDTH]; GLint i; GLboolean keepdepth = GL_FALSE, keepstencil = GL_FALSE; @@ -2916,6 +2935,10 @@ _mesa_texstore_s8_z24(TEXSTORE_PARAMS) dstRow += dstRowStride / sizeof(GLuint); } } + + free(depth); + free(stencil); + return GL_TRUE; } @@ -2944,7 +2967,11 @@ _mesa_texstore_s8(TEXSTORE_PARAMS) const GLint srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType); GLint img, row; - + GLubyte *stencil = (GLubyte *) malloc(srcWidth * sizeof(GLubyte)); + + if (!stencil) + return GL_FALSE; + for (img = 0; img < srcDepth; img++) { GLubyte *dstRow = dstSlices[img]; const GLubyte *src @@ -2953,7 +2980,6 @@ _mesa_texstore_s8(TEXSTORE_PARAMS) srcFormat, srcType, img, 0, 0); for (row = 0; row < srcHeight; row++) { - GLubyte stencil[MAX_WIDTH]; GLint i; /* get the 8-bit stencil values */ @@ -2971,6 +2997,7 @@ _mesa_texstore_s8(TEXSTORE_PARAMS) } } + free(stencil); } return GL_TRUE; diff --git a/mesalib/src/mesa/program/prog_execute.h b/mesalib/src/mesa/program/prog_execute.h index 6365b0741..1ae302c32 100644 --- a/mesalib/src/mesa/program/prog_execute.h +++ b/mesalib/src/mesa/program/prog_execute.h @@ -1,87 +1,91 @@ -/*
- * Mesa 3-D graphics library
- * Version: 7.0.3
- *
- * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef PROG_EXECUTE_H
-#define PROG_EXECUTE_H
-
-#include "main/config.h"
-#include "main/mtypes.h"
-
-
-typedef void (*FetchTexelLodFunc)(struct gl_context *ctx, const GLfloat texcoord[4],
- GLfloat lambda, GLuint unit, GLfloat color[4]);
-
-typedef void (*FetchTexelDerivFunc)(struct gl_context *ctx, const GLfloat texcoord[4],
- const GLfloat texdx[4],
- const GLfloat texdy[4],
- GLfloat lodBias,
- GLuint unit, GLfloat color[4]);
-
-
-/**
- * Virtual machine state used during execution of vertex/fragment programs.
- */
-struct gl_program_machine
-{
- const struct gl_program *CurProgram;
-
- /** Fragment Input attributes */
- GLfloat (*Attribs)[MAX_WIDTH][4];
- GLfloat (*DerivX)[4];
- GLfloat (*DerivY)[4];
- GLuint NumDeriv; /**< Max index into DerivX/Y arrays */
- GLuint CurElement; /**< Index into Attribs arrays */
-
- /** Vertex Input attribs */
- GLfloat VertAttribs[VERT_ATTRIB_MAX][4];
-
- GLfloat Temporaries[MAX_PROGRAM_TEMPS][4];
- GLfloat Outputs[MAX_PROGRAM_OUTPUTS][4];
- GLfloat (*EnvParams)[4]; /**< Vertex or Fragment env parameters */
- GLuint CondCodes[4]; /**< COND_* value for x/y/z/w */
- GLint AddressReg[MAX_PROGRAM_ADDRESS_REGS][4];
- GLfloat SystemValues[SYSTEM_VALUE_MAX][4];
-
- const GLubyte *Samplers; /** Array mapping sampler var to tex unit */
-
- GLuint CallStack[MAX_PROGRAM_CALL_DEPTH]; /**< For CAL/RET instructions */
- GLuint StackDepth; /**< Index/ptr to top of CallStack[] */
-
- /** Texture fetch functions */
- FetchTexelLodFunc FetchTexelLod;
- FetchTexelDerivFunc FetchTexelDeriv;
-};
-
-
-extern void
-_mesa_get_program_register(struct gl_context *ctx, gl_register_file file,
- GLuint index, GLfloat val[4]);
-
-extern GLboolean
-_mesa_execute_program(struct gl_context *ctx,
- const struct gl_program *program,
- struct gl_program_machine *machine);
-
-
-#endif /* PROG_EXECUTE_H */
+/* + * Mesa 3-D graphics library + * Version: 7.0.3 + * + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef PROG_EXECUTE_H +#define PROG_EXECUTE_H + +#include "main/config.h" +#include "main/mtypes.h" + + +typedef void (*FetchTexelLodFunc)(struct gl_context *ctx, const GLfloat texcoord[4], + GLfloat lambda, GLuint unit, GLfloat color[4]); + +typedef void (*FetchTexelDerivFunc)(struct gl_context *ctx, const GLfloat texcoord[4], + const GLfloat texdx[4], + const GLfloat texdy[4], + GLfloat lodBias, + GLuint unit, GLfloat color[4]); + + +/** NOTE: This must match SWRAST_MAX_WIDTH */ +#define PROG_MAX_WIDTH 16384 + + +/** + * Virtual machine state used during execution of vertex/fragment programs. + */ +struct gl_program_machine +{ + const struct gl_program *CurProgram; + + /** Fragment Input attributes */ + GLfloat (*Attribs)[PROG_MAX_WIDTH][4]; + GLfloat (*DerivX)[4]; + GLfloat (*DerivY)[4]; + GLuint NumDeriv; /**< Max index into DerivX/Y arrays */ + GLuint CurElement; /**< Index into Attribs arrays */ + + /** Vertex Input attribs */ + GLfloat VertAttribs[VERT_ATTRIB_MAX][4]; + + GLfloat Temporaries[MAX_PROGRAM_TEMPS][4]; + GLfloat Outputs[MAX_PROGRAM_OUTPUTS][4]; + GLfloat (*EnvParams)[4]; /**< Vertex or Fragment env parameters */ + GLuint CondCodes[4]; /**< COND_* value for x/y/z/w */ + GLint AddressReg[MAX_PROGRAM_ADDRESS_REGS][4]; + GLfloat SystemValues[SYSTEM_VALUE_MAX][4]; + + const GLubyte *Samplers; /** Array mapping sampler var to tex unit */ + + GLuint CallStack[MAX_PROGRAM_CALL_DEPTH]; /**< For CAL/RET instructions */ + GLuint StackDepth; /**< Index/ptr to top of CallStack[] */ + + /** Texture fetch functions */ + FetchTexelLodFunc FetchTexelLod; + FetchTexelDerivFunc FetchTexelDeriv; +}; + + +extern void +_mesa_get_program_register(struct gl_context *ctx, gl_register_file file, + GLuint index, GLfloat val[4]); + +extern GLboolean +_mesa_execute_program(struct gl_context *ctx, + const struct gl_program *program, + struct gl_program_machine *machine); + + +#endif /* PROG_EXECUTE_H */ diff --git a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c index de4c189a8..18d0e53d4 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c @@ -824,9 +824,10 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, enum pipe_transfer_usage usage; struct pipe_transfer *pt; const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0; - GLint skipPixels; ubyte *stmap; struct gl_pixelstore_attrib clippedUnpack = *unpack; + GLubyte *sValues; + GLuint *zValues; if (!zoom) { if (!_mesa_clip_drawpixels(ctx, &x, &y, &width, &height, @@ -862,22 +863,19 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, pixels = _mesa_map_pbo_source(ctx, &clippedUnpack, pixels); assert(pixels); - /* if width > MAX_WIDTH, have to process image in chunks */ - skipPixels = 0; - while (skipPixels < width) { - const GLint spanX = skipPixels; - const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH); + sValues = (GLubyte *) malloc(width * sizeof(GLubyte)); + zValues = (GLuint *) malloc(width * sizeof(GLuint)); + + if (sValues && zValues) { GLint row; for (row = 0; row < height; row++) { - GLubyte sValues[MAX_WIDTH]; - GLuint zValues[MAX_WIDTH]; GLfloat *zValuesFloat = (GLfloat*)zValues; GLenum destType = GL_UNSIGNED_BYTE; const GLvoid *source = _mesa_image_address2d(&clippedUnpack, pixels, width, height, format, type, - row, skipPixels); - _mesa_unpack_stencil_span(ctx, spanWidth, destType, sValues, + row, 0); + _mesa_unpack_stencil_span(ctx, width, destType, sValues, type, source, &clippedUnpack, ctx->_ImageTransferState); @@ -886,7 +884,7 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, pt->resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ? GL_FLOAT : GL_UNSIGNED_INT; - _mesa_unpack_depth_span(ctx, spanWidth, ztype, zValues, + _mesa_unpack_depth_span(ctx, width, ztype, zValues, (1 << 24) - 1, type, source, &clippedUnpack); } @@ -910,63 +908,63 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, switch (pt->resource->format) { case PIPE_FORMAT_S8_UINT: { - ubyte *dest = stmap + spanY * pt->stride + spanX; + ubyte *dest = stmap + spanY * pt->stride; assert(usage == PIPE_TRANSFER_WRITE); - memcpy(dest, sValues, spanWidth); + memcpy(dest, sValues, width); } break; case PIPE_FORMAT_Z24_UNORM_S8_UINT: if (format == GL_DEPTH_STENCIL) { - uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + uint *dest = (uint *) (stmap + spanY * pt->stride); GLint k; assert(usage == PIPE_TRANSFER_WRITE); - for (k = 0; k < spanWidth; k++) { + for (k = 0; k < width; k++) { dest[k] = zValues[k] | (sValues[k] << 24); } } else { - uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + uint *dest = (uint *) (stmap + spanY * pt->stride); GLint k; assert(usage == PIPE_TRANSFER_READ_WRITE); - for (k = 0; k < spanWidth; k++) { + for (k = 0; k < width; k++) { dest[k] = (dest[k] & 0xffffff) | (sValues[k] << 24); } } break; case PIPE_FORMAT_S8_UINT_Z24_UNORM: if (format == GL_DEPTH_STENCIL) { - uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + uint *dest = (uint *) (stmap + spanY * pt->stride); GLint k; assert(usage == PIPE_TRANSFER_WRITE); - for (k = 0; k < spanWidth; k++) { + for (k = 0; k < width; k++) { dest[k] = (zValues[k] << 8) | (sValues[k] & 0xff); } } else { - uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + uint *dest = (uint *) (stmap + spanY * pt->stride); GLint k; assert(usage == PIPE_TRANSFER_READ_WRITE); - for (k = 0; k < spanWidth; k++) { + for (k = 0; k < width; k++) { dest[k] = (dest[k] & 0xffffff00) | (sValues[k] & 0xff); } } break; case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: if (format == GL_DEPTH_STENCIL) { - uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + uint *dest = (uint *) (stmap + spanY * pt->stride); GLfloat *destf = (GLfloat*)dest; GLint k; assert(usage == PIPE_TRANSFER_WRITE); - for (k = 0; k < spanWidth; k++) { + for (k = 0; k < width; k++) { destf[k*2] = zValuesFloat[k]; dest[k*2+1] = sValues[k] & 0xff; } } else { - uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + uint *dest = (uint *) (stmap + spanY * pt->stride); GLint k; assert(usage == PIPE_TRANSFER_READ_WRITE); - for (k = 0; k < spanWidth; k++) { + for (k = 0; k < width; k++) { dest[k*2+1] = sValues[k] & 0xff; } } @@ -976,8 +974,13 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, } } } - skipPixels += spanWidth; } + else { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels()"); + } + + free(sValues); + free(zValues); _mesa_unmap_pbo_source(ctx, &clippedUnpack); diff --git a/mesalib/src/mesa/state_tracker/st_cb_texture.c b/mesalib/src/mesa/state_tracker/st_cb_texture.c index d887455e6..ea59ccf25 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_texture.c +++ b/mesalib/src/mesa/state_tracker/st_cb_texture.c @@ -684,9 +684,16 @@ decompress_with_blit(struct gl_context * ctx, /* format translation via floats */ GLuint row; enum pipe_format pformat = util_format_linear(dst_texture->format); + GLfloat *rgba; + + rgba = (GLfloat *) malloc(width * 4 * sizeof(GLfloat)); + if (!rgba) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage()"); + goto end; + } + for (row = 0; row < height; row++) { const GLbitfield transferOps = 0x0; /* bypassed for glGetTexImage() */ - GLfloat rgba[4 * MAX_WIDTH]; GLvoid *dest = _mesa_image_address2d(&ctx->Pack, pixels, width, height, format, type, row, 0); @@ -700,8 +707,11 @@ decompress_with_blit(struct gl_context * ctx, _mesa_pack_rgba_span_float(ctx, width, (GLfloat (*)[4]) rgba, format, type, dest, &ctx->Pack, transferOps); } + + free(rgba); } +end: _mesa_unmap_pbo_dest(ctx, &ctx->Pack); pipe->transfer_destroy(pipe, tex_xfer); @@ -763,8 +773,6 @@ fallback_copy_texsubimage(struct gl_context *ctx, if (ST_DEBUG & DEBUG_FALLBACK) debug_printf("%s: fallback processing\n", __FUNCTION__); - assert(width <= MAX_WIDTH); - if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) { srcY = strb->Base.Height - srcY - height; } @@ -793,6 +801,7 @@ fallback_copy_texsubimage(struct gl_context *ctx, const GLboolean scaleOrBias = (ctx->Pixel.DepthScale != 1.0F || ctx->Pixel.DepthBias != 0.0F); GLint row, yStep; + uint *data; /* determine bottom-to-top vs. top-to-bottom order for src buffer */ if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) { @@ -804,15 +813,23 @@ fallback_copy_texsubimage(struct gl_context *ctx, yStep = 1; } - /* To avoid a large temp memory allocation, do copy row by row */ - for (row = 0; row < height; row++, srcY += yStep) { - uint data[MAX_WIDTH]; - pipe_get_tile_z(pipe, src_trans, 0, srcY, width, 1, data); - if (scaleOrBias) { - _mesa_scale_and_bias_depth_uint(ctx, width, data); + data = (uint *) malloc(width * sizeof(uint)); + + if (data) { + /* To avoid a large temp memory allocation, do copy row by row */ + for (row = 0; row < height; row++, srcY += yStep) { + pipe_get_tile_z(pipe, src_trans, 0, srcY, width, 1, data); + if (scaleOrBias) { + _mesa_scale_and_bias_depth_uint(ctx, width, data); + } + pipe_put_tile_z(pipe, stImage->transfer, 0, row, width, 1, data); } - pipe_put_tile_z(pipe, stImage->transfer, 0, row, width, 1, data); } + else { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexSubImage()"); + } + + free(data); } else { /* RGBA format */ diff --git a/mesalib/src/mesa/state_tracker/st_extensions.c b/mesalib/src/mesa/state_tracker/st_extensions.c index fb36a6809..33bc6ed72 100644 --- a/mesalib/src/mesa/state_tracker/st_extensions.c +++ b/mesalib/src/mesa/state_tracker/st_extensions.c @@ -108,6 +108,14 @@ void st_init_limits(struct st_context *st) c->MaxTextureUnits = _min(c->MaxTextureImageUnits, c->MaxTextureCoordUnits); + /* Define max viewport size and max renderbuffer size in terms of + * max texture size (note: max tex RECT size = max tex 2D size). + * If this isn't true for some hardware we'll need new PIPE_CAP_ queries. + */ + c->MaxViewportWidth = + c->MaxViewportHeight = + c->MaxRenderbufferSize = c->MaxTextureRectSize; + c->MaxDrawBuffers = _clamp(screen->get_param(screen, PIPE_CAP_MAX_RENDER_TARGETS), 1, MAX_DRAW_BUFFERS); diff --git a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index f139e95fe..b0227855c 100644 --- a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4746,8 +4746,6 @@ get_mesa_program(struct gl_context *ctx, { glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); struct gl_program *prog; - struct pipe_screen * screen = st_context(ctx)->pipe->screen; - unsigned pipe_shader_type; GLenum target; const char *target_string; bool progress; @@ -4758,17 +4756,14 @@ get_mesa_program(struct gl_context *ctx, case GL_VERTEX_SHADER: target = GL_VERTEX_PROGRAM_ARB; target_string = "vertex"; - pipe_shader_type = PIPE_SHADER_VERTEX; break; case GL_FRAGMENT_SHADER: target = GL_FRAGMENT_PROGRAM_ARB; target_string = "fragment"; - pipe_shader_type = PIPE_SHADER_FRAGMENT; break; case GL_GEOMETRY_SHADER: target = GL_GEOMETRY_PROGRAM_NV; target_string = "geometry"; - pipe_shader_type = PIPE_SHADER_GEOMETRY; break; default: assert(!"should not be reached"); @@ -4792,12 +4787,8 @@ get_mesa_program(struct gl_context *ctx, _mesa_generate_parameters_list_for_uniforms(shader_program, shader, prog->Parameters); - if (!screen->get_shader_param(screen, pipe_shader_type, - PIPE_SHADER_CAP_OUTPUT_READ)) { - /* Remove reads to output registers, and to varyings in vertex shaders. */ - lower_output_reads(shader->ir); - } - + /* Remove reads from output registers. */ + lower_output_reads(shader->ir); /* Emit intermediate IR for main(). */ visit_exec_list(shader->ir, v); diff --git a/mesalib/src/mesa/swrast/s_aalinetemp.h b/mesalib/src/mesa/swrast/s_aalinetemp.h index ba9f8abb9..a517fb697 100644 --- a/mesalib/src/mesa/swrast/s_aalinetemp.h +++ b/mesalib/src/mesa/swrast/s_aalinetemp.h @@ -91,7 +91,7 @@ NAME(plot)(struct gl_context *ctx, struct LineInfo *line, int ix, int iy) ATTRIB_LOOP_END #endif - if (line->span.end == MAX_WIDTH) { + if (line->span.end == SWRAST_MAX_WIDTH) { _swrast_write_rgba_span(ctx, &(line->span)); line->span.end = 0; /* reset counter */ } diff --git a/mesalib/src/mesa/swrast/s_aatritemp.h b/mesalib/src/mesa/swrast/s_aatritemp.h index 77b3ae6ec..9cdb35fd2 100644 --- a/mesalib/src/mesa/swrast/s_aatritemp.h +++ b/mesalib/src/mesa/swrast/s_aatritemp.h @@ -196,7 +196,7 @@ span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num(); #endif /* skip over fragments with zero coverage */ - while (startX < MAX_WIDTH) { + while (startX < SWRAST_MAX_WIDTH) { coverage = compute_coveragef(pMin, pMid, pMax, startX, iy); if (coverage > 0.0F) break; diff --git a/mesalib/src/mesa/swrast/s_bitmap.c b/mesalib/src/mesa/swrast/s_bitmap.c index 18f1c1866..3a2792913 100644 --- a/mesalib/src/mesa/swrast/s_bitmap.c +++ b/mesalib/src/mesa/swrast/s_bitmap.c @@ -123,7 +123,7 @@ _swrast_Bitmap( struct gl_context *ctx, GLint px, GLint py, src++; } - if (count + width >= MAX_WIDTH || row + 1 == height) { + if (count + width >= SWRAST_MAX_WIDTH || row + 1 == height) { /* flush the span */ span.end = count; _swrast_write_rgba_span(ctx, &span); diff --git a/mesalib/src/mesa/swrast/s_blend.c b/mesalib/src/mesa/swrast/s_blend.c index cd6e6f036..54fffbb45 100644 --- a/mesalib/src/mesa/swrast/s_blend.c +++ b/mesalib/src/mesa/swrast/s_blend.c @@ -998,7 +998,7 @@ _swrast_blend_span(struct gl_context *ctx, struct gl_renderbuffer *rb, SWspan *s SWcontext *swrast = SWRAST_CONTEXT(ctx); void *rbPixels; - ASSERT(span->end <= MAX_WIDTH); + ASSERT(span->end <= SWRAST_MAX_WIDTH); ASSERT(span->arrayMask & SPAN_RGBA); ASSERT(!ctx->Color.ColorLogicOpEnabled); diff --git a/mesalib/src/mesa/swrast/s_chan.h b/mesalib/src/mesa/swrast/s_chan.h index 94ac8b65b..1db7fae4d 100644 --- a/mesalib/src/mesa/swrast/s_chan.h +++ b/mesalib/src/mesa/swrast/s_chan.h @@ -36,6 +36,14 @@ /** + * Default bits per color channel: 8, 16 or 32 + */ +#ifndef CHAN_BITS +#define CHAN_BITS 8 +#endif + + +/** * Color channel data type. */ #if CHAN_BITS == 8 diff --git a/mesalib/src/mesa/swrast/s_context.c b/mesalib/src/mesa/swrast/s_context.c index cc304d70c..63350b2e7 100644 --- a/mesalib/src/mesa/swrast/s_context.c +++ b/mesalib/src/mesa/swrast/s_context.c @@ -726,6 +726,18 @@ _swrast_CreateContext( struct gl_context *ctx ) const GLuint maxThreads = 1; #endif + assert(ctx->Const.MaxViewportWidth <= SWRAST_MAX_WIDTH); + assert(ctx->Const.MaxViewportHeight <= SWRAST_MAX_WIDTH); + + assert(ctx->Const.MaxRenderbufferSize <= SWRAST_MAX_WIDTH); + + /* make sure largest texture image is <= SWRAST_MAX_WIDTH in size */ + assert((1 << (ctx->Const.MaxTextureLevels - 1)) <= SWRAST_MAX_WIDTH); + assert((1 << (ctx->Const.MaxCubeTextureLevels - 1)) <= SWRAST_MAX_WIDTH); + assert((1 << (ctx->Const.Max3DTextureLevels - 1)) <= SWRAST_MAX_WIDTH); + + assert(PROG_MAX_WIDTH == SWRAST_MAX_WIDTH); + if (SWRAST_DEBUG) { _mesa_debug(ctx, "_swrast_CreateContext\n"); } @@ -790,6 +802,19 @@ _swrast_CreateContext( struct gl_context *ctx ) ctx->swrast_context = swrast; + swrast->stencil_temp.buf1 = (GLubyte *) malloc(SWRAST_MAX_WIDTH * sizeof(GLubyte)); + swrast->stencil_temp.buf2 = (GLubyte *) malloc(SWRAST_MAX_WIDTH * sizeof(GLubyte)); + swrast->stencil_temp.buf3 = (GLubyte *) malloc(SWRAST_MAX_WIDTH * sizeof(GLubyte)); + swrast->stencil_temp.buf4 = (GLubyte *) malloc(SWRAST_MAX_WIDTH * sizeof(GLubyte)); + + if (!swrast->stencil_temp.buf1 || + !swrast->stencil_temp.buf2 || + !swrast->stencil_temp.buf3 || + !swrast->stencil_temp.buf4) { + _swrast_DestroyContext(ctx); + return GL_FALSE; + } + return GL_TRUE; } @@ -806,6 +831,12 @@ _swrast_DestroyContext( struct gl_context *ctx ) if (swrast->ZoomedArrays) FREE( swrast->ZoomedArrays ); FREE( swrast->TexelBuffer ); + + free(swrast->stencil_temp.buf1); + free(swrast->stencil_temp.buf2); + free(swrast->stencil_temp.buf3); + free(swrast->stencil_temp.buf4); + FREE( swrast ); ctx->swrast_context = 0; diff --git a/mesalib/src/mesa/swrast/s_context.h b/mesalib/src/mesa/swrast/s_context.h index 9388c3569..26b97f78d 100644 --- a/mesalib/src/mesa/swrast/s_context.h +++ b/mesalib/src/mesa/swrast/s_context.h @@ -306,6 +306,13 @@ typedef struct /** State used during execution of fragment programs */ struct gl_program_machine FragProgMachine; + /** Temporary arrays for stencil operations. To avoid large stack + * allocations. + */ + struct { + GLubyte *buf1, *buf2, *buf3, *buf4; + } stencil_temp; + } SWcontext; diff --git a/mesalib/src/mesa/swrast/s_copypix.c b/mesalib/src/mesa/swrast/s_copypix.c index 592d35a98..cf419c328 100644 --- a/mesalib/src/mesa/swrast/s_copypix.c +++ b/mesalib/src/mesa/swrast/s_copypix.c @@ -158,7 +158,7 @@ copy_rgba_pixels(struct gl_context *ctx, GLint srcx, GLint srcy, p = NULL; } - ASSERT(width < MAX_WIDTH); + ASSERT(width < SWRAST_MAX_WIDTH); for (row = 0; row < height; row++, sy += stepy, dy += stepy) { GLvoid *rgba = span.array->attribs[FRAG_ATTRIB_COL0]; @@ -246,7 +246,7 @@ copy_depth_pixels( struct gl_context *ctx, GLint srcx, GLint srcy, { struct gl_framebuffer *fb = ctx->ReadBuffer; struct gl_renderbuffer *readRb = fb->Attachment[BUFFER_DEPTH].Renderbuffer; - GLfloat *p, *tmpImage; + GLfloat *p, *tmpImage, *depth; GLint sy, dy, stepy; GLint j; const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F; @@ -303,8 +303,13 @@ copy_depth_pixels( struct gl_context *ctx, GLint srcx, GLint srcy, p = NULL; } + depth = (GLfloat *) malloc(width * sizeof(GLfloat)); + if (!depth) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyPixels()"); + goto end; + } + for (j = 0; j < height; j++, sy += stepy, dy += stepy) { - GLfloat depth[MAX_WIDTH]; /* get depth values */ if (overlapping) { memcpy(depth, p, width * sizeof(GLfloat)); @@ -327,6 +332,9 @@ copy_depth_pixels( struct gl_context *ctx, GLint srcx, GLint srcy, _swrast_write_rgba_span(ctx, &span); } + free(depth); + +end: if (overlapping) free(tmpImage); } @@ -342,7 +350,7 @@ copy_stencil_pixels( struct gl_context *ctx, GLint srcx, GLint srcy, struct gl_renderbuffer *rb = fb->Attachment[BUFFER_STENCIL].Renderbuffer; GLint sy, dy, stepy; GLint j; - GLubyte *p, *tmpImage; + GLubyte *p, *tmpImage, *stencil; const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F; GLint overlapping; @@ -392,9 +400,13 @@ copy_stencil_pixels( struct gl_context *ctx, GLint srcx, GLint srcy, p = NULL; } - for (j = 0; j < height; j++, sy += stepy, dy += stepy) { - GLubyte stencil[MAX_WIDTH]; + stencil = (GLubyte *) malloc(width * sizeof(GLubyte)); + if (!stencil) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyPixels()"); + goto end; + } + for (j = 0; j < height; j++, sy += stepy, dy += stepy) { /* Get stencil values */ if (overlapping) { memcpy(stencil, p, width * sizeof(GLubyte)); @@ -416,6 +428,9 @@ copy_stencil_pixels( struct gl_context *ctx, GLint srcx, GLint srcy, } } + free(stencil); + +end: if (overlapping) free(tmpImage); } diff --git a/mesalib/src/mesa/swrast/s_depth.c b/mesalib/src/mesa/swrast/s_depth.c index c90388209..26126a932 100644 --- a/mesalib/src/mesa/swrast/s_depth.c +++ b/mesalib/src/mesa/swrast/s_depth.c @@ -419,9 +419,15 @@ _swrast_depth_bounds_test( struct gl_context *ctx, SWspan *span ) const GLuint count = span->end; GLuint i; GLboolean anyPass = GL_FALSE; - GLuint zBufferTemp[MAX_WIDTH]; + GLuint *zBufferTemp; const GLuint *zBufferVals; + zBufferTemp = (GLuint *) malloc(count * sizeof(GLuint)); + if (!zBufferTemp) { + /* don't generate a stream of OUT_OF_MEMORY errors here */ + return GL_FALSE; + } + if (span->arrayMask & SPAN_XY) zStart = NULL; else @@ -453,6 +459,8 @@ _swrast_depth_bounds_test( struct gl_context *ctx, SWspan *span ) } } + free(zBufferTemp); + return anyPass; } diff --git a/mesalib/src/mesa/swrast/s_drawpix.c b/mesalib/src/mesa/swrast/s_drawpix.c index c19808bd4..1fa64c378 100644 --- a/mesalib/src/mesa/swrast/s_drawpix.c +++ b/mesalib/src/mesa/swrast/s_drawpix.c @@ -264,34 +264,35 @@ draw_stencil_pixels( struct gl_context *ctx, GLint x, GLint y, { const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0; const GLenum destType = GL_UNSIGNED_BYTE; - GLint skipPixels; + GLint row; + GLubyte *values; - /* if width > MAX_WIDTH, have to process image in chunks */ - skipPixels = 0; - while (skipPixels < width) { - const GLint spanX = x + skipPixels; - const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH); - GLint row; - for (row = 0; row < height; row++) { - const GLint spanY = y + row; - GLubyte values[MAX_WIDTH]; - const GLvoid *source = _mesa_image_address2d(unpack, pixels, - width, height, - GL_STENCIL_INDEX, type, - row, skipPixels); - _mesa_unpack_stencil_span(ctx, spanWidth, destType, values, - type, source, unpack, - ctx->_ImageTransferState); - if (zoom) { - _swrast_write_zoomed_stencil_span(ctx, x, y, spanWidth, - spanX, spanY, values); - } - else { - _swrast_write_stencil_span(ctx, spanWidth, spanX, spanY, values); - } + values = (GLubyte *) malloc(width * sizeof(GLubyte)); + if (!values) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels"); + return; + } + + for (row = 0; row < height; row++) { + const GLvoid *source = _mesa_image_address2d(unpack, pixels, + width, height, + GL_STENCIL_INDEX, type, + row, 0); + _mesa_unpack_stencil_span(ctx, width, destType, values, + type, source, unpack, + ctx->_ImageTransferState); + if (zoom) { + _swrast_write_zoomed_stencil_span(ctx, x, y, width, + x, y, values); + } + else { + _swrast_write_stencil_span(ctx, width, x, y, values); } - skipPixels += spanWidth; + + y++; } + + free(values); } @@ -318,7 +319,7 @@ draw_depth_pixels( struct gl_context *ctx, GLint x, GLint y, && ctx->DrawBuffer->Visual.depthBits == 16 && !scaleOrBias && !zoom - && width <= MAX_WIDTH + && width <= SWRAST_MAX_WIDTH && !unpack->SwapBytes) { /* Special case: directly write 16-bit depth values */ GLint row; @@ -338,7 +339,7 @@ draw_depth_pixels( struct gl_context *ctx, GLint x, GLint y, else if (type == GL_UNSIGNED_INT && !scaleOrBias && !zoom - && width <= MAX_WIDTH + && width <= SWRAST_MAX_WIDTH && !unpack->SwapBytes) { /* Special case: shift 32-bit values down to Visual.depthBits */ const GLint shift = 32 - ctx->DrawBuffer->Visual.depthBits; @@ -366,11 +367,11 @@ draw_depth_pixels( struct gl_context *ctx, GLint x, GLint y, const GLuint depthMax = ctx->DrawBuffer->_DepthMax; GLint skipPixels = 0; - /* in case width > MAX_WIDTH do the copy in chunks */ + /* in case width > SWRAST_MAX_WIDTH do the copy in chunks */ while (skipPixels < width) { - const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH); + const GLint spanWidth = MIN2(width - skipPixels, SWRAST_MAX_WIDTH); GLint row; - ASSERT(span.end <= MAX_WIDTH); + ASSERT(span.end <= SWRAST_MAX_WIDTH); for (row = 0; row < height; row++) { const GLvoid *zSrc = _mesa_image_address2d(unpack, pixels, width, height, @@ -452,9 +453,9 @@ draw_rgba_pixels( struct gl_context *ctx, GLint x, GLint y, /* use span array for temp color storage */ GLfloat *rgba = (GLfloat *) span.array->attribs[FRAG_ATTRIB_COL0]; - /* if the span is wider than MAX_WIDTH we have to do it in chunks */ + /* if the span is wider than SWRAST_MAX_WIDTH we have to do it in chunks */ while (skipPixels < width) { - const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH); + const GLint spanWidth = MIN2(width - skipPixels, SWRAST_MAX_WIDTH); const GLubyte *source = (const GLubyte *) _mesa_image_address2d(unpack, pixels, width, height, format, @@ -588,15 +589,21 @@ draw_depth_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, * Separate depth/stencil buffers, or pixel transfer ops required. */ /* XXX need to handle very wide images (skippixels) */ + GLuint *zValues; /* 32-bit Z values */ GLint i; + zValues = (GLuint *) malloc(width * sizeof(GLuint)); + if (!zValues) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels"); + return; + } + for (i = 0; i < height; i++) { const GLuint *depthStencilSrc = (const GLuint *) _mesa_image_address2d(&clippedUnpack, pixels, width, height, GL_DEPTH_STENCIL_EXT, type, i, 0); if (ctx->Depth.Mask) { - GLuint zValues[MAX_WIDTH]; /* 32-bit Z values */ _mesa_unpack_depth_span(ctx, width, GL_UNSIGNED_INT, /* dest type */ zValues, /* dest addr */ @@ -615,7 +622,7 @@ draw_depth_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, } if (stencilMask != 0x0) { - GLubyte stencilValues[MAX_WIDTH]; + GLubyte *stencilValues = (GLubyte *) zValues; /* re-use buffer */ /* get stencil values, with shift/offset/mapping */ _mesa_unpack_stencil_span(ctx, width, stencilType, stencilValues, type, depthStencilSrc, &clippedUnpack, @@ -627,6 +634,8 @@ draw_depth_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, _swrast_write_stencil_span(ctx, width, x, y + i, stencilValues); } } + + free(zValues); } } diff --git a/mesalib/src/mesa/swrast/s_lines.c b/mesalib/src/mesa/swrast/s_lines.c index ee997b08a..2078be43b 100644 --- a/mesalib/src/mesa/swrast/s_lines.c +++ b/mesalib/src/mesa/swrast/s_lines.c @@ -67,7 +67,7 @@ draw_wide_line( struct gl_context *ctx, SWspan *span, GLboolean xMajor ) ctx->Const.MaxLineWidth); GLint start; - ASSERT(span->end < MAX_WIDTH); + ASSERT(span->end < SWRAST_MAX_WIDTH); if (width & 1) start = width / 2; diff --git a/mesalib/src/mesa/swrast/s_logic.c b/mesalib/src/mesa/swrast/s_logic.c index e908a0efe..8791630a4 100644 --- a/mesalib/src/mesa/swrast/s_logic.c +++ b/mesalib/src/mesa/swrast/s_logic.c @@ -193,7 +193,7 @@ _swrast_logicop_rgba_span(struct gl_context *ctx, struct gl_renderbuffer *rb, { void *rbPixels; - ASSERT(span->end < MAX_WIDTH); + ASSERT(span->end < SWRAST_MAX_WIDTH); ASSERT(span->arrayMask & SPAN_RGBA); rbPixels = _swrast_get_dest_rgba(ctx, rb, span); diff --git a/mesalib/src/mesa/swrast/s_masking.c b/mesalib/src/mesa/swrast/s_masking.c index 2d962ebc5..4f262fa97 100644 --- a/mesalib/src/mesa/swrast/s_masking.c +++ b/mesalib/src/mesa/swrast/s_masking.c @@ -46,7 +46,7 @@ _swrast_mask_rgba_span(struct gl_context *ctx, struct gl_renderbuffer *rb, const GLuint n = span->end; void *rbPixels; - ASSERT(n < MAX_WIDTH); + ASSERT(n < SWRAST_MAX_WIDTH); ASSERT(span->arrayMask & SPAN_RGBA); rbPixels = _swrast_get_dest_rgba(ctx, rb, span); diff --git a/mesalib/src/mesa/swrast/s_points.c b/mesalib/src/mesa/swrast/s_points.c index 11b7ef7b1..acbdb2d42 100644 --- a/mesalib/src/mesa/swrast/s_points.c +++ b/mesalib/src/mesa/swrast/s_points.c @@ -439,7 +439,7 @@ large_point(struct gl_context *ctx, const SWvertex *vert) span.end++; } } - assert(span.end <= MAX_WIDTH); + assert(span.end <= SWRAST_MAX_WIDTH); _swrast_write_rgba_span(ctx, &span); } } @@ -475,7 +475,7 @@ pixel_point(struct gl_context *ctx, const SWvertex *vert) span->attrStepY[FRAG_ATTRIB_WPOS][3] = 0.0F; /* check if we need to flush */ - if (span->end >= MAX_WIDTH || + if (span->end >= SWRAST_MAX_WIDTH || (swrast->_RasterMask & (BLEND_BIT | LOGIC_OP_BIT | MASKING_BIT)) || span->facing != swrast->PointLineFacing) { if (span->end > 0) { @@ -504,7 +504,7 @@ pixel_point(struct gl_context *ctx, const SWvertex *vert) span->array->z[count] = (GLint) (vert->attrib[FRAG_ATTRIB_WPOS][2] + 0.5F); span->end = count + 1; - ASSERT(span->end <= MAX_WIDTH); + ASSERT(span->end <= SWRAST_MAX_WIDTH); } diff --git a/mesalib/src/mesa/swrast/s_span.c b/mesalib/src/mesa/swrast/s_span.c index 025e7b207..627ef1136 100644 --- a/mesalib/src/mesa/swrast/s_span.c +++ b/mesalib/src/mesa/swrast/s_span.c @@ -1164,7 +1164,7 @@ _swrast_write_rgba_span( struct gl_context *ctx, SWspan *span) return; } - ASSERT(span->end <= MAX_WIDTH); + ASSERT(span->end <= SWRAST_MAX_WIDTH); /* Depth bounds test */ if (ctx->Depth.BoundsTest && fb->Visual.depthBits > 0) { @@ -1319,7 +1319,8 @@ _swrast_write_rgba_span( struct gl_context *ctx, SWspan *span) /* color[fragOutput] will be written to buffer[buf] */ if (rb) { - GLchan rgbaSave[MAX_WIDTH][4]; + /* re-use one of the attribute array buffers for rgbaSave */ + GLchan (*rgbaSave)[4] = (GLchan (*)[4]) span->array->attribs[0]; struct swrast_renderbuffer *srb = swrast_renderbuffer(rb); GLenum colorType = srb->ColorType; diff --git a/mesalib/src/mesa/swrast/s_span.h b/mesalib/src/mesa/swrast/s_span.h index ff0fe6cd9..0763c7161 100644 --- a/mesalib/src/mesa/swrast/s_span.h +++ b/mesalib/src/mesa/swrast/s_span.h @@ -32,6 +32,7 @@ #include "main/glheader.h" #include "main/mtypes.h" #include "swrast/s_chan.h" +#include "swrast/swrast.h" struct gl_context; @@ -71,24 +72,24 @@ typedef struct sw_span_arrays /* XXX someday look at transposing first two indexes for better memory * access pattern. */ - GLfloat attribs[FRAG_ATTRIB_MAX][MAX_WIDTH][4]; + GLfloat attribs[FRAG_ATTRIB_MAX][SWRAST_MAX_WIDTH][4]; /** This mask indicates which fragments are alive or culled */ - GLubyte mask[MAX_WIDTH]; + GLubyte mask[SWRAST_MAX_WIDTH]; GLenum ChanType; /**< Color channel type, GL_UNSIGNED_BYTE, GL_FLOAT */ /** Attribute arrays that don't fit into attribs[] array above */ /*@{*/ - GLubyte rgba8[MAX_WIDTH][4]; - GLushort rgba16[MAX_WIDTH][4]; + GLubyte rgba8[SWRAST_MAX_WIDTH][4]; + GLushort rgba16[SWRAST_MAX_WIDTH][4]; GLchan (*rgba)[4]; /** either == rgba8 or rgba16 */ - GLint x[MAX_WIDTH]; /**< fragment X coords */ - GLint y[MAX_WIDTH]; /**< fragment Y coords */ - GLuint z[MAX_WIDTH]; /**< fragment Z coords */ - GLuint index[MAX_WIDTH]; /**< Color indexes */ - GLfloat lambda[MAX_TEXTURE_COORD_UNITS][MAX_WIDTH]; /**< Texture LOD */ - GLfloat coverage[MAX_WIDTH]; /**< Fragment coverage for AA/smoothing */ + GLint x[SWRAST_MAX_WIDTH]; /**< fragment X coords */ + GLint y[SWRAST_MAX_WIDTH]; /**< fragment Y coords */ + GLuint z[SWRAST_MAX_WIDTH]; /**< fragment Z coords */ + GLuint index[SWRAST_MAX_WIDTH]; /**< Color indexes */ + GLfloat lambda[MAX_TEXTURE_COORD_UNITS][SWRAST_MAX_WIDTH]; /**< Texture LOD */ + GLfloat coverage[SWRAST_MAX_WIDTH]; /**< Fragment coverage for AA/smoothing */ /*@}*/ } SWspanarrays; diff --git a/mesalib/src/mesa/swrast/s_stencil.c b/mesalib/src/mesa/swrast/s_stencil.c index bbfbf44cc..3423737ee 100644 --- a/mesalib/src/mesa/swrast/s_stencil.c +++ b/mesalib/src/mesa/swrast/s_stencil.c @@ -210,7 +210,8 @@ static GLboolean do_stencil_test(struct gl_context *ctx, GLuint face, GLuint n, GLubyte stencil[], GLubyte mask[], GLint stride) { - GLubyte fail[MAX_WIDTH]; + SWcontext *swrast = SWRAST_CONTEXT(ctx); + GLubyte *fail = swrast->stencil_temp.buf2; GLboolean allfail = GL_FALSE; GLuint i, j; const GLuint valueMask = ctx->Stencil.ValueMask[face]; @@ -347,6 +348,7 @@ put_s8_values(struct gl_context *ctx, struct gl_renderbuffer *rb, GLboolean _swrast_stencil_and_ztest_span(struct gl_context *ctx, SWspan *span) { + SWcontext *swrast = SWRAST_CONTEXT(ctx); struct gl_framebuffer *fb = ctx->DrawBuffer; struct gl_renderbuffer *rb = fb->Attachment[BUFFER_STENCIL].Renderbuffer; const GLint stencilOffset = get_stencil_offset(rb->Format); @@ -354,7 +356,7 @@ _swrast_stencil_and_ztest_span(struct gl_context *ctx, SWspan *span) const GLuint face = (span->facing == 0) ? 0 : ctx->Stencil._BackFace; const GLuint count = span->end; GLubyte *mask = span->array->mask; - GLubyte stencilTemp[MAX_WIDTH]; + GLubyte *stencilTemp = swrast->stencil_temp.buf1; GLubyte *stencilBuf; if (span->arrayMask & SPAN_XY) { @@ -402,7 +404,10 @@ _swrast_stencil_and_ztest_span(struct gl_context *ctx, SWspan *span) /* * Perform depth buffering, then apply zpass or zfail stencil function. */ - GLubyte passMask[MAX_WIDTH], failMask[MAX_WIDTH], origMask[MAX_WIDTH]; + SWcontext *swrast = SWRAST_CONTEXT(ctx); + GLubyte *passMask = swrast->stencil_temp.buf2; + GLubyte *failMask = swrast->stencil_temp.buf3; + GLubyte *origMask = swrast->stencil_temp.buf4; /* save the current mask bits */ memcpy(origMask, mask, count * sizeof(GLubyte)); @@ -488,6 +493,7 @@ void _swrast_write_stencil_span(struct gl_context *ctx, GLint n, GLint x, GLint y, const GLubyte stencil[] ) { + SWcontext *swrast = SWRAST_CONTEXT(ctx); struct gl_framebuffer *fb = ctx->DrawBuffer; struct gl_renderbuffer *rb = fb->Attachment[BUFFER_STENCIL].Renderbuffer; const GLuint stencilMax = (1 << fb->Visual.stencilBits) - 1; @@ -517,7 +523,8 @@ _swrast_write_stencil_span(struct gl_context *ctx, GLint n, GLint x, GLint y, if ((stencilMask & stencilMax) != stencilMax) { /* need to apply writemask */ - GLubyte destVals[MAX_WIDTH], newVals[MAX_WIDTH]; + GLubyte *destVals = swrast->stencil_temp.buf1; + GLubyte *newVals = swrast->stencil_temp.buf2; GLint i; _mesa_unpack_ubyte_stencil_row(rb->Format, n, stencilBuf, destVals); diff --git a/mesalib/src/mesa/swrast/s_texcombine.c b/mesalib/src/mesa/swrast/s_texcombine.c index 1fce5c565..8fbf988b4 100644 --- a/mesalib/src/mesa/swrast/s_texcombine.c +++ b/mesalib/src/mesa/swrast/s_texcombine.c @@ -49,9 +49,9 @@ static inline float4_array get_texel_array(SWcontext *swrast, GLuint unit) { #ifdef _OPENMP - return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4 * omp_get_num_threads() + (MAX_WIDTH * 4 * omp_get_thread_num())); + return (float4_array) (swrast->TexelBuffer + unit * SWRAST_MAX_WIDTH * 4 * omp_get_num_threads() + (SWRAST_MAX_WIDTH * 4 * omp_get_thread_num())); #else - return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4); + return (float4_array) (swrast->TexelBuffer + unit * SWRAST_MAX_WIDTH * 4); #endif } @@ -611,7 +611,7 @@ _swrast_texture_span( struct gl_context *ctx, SWspan *span ) */ swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads * - MAX_WIDTH * 4 * sizeof(GLfloat)); + SWRAST_MAX_WIDTH * 4 * sizeof(GLfloat)); if (!swrast->TexelBuffer) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine"); return; @@ -625,7 +625,7 @@ _swrast_texture_span( struct gl_context *ctx, SWspan *span ) return; } - ASSERT(span->end <= MAX_WIDTH); + ASSERT(span->end <= SWRAST_MAX_WIDTH); /* * Save copy of the incoming fragment colors (the GL_PRIMARY_COLOR) diff --git a/mesalib/src/mesa/swrast/s_triangle.c b/mesalib/src/mesa/swrast/s_triangle.c index ddb4792f7..e89a999a9 100644 --- a/mesalib/src/mesa/swrast/s_triangle.c +++ b/mesalib/src/mesa/swrast/s_triangle.c @@ -142,7 +142,7 @@ _swrast_culltriangle( struct gl_context *ctx, #define RENDER_SPAN( span ) \ GLuint i; \ - GLubyte rgba[MAX_WIDTH][4]; \ + GLubyte (*rgba)[4] = swrast->SpanArrays->rgba8; \ span.intTex[0] -= FIXED_HALF; /* off-by-one error? */ \ span.intTex[1] -= FIXED_HALF; \ for (i = 0; i < span.end; i++) { \ @@ -200,7 +200,8 @@ _swrast_culltriangle( struct gl_context *ctx, #define RENDER_SPAN( span ) \ GLuint i; \ - GLubyte rgba[MAX_WIDTH][4]; \ + GLubyte (*rgba)[4] = swrast->SpanArrays->rgba8; \ + GLubyte *mask = swrast->SpanArrays->mask; \ span.intTex[0] -= FIXED_HALF; /* off-by-one error? */ \ span.intTex[1] -= FIXED_HALF; \ for (i = 0; i < span.end; i++) { \ @@ -215,17 +216,17 @@ _swrast_culltriangle( struct gl_context *ctx, rgba[i][BCOMP] = texture[pos+0]; \ rgba[i][ACOMP] = 0xff; \ zRow[i] = z; \ - span.array->mask[i] = 1; \ + mask[i] = 1; \ } \ else { \ - span.array->mask[i] = 0; \ + mask[i] = 0; \ } \ span.intTex[0] += span.intTexStep[0]; \ span.intTex[1] += span.intTexStep[1]; \ span.z += span.zStep; \ } \ _swrast_put_row(ctx, rb, GL_UNSIGNED_BYTE, \ - span.end, span.x, span.y, rgba, span.array->mask); + span.end, span.x, span.y, rgba, mask); #include "s_tritemp.h" diff --git a/mesalib/src/mesa/swrast/s_zoom.c b/mesalib/src/mesa/swrast/s_zoom.c index 73bff482b..768bbbafd 100644 --- a/mesalib/src/mesa/swrast/s_zoom.c +++ b/mesalib/src/mesa/swrast/s_zoom.c @@ -150,7 +150,7 @@ zoom_span( struct gl_context *ctx, GLint imgX, GLint imgY, const SWspan *span, zoomedWidth = x1 - x0; ASSERT(zoomedWidth > 0); - ASSERT(zoomedWidth <= MAX_WIDTH); + ASSERT(zoomedWidth <= SWRAST_MAX_WIDTH); /* no pixel arrays! must be horizontal spans. */ ASSERT((span->arrayMask & SPAN_XY) == 0); @@ -362,7 +362,7 @@ _swrast_write_zoomed_stencil_span(struct gl_context *ctx, GLint imgX, GLint imgY GLint width, GLint spanX, GLint spanY, const GLubyte stencil[]) { - GLubyte zoomedVals[MAX_WIDTH]; + GLubyte *zoomedVals; GLint x0, x1, y0, y1, y; GLint i, zoomedWidth; @@ -373,7 +373,11 @@ _swrast_write_zoomed_stencil_span(struct gl_context *ctx, GLint imgX, GLint imgY zoomedWidth = x1 - x0; ASSERT(zoomedWidth > 0); - ASSERT(zoomedWidth <= MAX_WIDTH); + ASSERT(zoomedWidth <= SWRAST_MAX_WIDTH); + + zoomedVals = (GLubyte *) malloc(zoomedWidth * sizeof(GLubyte)); + if (!zoomedVals) + return; /* zoom the span horizontally */ for (i = 0; i < zoomedWidth; i++) { @@ -387,6 +391,8 @@ _swrast_write_zoomed_stencil_span(struct gl_context *ctx, GLint imgX, GLint imgY for (y = y0; y < y1; y++) { _swrast_write_stencil_span(ctx, zoomedWidth, x0, y, zoomedVals); } + + free(zoomedVals); } @@ -401,7 +407,7 @@ _swrast_write_zoomed_z_span(struct gl_context *ctx, GLint imgX, GLint imgY, { struct gl_renderbuffer *rb = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer; - GLuint zoomedVals[MAX_WIDTH]; + GLuint *zoomedVals; GLint x0, x1, y0, y1, y; GLint i, zoomedWidth; @@ -412,7 +418,11 @@ _swrast_write_zoomed_z_span(struct gl_context *ctx, GLint imgX, GLint imgY, zoomedWidth = x1 - x0; ASSERT(zoomedWidth > 0); - ASSERT(zoomedWidth <= MAX_WIDTH); + ASSERT(zoomedWidth <= SWRAST_MAX_WIDTH); + + zoomedVals = (GLuint *) malloc(zoomedWidth * sizeof(GLuint)); + if (!zoomedVals) + return; /* zoom the span horizontally */ for (i = 0; i < zoomedWidth; i++) { @@ -427,4 +437,6 @@ _swrast_write_zoomed_z_span(struct gl_context *ctx, GLint imgX, GLint imgY, GLubyte *dst = _swrast_pixel_address(rb, x0, y); _mesa_pack_uint_z_row(rb->Format, zoomedWidth, zoomedVals, dst); } + + free(zoomedVals); } diff --git a/mesalib/src/mesa/swrast/swrast.h b/mesalib/src/mesa/swrast/swrast.h index ad19eeecc..a299e6fda 100644 --- a/mesalib/src/mesa/swrast/swrast.h +++ b/mesalib/src/mesa/swrast/swrast.h @@ -35,6 +35,34 @@ #include "main/mtypes.h" #include "swrast/s_chan.h" + +/** + * If non-zero use GLdouble for walking triangle edges, for better accuracy. + */ +#define TRIANGLE_WALK_DOUBLE 0 + + +/** + * Bits per depth buffer value (max is 32). + */ +#ifndef DEFAULT_SOFTWARE_DEPTH_BITS +#define DEFAULT_SOFTWARE_DEPTH_BITS 16 +#endif +/** Depth buffer data type */ +#if DEFAULT_SOFTWARE_DEPTH_BITS <= 16 +#define DEFAULT_SOFTWARE_DEPTH_TYPE GLushort +#else +#define DEFAULT_SOFTWARE_DEPTH_TYPE GLuint +#endif + + +/** + * Max image/surface/texture size. + */ +#define SWRAST_MAX_WIDTH 16384 +#define SWRAST_MAX_HEIGHT 16384 + + /** * \struct SWvertex * \brief Data-structure to handle vertices in the software rasterizer. diff --git a/pixman/configure.ac b/pixman/configure.ac index f39f43739..5eeb6a54e 100644 --- a/pixman/configure.ac +++ b/pixman/configure.ac @@ -294,6 +294,9 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ #if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4)) #error "Need GCC >= 3.4 for MMX intrinsics" #endif +#if defined(__clang__) +#error "clang chokes on the inline assembly in pixman-mmx.c" +#endif #include <mmintrin.h> int main () { __m64 v = _mm_cvtsi32_si64 (1); @@ -592,6 +595,51 @@ fi AM_CONDITIONAL(USE_ARM_IWMMXT, test $have_iwmmxt_intrinsics = yes) +dnl ========================================================================== +dnl Check if assembler is gas compatible and supports MIPS DSPr2 instructions + +have_mips_dspr2=no +AC_MSG_CHECKING(whether to use MIPS DSPr2 assembler) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="-mdspr2 $CFLAGS" + +AC_COMPILE_IFELSE([[ +#if !(defined(__mips__) && __mips_isa_rev >= 2) +#error MIPS DSPr2 is currently only available on MIPS32r2 platforms. +#endif +int +main () +{ + int c = 0, a = 0, b = 0; + __asm__ __volatile__ ( + "precr.qb.ph %[c], %[a], %[b] \n\t" + : [c] "=r" (c) + : [a] "r" (a), [b] "r" (b) + ); + return c; +}]], have_mips_dspr2=yes) +CFLAGS=$xserver_save_CFLAGS + +AC_ARG_ENABLE(mips-dspr2, + [AC_HELP_STRING([--disable-mips-dspr2], + [disable MIPS DSPr2 fast paths])], + [enable_mips_dspr2=$enableval], [enable_mips_dspr2=auto]) + +if test $enable_mips_dspr2 = no ; then + have_mips_dspr2=disabled +fi + +if test $have_mips_dspr2 = yes ; then + AC_DEFINE(USE_MIPS_DSPR2, 1, [use MIPS DSPr2 assembly optimizations]) +fi + +AM_CONDITIONAL(USE_MIPS_DSPR2, test $have_mips_dspr2 = yes) + +AC_MSG_RESULT($have_mips_dspr2) +if test $enable_mips_dspr2 = yes && test $have_mips_dspr2 = no ; then + AC_MSG_ERROR([MIPS DSPr2 instructions not detected]) +fi + dnl ========================================================================================= dnl Check for GNU-style inline assembly support diff --git a/pixman/pixman/Makefile.am b/pixman/pixman/Makefile.am index 286b7cf36..fb7e04723 100644 --- a/pixman/pixman/Makefile.am +++ b/pixman/pixman/Makefile.am @@ -102,5 +102,21 @@ libpixman_1_la_LIBADD += libpixman-iwmmxt.la ASM_CFLAGS_IWMMXT=$(IWMMXT_CFLAGS) endif +# mips dspr2 code +if USE_MIPS_DSPR2 +noinst_LTLIBRARIES += libpixman-mips-dspr2.la +libpixman_mips_dspr2_la_SOURCES = \ + pixman-mips-dspr2.c \ + pixman-mips-dspr2.h \ + pixman-mips-dspr2-asm.S \ + pixman-mips-dspr2-asm.h \ + pixman-mips-memcpy-asm.S +libpixman_mips_dspr2_la_CFLAGS = $(DEP_CFLAGS) +libpixman_mips_dspr2_la_LIBADD = $(DEP_LIBS) +libpixman_1_la_LIBADD += libpixman-mips-dspr2.la + +ASM_CFLAGS_mips_dspr2= +endif + .c.s : $(libpixmaninclude_HEADERS) $(BUILT_SOURCES) $(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) $(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) -I$(builddir) -I$(top_builddir) -S -o $@ $< diff --git a/pixman/pixman/pixman-cpu.c b/pixman/pixman/pixman-cpu.c index 92942b217..fcf591a99 100644 --- a/pixman/pixman/pixman-cpu.c +++ b/pixman/pixman/pixman-cpu.c @@ -427,6 +427,54 @@ pixman_have_arm_iwmmxt (void) #endif /* USE_ARM_SIMD || USE_ARM_NEON || USE_ARM_IWMMXT */ +#if defined(USE_MIPS_DSPR2) + +#if defined (__linux__) /* linux ELF */ + +pixman_bool_t +pixman_have_mips_dspr2 (void) +{ + const char *search_string = "MIPS 74K"; + const char *file_name = "/proc/cpuinfo"; + /* Simple detection of MIPS DSP ASE (revision 2) at runtime for Linux. + * It is based on /proc/cpuinfo, which reveals hardware configuration + * to user-space applications. According to MIPS (early 2010), no similar + * facility is universally available on the MIPS architectures, so it's up + * to individual OSes to provide such. + * + * Only currently available MIPS core that supports DSPr2 is 74K. + */ + + char cpuinfo_line[256]; + + FILE *f = NULL; + + if ((f = fopen (file_name, "r")) == NULL) + return FALSE; + + while (fgets (cpuinfo_line, sizeof (cpuinfo_line), f) != NULL) + { + if (strstr (cpuinfo_line, search_string) != NULL) + { + fclose (f); + return TRUE; + } + } + + fclose (f); + + /* Did not find string in the proc file. */ + return FALSE; +} + +#else /* linux ELF */ + +#define pixman_have_mips_dspr2() FALSE + +#endif /* linux ELF */ + +#endif /* USE_MIPS_DSPR2 */ + #if defined(USE_X86_MMX) || defined(USE_SSE2) /* The CPU detection code needs to be in a file not compiled with * "-mmmx -msse", as gcc would generate CMOV instructions otherwise @@ -696,6 +744,11 @@ _pixman_choose_implementation (void) imp = _pixman_implementation_create_arm_neon (imp); #endif +#ifdef USE_MIPS_DSPR2 + if (pixman_have_mips_dspr2 ()) + imp = _pixman_implementation_create_mips_dspr2 (imp); +#endif + #ifdef USE_VMX if (pixman_have_vmx ()) imp = _pixman_implementation_create_vmx (imp); diff --git a/pixman/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman/pixman-mips-dspr2-asm.S new file mode 100644 index 000000000..0a4c87e37 --- /dev/null +++ b/pixman/pixman/pixman-mips-dspr2-asm.S @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nemanja Lukic (nlukic@mips.com) + */ + +#include "pixman-mips-dspr2-asm.h" + +LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (a8r8g8b8) + * a2 - w + */ + + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop + li t4, 0xf800f800 + li t5, 0x07e007e0 + li t6, 0x001f001f +1: + lw t0, 0(a1) + lw t1, 4(a1) + addiu a1, a1, 8 + addiu a2, a2, -2 + + CONVERT_2x8888_TO_2x0565 t0, t1, t2, t3, t4, t5, t6, t7, t8 + + sh t2, 0(a0) + sh t3, 2(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 4 +2: + beqz a2, 3f + nop + lw t0, 0(a1) + + CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 + + sh t1, 0(a0) +3: + j ra + nop + +END(pixman_composite_src_8888_0565_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_src_0565_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (r5g6b5) + * a2 - w + */ + + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop + li t4, 0x07e007e0 + li t5, 0x001F001F +1: + lhu t0, 0(a1) + lhu t1, 2(a1) + addiu a1, a1, 4 + addiu a2, a2, -2 + + CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9 + + sw t2, 0(a0) + sw t3, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + lhu t0, 0(a1) + + CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3 + + sw t1, 0(a0) +3: + j ra + nop + +END(pixman_composite_src_0565_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (x8r8g8b8) + * a2 - w + */ + + beqz a2, 4f + nop + li t9, 0xff000000 + srl t8, a2, 3 /* t1 = how many multiples of 8 src pixels */ + beqz t8, 3f /* branch if less than 8 src pixels */ + nop +1: + addiu t8, t8, -1 + beqz t8, 2f + addiu a2, a2, -8 + pref 0, 32(a1) + lw t0, 0(a1) + lw t1, 4(a1) + lw t2, 8(a1) + lw t3, 12(a1) + lw t4, 16(a1) + lw t5, 20(a1) + lw t6, 24(a1) + lw t7, 28(a1) + addiu a1, a1, 32 + or t0, t0, t9 + or t1, t1, t9 + or t2, t2, t9 + or t3, t3, t9 + or t4, t4, t9 + or t5, t5, t9 + or t6, t6, t9 + or t7, t7, t9 + pref 30, 32(a0) + sw t0, 0(a0) + sw t1, 4(a0) + sw t2, 8(a0) + sw t3, 12(a0) + sw t4, 16(a0) + sw t5, 20(a0) + sw t6, 24(a0) + sw t7, 28(a0) + b 1b + addiu a0, a0, 32 +2: + lw t0, 0(a1) + lw t1, 4(a1) + lw t2, 8(a1) + lw t3, 12(a1) + lw t4, 16(a1) + lw t5, 20(a1) + lw t6, 24(a1) + lw t7, 28(a1) + addiu a1, a1, 32 + or t0, t0, t9 + or t1, t1, t9 + or t2, t2, t9 + or t3, t3, t9 + or t4, t4, t9 + or t5, t5, t9 + or t6, t6, t9 + or t7, t7, t9 + sw t0, 0(a0) + sw t1, 4(a0) + sw t2, 8(a0) + sw t3, 12(a0) + sw t4, 16(a0) + sw t5, 20(a0) + sw t6, 24(a0) + sw t7, 28(a0) + beqz a2, 4f + addiu a0, a0, 32 +3: + lw t0, 0(a1) + addiu a1, a1, 4 + addiu a2, a2, -1 + or t1, t0, t9 + sw t1, 0(a0) + bnez a2, 3b + addiu a0, a0, 4 +4: + jr ra + nop + +END(pixman_composite_src_x888_8888_asm_mips) diff --git a/pixman/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman/pixman-mips-dspr2-asm.h new file mode 100644 index 000000000..e07cda470 --- /dev/null +++ b/pixman/pixman/pixman-mips-dspr2-asm.h @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nemanja Lukic (nlukic@mips.com) + */ + +#ifndef PIXMAN_MIPS_DSPR2_ASM_H +#define PIXMAN_MIPS_DSPR2_ASM_H + +#define zero $0 +#define AT $1 +#define v0 $2 +#define v1 $3 +#define a0 $4 +#define a1 $5 +#define a2 $6 +#define a3 $7 +#define t0 $8 +#define t1 $9 +#define t2 $10 +#define t3 $11 +#define t4 $12 +#define t5 $13 +#define t6 $14 +#define t7 $15 +#define s0 $16 +#define s1 $17 +#define s2 $18 +#define s3 $19 +#define s4 $20 +#define s5 $21 +#define s6 $22 +#define s7 $23 +#define t8 $24 +#define t9 $25 +#define k0 $26 +#define k1 $27 +#define gp $28 +#define sp $29 +#define fp $30 +#define s8 $30 +#define ra $31 + +/* + * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2 + */ +#define LEAF_MIPS32R2(symbol) \ + .globl symbol; \ + .align 2; \ + .type symbol, @function; \ + .ent symbol, 0; \ +symbol: .frame sp, 0, ra; \ + .set push; \ + .set arch=mips32r2; \ + .set noreorder; \ + .set noat; + +/* + * LEAF_MIPS32R2 - declare leaf routine for MIPS DSPr2 + */ +#define LEAF_MIPS_DSPR2(symbol) \ +LEAF_MIPS32R2(symbol) \ + .set dspr2; + +/* + * END - mark end of function + */ +#define END(function) \ + .set pop; \ + .end function; \ + .size function,.-function + +/* + * Conversion of single r5g6b5 pixel (in_565) to single a8r8g8b8 pixel + * returned in (out_8888) register. Requires two temporary registers + * (scratch1 and scratch2). + */ +.macro CONVERT_1x0565_TO_1x8888 in_565, \ + out_8888, \ + scratch1, scratch2 + lui \out_8888, 0xff00 + sll \scratch1, \in_565, 0x3 + andi \scratch2, \scratch1, 0xff + ext \scratch1, \in_565, 0x2, 0x3 + or \scratch1, \scratch2, \scratch1 + or \out_8888, \out_8888, \scratch1 + + sll \scratch1, \in_565, 0x5 + andi \scratch1, \scratch1, 0xfc00 + srl \scratch2, \in_565, 0x1 + andi \scratch2, \scratch2, 0x300 + or \scratch2, \scratch1, \scratch2 + or \out_8888, \out_8888, \scratch2 + + andi \scratch1, \in_565, 0xf800 + srl \scratch2, \scratch1, 0x5 + andi \scratch2, \scratch2, 0xff00 + or \scratch1, \scratch1, \scratch2 + sll \scratch1, \scratch1, 0x8 + or \out_8888, \out_8888, \scratch1 +.endm + +/* + * Conversion of two r5g6b5 pixels (in1_565 and in2_565) to two a8r8g8b8 pixels + * returned in (out1_8888 and out2_8888) registers. Requires four scratch + * registers (scratch1 ... scratch4). It also requires maskG and maskB for + * color component extractions. These masks must have following values: + * li maskG, 0x07e007e0 + * li maskB, 0x001F001F + */ +.macro CONVERT_2x0565_TO_2x8888 in1_565, in2_565, \ + out1_8888, out2_8888, \ + maskG, maskB, \ + scratch1, scratch2, scratch3, scratch4 + sll \scratch1, \in1_565, 16 + or \scratch1, \scratch1, \in2_565 + lui \out2_8888, 0xff00 + ori \out2_8888, \out2_8888, 0xff00 + shrl.ph \scratch2, \scratch1, 11 + and \scratch3, \scratch1, \maskG + shra.ph \scratch4, \scratch2, 2 + shll.ph \scratch2, \scratch2, 3 + shll.ph \scratch3, \scratch3, 5 + or \scratch2, \scratch2, \scratch4 + shrl.qb \scratch4, \scratch3, 6 + or \out2_8888, \out2_8888, \scratch2 + or \scratch3, \scratch3, \scratch4 + and \scratch1, \scratch1, \maskB + shll.ph \scratch2, \scratch1, 3 + shra.ph \scratch4, \scratch1, 2 + or \scratch2, \scratch2, \scratch4 + or \scratch3, \scratch2, \scratch3 + precrq.ph.w \out1_8888, \out2_8888, \scratch3 + precr_sra.ph.w \out2_8888, \scratch3, 0 +.endm + +/* + * Conversion of single a8r8g8b8 pixel (in_8888) to single r5g6b5 pixel + * returned in (out_565) register. Requires two temporary registers + * (scratch1 and scratch2). + */ +.macro CONVERT_1x8888_TO_1x0565 in_8888, \ + out_565, \ + scratch1, scratch2 + ext \out_565, \in_8888, 0x3, 0x5 + srl \scratch1, \in_8888, 0x5 + andi \scratch1, \scratch1, 0x07e0 + srl \scratch2, \in_8888, 0x8 + andi \scratch2, \scratch2, 0xf800 + or \out_565, \out_565, \scratch1 + or \out_565, \out_565, \scratch2 +.endm + +/* + * Conversion of two a8r8g8b8 pixels (in1_8888 and in2_8888) to two r5g6b5 + * pixels returned in (out1_565 and out2_565) registers. Requires two temporary + * registers (scratch1 and scratch2). It also requires maskR, maskG and maskB + * for color component extractions. These masks must have following values: + * li maskR, 0xf800f800 + * li maskG, 0x07e007e0 + * li maskB, 0x001F001F + * Value of input register in2_8888 is lost. + */ +.macro CONVERT_2x8888_TO_2x0565 in1_8888, in2_8888, \ + out1_565, out2_565, \ + maskR, maskG, maskB, \ + scratch1, scratch2 + precrq.ph.w \scratch1, \in2_8888, \in1_8888 + precr_sra.ph.w \in2_8888, \in1_8888, 0 + shll.ph \scratch1, \scratch1, 8 + srl \in2_8888, \in2_8888, 3 + and \scratch2, \in2_8888, \maskB + and \scratch1, \scratch1, \maskR + srl \in2_8888, \in2_8888, 2 + and \out2_565, \in2_8888, \maskG + or \out2_565, \out2_565, \scratch2 + or \out1_565, \out2_565, \scratch1 + srl \out2_565, \out1_565, 16 +.endm + +#endif //PIXMAN_MIPS_DSPR2_ASM_H diff --git a/pixman/pixman/pixman-mips-dspr2.c b/pixman/pixman/pixman-mips-dspr2.c new file mode 100644 index 000000000..e331853b7 --- /dev/null +++ b/pixman/pixman/pixman-mips-dspr2.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nemanja Lukic (nlukic@mips.com) + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "pixman-private.h" +#include "pixman-mips-dspr2.h" + +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_x888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_8888_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0565_8888, + uint16_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0565_0565, + uint16_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888, + uint8_t, 3, uint8_t, 3) + +static const pixman_fast_path_t mips_dspr2_fast_paths[] = +{ + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, mips_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, mips_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, mips_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, mips_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, mips_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, mips_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, mips_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, mips_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, mips_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, mips_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, mips_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, mips_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, mips_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, mips_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, mips_composite_src_0888_0888), + + { PIXMAN_OP_NONE }, +}; + +pixman_implementation_t * +_pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = + _pixman_implementation_create (fallback, mips_dspr2_fast_paths); + + return imp; +} diff --git a/pixman/pixman/pixman-mips-dspr2.h b/pixman/pixman/pixman-mips-dspr2.h new file mode 100644 index 000000000..449c42a56 --- /dev/null +++ b/pixman/pixman/pixman-mips-dspr2.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nemanja Lukic (nlukic@mips.com) + */ + +#ifndef PIXMAN_MIPS_DSPR2_H +#define PIXMAN_MIPS_DSPR2_H + +#include "pixman-private.h" +#include "pixman-inlines.h" + +#define SKIP_ZERO_SRC 1 +#define SKIP_ZERO_MASK 2 +#define DO_FAST_MEMCPY 3 + +void +pixman_mips_fast_memcpy (void *dst, void *src, uint32_t n_bytes); + +/****************************************************************/ + +#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST(flags, name, \ + src_type, src_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_mips (dst_type *dst, \ + src_type *src, \ + int32_t w); \ + \ +static void \ +mips_composite_##name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line, *dst; \ + src_type *src_line, *src; \ + int32_t dst_stride, src_stride; \ + int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8; \ + \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + \ + while (height--) \ + { \ + dst = dst_line; \ + dst_line += dst_stride; \ + src = src_line; \ + src_line += src_stride; \ + \ + if (flags == DO_FAST_MEMCPY) \ + pixman_mips_fast_memcpy (dst, src, width * bpp); \ + else \ + pixman_composite_##name##_asm_mips (dst, src, width); \ + } \ +} + +#endif //PIXMAN_MIPS_DSPR2_H diff --git a/pixman/pixman/pixman-mips-memcpy-asm.S b/pixman/pixman/pixman-mips-memcpy-asm.S new file mode 100644 index 000000000..9ad6da537 --- /dev/null +++ b/pixman/pixman/pixman-mips-memcpy-asm.S @@ -0,0 +1,382 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "pixman-mips-dspr2-asm.h" + +/* + * This routine could be optimized for MIPS64. The current code only + * uses MIPS32 instructions. + */ + +#ifdef EB +# define LWHI lwl /* high part is left in big-endian */ +# define SWHI swl /* high part is left in big-endian */ +# define LWLO lwr /* low part is right in big-endian */ +# define SWLO swr /* low part is right in big-endian */ +#else +# define LWHI lwr /* high part is right in little-endian */ +# define SWHI swr /* high part is right in little-endian */ +# define LWLO lwl /* low part is left in big-endian */ +# define SWLO swl /* low part is left in big-endian */ +#endif + +LEAF_MIPS32R2(pixman_mips_fast_memcpy) + + slti AT, a2, 8 + bne AT, zero, $last8 + move v0, a0 /* memcpy returns the dst pointer */ + +/* Test if the src and dst are word-aligned, or can be made word-aligned */ + xor t8, a1, a0 + andi t8, t8, 0x3 /* t8 is a0/a1 word-displacement */ + + bne t8, zero, $unaligned + negu a3, a0 + + andi a3, a3, 0x3 /* we need to copy a3 bytes to make a0/a1 aligned */ + beq a3, zero, $chk16w /* when a3=0 then the dst (a0) is word-aligned */ + subu a2, a2, a3 /* now a2 is the remining bytes count */ + + LWHI t8, 0(a1) + addu a1, a1, a3 + SWHI t8, 0(a0) + addu a0, a0, a3 + +/* Now the dst/src are mutually word-aligned with word-aligned addresses */ +$chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */ + /* t8 is the byte count after 64-byte chunks */ + + beq a2, t8, $chk8w /* if a2==t8, no 64-byte chunks */ + /* There will be at most 1 32-byte chunk after it */ + subu a3, a2, t8 /* subtract from a2 the reminder */ + /* Here a3 counts bytes in 16w chunks */ + addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */ + + addu t0, a0, a2 /* t0 is the "past the end" address */ + +/* + * When in the loop we exercise "pref 30, x(a0)", the a0+x should not be past + * the "t0-32" address + * This means: for x=128 the last "safe" a0 address is "t0-160" + * Alternatively, for x=64 the last "safe" a0 address is "t0-96" + * In the current version we use "pref 30, 128(a0)", so "t0-160" is the limit + */ + subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */ + + pref 0, 0(a1) /* bring the first line of src, addr 0 */ + pref 0, 32(a1) /* bring the second line of src, addr 32 */ + pref 0, 64(a1) /* bring the third line of src, addr 64 */ + pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */ +/* In case the a0 > t9 don't use "pref 30" at all */ + sgtu v1, a0, t9 + bgtz v1, $loop16w /* skip "pref 30, 64(a0)" for too short arrays */ + nop +/* otherwise, start with using pref30 */ + pref 30, 64(a0) +$loop16w: + pref 0, 96(a1) + lw t0, 0(a1) + bgtz v1, $skip_pref30_96 /* skip "pref 30, 96(a0)" */ + lw t1, 4(a1) + pref 30, 96(a0) /* continue setting up the dest, addr 96 */ +$skip_pref30_96: + lw t2, 8(a1) + lw t3, 12(a1) + lw t4, 16(a1) + lw t5, 20(a1) + lw t6, 24(a1) + lw t7, 28(a1) + pref 0, 128(a1) /* bring the next lines of src, addr 128 */ + + sw t0, 0(a0) + sw t1, 4(a0) + sw t2, 8(a0) + sw t3, 12(a0) + sw t4, 16(a0) + sw t5, 20(a0) + sw t6, 24(a0) + sw t7, 28(a0) + + lw t0, 32(a1) + bgtz v1, $skip_pref30_128 /* skip "pref 30, 128(a0)" */ + lw t1, 36(a1) + pref 30, 128(a0) /* continue setting up the dest, addr 128 */ +$skip_pref30_128: + lw t2, 40(a1) + lw t3, 44(a1) + lw t4, 48(a1) + lw t5, 52(a1) + lw t6, 56(a1) + lw t7, 60(a1) + pref 0, 160(a1) /* bring the next lines of src, addr 160 */ + + sw t0, 32(a0) + sw t1, 36(a0) + sw t2, 40(a0) + sw t3, 44(a0) + sw t4, 48(a0) + sw t5, 52(a0) + sw t6, 56(a0) + sw t7, 60(a0) + + addiu a0, a0, 64 /* adding 64 to dest */ + sgtu v1, a0, t9 + bne a0, a3, $loop16w + addiu a1, a1, 64 /* adding 64 to src */ + move a2, t8 + +/* Here we have src and dest word-aligned but less than 64-bytes to go */ + +$chk8w: + pref 0, 0x0(a1) + andi t8, a2, 0x1f /* is there a 32-byte chunk? */ + /* the t8 is the reminder count past 32-bytes */ + beq a2, t8, $chk1w /* when a2=t8, no 32-byte chunk */ + nop + + lw t0, 0(a1) + lw t1, 4(a1) + lw t2, 8(a1) + lw t3, 12(a1) + lw t4, 16(a1) + lw t5, 20(a1) + lw t6, 24(a1) + lw t7, 28(a1) + addiu a1, a1, 32 + + sw t0, 0(a0) + sw t1, 4(a0) + sw t2, 8(a0) + sw t3, 12(a0) + sw t4, 16(a0) + sw t5, 20(a0) + sw t6, 24(a0) + sw t7, 28(a0) + addiu a0, a0, 32 + +$chk1w: + andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */ + beq a2, t8, $last8 + subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */ + addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */ + +/* copying in words (4-byte chunks) */ +$wordCopy_loop: + lw t3, 0(a1) /* the first t3 may be equal t0 ... optimize? */ + addiu a1, a1, 4 + addiu a0, a0, 4 + bne a0, a3, $wordCopy_loop + sw t3, -4(a0) + +/* For the last (<8) bytes */ +$last8: + blez a2, leave + addu a3, a0, a2 /* a3 is the last dst address */ +$last8loop: + lb v1, 0(a1) + addiu a1, a1, 1 + addiu a0, a0, 1 + bne a0, a3, $last8loop + sb v1, -1(a0) + +leave: j ra + nop + +/* + * UNALIGNED case + */ + +$unaligned: + /* got here with a3="negu a0" */ + andi a3, a3, 0x3 /* test if the a0 is word aligned */ + beqz a3, $ua_chk16w + subu a2, a2, a3 /* bytes left after initial a3 bytes */ + + LWHI v1, 0(a1) + LWLO v1, 3(a1) + addu a1, a1, a3 /* a3 may be here 1, 2 or 3 */ + SWHI v1, 0(a0) + addu a0, a0, a3 /* below the dst will be word aligned (NOTE1) */ + +$ua_chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */ + /* t8 is the byte count after 64-byte chunks */ + beq a2, t8, $ua_chk8w /* if a2==t8, no 64-byte chunks */ + /* There will be at most 1 32-byte chunk after it */ + subu a3, a2, t8 /* subtract from a2 the reminder */ + /* Here a3 counts bytes in 16w chunks */ + addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */ + + addu t0, a0, a2 /* t0 is the "past the end" address */ + + subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */ + + pref 0, 0(a1) /* bring the first line of src, addr 0 */ + pref 0, 32(a1) /* bring the second line of src, addr 32 */ + pref 0, 64(a1) /* bring the third line of src, addr 64 */ + pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */ +/* In case the a0 > t9 don't use "pref 30" at all */ + sgtu v1, a0, t9 + bgtz v1, $ua_loop16w /* skip "pref 30, 64(a0)" for too short arrays */ + nop +/* otherwise, start with using pref30 */ + pref 30, 64(a0) +$ua_loop16w: + pref 0, 96(a1) + LWHI t0, 0(a1) + LWLO t0, 3(a1) + LWHI t1, 4(a1) + bgtz v1, $ua_skip_pref30_96 + LWLO t1, 7(a1) + pref 30, 96(a0) /* continue setting up the dest, addr 96 */ +$ua_skip_pref30_96: + LWHI t2, 8(a1) + LWLO t2, 11(a1) + LWHI t3, 12(a1) + LWLO t3, 15(a1) + LWHI t4, 16(a1) + LWLO t4, 19(a1) + LWHI t5, 20(a1) + LWLO t5, 23(a1) + LWHI t6, 24(a1) + LWLO t6, 27(a1) + LWHI t7, 28(a1) + LWLO t7, 31(a1) + pref 0, 128(a1) /* bring the next lines of src, addr 128 */ + + sw t0, 0(a0) + sw t1, 4(a0) + sw t2, 8(a0) + sw t3, 12(a0) + sw t4, 16(a0) + sw t5, 20(a0) + sw t6, 24(a0) + sw t7, 28(a0) + + LWHI t0, 32(a1) + LWLO t0, 35(a1) + LWHI t1, 36(a1) + bgtz v1, $ua_skip_pref30_128 + LWLO t1, 39(a1) + pref 30, 128(a0) /* continue setting up the dest, addr 128 */ +$ua_skip_pref30_128: + LWHI t2, 40(a1) + LWLO t2, 43(a1) + LWHI t3, 44(a1) + LWLO t3, 47(a1) + LWHI t4, 48(a1) + LWLO t4, 51(a1) + LWHI t5, 52(a1) + LWLO t5, 55(a1) + LWHI t6, 56(a1) + LWLO t6, 59(a1) + LWHI t7, 60(a1) + LWLO t7, 63(a1) + pref 0, 160(a1) /* bring the next lines of src, addr 160 */ + + sw t0, 32(a0) + sw t1, 36(a0) + sw t2, 40(a0) + sw t3, 44(a0) + sw t4, 48(a0) + sw t5, 52(a0) + sw t6, 56(a0) + sw t7, 60(a0) + + addiu a0, a0, 64 /* adding 64 to dest */ + sgtu v1, a0, t9 + bne a0, a3, $ua_loop16w + addiu a1, a1, 64 /* adding 64 to src */ + move a2, t8 + +/* Here we have src and dest word-aligned but less than 64-bytes to go */ + +$ua_chk8w: + pref 0, 0x0(a1) + andi t8, a2, 0x1f /* is there a 32-byte chunk? */ + /* the t8 is the reminder count */ + beq a2, t8, $ua_chk1w /* when a2=t8, no 32-byte chunk */ + + LWHI t0, 0(a1) + LWLO t0, 3(a1) + LWHI t1, 4(a1) + LWLO t1, 7(a1) + LWHI t2, 8(a1) + LWLO t2, 11(a1) + LWHI t3, 12(a1) + LWLO t3, 15(a1) + LWHI t4, 16(a1) + LWLO t4, 19(a1) + LWHI t5, 20(a1) + LWLO t5, 23(a1) + LWHI t6, 24(a1) + LWLO t6, 27(a1) + LWHI t7, 28(a1) + LWLO t7, 31(a1) + addiu a1, a1, 32 + + sw t0, 0(a0) + sw t1, 4(a0) + sw t2, 8(a0) + sw t3, 12(a0) + sw t4, 16(a0) + sw t5, 20(a0) + sw t6, 24(a0) + sw t7, 28(a0) + addiu a0, a0, 32 + +$ua_chk1w: + andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */ + beq a2, t8, $ua_smallCopy + subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */ + addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */ + +/* copying in words (4-byte chunks) */ +$ua_wordCopy_loop: + LWHI v1, 0(a1) + LWLO v1, 3(a1) + addiu a1, a1, 4 + addiu a0, a0, 4 /* note: dst=a0 is word aligned here, see NOTE1 */ + bne a0, a3, $ua_wordCopy_loop + sw v1, -4(a0) + +/* Now less than 4 bytes (value in a2) left to copy */ +$ua_smallCopy: + beqz a2, leave + addu a3, a0, a2 /* a3 is the last dst address */ +$ua_smallCopy_loop: + lb v1, 0(a1) + addiu a1, a1, 1 + addiu a0, a0, 1 + bne a0, a3, $ua_smallCopy_loop + sb v1, -1(a0) + + j ra + nop + +END(pixman_mips_fast_memcpy) diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c index 8fd85776d..bcfeb56f0 100644 --- a/pixman/pixman/pixman-mmx.c +++ b/pixman/pixman/pixman-mmx.c @@ -353,9 +353,16 @@ static __inline__ uint32_t ldl_u(uint32_t *p) } static force_inline __m64 -load8888 (uint32_t v) +load8888 (const uint32_t *v) { - return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64 ()); + return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (*v), _mm_setzero_si64 ()); +} + +static force_inline __m64 +load8888u (const uint32_t *v) +{ + uint32_t l = ldl_u(v); + return load8888(&l); } static force_inline __m64 @@ -364,15 +371,12 @@ pack8888 (__m64 lo, __m64 hi) return _mm_packs_pu16 (lo, hi); } -#ifdef _MSC_VER -#define store8888(v) _mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ())) -#else -static force_inline uint32_t -store8888 (__m64 v) +static force_inline void +store8888 (uint32_t *dest, __m64 v) { - return _mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ())); + v = pack8888 (v, _mm_setzero_si64()); + *dest = _mm_cvtsi64_si32 (v); } -#endif /* Expand 16 bits positioned at @pos (0-3) of a mmx register into * @@ -475,13 +479,6 @@ pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b) /* --------------- MMX code patch for fbcompose.c --------------------- */ -#ifdef _MSC_VER -#define combine(src, mask) \ - ((mask) ? \ - store8888 (pix_multiply (load8888 (*src), expand_alpha (load8888 (*mask)))) \ - : \ - *src) -#else static force_inline uint32_t combine (const uint32_t *src, const uint32_t *mask) { @@ -489,18 +486,17 @@ combine (const uint32_t *src, const uint32_t *mask) if (mask) { - __m64 m = load8888 (*mask); - __m64 s = load8888 (ssrc); + __m64 m = load8888 (mask); + __m64 s = load8888 (&ssrc); m = expand_alpha (m); s = pix_multiply (s, m); - ssrc = store8888 (s); + store8888 (&ssrc, s); } return ssrc; } -#endif static void mmx_combine_over_u (pixman_implementation_t *imp, @@ -524,9 +520,9 @@ mmx_combine_over_u (pixman_implementation_t *imp, else if (ssrc) { __m64 s, sa; - s = load8888 (ssrc); + s = load8888 (&ssrc); sa = expand_alpha (s); - *dest = store8888 (over (s, sa, load8888 (*dest))); + store8888 (dest, over (s, sa, load8888 (dest))); } ++dest; @@ -552,9 +548,9 @@ mmx_combine_over_reverse_u (pixman_implementation_t *imp, __m64 d, da; uint32_t s = combine (src, mask); - d = load8888 (*dest); + d = load8888 (dest); da = expand_alpha (d); - *dest = store8888 (over (d, da, load8888 (s))); + store8888 (dest, over (d, da, load8888 (&s))); ++dest; ++src; @@ -577,13 +573,14 @@ mmx_combine_in_u (pixman_implementation_t *imp, while (dest < end) { __m64 x, a; + uint32_t ssrc = combine (src, mask); - x = load8888 (combine (src, mask)); - a = load8888 (*dest); + x = load8888 (&ssrc); + a = load8888 (dest); a = expand_alpha (a); x = pix_multiply (x, a); - *dest = store8888 (x); + store8888 (dest, x); ++dest; ++src; @@ -606,12 +603,13 @@ mmx_combine_in_reverse_u (pixman_implementation_t *imp, while (dest < end) { __m64 x, a; + uint32_t ssrc = combine (src, mask); - x = load8888 (*dest); - a = load8888 (combine (src, mask)); + x = load8888 (dest); + a = load8888 (&ssrc); a = expand_alpha (a); x = pix_multiply (x, a); - *dest = store8888 (x); + store8888 (dest, x); ++dest; ++src; @@ -634,13 +632,14 @@ mmx_combine_out_u (pixman_implementation_t *imp, while (dest < end) { __m64 x, a; + uint32_t ssrc = combine (src, mask); - x = load8888 (combine (src, mask)); - a = load8888 (*dest); + x = load8888 (&ssrc); + a = load8888 (dest); a = expand_alpha (a); a = negate (a); x = pix_multiply (x, a); - *dest = store8888 (x); + store8888 (dest, x); ++dest; ++src; @@ -663,14 +662,15 @@ mmx_combine_out_reverse_u (pixman_implementation_t *imp, while (dest < end) { __m64 x, a; + uint32_t ssrc = combine (src, mask); - x = load8888 (*dest); - a = load8888 (combine (src, mask)); + x = load8888 (dest); + a = load8888 (&ssrc); a = expand_alpha (a); a = negate (a); x = pix_multiply (x, a); - *dest = store8888 (x); + store8888 (dest, x); ++dest; ++src; @@ -693,14 +693,15 @@ mmx_combine_atop_u (pixman_implementation_t *imp, while (dest < end) { __m64 s, da, d, sia; + uint32_t ssrc = combine (src, mask); - s = load8888 (combine (src, mask)); - d = load8888 (*dest); + s = load8888 (&ssrc); + d = load8888 (dest); sia = expand_alpha (s); sia = negate (sia); da = expand_alpha (d); s = pix_add_mul (s, da, d, sia); - *dest = store8888 (s); + store8888 (dest, s); ++dest; ++src; @@ -725,14 +726,15 @@ mmx_combine_atop_reverse_u (pixman_implementation_t *imp, while (dest < end) { __m64 s, dia, d, sa; + uint32_t ssrc = combine (src, mask); - s = load8888 (combine (src, mask)); - d = load8888 (*dest); + s = load8888 (&ssrc); + d = load8888 (dest); sa = expand_alpha (s); dia = expand_alpha (d); dia = negate (dia); s = pix_add_mul (s, dia, d, sa); - *dest = store8888 (s); + store8888 (dest, s); ++dest; ++src; @@ -755,15 +757,16 @@ mmx_combine_xor_u (pixman_implementation_t *imp, while (dest < end) { __m64 s, dia, d, sia; + uint32_t ssrc = combine (src, mask); - s = load8888 (combine (src, mask)); - d = load8888 (*dest); + s = load8888 (&ssrc); + d = load8888 (dest); sia = expand_alpha (s); dia = expand_alpha (d); sia = negate (sia); dia = negate (dia); s = pix_add_mul (s, dia, d, sia); - *dest = store8888 (s); + store8888 (dest, s); ++dest; ++src; @@ -786,11 +789,12 @@ mmx_combine_add_u (pixman_implementation_t *imp, while (dest < end) { __m64 s, d; + uint32_t ssrc = combine (src, mask); - s = load8888 (combine (src, mask)); - d = load8888 (*dest); + s = load8888 (&ssrc); + d = load8888 (dest); s = pix_add (s, d); - *dest = store8888 (s); + store8888 (dest, s); ++dest; ++src; @@ -814,20 +818,21 @@ mmx_combine_saturate_u (pixman_implementation_t *imp, { uint32_t s = combine (src, mask); uint32_t d = *dest; - __m64 ms = load8888 (s); - __m64 md = load8888 (d); + __m64 ms = load8888 (&s); + __m64 md = load8888 (&d); uint32_t sa = s >> 24; uint32_t da = ~d >> 24; if (sa > da) { - __m64 msa = load8888 (DIV_UN8 (da, sa) << 24); + uint32_t quot = DIV_UN8 (da, sa) << 24; + __m64 msa = load8888 ("); msa = expand_alpha (msa); ms = pix_multiply (ms, msa); } md = pix_add (md, ms); - *dest = store8888 (md); + store8888 (dest, md); ++src; ++dest; @@ -849,11 +854,11 @@ mmx_combine_src_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); s = pix_multiply (s, a); - *dest = store8888 (s); + store8888 (dest, s); ++src; ++mask; @@ -874,12 +879,12 @@ mmx_combine_over_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 sa = expand_alpha (s); - *dest = store8888 (in_over (s, sa, a, d)); + store8888 (dest, in_over (s, sa, a, d)); ++src; ++dest; @@ -900,12 +905,12 @@ mmx_combine_over_reverse_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); - *dest = store8888 (over (d, da, in (s, a))); + store8888 (dest, over (d, da, in (s, a))); ++src; ++dest; @@ -926,14 +931,14 @@ mmx_combine_in_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); s = pix_multiply (s, a); s = pix_multiply (s, da); - *dest = store8888 (s); + store8888 (dest, s); ++src; ++dest; @@ -954,14 +959,14 @@ mmx_combine_in_reverse_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 sa = expand_alpha (s); a = pix_multiply (a, sa); d = pix_multiply (d, a); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -982,15 +987,15 @@ mmx_combine_out_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); da = negate (da); s = pix_multiply (s, a); s = pix_multiply (s, da); - *dest = store8888 (s); + store8888 (dest, s); ++src; ++dest; @@ -1011,15 +1016,15 @@ mmx_combine_out_reverse_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 sa = expand_alpha (s); a = pix_multiply (a, sa); a = negate (a); d = pix_multiply (d, a); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -1040,9 +1045,9 @@ mmx_combine_atop_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); __m64 sa = expand_alpha (s); @@ -1050,7 +1055,7 @@ mmx_combine_atop_ca (pixman_implementation_t *imp, a = pix_multiply (a, sa); a = negate (a); d = pix_add_mul (d, a, s, da); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -1071,9 +1076,9 @@ mmx_combine_atop_reverse_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); __m64 sa = expand_alpha (s); @@ -1081,7 +1086,7 @@ mmx_combine_atop_reverse_ca (pixman_implementation_t *imp, a = pix_multiply (a, sa); da = negate (da); d = pix_add_mul (d, a, s, da); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -1102,9 +1107,9 @@ mmx_combine_xor_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); __m64 sa = expand_alpha (s); @@ -1113,7 +1118,7 @@ mmx_combine_xor_ca (pixman_implementation_t *imp, da = negate (da); a = negate (a); d = pix_add_mul (d, a, s, da); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -1134,13 +1139,13 @@ mmx_combine_add_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); s = pix_multiply (s, a); d = pix_add (s, d); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -1171,7 +1176,7 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -1184,7 +1189,7 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp, while (w && (unsigned long)dst & 7) { - *dst = store8888 (over (vsrc, vsrca, load8888 (*dst))); + store8888 (dst, over (vsrc, vsrca, load8888 (dst))); w--; dst++; @@ -1210,7 +1215,7 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp, if (w) { - *dst = store8888 (over (vsrc, vsrca, load8888 (*dst))); + store8888 (dst, over (vsrc, vsrca, load8888 (dst))); } } @@ -1237,7 +1242,7 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -1316,7 +1321,7 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -1331,9 +1336,9 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, if (m) { - __m64 vdest = load8888 (*q); - vdest = in_over (vsrc, vsrca, load8888 (m), vdest); - *q = store8888 (vdest); + __m64 vdest = load8888 (q); + vdest = in_over (vsrc, vsrca, load8888 (&m), vdest); + store8888 (q, vdest); } twidth--; @@ -1352,9 +1357,9 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, __m64 dest0, dest1; __m64 vdest = *(__m64 *)q; - dest0 = in_over (vsrc, vsrca, load8888 (m0), + dest0 = in_over (vsrc, vsrca, load8888 (&m0), expand8888 (vdest, 0)); - dest1 = in_over (vsrc, vsrca, load8888 (m1), + dest1 = in_over (vsrc, vsrca, load8888 (&m1), expand8888 (vdest, 1)); *(__m64 *)q = pack8888 (dest0, dest1); @@ -1371,9 +1376,9 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, if (m) { - __m64 vdest = load8888 (*q); - vdest = in_over (vsrc, vsrca, load8888 (m), vdest); - *q = store8888 (vdest); + __m64 vdest = load8888 (q); + vdest = in_over (vsrc, vsrca, load8888 (&m), vdest); + store8888 (q, vdest); } twidth--; @@ -1408,7 +1413,7 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask &= 0xff000000; mask = mask | mask >> 8 | mask >> 16 | mask >> 24; - vmask = load8888 (mask); + vmask = load8888 (&mask); while (height--) { @@ -1420,10 +1425,10 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, while (w && (unsigned long)dst & 7) { - __m64 s = load8888 (*src); - __m64 d = load8888 (*dst); + __m64 s = load8888 (src); + __m64 d = load8888 (dst); - *dst = store8888 (in_over (s, expand_alpha (s), vmask, d)); + store8888 (dst, in_over (s, expand_alpha (s), vmask, d)); w--; dst++; @@ -1448,10 +1453,10 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, if (w) { - __m64 s = load8888 (*src); - __m64 d = load8888 (*dst); + __m64 s = load8888 (src); + __m64 d = load8888 (dst); - *dst = store8888 (in_over (s, expand_alpha (s), vmask, d)); + store8888 (dst, in_over (s, expand_alpha (s), vmask, d)); } } @@ -1479,7 +1484,7 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, mask &= 0xff000000; mask = mask | mask >> 8 | mask >> 16 | mask >> 24; - vmask = load8888 (mask); + vmask = load8888 (&mask); srca = MC (4x00ff); while (height--) @@ -1492,10 +1497,11 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, while (w && (unsigned long)dst & 7) { - __m64 s = load8888 (*src | 0xff000000); - __m64 d = load8888 (*dst); + uint32_t ssrc = *src | 0xff000000; + __m64 s = load8888 (&ssrc); + __m64 d = load8888 (dst); - *dst = store8888 (in_over (s, srca, vmask, d)); + store8888 (dst, in_over (s, srca, vmask, d)); w--; dst++; @@ -1570,10 +1576,11 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, while (w) { - __m64 s = load8888 (*src | 0xff000000); - __m64 d = load8888 (*dst); + uint32_t ssrc = *src | 0xff000000; + __m64 s = load8888 (&ssrc); + __m64 d = load8888 (dst); - *dst = store8888 (in_over (s, srca, vmask, d)); + store8888 (dst, in_over (s, srca, vmask, d)); w--; dst++; @@ -1621,9 +1628,9 @@ mmx_composite_over_8888_8888 (pixman_implementation_t *imp, else if (s) { __m64 ms, sa; - ms = load8888 (s); + ms = load8888 (&s); sa = expand_alpha (ms); - *dst = store8888 (over (ms, sa, load8888 (*dst))); + store8888 (dst, over (ms, sa, load8888 (dst))); } dst++; @@ -1664,7 +1671,7 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp, while (w && (unsigned long)dst & 7) { - __m64 vsrc = load8888 (*src); + __m64 vsrc = load8888 (src); uint64_t d = *dst; __m64 vdest = expand565 (to_m64 (d), 0); @@ -1685,10 +1692,10 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp, __m64 vsrc0, vsrc1, vsrc2, vsrc3; __m64 vdest; - vsrc0 = load8888 (*(src + 0)); - vsrc1 = load8888 (*(src + 1)); - vsrc2 = load8888 (*(src + 2)); - vsrc3 = load8888 (*(src + 3)); + vsrc0 = load8888 ((src + 0)); + vsrc1 = load8888 ((src + 1)); + vsrc2 = load8888 ((src + 2)); + vsrc3 = load8888 ((src + 3)); vdest = *(__m64 *)dst; @@ -1708,7 +1715,7 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp, while (w) { - __m64 vsrc = load8888 (*src); + __m64 vsrc = load8888 (src); uint64_t d = *dst; __m64 vdest = expand565 (to_m64 (d), 0); @@ -1751,7 +1758,7 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -1772,9 +1779,9 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp, { __m64 vdest = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m)), - load8888 (*dst)); + load8888 (dst)); - *dst = store8888 (vdest); + store8888 (dst, vdest); } w--; @@ -1823,11 +1830,11 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp, if (m) { - __m64 vdest = load8888 (*dst); + __m64 vdest = load8888 (dst); vdest = in_over ( vsrc, vsrca, expand_alpha_rev (to_m64 (m)), vdest); - *dst = store8888 (vdest); + store8888 (dst, vdest); } } } @@ -2016,7 +2023,7 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); while (height--) { @@ -2036,7 +2043,7 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp, { __m64 vdest = in (vsrc, expand_alpha_rev (to_m64 (m))); - *dst = store8888 (vdest); + store8888 (dst, vdest); } else { @@ -2087,10 +2094,10 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp, if (m) { - __m64 vdest = load8888 (*dst); + __m64 vdest = load8888 (dst); vdest = in (vsrc, expand_alpha_rev (to_m64 (m))); - *dst = store8888 (vdest); + store8888 (dst, vdest); } else { @@ -2126,7 +2133,7 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); tmp = pack_565 (vsrc, _mm_setzero_si64 (), 0); @@ -2265,7 +2272,7 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, while (w && (unsigned long)dst & 7) { - __m64 vsrc = load8888 (*src); + __m64 vsrc = load8888 (src); uint64_t d = *dst; __m64 vdest = expand565 (to_m64 (d), 0); @@ -2298,10 +2305,10 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, if ((a0 & a1 & a2 & a3) == 0xFF) { __m64 vdest; - vdest = pack_565 (invert_colors (load8888 (s0)), _mm_setzero_si64 (), 0); - vdest = pack_565 (invert_colors (load8888 (s1)), vdest, 1); - vdest = pack_565 (invert_colors (load8888 (s2)), vdest, 2); - vdest = pack_565 (invert_colors (load8888 (s3)), vdest, 3); + vdest = pack_565 (invert_colors (load8888 (&s0)), _mm_setzero_si64 (), 0); + vdest = pack_565 (invert_colors (load8888 (&s1)), vdest, 1); + vdest = pack_565 (invert_colors (load8888 (&s2)), vdest, 2); + vdest = pack_565 (invert_colors (load8888 (&s3)), vdest, 3); *(__m64 *)dst = vdest; } @@ -2309,10 +2316,10 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, { __m64 vdest = *(__m64 *)dst; - vdest = pack_565 (over_rev_non_pre (load8888 (s0), expand565 (vdest, 0)), vdest, 0); - vdest = pack_565 (over_rev_non_pre (load8888 (s1), expand565 (vdest, 1)), vdest, 1); - vdest = pack_565 (over_rev_non_pre (load8888 (s2), expand565 (vdest, 2)), vdest, 2); - vdest = pack_565 (over_rev_non_pre (load8888 (s3), expand565 (vdest, 3)), vdest, 3); + vdest = pack_565 (over_rev_non_pre (load8888 (&s0), expand565 (vdest, 0)), vdest, 0); + vdest = pack_565 (over_rev_non_pre (load8888 (&s1), expand565 (vdest, 1)), vdest, 1); + vdest = pack_565 (over_rev_non_pre (load8888 (&s2), expand565 (vdest, 2)), vdest, 2); + vdest = pack_565 (over_rev_non_pre (load8888 (&s3), expand565 (vdest, 3)), vdest, 3); *(__m64 *)dst = vdest; } @@ -2326,7 +2333,7 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, while (w) { - __m64 vsrc = load8888 (*src); + __m64 vsrc = load8888 (src); uint64_t d = *dst; __m64 vdest = expand565 (to_m64 (d), 0); @@ -2373,10 +2380,10 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, while (w && (unsigned long)dst & 7) { - __m64 s = load8888 (*src); - __m64 d = load8888 (*dst); + __m64 s = load8888 (src); + __m64 d = load8888 (dst); - *dst = store8888 (over_rev_non_pre (s, d)); + store8888 (dst, over_rev_non_pre (s, d)); w--; dst++; @@ -2385,7 +2392,7 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, while (w >= 2) { - uint64_t s0, s1; + uint32_t s0, s1; unsigned char a0, a1; __m64 d0, d1; @@ -2397,8 +2404,8 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, if ((a0 & a1) == 0xFF) { - d0 = invert_colors (load8888 (s0)); - d1 = invert_colors (load8888 (s1)); + d0 = invert_colors (load8888 (&s0)); + d1 = invert_colors (load8888 (&s1)); *(__m64 *)dst = pack8888 (d0, d1); } @@ -2406,8 +2413,8 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, { __m64 vdest = *(__m64 *)dst; - d0 = over_rev_non_pre (load8888 (s0), expand8888 (vdest, 0)); - d1 = over_rev_non_pre (load8888 (s1), expand8888 (vdest, 1)); + d0 = over_rev_non_pre (load8888 (&s0), expand8888 (vdest, 0)); + d1 = over_rev_non_pre (load8888 (&s1), expand8888 (vdest, 1)); *(__m64 *)dst = pack8888 (d0, d1); } @@ -2419,10 +2426,10 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, if (w) { - __m64 s = load8888 (*src); - __m64 d = load8888 (*dst); + __m64 s = load8888 (src); + __m64 d = load8888 (dst); - *dst = store8888 (over_rev_non_pre (s, d)); + store8888 (dst, over_rev_non_pre (s, d)); } } @@ -2450,7 +2457,7 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -2467,7 +2474,7 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, { uint64_t d = *q; __m64 vdest = expand565 (to_m64 (d), 0); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0); *q = to_uint64 (vdest); } @@ -2489,10 +2496,10 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, { __m64 vdest = *(__m64 *)q; - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m0), expand565 (vdest, 0)), vdest, 0); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m1), expand565 (vdest, 1)), vdest, 1); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m2), expand565 (vdest, 2)), vdest, 2); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m3), expand565 (vdest, 3)), vdest, 3); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m0), expand565 (vdest, 0)), vdest, 0); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m1), expand565 (vdest, 1)), vdest, 1); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m2), expand565 (vdest, 2)), vdest, 2); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m3), expand565 (vdest, 3)), vdest, 3); *(__m64 *)q = vdest; } @@ -2510,7 +2517,7 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, { uint64_t d = *q; __m64 vdest = expand565 (to_m64 (d), 0); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0); *q = to_uint64 (vdest); } @@ -2546,7 +2553,7 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp, sa = src >> 24; - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -2578,10 +2585,10 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp, __m64 vmask; __m64 vdest; - vmask = load8888 (ldl_u((uint32_t *)mask)); - vdest = load8888 (*(uint32_t *)dst); + vmask = load8888u ((uint32_t *)mask); + vdest = load8888 ((uint32_t *)dst); - *(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest)); + store8888 ((uint32_t *)dst, in (in (vsrca, vmask), vdest)); dst += 4; mask += 4; @@ -2648,7 +2655,7 @@ mmx_composite_in_8_8 (pixman_implementation_t *imp, uint32_t *s = (uint32_t *)src; uint32_t *d = (uint32_t *)dst; - *d = store8888 (in (load8888 (ldl_u((uint32_t *)s)), load8888 (*d))); + store8888 (d, in (load8888u (s), load8888 (d))); w -= 4; dst += 4; @@ -2696,7 +2703,7 @@ mmx_composite_add_n_8_8 (pixman_implementation_t *imp, if (src == 0) return; - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -2729,10 +2736,10 @@ mmx_composite_add_n_8_8 (pixman_implementation_t *imp, __m64 vmask; __m64 vdest; - vmask = load8888 (ldl_u((uint32_t *)mask)); - vdest = load8888 (*(uint32_t *)dst); + vmask = load8888u ((uint32_t *)mask); + vdest = load8888 ((uint32_t *)dst); - *(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest)); + store8888 ((uint32_t *)dst, _mm_adds_pu8 (in (vsrca, vmask), vdest)); dst += 4; mask += 4; @@ -3073,19 +3080,20 @@ mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp, if (m) { - __m64 s = load8888 (*src | 0xff000000); + uint32_t ssrc = *src | 0xff000000; + __m64 s = load8888 (&ssrc); if (m == 0xff) { - *dst = store8888 (s); + store8888 (dst, s); } else { __m64 sa = expand_alpha (s); __m64 vm = expand_alpha_rev (to_m64 (m)); - __m64 vdest = in_over (s, sa, vm, load8888 (*dst)); + __m64 vdest = in_over (s, sa, vm, load8888 (dst)); - *dst = store8888 (vdest); + store8888 (dst, vdest); } } diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h index 856038547..9d96a9312 100644 --- a/pixman/pixman/pixman-private.h +++ b/pixman/pixman/pixman-private.h @@ -559,6 +559,11 @@ pixman_implementation_t * _pixman_implementation_create_arm_neon (pixman_implementation_t *fallback); #endif +#ifdef USE_MIPS_DSPR2 +pixman_implementation_t * +_pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback); +#endif + #ifdef USE_VMX pixman_implementation_t * _pixman_implementation_create_vmx (pixman_implementation_t *fallback); diff --git a/pixman/test/lowlevel-blt-bench.c b/pixman/test/lowlevel-blt-bench.c index ba7f30716..95513ba10 100644 --- a/pixman/test/lowlevel-blt-bench.c +++ b/pixman/test/lowlevel-blt-bench.c @@ -626,6 +626,7 @@ tests_tbl[] = { "over_n_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_r5g6b5 }, { "over_n_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_a1r5g5b5 }, { "over_8888_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_r5g6b5 }, + { "over_8888_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, { "over_8888_x888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, { "over_x888_8_0565", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, { "over_x888_8_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 }, @@ -649,6 +650,7 @@ tests_tbl[] = { "over_8888_n_x888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_x8r8g8b8 }, { "over_8888_n_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_r5g6b5 }, { "over_8888_n_1555", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_a1r5g5b5 }, + { "over_x888_n_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_a8r8g8b8 }, { "outrev_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, { "outrev_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 }, { "outrev_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 }, |