svn merge ^/branches/released .

author: marha <marha@users.sourceforge.net> 2011-02-14 14:46:43 +0000
committer: marha <marha@users.sourceforge.net> 2011-02-14 14:46:43 +0000
commit: 0f63823d473bf956ec5bddf06da6e4b591e994f8 (patch)
tree: c8fa2b17686616a7443c583e09bbf6eceec883c2 /mesalib/src/mesa
parent: 9acb2b3cd11b530debce5008074fa03587ac3331 (diff)
parent: 026b85e62b3d8812afb5f04df29aeac28c52b331 (diff)
download: vcxsrv-0f63823d473bf956ec5bddf06da6e4b591e994f8.tar.gz
vcxsrv-0f63823d473bf956ec5bddf06da6e4b591e994f8.tar.bz2
vcxsrv-0f63823d473bf956ec5bddf06da6e4b591e994f8.zip
10 files changed, 947 insertions, 923 deletions
diff --git a/mesalib/src/mesa/Makefile b/mesalib/src/mesa/Makefile
index a6025e990..36cbdd994 100644
--- a/mesalib/src/mesa/Makefile
+++ b/mesalib/src/mesa/Makefile
@@ -1,188 +1,224 @@
-# src/mesa/Makefile
-
-TOP = ../..
-include $(TOP)/configs/current
-
-MESA_LIBS := libmesa.a libmesagallium.a
-DEPENDS := depend
-
-MESA_OBJ_DIR := .
-
-
-include sources.mak
-
-# adjust object dirs
-MESA_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_OBJECTS))
-MESA_GALLIUM_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_GALLIUM_OBJECTS))
-
-# define preprocessor flags
-MESA_CPPFLAGS := $(API_DEFINES) $(DEFINES)
-
-# append include dirs
-MESA_CPPFLAGS += $(INCLUDE_DIRS)
-
-# tidy compiler flags
-CFLAGS := $(filter-out $(DEFINES), $(CFLAGS))
-CXXFLAGS := $(filter-out $(DEFINES), $(CXXFLAGS))
-
-# LLVM is needed for the state tracker
-MESA_CFLAGS := $(LLVM_CFLAGS)
-
-define mesa-cc-c
-	@mkdir -p $(dir $@)
-	$(CC) -c -o $@ $< $($(1)_CPPFLAGS) $($(1)_CFLAGS) $(CFLAGS)
-endef
-
-define mesa-cxx-c
-	@mkdir -p $(dir $@)
-	$(CXX) -c -o $@ $< $($(1)_CPPFLAGS) $($(1)_CFLAGS) $(CXXFLAGS)
-endef
-
-$(MESA_OBJ_DIR)/%.o: %.c
-	$(call mesa-cc-c,MESA)
-
-$(MESA_OBJ_DIR)/%.o: %.cpp
-	$(call mesa-cxx-c,MESA)
-
-$(MESA_OBJ_DIR)/%.o: %.S
-	$(call mesa-cc-c,MESA)
-
-# Default: build dependencies, then asm_subdirs, GLSL built-in lib,
-# then convenience libs (.a) and finally the device drivers:
-default: $(DEPENDS) asm_subdirs $(MESA_LIBS) driver_subdirs
-
-main/api_exec_es1.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py
-	$(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES1.1 > $@
-
-main/api_exec_es2.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py
-	$(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES2.0 > $@
-
-######################################################################
-# Helper libraries used by many drivers:
-
-# Make archive of core mesa object files
-libmesa.a: $(MESA_OBJECTS) $(GLSL_LIBS)
-	@ $(MKLIB) -o mesa -static $(MESA_OBJECTS) $(GLSL_LIBS)
-
-# Make archive of subset of core mesa object files for gallium
-libmesagallium.a: $(MESA_GALLIUM_OBJECTS) $(GLSL_LIBS)
-	@ $(MKLIB) -o mesagallium -static $(MESA_GALLIUM_OBJECTS) $(GLSL_LIBS)
-
-######################################################################
-# Device drivers
-driver_subdirs: $(MESA_LIBS)
-	@ (cd drivers && $(MAKE))
-
-
-######################################################################
-# Assembly subdirs
-asm_subdirs:
-	@ if echo "$(ASM_FLAGS)" | grep -q USE_X86_ASM ; then \
-		(cd x86 && $(MAKE)) || exit 1 ; \
-	fi
-	@ if echo "$(ASM_FLAGS)" | grep -q USE_X86_64_ASM ; then \
-		(cd x86 && $(MAKE)) || exit 1 ; \
-		(cd x86-64 && $(MAKE)) || exit 1 ; \
-	fi
-
-
-######################################################################
-# Dependency generation
-
-depend: $(ALL_SOURCES)
-	@ echo "running $(MKDEP)"
-	@ touch depend
-	@$(MKDEP) $(MKDEP_OPTIONS) -p$(MESA_OBJ_DIR)/ $(MESA_CPPFLAGS) \
-		$(ALL_SOURCES) > /dev/null 2>/dev/null
-
-######################################################################
-# Installation rules
-
-# this isn't fleshed out yet but is probably the way to go in the future
-new_install:
-	(cd drivers && $(MAKE) install)
-
-
-# XXX replace this with new_install above someday
-install: default
-	@for driver in $(DRIVER_DIRS) ; do \
-	  case "$$driver" in \
-	    osmesa) if [ "$(DRIVER_DIRS)" = osmesa ]; then \
-	              $(MAKE) install-headers install-osmesa || exit 1 ; \
-	            else \
-	              $(MAKE) install-osmesa || exit 1 ; \
-	            fi ;; \
-	    dri)    $(MAKE) install-libgl install-dri || exit 1 ;; \
-	    *)      $(MAKE) install-libgl || exit 1 ;; \
-	  esac ; \
-	done
-
-pcedit = \
-	-e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \
-	-e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \
-	-e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \
-	-e 's,@VERSION@,$(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY),' \
-
-
-gl_pcedit = sed \
-	$(pcedit) \
-	-e 's,@GL_PC_REQ_PRIV@,$(GL_PC_REQ_PRIV),' \
-	-e 's,@GL_PC_LIB_PRIV@,$(GL_PC_LIB_PRIV),' \
-	-e 's,@GL_PC_CFLAGS@,$(GL_PC_CFLAGS),' \
-	-e 's,@GLX_TLS@,$(GLX_TLS),' \
-	-e 's,@GL_LIB@,$(GL_LIB),'
-
-gl.pc: gl.pc.in
-	$(gl_pcedit) $< > $@
-
-osmesa_pcedit = sed \
-	$(pcedit) \
-	-e 's,@OSMESA_LIB@,$(OSMESA_LIB),' \
-	-e 's,@OSMESA_PC_REQ@,$(OSMESA_PC_REQ),' \
-	-e 's,@OSMESA_PC_LIB_PRIV@,$(OSMESA_PC_LIB_PRIV),'
-
-osmesa.pc: osmesa.pc.in
-	$(osmesa_pcedit) $< > $@
-
-install-headers:
-	$(INSTALL) -d $(DESTDIR)$(INSTALL_INC_DIR)/GL
-	$(INSTALL) -m 644 $(TOP)/include/GL/*.h \
-		$(DESTDIR)$(INSTALL_INC_DIR)/GL
-
-install-libgl: default gl.pc install-headers
-	$(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)
-	$(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
-	$(MINSTALL) $(TOP)/$(LIB_DIR)/$(GL_LIB_GLOB) \
-		$(DESTDIR)$(INSTALL_LIB_DIR)
-	$(INSTALL) -m 644 gl.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
-
-install-osmesa: default osmesa.pc
-	$(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)
-	$(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
-	$(MINSTALL) $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_GLOB) \
-		$(DESTDIR)$(INSTALL_LIB_DIR)
-	$(INSTALL) -m 644 osmesa.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
-
-install-dri: default
-	cd drivers/dri && $(MAKE) install
-
-
-
-# Emacs tags
-tags:
-	etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h
-
-clean:
-	-rm -f */*.o
-	-rm -f */*/*.o
-	-rm -f depend depend.bak libmesa.a libmesagallium.a
-	-rm -f drivers/*/*.o
-	-rm -f *.pc
-	-@cd drivers/dri && $(MAKE) clean
-	-@cd drivers/x11 && $(MAKE) clean
-	-@cd drivers/osmesa && $(MAKE) clean
-	-@cd x86 && $(MAKE) clean
-	-@cd x86-64 && $(MAKE) clean
-
-
--include $(DEPENDS)
+# src/mesa/Makefile
+
+TOP = ../..
+include $(TOP)/configs/current
+
+MESA_LIBS := libmesa.a libmesagallium.a
+DEPENDS := depend
+
+MESA_OBJ_DIR := .
+DRICORE_OBJ_DIR := objs-dricore
+
+include sources.mak
+
+# adjust object dirs
+MESA_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_OBJECTS))
+MESA_GALLIUM_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_GALLIUM_OBJECTS))
+
+DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS))
+
+# define preprocessor flags
+MESA_CPPFLAGS := $(API_DEFINES) $(DEFINES)
+
+# append include dirs
+MESA_CPPFLAGS += $(INCLUDE_DIRS)
+
+DRICORE_CPPFLAGS = $(MESA_CPPFLAGS)
+
+# tidy compiler flags
+CFLAGS := $(filter-out $(DEFINES), $(CFLAGS))
+CXXFLAGS := $(filter-out $(DEFINES), $(CXXFLAGS))
+
+# LLVM is needed for the state tracker
+MESA_CFLAGS := $(LLVM_CFLAGS) $(CFLAGS)
+DRICORE_CFLAGS := $(LLVM_CFLAGS) $(DRI_CFLAGS)
+
+MESA_CXXFLAGS := $(LLVM_CFLAGS) $(CXXFLAGS)
+DRICORE_CXXFLAGS := $(LLVM_CFLAGS) $(DRI_CXXFLAGS)
+
+define mesa-cc-c
+	@mkdir -p $(dir $@)
+	$(CC) -c -o $@ $< $($(1)_CPPFLAGS) $($(1)_CFLAGS)
+endef
+
+define mesa-cxx-c
+	@mkdir -p $(dir $@)
+	$(CXX) -c -o $@ $< $($(1)_CPPFLAGS) $($(1)_CFLAGS) $($(1)_CXXFLAGS)
+endef
+
+$(MESA_OBJ_DIR)/%.o: %.c
+	$(call mesa-cc-c,MESA)
+
+$(MESA_OBJ_DIR)/%.o: %.cpp
+	$(call mesa-cxx-c,MESA)
+
+$(MESA_OBJ_DIR)/%.o: %.S
+	$(call mesa-cc-c,MESA)
+
+$(DRICORE_OBJ_DIR)/%.o: %.c
+	$(call mesa-cc-c,DRICORE)
+
+$(DRICORE_OBJ_DIR)/%.o: %.cpp
+	$(call mesa-cxx-c,DRICORE)
+
+$(DRICORE_OBJ_DIR)/%.o: %.S
+	$(call mesa-cc-c,DRICORE)
+
+# Default: build dependencies, then asm_subdirs, GLSL built-in lib,
+# then convenience libs (.a) and finally the device drivers:
+default: $(DEPENDS) asm_subdirs $(MESA_LIBS) $(DRICORE_LIBS) driver_subdirs
+
+main/api_exec_es1.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py
+	$(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES1.1 > $@
+
+main/api_exec_es2.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py
+	$(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES2.0 > $@
+
+######################################################################
+# Helper libraries used by many drivers:
+
+# Make archive of core mesa object files
+libmesa.a: $(MESA_OBJECTS) $(GLSL_LIBS)
+	@ $(MKLIB) -o mesa -static $(MESA_OBJECTS) $(GLSL_LIBS)
+
+# Shared dricore library for classic DRI drivers
+$(TOP)/$(LIB_DIR)/libdricore.so: $(DRICORE_OBJECTS) $(DRICORE_GLSL_LIBS)
+	@$(MKLIB) -o $@ -linker '$(CXX)' -ldflags '$(LDFLAGS)' \
+		-cplusplus -noprefix \
+		-install $(TOP)/$(LIB_DIR) -id $(DRI_DRIVER_INSTALL_DIR)/$@.dylib \
+		$(DRICORE_LIB_DEPS) $(DRICORE_OBJECTS)
+
+# Make archive of subset of core mesa object files for gallium
+libmesagallium.a: $(MESA_GALLIUM_OBJECTS) $(GLSL_LIBS)
+	@ $(MKLIB) -o mesagallium -static $(MESA_GALLIUM_OBJECTS) $(GLSL_LIBS)
+
+######################################################################
+# Device drivers
+driver_subdirs: $(MESA_LIBS) $(DRICORE_LIBS)
+	@ (cd drivers && $(MAKE))
+
+
+######################################################################
+# Assembly subdirs
+asm_subdirs:
+	@ if echo "$(ASM_FLAGS)" | grep -q USE_X86_ASM ; then \
+		(cd x86 && $(MAKE)) || exit 1 ; \
+	fi
+	@ if echo "$(ASM_FLAGS)" | grep -q USE_X86_64_ASM ; then \
+		(cd x86 && $(MAKE)) || exit 1 ; \
+		(cd x86-64 && $(MAKE)) || exit 1 ; \
+	fi
+
+
+######################################################################
+# Dependency generation
+
+depend: $(ALL_SOURCES)
+	@ echo "running $(MKDEP)"
+	@ touch depend
+	@$(MKDEP) $(MKDEP_OPTIONS) -p$(MESA_OBJ_DIR)/ $(MESA_CPPFLAGS) \
+		$(ALL_SOURCES) > /dev/null 2>/dev/null
+
+######################################################################
+# Installation rules
+
+# this isn't fleshed out yet but is probably the way to go in the future
+new_install:
+	(cd drivers && $(MAKE) install)
+
+ifneq (,$(DRICORE_LIBS))
+DRICORE_INSTALL_TARGET = install-dricore
+endif
+
+# XXX replace this with new_install above someday
+install: default $(DRICORE_INSTALL_TARGET)
+	@for driver in $(DRIVER_DIRS) ; do \
+	  case "$$driver" in \
+	    osmesa) if [ "$(DRIVER_DIRS)" = osmesa ]; then \
+	              $(MAKE) install-headers install-osmesa || exit 1 ; \
+	            else \
+	              $(MAKE) install-osmesa || exit 1 ; \
+	            fi ;; \
+	    dri)    $(MAKE) install-libgl install-dri || exit 1 ;; \
+	    *)      $(MAKE) install-libgl || exit 1 ;; \
+	  esac ; \
+	done
+
+pcedit = \
+	-e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \
+	-e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \
+	-e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \
+	-e 's,@VERSION@,$(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY),' \
+
+
+gl_pcedit = sed \
+	$(pcedit) \
+	-e 's,@GL_PC_REQ_PRIV@,$(GL_PC_REQ_PRIV),' \
+	-e 's,@GL_PC_LIB_PRIV@,$(GL_PC_LIB_PRIV),' \
+	-e 's,@GL_PC_CFLAGS@,$(GL_PC_CFLAGS),' \
+	-e 's,@GLX_TLS@,$(GLX_TLS),' \
+	-e 's,@GL_LIB@,$(GL_LIB),'
+
+gl.pc: gl.pc.in
+	$(gl_pcedit) $< > $@
+
+osmesa_pcedit = sed \
+	$(pcedit) \
+	-e 's,@OSMESA_LIB@,$(OSMESA_LIB),' \
+	-e 's,@OSMESA_PC_REQ@,$(OSMESA_PC_REQ),' \
+	-e 's,@OSMESA_PC_LIB_PRIV@,$(OSMESA_PC_LIB_PRIV),'
+
+osmesa.pc: osmesa.pc.in
+	$(osmesa_pcedit) $< > $@
+
+install-headers:
+	$(INSTALL) -d $(DESTDIR)$(INSTALL_INC_DIR)/GL
+	$(INSTALL) -m 644 $(TOP)/include/GL/*.h \
+		$(DESTDIR)$(INSTALL_INC_DIR)/GL
+
+install-libgl: default gl.pc install-headers
+	$(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)
+	$(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
+	$(MINSTALL) $(TOP)/$(LIB_DIR)/$(GL_LIB_GLOB) \
+		$(DESTDIR)$(INSTALL_LIB_DIR)
+	$(INSTALL) -m 644 gl.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
+
+install-osmesa: default osmesa.pc
+	$(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)
+	$(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
+	$(MINSTALL) $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_GLOB) \
+		$(DESTDIR)$(INSTALL_LIB_DIR)
+	$(INSTALL) -m 644 osmesa.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
+
+install-dri: default
+	cd drivers/dri && $(MAKE) install
+
+# We don't need MINSTALL here because we're not installing symbolic links
+install-dricore: default
+	$(INSTALL) -d $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
+	$(INSTALL) -m 755 $(DRICORE_LIBS) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
+
+
+# Emacs tags
+tags:
+	etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h
+
+clean-dricore:
+	-rm -f libdricore.so
+	-rm -f $(DRICORE_LIBS)
+	-rm -rf $(DRICORE_OBJ_DIR)
+
+clean: clean-dricore
+	-rm -f */*.o
+	-rm -f */*/*.o
+	-rm -f depend depend.bak libmesa.a libmesagallium.a
+	-rm -f drivers/*/*.o
+	-rm -f *.pc
+	-@cd drivers/dri && $(MAKE) clean
+	-@cd drivers/x11 && $(MAKE) clean
+	-@cd drivers/osmesa && $(MAKE) clean
+	-@cd x86 && $(MAKE) clean
+	-@cd x86-64 && $(MAKE) clean
+
+
+-include $(DEPENDS)
diff --git a/mesalib/src/mesa/drivers/dri/Makefile.template b/mesalib/src/mesa/drivers/dri/Makefile.template
index 6be554af7..588210f8a 100644
--- a/mesalib/src/mesa/drivers/dri/Makefile.template
+++ b/mesalib/src/mesa/drivers/dri/Makefile.template
@@ -1,7 +1,5 @@
 # -*-makefile-*-
 
-MESA_MODULES = $(TOP)/src/mesa/libmesa.a
-
 COMMON_GALLIUM_SOURCES = \
         ../common/utils.c \
         ../common/vblank.c \
@@ -39,13 +37,13 @@ CXXFLAGS += $(API_DEFINES)
 ##### RULES #####
 
 .c.o:
-	$(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
+	$(CC) -c $(INCLUDES) $(DRI_CFLAGS) $(DRIVER_DEFINES) $< -o $@
 
 .cpp.o:
-	$(CC) -c $(INCLUDES) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@
+	$(CC) -c $(INCLUDES) $(DRI_CXXFLAGS) $(DRIVER_DEFINES) $< -o $@
 
 .S.o:
-	$(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
+	$(CC) -c $(INCLUDES) $(DRI_CFLAGS) $(DRIVER_DEFINES) $< -o $@
 
 
 ##### TARGETS #####
@@ -57,10 +55,10 @@ default: subdirs lib
 lib: symlinks subdirs depend
 	@$(MAKE) $(LIBNAME) $(TOP)/$(LIB_DIR)/$(LIBNAME)
 
-$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(EXTRA_MODULES) Makefile \
+$(LIBNAME): $(OBJECTS) $(EXTRA_MODULES) $(MESA_MODULES) Makefile \
 		$(TOP)/src/mesa/drivers/dri/Makefile.template $(TOP)/src/mesa/drivers/dri/common/dri_test.o
 	$(MKLIB) -o $@.tmp -noprefix -linker '$(CXX)' -ldflags '$(LDFLAGS)' \
-		$(OBJECTS) $(MESA_MODULES) $(EXTRA_MODULES) $(DRI_LIB_DEPS)
+		$(OBJECTS) $(EXTRA_MODULES) $(DRI_LIB_DEPS)
 	$(CXX) $(CFLAGS) -o $@.test $(TOP)/src/mesa/drivers/dri/common/dri_test.o $@.tmp $(DRI_LIB_DEPS)
 	@rm -f $@.test
 	mv -f $@.tmp $@
diff --git a/mesalib/src/mesa/drivers/windows/gldirect/dx9/gld_primitive_dx9.c b/mesalib/src/mesa/drivers/windows/gldirect/dx9/gld_primitive_dx9.c
index aaed87028..256ffcc38 100644
--- a/mesalib/src/mesa/drivers/windows/gldirect/dx9/gld_primitive_dx9.c
+++ b/mesalib/src/mesa/drivers/windows/gldirect/dx9/gld_primitive_dx9.c
@@ -62,7 +62,6 @@
 #include "swrast/s_depth.h"
 #include "swrast/s_lines.h"
 #include "swrast/s_triangle.h"
-#include "swrast/s_trispan.h"
 #include "tnl/tnl.h"
 #include "tnl/t_context.h"
 #include "tnl/t_pipeline.h"
diff --git a/mesalib/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c b/mesalib/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c
index 36ebe9f31..02f0dfacd 100644
--- a/mesalib/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c
+++ b/mesalib/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c
@@ -61,7 +61,6 @@
 #include "swrast/s_depth.h"
 #include "swrast/s_lines.h"
 #include "swrast/s_triangle.h"
-#include "swrast/s_trispan.h"
 #include "tnl/tnl.h"
 #include "tnl/t_context.h"
 #include "tnl/t_pipeline.h"
diff --git a/mesalib/src/mesa/main/mtypes.h b/mesalib/src/mesa/main/mtypes.h
index 7199a05fb..020595bd0 100644
--- a/mesalib/src/mesa/main/mtypes.h
+++ b/mesalib/src/mesa/main/mtypes.h
@@ -2103,8 +2103,6 @@ struct gl_shader
    GLint RefCount;  /**< Reference count */
    GLboolean DeletePending;
    GLboolean CompileStatus;
-   GLboolean Main;  /**< shader defines main() */
-   GLboolean UnresolvedRefs;
    const GLchar *Source;  /**< Source code string */
    GLuint SourceChecksum;       /**< for debug/logging purposes */
    struct gl_program *Program;  /**< Post-compile assembly code */
diff --git a/mesalib/src/mesa/program/ir_to_mesa.cpp b/mesalib/src/mesa/program/ir_to_mesa.cpp
index 77946ad13..aa5f007e9 100644
--- a/mesalib/src/mesa/program/ir_to_mesa.cpp
+++ b/mesalib/src/mesa/program/ir_to_mesa.cpp
@@ -2754,6 +2754,9 @@ ir_to_mesa_visitor::copy_propagate(void)
 	     */
 	    for (int r = 0; r < this->next_temp; r++) {
 	       for (int c = 0; c < 4; c++) {
+		  if (!acp[4 * r + c])
+		     continue;
+
 		  if (acp[4 * r + c]->src_reg[0].file == PROGRAM_OUTPUT)
 		     acp[4 * r + c] = NULL;
 	       }
diff --git a/mesalib/src/mesa/state_tracker/st_cb_fbo.c b/mesalib/src/mesa/state_tracker/st_cb_fbo.c
index b778ecf0b..2934f9ffb 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_fbo.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_fbo.c
@@ -448,11 +448,14 @@ st_finish_render_texture(struct gl_context *ctx,
  * Validate a renderbuffer attachment for a particular set of bindings.
  */
 static GLboolean
-st_validate_attachment(struct pipe_screen *screen,
+st_validate_attachment(struct gl_context *ctx,
+		       struct pipe_screen *screen,
 		       const struct gl_renderbuffer_attachment *att,
 		       unsigned bindings)
 {
    const struct st_texture_object *stObj = st_texture_object(att->Texture);
+   enum pipe_format format;
+   gl_format texFormat;
 
    /* Only validate texture attachments for now, since
     * st_renderbuffer_alloc_storage makes sure that
@@ -464,7 +467,20 @@ st_validate_attachment(struct pipe_screen *screen,
    if (!stObj)
       return GL_FALSE;
 
-   return screen->is_format_supported(screen, stObj->pt->format,
+   format = stObj->pt->format;
+   texFormat =
+      stObj->base.Image[att->CubeMapFace][att->TextureLevel]->TexFormat;
+
+   /* If the encoding is sRGB and sRGB rendering cannot be enabled,
+    * check for linear format support instead.
+    * Later when we create a surface, we change the format to a linear one. */
+   if (!ctx->Const.sRGBCapable &&
+       _mesa_get_format_color_encoding(texFormat) == GL_SRGB) {
+      const gl_format linearFormat = _mesa_get_srgb_format_linear(texFormat);
+      format = st_mesa_format_to_pipe_format(linearFormat);
+   }
+
+   return screen->is_format_supported(screen, format,
                                       PIPE_TEXTURE_2D,
                                       stObj->pt->nr_samples, bindings, 0);
 }
@@ -528,20 +544,23 @@ st_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
       return;
    }
 
-   if (!st_validate_attachment(screen,
+   if (!st_validate_attachment(ctx,
+                               screen,
                                depth,
 			       PIPE_BIND_DEPTH_STENCIL)) {
       fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
       return;
    }
-   if (!st_validate_attachment(screen,
+   if (!st_validate_attachment(ctx,
+                               screen,
                                stencil,
 			       PIPE_BIND_DEPTH_STENCIL)) {
       fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
       return;
    }
    for (i = 0; i < ctx->Const.MaxColorAttachments; i++) {
-      if (!st_validate_attachment(screen,
+      if (!st_validate_attachment(ctx,
+                                  screen,
 				  &fb->Attachment[BUFFER_COLOR0 + i],
 				  PIPE_BIND_RENDER_TARGET)) {
 	 fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
diff --git a/mesalib/src/mesa/state_tracker/st_manager.c b/mesalib/src/mesa/state_tracker/st_manager.c
index 3b54eb7b8..5684e9768 100644
--- a/mesalib/src/mesa/state_tracker/st_manager.c
+++ b/mesalib/src/mesa/state_tracker/st_manager.c
@@ -426,19 +426,13 @@ st_framebuffer_create(struct st_framebuffer_iface *stfbi)
    struct gl_config mode;
    gl_buffer_index idx;
 
+   if (!stfbi)
+      return NULL;
+
    stfb = CALLOC_STRUCT(st_framebuffer);
    if (!stfb)
       return NULL;
 
-   /* for FBO-only context */
-   if (!stfbi) {
-      struct gl_framebuffer *base = _mesa_get_incomplete_framebuffer();
-
-      stfb->Base = *base;
-
-      return stfb;
-   }
-
    st_visual_to_context_mode(stfbi->visual, &mode);
    _mesa_initialize_window_framebuffer(&stfb->Base, &mode);
 
@@ -764,7 +758,8 @@ st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi,
          ret = _mesa_make_current(st->ctx, &stdraw->Base, &stread->Base);
       }
       else {
-         ret = FALSE;
+         struct gl_framebuffer *incomplete = _mesa_get_incomplete_framebuffer();
+         ret = _mesa_make_current(st->ctx, incomplete, incomplete);
       }
 
       st_framebuffer_reference(&stdraw, NULL);
diff --git a/mesalib/src/mesa/swrast/s_trispan.h b/mesalib/src/mesa/swrast/s_trispan.h
deleted file mode 100644
index 15207e863..000000000
--- a/mesalib/src/mesa/swrast/s_trispan.h
+++ /dev/null
@@ -1,31 +0,0 @@
-
-/*
- * Mesa 3-D graphics library
- * Version:  3.5
- *
- * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#ifndef S_TRISPAN_H
-#define S_TRISPAN_H
-
-
-#endif /* S_TRISPAN_H */
diff --git a/mesalib/src/mesa/x86/read_rgba_span_x86.S b/mesalib/src/mesa/x86/read_rgba_span_x86.S
index 817729973..04571afb7 100644
--- a/mesalib/src/mesa/x86/read_rgba_span_x86.S
+++ b/mesalib/src/mesa/x86/read_rgba_span_x86.S
@@ -1,678 +1,686 @@
-/*
- * (C) Copyright IBM Corporation 2004
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
- * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
- 
-/**
- * \file read_rgba_span_x86.S
- * Optimized routines to transfer pixel data from the framebuffer to a
- * buffer in main memory.
- *
- * \author Ian Romanick <idr@us.ibm.com>
- */
-
-	.file	"read_rgba_span_x86.S"
-#if !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) /* this one cries for assyntax.h */
-/* Kevin F. Quinn 2nd July 2006
- * Replaced data segment constants with text-segment instructions.
- */
-#define	LOAD_MASK(mvins,m1,m2) \
-   	pushl	$0xff00ff00 ;\
-   	pushl	$0xff00ff00 ;\
-   	pushl	$0xff00ff00 ;\
-   	pushl	$0xff00ff00 ;\
-	mvins	(%esp), m1	;\
-   	pushl	$0x00ff0000 ;\
-   	pushl	$0x00ff0000 ;\
-   	pushl	$0x00ff0000 ;\
-   	pushl	$0x00ff0000 ;\
-	mvins	(%esp), m2	;\
-	addl	$32, %esp
-
-/* I implemented these as macros because they appear in several places,
- * and I've tweaked them a number of times.  I got tired of changing every
- * place they appear. :)
- */
-
-#define DO_ONE_PIXEL() \
-	movl	(%ebx), %eax ; \
-	addl	$4, %ebx ; \
-	bswap	%eax          /* ARGB -> BGRA */ ; \
-	rorl	$8, %eax      /* BGRA -> ABGR */ ; \
-	movl	%eax, (%ecx)  /* ABGR -> R, G, B, A */ ; \
-	addl	$4, %ecx
-
-#define DO_ONE_LAST_PIXEL() \
-	movl	(%ebx), %eax ; \
-	bswap	%eax          /* ARGB -> BGRA */ ; \
-	rorl	$8, %eax      /* BGRA -> ABGR */ ; \
-	movl	%eax, (%ecx)  /* ABGR -> R, G, B, A */ ; \
-
-
-/**
- * MMX optimized version of the BGRA8888_REV to RGBA copy routine.
- * 
- * \warning
- * This function assumes that the caller will issue the EMMS instruction
- * at the correct places.
- */
-
-.globl _generic_read_RGBA_span_BGRA8888_REV_MMX
-.hidden _generic_read_RGBA_span_BGRA8888_REV_MMX
-	.type	_generic_read_RGBA_span_BGRA8888_REV_MMX, @function
-_generic_read_RGBA_span_BGRA8888_REV_MMX:
-	pushl	%ebx
-
-#ifdef USE_INNER_EMMS
-	emms
-#endif
-	LOAD_MASK(movq,%mm1,%mm2)
-
-	movl	8(%esp), %ebx	/* source pointer */
-	movl	16(%esp), %edx	/* number of pixels to copy */
-	movl	12(%esp), %ecx	/* destination pointer */
-
-	testl	%edx, %edx
-	jle	.L20		/* Bail if there's nothing to do. */
-
-	movl	%ebx, %eax
-
-	negl	%eax
-	sarl	$2, %eax
-	andl	$1, %eax
-	je	.L17
-
-	subl	%eax, %edx
-	DO_ONE_PIXEL()
-.L17:
-
-	/* Would it be faster to unroll this loop once and process 4 pixels
-	 * per pass, instead of just two?
-	 */
-
-	movl	%edx, %eax
-	shrl	%eax
-	jmp	.L18
-.L19:
-	movq	(%ebx), %mm0
-	addl	$8, %ebx
-
-	/* These 9 instructions do what PSHUFB (if there were such an
-	 * instruction) could do in 1. :(
-	 */
-
-	movq	%mm0, %mm3
-	movq	%mm0, %mm4
-
-	pand	%mm2, %mm3
-	psllq	$16, %mm4
-	psrlq	$16, %mm3
-	pand	%mm2, %mm4
-
-	pand	%mm1, %mm0
-	por	%mm4, %mm3
-	por	%mm3, %mm0
-
-	movq	%mm0, (%ecx)
-	addl	$8, %ecx
-	subl	$1, %eax
-.L18:
-	jne	.L19
-
-#ifdef USE_INNER_EMMS
-	emms
-#endif
-
-	/* At this point there are either 1 or 0 pixels remaining to be
-	 * converted.  Convert the last pixel, if needed.
-	 */
-
-	testl	$1, %edx
-	je	.L20
-
-	DO_ONE_LAST_PIXEL()
-
-.L20:
-	popl	%ebx
-	ret
-	.size	_generic_read_RGBA_span_BGRA8888_REV_MMX, .-_generic_read_RGBA_span_BGRA8888_REV_MMX
-
-
-/**
- * SSE optimized version of the BGRA8888_REV to RGBA copy routine.  SSE
- * instructions are only actually used to read data from the framebuffer.
- * In practice, the speed-up is pretty small.
- *
- * \todo
- * Do some more testing and determine if there's any reason to have this
- * function in addition to the MMX version.
- *
- * \warning
- * This function assumes that the caller will issue the EMMS instruction
- * at the correct places.
- */
-
-.globl _generic_read_RGBA_span_BGRA8888_REV_SSE
-.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE
-	.type	_generic_read_RGBA_span_BGRA8888_REV_SSE, @function
-_generic_read_RGBA_span_BGRA8888_REV_SSE:
-	pushl	%esi
-	pushl	%ebx
-	pushl	%ebp
-
-#ifdef USE_INNER_EMMS
-	emms
-#endif
-
-	LOAD_MASK(movq,%mm1,%mm2)
-
-	movl	16(%esp), %ebx	/* source pointer */
-	movl	24(%esp), %edx	/* number of pixels to copy */
-	movl	20(%esp), %ecx	/* destination pointer */
-
-	testl	%edx, %edx
-	jle	.L35		/* Bail if there's nothing to do. */
-
-	movl	%esp, %ebp
-	subl	$16, %esp
-	andl	$0xfffffff0, %esp
-
-	movl	%ebx, %eax
-	movl	%edx, %esi
-
-	negl	%eax
-	andl	$15, %eax
-	sarl	$2, %eax
-	cmpl	%edx, %eax
-	cmovle	%eax, %esi
-
-	subl	%esi, %edx
-
-	testl	$1, %esi
-	je	.L32
-
-	DO_ONE_PIXEL()
-.L32:
-
-	testl	$2, %esi
-	je	.L31
-
-	movq	(%ebx), %mm0
-	addl	$8, %ebx
-
-	movq	%mm0, %mm3
-	movq	%mm0, %mm4
-	
-	pand	%mm2, %mm3
-	psllq	$16, %mm4
-	psrlq	$16, %mm3
-	pand	%mm2, %mm4
-
-	pand	%mm1, %mm0
-	por	%mm4, %mm3
-	por	%mm3, %mm0
-
-	movq	%mm0, (%ecx)
-	addl	$8, %ecx
-.L31:
-
-	movl	%edx, %eax
-	shrl	$2, %eax
-	jmp	.L33
-.L34:
-	movaps	(%ebx), %xmm0
-	addl	$16, %ebx
-
-	/* This would be so much better if we could just move directly from
-	 * an SSE register to an MMX register.  Unfortunately, that
-	 * functionality wasn't introduced until SSE2 with the MOVDQ2Q
-	 * instruction.
-	 */
-
-	movaps	%xmm0, (%esp)
-	movq	(%esp), %mm0
-	movq	8(%esp), %mm5
-
-	movq	%mm0, %mm3
-	movq	%mm0, %mm4
-	movq	%mm5, %mm6
-	movq	%mm5, %mm7
-
-	pand	%mm2, %mm3
-	pand	%mm2, %mm6
-
-	psllq	$16, %mm4
-	psllq	$16, %mm7
-
-	psrlq	$16, %mm3
-	psrlq	$16, %mm6
-
-	pand	%mm2, %mm4
-	pand	%mm2, %mm7
-
-	pand	%mm1, %mm0
-	pand	%mm1, %mm5
-
-	por	%mm4, %mm3
-	por	%mm7, %mm6
-
-	por	%mm3, %mm0
-	por	%mm6, %mm5
-
-	movq	%mm0, (%ecx)
-	movq	%mm5, 8(%ecx)
-	addl	$16, %ecx
-
-	subl	$1, %eax
-.L33:
-	jne	.L34
-
-#ifdef USE_INNER_EMMS
-	emms
-#endif
-	movl	%ebp, %esp
-
-	/* At this point there are either [0, 3] pixels remaining to be
-	 * converted.
-	 */
-
-	testl	$2, %edx
-	je	.L36
-
-	movq	(%ebx), %mm0
-	addl	$8, %ebx
-
-	movq	%mm0, %mm3
-	movq	%mm0, %mm4
-	
-	pand	%mm2, %mm3
-	psllq	$16, %mm4
-	psrlq	$16, %mm3
-	pand	%mm2, %mm4
-
-	pand	%mm1, %mm0
-	por	%mm4, %mm3
-	por	%mm3, %mm0
-
-	movq	%mm0, (%ecx)
-	addl	$8, %ecx
-.L36:
-
-	testl	$1, %edx
-	je	.L35
-
-	DO_ONE_LAST_PIXEL()
-.L35:
-	popl	%ebp
-	popl	%ebx
-	popl	%esi
-	ret
-	.size	_generic_read_RGBA_span_BGRA8888_REV_SSE, .-_generic_read_RGBA_span_BGRA8888_REV_SSE
-
-
-/**
- * SSE2 optimized version of the BGRA8888_REV to RGBA copy routine.
- */
-
-	.text
-.globl _generic_read_RGBA_span_BGRA8888_REV_SSE2
-.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE2
-	.type	_generic_read_RGBA_span_BGRA8888_REV_SSE2, @function
-_generic_read_RGBA_span_BGRA8888_REV_SSE2:
-	pushl	%esi
-	pushl	%ebx
-
-	LOAD_MASK(movdqu,%xmm1,%xmm2)
-
-	movl	12(%esp), %ebx	/* source pointer */
-	movl	20(%esp), %edx	/* number of pixels to copy */
-	movl	16(%esp), %ecx	/* destination pointer */
-
-	movl	%ebx, %eax
-	movl	%edx, %esi
-
-	testl	%edx, %edx
-	jle	.L46		/* Bail if there's nothing to do. */
-
-	/* If the source pointer isn't a multiple of 16 we have to process
-	 * a few pixels the "slow" way to get the address aligned for
-	 * the SSE fetch intsructions.
-	 */
-
-	negl	%eax
-	andl	$15, %eax
-	sarl	$2, %eax
-
-	cmpl	%edx, %eax
-	cmovbe	%eax, %esi
-	subl	%esi, %edx
-
-	testl	$1, %esi
-	je	.L41
-
-	DO_ONE_PIXEL()  
-.L41:
-	testl	$2, %esi
-	je	.L40
-
-	movq	(%ebx), %xmm0
-	addl	$8, %ebx
-
-	movdqa	%xmm0, %xmm3
-	movdqa	%xmm0, %xmm4
-	andps	%xmm1, %xmm0
-
-	andps	%xmm2, %xmm3
-	pslldq	$2, %xmm4
-	psrldq	$2, %xmm3
-	andps	%xmm2, %xmm4
-
-	orps	%xmm4, %xmm3
-	orps	%xmm3, %xmm0
-
-	movq	%xmm0, (%ecx)
-	addl	$8, %ecx
-.L40:
-
-	/* Would it be worth having a specialized version of this loop for
-	 * the case where the destination is 16-byte aligned?  That version
-	 * would be identical except that it could use movedqa instead of
-	 * movdqu.
-	 */
-
-	movl	%edx, %eax
-	shrl	$2, %eax
-	jmp	.L42
-.L43:
-	movdqa	(%ebx), %xmm0
-	addl	$16, %ebx
-
-	movdqa	%xmm0, %xmm3
-	movdqa	%xmm0, %xmm4
-	andps	%xmm1, %xmm0
-
-	andps	%xmm2, %xmm3
-	pslldq	$2, %xmm4
-	psrldq	$2, %xmm3
-	andps	%xmm2, %xmm4
-
-	orps	%xmm4, %xmm3
-	orps	%xmm3, %xmm0
-
-	movdqu	%xmm0, (%ecx)
-	addl	$16, %ecx
-	subl	$1, %eax
-.L42:
-	jne	.L43
-
-
-	/* There may be upto 3 pixels remaining to be copied.  Take care
-	 * of them now.  We do the 2 pixel case first because the data
-	 * will be aligned.
-	 */
-
-	testl	$2, %edx
-	je	.L47
-
-	movq	(%ebx), %xmm0
-	addl	$8, %ebx
-        
-	movdqa	%xmm0, %xmm3
-	movdqa	%xmm0, %xmm4
-	andps	%xmm1, %xmm0
-
-	andps	%xmm2, %xmm3
-	pslldq	$2, %xmm4
-	psrldq	$2, %xmm3
-	andps	%xmm2, %xmm4
-
-	orps	%xmm4, %xmm3
-	orps	%xmm3, %xmm0
-
-	movq	%xmm0, (%ecx)
-	addl	$8, %ecx        
-.L47:
-
-	testl	$1, %edx
-	je	.L46
-
-	DO_ONE_LAST_PIXEL()  
-.L46:
-
-	popl	%ebx
-	popl	%esi
-	ret
-	.size	_generic_read_RGBA_span_BGRA8888_REV_SSE2, .-_generic_read_RGBA_span_BGRA8888_REV_SSE2
-
-
-
-#define MASK_565_L	0x07e0f800
-#define MASK_565_H	0x0000001f
-/* Setting SCALE_ADJUST to 5 gives a perfect match with the
- * classic C implementation in Mesa.  Setting SCALE_ADJUST
- * to 0 is slightly faster but at a small cost to accuracy.
- */
-#define SCALE_ADJUST	5
-#if SCALE_ADJUST == 5
-#define PRESCALE_L 0x00100001
-#define PRESCALE_H 0x00000200
-#define SCALE_L 0x40C620E8
-#define SCALE_H 0x0000839d
-#elif SCALE_ADJUST == 0
-#define PRESCALE_L 0x00200001
-#define PRESCALE_H 0x00000800
-#define SCALE_L 0x01040108
-#define SCALE_H 0x00000108
-#else
-#error SCALE_ADJUST must either be 5 or 0.
-#endif
-#define ALPHA_L 0x00000000
-#define ALPHA_H 0x00ff0000
-
-/**
- * MMX optimized version of the RGB565 to RGBA copy routine.
- */
-
-	.text
-	.globl	_generic_read_RGBA_span_RGB565_MMX
-        .hidden _generic_read_RGBA_span_RGB565_MMX
-	.type	_generic_read_RGBA_span_RGB565_MMX, @function
-
-_generic_read_RGBA_span_RGB565_MMX:
-
-#ifdef USE_INNER_EMMS
-	emms
-#endif
-
-	movl	4(%esp), %eax	/* source pointer */
-	movl	8(%esp), %edx	/* destination pointer */
-	movl	12(%esp), %ecx	/* number of pixels to copy */
-
-	pushl	$MASK_565_H
-	pushl	$MASK_565_L
-	movq	(%esp), %mm5
-	pushl	$PRESCALE_H
-	pushl	$PRESCALE_L
-	movq	(%esp), %mm6
-	pushl	$SCALE_H
-	pushl	$SCALE_L
-	movq	(%esp), %mm7
-	pushl	$ALPHA_H
-	pushl	$ALPHA_L
-	movq	(%esp), %mm3
-	addl	$32,%esp
-
-	sarl	$2, %ecx
-	jl	.L01		/* Bail early if the count is negative. */
-	jmp	.L02
-
-.L03:
-	/* Fetch 4 RGB565 pixels into %mm4.  Distribute the first and
-	 * second pixels into the four words of %mm0 and %mm2.
-      	 */
-
-	movq	(%eax), %mm4
-	addl	$8, %eax
-
-	pshufw	$0x00, %mm4, %mm0
-	pshufw	$0x55, %mm4, %mm2
-
-
-	/* Mask the pixels so that each word of each register contains only
-	 * one color component.
-	 */
-
-	pand	%mm5, %mm0
-	pand	%mm5, %mm2
-
-
-	/* Adjust the component values so that they are as small as possible,
-	 * but large enough so that we can multiply them by an unsigned 16-bit
-	 * number and get a value as large as 0x00ff0000.
- 	 */
-
-	pmullw	%mm6, %mm0
-	pmullw	%mm6, %mm2
-#if SCALE_ADJUST > 0
-	psrlw	$SCALE_ADJUST, %mm0
-	psrlw	$SCALE_ADJUST, %mm2
-#endif
-
-	/* Scale the input component values to be on the range
-	 * [0, 0x00ff0000].  This it the real magic of the whole routine.
-	 */
-
-	pmulhuw	%mm7, %mm0
-	pmulhuw	%mm7, %mm2
-
-
-	/* Always set the alpha value to 0xff.
-	 */
-
- 	por %mm3, %mm0
- 	por %mm3, %mm2
-
-
-	/* Pack the 16-bit values to 8-bit values and store the converted
-	 * pixel data.
-	 */
-
-	packuswb	%mm2, %mm0
-	movq	%mm0, (%edx)
-	addl	$8, %edx
-
-	pshufw	$0xaa, %mm4, %mm0
-	pshufw	$0xff, %mm4, %mm2
-
-	pand	%mm5, %mm0
-	pand	%mm5, %mm2
-	pmullw	%mm6, %mm0
-	pmullw	%mm6, %mm2
-#if SCALE_ADJUST > 0
-	psrlw	$SCALE_ADJUST, %mm0
-	psrlw	$SCALE_ADJUST, %mm2
-#endif
-	pmulhuw	%mm7, %mm0
-	pmulhuw	%mm7, %mm2
-
- 	por %mm3, %mm0
- 	por %mm3, %mm2
-
-	packuswb	%mm2, %mm0
-
-	movq	%mm0, (%edx)
-	addl	$8, %edx
-
-	subl	$1, %ecx
-.L02:
-	jne	.L03
-
-
-	/* At this point there can be at most 3 pixels left to process.  If
-	 * there is either 2 or 3 left, process 2.
-         */
-
-	movl	12(%esp), %ecx
-	testl	$0x02, %ecx
-	je	.L04
-
-	movd	(%eax), %mm4
-	addl	$4, %eax
-
-	pshufw	$0x00, %mm4, %mm0
-	pshufw	$0x55, %mm4, %mm2
-
-	pand	%mm5, %mm0
-	pand	%mm5, %mm2
-	pmullw	%mm6, %mm0
-	pmullw	%mm6, %mm2
-#if SCALE_ADJUST > 0
-	psrlw	$SCALE_ADJUST, %mm0
-	psrlw	$SCALE_ADJUST, %mm2
-#endif
-	pmulhuw	%mm7, %mm0
-	pmulhuw	%mm7, %mm2
-
- 	por %mm3, %mm0
- 	por %mm3, %mm2
-
-	packuswb	%mm2, %mm0
-
-	movq	%mm0, (%edx)
-	addl	$8, %edx
-
-.L04:
-	/* At this point there can be at most 1 pixel left to process.
-	 * Process it if needed.
-         */
-
-	testl	$0x01, %ecx
-	je	.L01
-
-	movzwl	(%eax), %ecx
-	movd	%ecx, %mm4
-
-	pshufw	$0x00, %mm4, %mm0
-
-	pand	%mm5, %mm0
-	pmullw	%mm6, %mm0
-#if SCALE_ADJUST > 0
-	psrlw	$SCALE_ADJUST, %mm0
-#endif
-	pmulhuw	%mm7, %mm0
-
- 	por %mm3, %mm0
-
-	packuswb	%mm0, %mm0
-
-	movd	%mm0, (%edx)
-
-.L01:
-#ifdef USE_INNER_EMMS
-	emms
-#endif
-	ret
-#endif /* !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) */
-	
-#if defined (__ELF__) && defined (__linux__)
-	.section .note.GNU-stack,"",%progbits
-#endif
+/*
+ * (C) Copyright IBM Corporation 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+ 
+/**
+ * \file read_rgba_span_x86.S
+ * Optimized routines to transfer pixel data from the framebuffer to a
+ * buffer in main memory.
+ *
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+
+	.file	"read_rgba_span_x86.S"
+#if !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) /* this one cries for assyntax.h */
+/* Kevin F. Quinn 2nd July 2006
+ * Replaced data segment constants with text-segment instructions.
+ */
+#define	LOAD_MASK(mvins,m1,m2) \
+   	pushl	$0xff00ff00 ;\
+   	pushl	$0xff00ff00 ;\
+   	pushl	$0xff00ff00 ;\
+   	pushl	$0xff00ff00 ;\
+	mvins	(%esp), m1	;\
+   	pushl	$0x00ff0000 ;\
+   	pushl	$0x00ff0000 ;\
+   	pushl	$0x00ff0000 ;\
+   	pushl	$0x00ff0000 ;\
+	mvins	(%esp), m2	;\
+	addl	$32, %esp
+
+/* I implemented these as macros because they appear in several places,
+ * and I've tweaked them a number of times.  I got tired of changing every
+ * place they appear. :)
+ */
+
+#define DO_ONE_PIXEL() \
+	movl	(%ebx), %eax ; \
+	addl	$4, %ebx ; \
+	bswap	%eax          /* ARGB -> BGRA */ ; \
+	rorl	$8, %eax      /* BGRA -> ABGR */ ; \
+	movl	%eax, (%ecx)  /* ABGR -> R, G, B, A */ ; \
+	addl	$4, %ecx
+
+#define DO_ONE_LAST_PIXEL() \
+	movl	(%ebx), %eax ; \
+	bswap	%eax          /* ARGB -> BGRA */ ; \
+	rorl	$8, %eax      /* BGRA -> ABGR */ ; \
+	movl	%eax, (%ecx)  /* ABGR -> R, G, B, A */ ; \
+
+
+/**
+ * MMX optimized version of the BGRA8888_REV to RGBA copy routine.
+ * 
+ * \warning
+ * This function assumes that the caller will issue the EMMS instruction
+ * at the correct places.
+ */
+
+.globl _generic_read_RGBA_span_BGRA8888_REV_MMX
+#ifndef USE_DRICORE
+.hidden _generic_read_RGBA_span_BGRA8888_REV_MMX
+#endif
+	.type	_generic_read_RGBA_span_BGRA8888_REV_MMX, @function
+_generic_read_RGBA_span_BGRA8888_REV_MMX:
+	pushl	%ebx
+
+#ifdef USE_INNER_EMMS
+	emms
+#endif
+	LOAD_MASK(movq,%mm1,%mm2)
+
+	movl	8(%esp), %ebx	/* source pointer */
+	movl	16(%esp), %edx	/* number of pixels to copy */
+	movl	12(%esp), %ecx	/* destination pointer */
+
+	testl	%edx, %edx
+	jle	.L20		/* Bail if there's nothing to do. */
+
+	movl	%ebx, %eax
+
+	negl	%eax
+	sarl	$2, %eax
+	andl	$1, %eax
+	je	.L17
+
+	subl	%eax, %edx
+	DO_ONE_PIXEL()
+.L17:
+
+	/* Would it be faster to unroll this loop once and process 4 pixels
+	 * per pass, instead of just two?
+	 */
+
+	movl	%edx, %eax
+	shrl	%eax
+	jmp	.L18
+.L19:
+	movq	(%ebx), %mm0
+	addl	$8, %ebx
+
+	/* These 9 instructions do what PSHUFB (if there were such an
+	 * instruction) could do in 1. :(
+	 */
+
+	movq	%mm0, %mm3
+	movq	%mm0, %mm4
+
+	pand	%mm2, %mm3
+	psllq	$16, %mm4
+	psrlq	$16, %mm3
+	pand	%mm2, %mm4
+
+	pand	%mm1, %mm0
+	por	%mm4, %mm3
+	por	%mm3, %mm0
+
+	movq	%mm0, (%ecx)
+	addl	$8, %ecx
+	subl	$1, %eax
+.L18:
+	jne	.L19
+
+#ifdef USE_INNER_EMMS
+	emms
+#endif
+
+	/* At this point there are either 1 or 0 pixels remaining to be
+	 * converted.  Convert the last pixel, if needed.
+	 */
+
+	testl	$1, %edx
+	je	.L20
+
+	DO_ONE_LAST_PIXEL()
+
+.L20:
+	popl	%ebx
+	ret
+	.size	_generic_read_RGBA_span_BGRA8888_REV_MMX, .-_generic_read_RGBA_span_BGRA8888_REV_MMX
+
+
+/**
+ * SSE optimized version of the BGRA8888_REV to RGBA copy routine.  SSE
+ * instructions are only actually used to read data from the framebuffer.
+ * In practice, the speed-up is pretty small.
+ *
+ * \todo
+ * Do some more testing and determine if there's any reason to have this
+ * function in addition to the MMX version.
+ *
+ * \warning
+ * This function assumes that the caller will issue the EMMS instruction
+ * at the correct places.
+ */
+
+.globl _generic_read_RGBA_span_BGRA8888_REV_SSE
+#ifndef USE_DRICORE
+.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE
+#endif
+	.type	_generic_read_RGBA_span_BGRA8888_REV_SSE, @function
+_generic_read_RGBA_span_BGRA8888_REV_SSE:
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+
+#ifdef USE_INNER_EMMS
+	emms
+#endif
+
+	LOAD_MASK(movq,%mm1,%mm2)
+
+	movl	16(%esp), %ebx	/* source pointer */
+	movl	24(%esp), %edx	/* number of pixels to copy */
+	movl	20(%esp), %ecx	/* destination pointer */
+
+	testl	%edx, %edx
+	jle	.L35		/* Bail if there's nothing to do. */
+
+	movl	%esp, %ebp
+	subl	$16, %esp
+	andl	$0xfffffff0, %esp
+
+	movl	%ebx, %eax
+	movl	%edx, %esi
+
+	negl	%eax
+	andl	$15, %eax
+	sarl	$2, %eax
+	cmpl	%edx, %eax
+	cmovle	%eax, %esi
+
+	subl	%esi, %edx
+
+	testl	$1, %esi
+	je	.L32
+
+	DO_ONE_PIXEL()
+.L32:
+
+	testl	$2, %esi
+	je	.L31
+
+	movq	(%ebx), %mm0
+	addl	$8, %ebx
+
+	movq	%mm0, %mm3
+	movq	%mm0, %mm4
+	
+	pand	%mm2, %mm3
+	psllq	$16, %mm4
+	psrlq	$16, %mm3
+	pand	%mm2, %mm4
+
+	pand	%mm1, %mm0
+	por	%mm4, %mm3
+	por	%mm3, %mm0
+
+	movq	%mm0, (%ecx)
+	addl	$8, %ecx
+.L31:
+
+	movl	%edx, %eax
+	shrl	$2, %eax
+	jmp	.L33
+.L34:
+	movaps	(%ebx), %xmm0
+	addl	$16, %ebx
+
+	/* This would be so much better if we could just move directly from
+	 * an SSE register to an MMX register.  Unfortunately, that
+	 * functionality wasn't introduced until SSE2 with the MOVDQ2Q
+	 * instruction.
+	 */
+
+	movaps	%xmm0, (%esp)
+	movq	(%esp), %mm0
+	movq	8(%esp), %mm5
+
+	movq	%mm0, %mm3
+	movq	%mm0, %mm4
+	movq	%mm5, %mm6
+	movq	%mm5, %mm7
+
+	pand	%mm2, %mm3
+	pand	%mm2, %mm6
+
+	psllq	$16, %mm4
+	psllq	$16, %mm7
+
+	psrlq	$16, %mm3
+	psrlq	$16, %mm6
+
+	pand	%mm2, %mm4
+	pand	%mm2, %mm7
+
+	pand	%mm1, %mm0
+	pand	%mm1, %mm5
+
+	por	%mm4, %mm3
+	por	%mm7, %mm6
+
+	por	%mm3, %mm0
+	por	%mm6, %mm5
+
+	movq	%mm0, (%ecx)
+	movq	%mm5, 8(%ecx)
+	addl	$16, %ecx
+
+	subl	$1, %eax
+.L33:
+	jne	.L34
+
+#ifdef USE_INNER_EMMS
+	emms
+#endif
+	movl	%ebp, %esp
+
+	/* At this point there are either [0, 3] pixels remaining to be
+	 * converted.
+	 */
+
+	testl	$2, %edx
+	je	.L36
+
+	movq	(%ebx), %mm0
+	addl	$8, %ebx
+
+	movq	%mm0, %mm3
+	movq	%mm0, %mm4
+	
+	pand	%mm2, %mm3
+	psllq	$16, %mm4
+	psrlq	$16, %mm3
+	pand	%mm2, %mm4
+
+	pand	%mm1, %mm0
+	por	%mm4, %mm3
+	por	%mm3, %mm0
+
+	movq	%mm0, (%ecx)
+	addl	$8, %ecx
+.L36:
+
+	testl	$1, %edx
+	je	.L35
+
+	DO_ONE_LAST_PIXEL()
+.L35:
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	ret
+	.size	_generic_read_RGBA_span_BGRA8888_REV_SSE, .-_generic_read_RGBA_span_BGRA8888_REV_SSE
+
+
+/**
+ * SSE2 optimized version of the BGRA8888_REV to RGBA copy routine.
+ */
+
+	.text
+.globl _generic_read_RGBA_span_BGRA8888_REV_SSE2
+#ifndef USE_DRICORE
+.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE2
+#endif
+	.type	_generic_read_RGBA_span_BGRA8888_REV_SSE2, @function
+_generic_read_RGBA_span_BGRA8888_REV_SSE2:
+	pushl	%esi
+	pushl	%ebx
+
+	LOAD_MASK(movdqu,%xmm1,%xmm2)
+
+	movl	12(%esp), %ebx	/* source pointer */
+	movl	20(%esp), %edx	/* number of pixels to copy */
+	movl	16(%esp), %ecx	/* destination pointer */
+
+	movl	%ebx, %eax
+	movl	%edx, %esi
+
+	testl	%edx, %edx
+	jle	.L46		/* Bail if there's nothing to do. */
+
+	/* If the source pointer isn't a multiple of 16 we have to process
+	 * a few pixels the "slow" way to get the address aligned for
+	 * the SSE fetch intsructions.
+	 */
+
+	negl	%eax
+	andl	$15, %eax
+	sarl	$2, %eax
+
+	cmpl	%edx, %eax
+	cmovbe	%eax, %esi
+	subl	%esi, %edx
+
+	testl	$1, %esi
+	je	.L41
+
+	DO_ONE_PIXEL()  
+.L41:
+	testl	$2, %esi
+	je	.L40
+
+	movq	(%ebx), %xmm0
+	addl	$8, %ebx
+
+	movdqa	%xmm0, %xmm3
+	movdqa	%xmm0, %xmm4
+	andps	%xmm1, %xmm0
+
+	andps	%xmm2, %xmm3
+	pslldq	$2, %xmm4
+	psrldq	$2, %xmm3
+	andps	%xmm2, %xmm4
+
+	orps	%xmm4, %xmm3
+	orps	%xmm3, %xmm0
+
+	movq	%xmm0, (%ecx)
+	addl	$8, %ecx
+.L40:
+
+	/* Would it be worth having a specialized version of this loop for
+	 * the case where the destination is 16-byte aligned?  That version
+	 * would be identical except that it could use movedqa instead of
+	 * movdqu.
+	 */
+
+	movl	%edx, %eax
+	shrl	$2, %eax
+	jmp	.L42
+.L43:
+	movdqa	(%ebx), %xmm0
+	addl	$16, %ebx
+
+	movdqa	%xmm0, %xmm3
+	movdqa	%xmm0, %xmm4
+	andps	%xmm1, %xmm0
+
+	andps	%xmm2, %xmm3
+	pslldq	$2, %xmm4
+	psrldq	$2, %xmm3
+	andps	%xmm2, %xmm4
+
+	orps	%xmm4, %xmm3
+	orps	%xmm3, %xmm0
+
+	movdqu	%xmm0, (%ecx)
+	addl	$16, %ecx
+	subl	$1, %eax
+.L42:
+	jne	.L43
+
+
+	/* There may be upto 3 pixels remaining to be copied.  Take care
+	 * of them now.  We do the 2 pixel case first because the data
+	 * will be aligned.
+	 */
+
+	testl	$2, %edx
+	je	.L47
+
+	movq	(%ebx), %xmm0
+	addl	$8, %ebx
+        
+	movdqa	%xmm0, %xmm3
+	movdqa	%xmm0, %xmm4
+	andps	%xmm1, %xmm0
+
+	andps	%xmm2, %xmm3
+	pslldq	$2, %xmm4
+	psrldq	$2, %xmm3
+	andps	%xmm2, %xmm4
+
+	orps	%xmm4, %xmm3
+	orps	%xmm3, %xmm0
+
+	movq	%xmm0, (%ecx)
+	addl	$8, %ecx        
+.L47:
+
+	testl	$1, %edx
+	je	.L46
+
+	DO_ONE_LAST_PIXEL()  
+.L46:
+
+	popl	%ebx
+	popl	%esi
+	ret
+	.size	_generic_read_RGBA_span_BGRA8888_REV_SSE2, .-_generic_read_RGBA_span_BGRA8888_REV_SSE2
+
+
+
+#define MASK_565_L	0x07e0f800
+#define MASK_565_H	0x0000001f
+/* Setting SCALE_ADJUST to 5 gives a perfect match with the
+ * classic C implementation in Mesa.  Setting SCALE_ADJUST
+ * to 0 is slightly faster but at a small cost to accuracy.
+ */
+#define SCALE_ADJUST	5
+#if SCALE_ADJUST == 5
+#define PRESCALE_L 0x00100001
+#define PRESCALE_H 0x00000200
+#define SCALE_L 0x40C620E8
+#define SCALE_H 0x0000839d
+#elif SCALE_ADJUST == 0
+#define PRESCALE_L 0x00200001
+#define PRESCALE_H 0x00000800
+#define SCALE_L 0x01040108
+#define SCALE_H 0x00000108
+#else
+#error SCALE_ADJUST must either be 5 or 0.
+#endif
+#define ALPHA_L 0x00000000
+#define ALPHA_H 0x00ff0000
+
+/**
+ * MMX optimized version of the RGB565 to RGBA copy routine.
+ */
+
+	.text
+	.globl	_generic_read_RGBA_span_RGB565_MMX
+#ifndef USE_DRICORE
+        .hidden _generic_read_RGBA_span_RGB565_MMX
+#endif
+	.type	_generic_read_RGBA_span_RGB565_MMX, @function
+
+_generic_read_RGBA_span_RGB565_MMX:
+
+#ifdef USE_INNER_EMMS
+	emms
+#endif
+
+	movl	4(%esp), %eax	/* source pointer */
+	movl	8(%esp), %edx	/* destination pointer */
+	movl	12(%esp), %ecx	/* number of pixels to copy */
+
+	pushl	$MASK_565_H
+	pushl	$MASK_565_L
+	movq	(%esp), %mm5
+	pushl	$PRESCALE_H
+	pushl	$PRESCALE_L
+	movq	(%esp), %mm6
+	pushl	$SCALE_H
+	pushl	$SCALE_L
+	movq	(%esp), %mm7
+	pushl	$ALPHA_H
+	pushl	$ALPHA_L
+	movq	(%esp), %mm3
+	addl	$32,%esp
+
+	sarl	$2, %ecx
+	jl	.L01		/* Bail early if the count is negative. */
+	jmp	.L02
+
+.L03:
+	/* Fetch 4 RGB565 pixels into %mm4.  Distribute the first and
+	 * second pixels into the four words of %mm0 and %mm2.
+      	 */
+
+	movq	(%eax), %mm4
+	addl	$8, %eax
+
+	pshufw	$0x00, %mm4, %mm0
+	pshufw	$0x55, %mm4, %mm2
+
+
+	/* Mask the pixels so that each word of each register contains only
+	 * one color component.
+	 */
+
+	pand	%mm5, %mm0
+	pand	%mm5, %mm2
+
+
+	/* Adjust the component values so that they are as small as possible,
+	 * but large enough so that we can multiply them by an unsigned 16-bit
+	 * number and get a value as large as 0x00ff0000.
+ 	 */
+
+	pmullw	%mm6, %mm0
+	pmullw	%mm6, %mm2
+#if SCALE_ADJUST > 0
+	psrlw	$SCALE_ADJUST, %mm0
+	psrlw	$SCALE_ADJUST, %mm2
+#endif
+
+	/* Scale the input component values to be on the range
+	 * [0, 0x00ff0000].  This it the real magic of the whole routine.
+	 */
+
+	pmulhuw	%mm7, %mm0
+	pmulhuw	%mm7, %mm2
+
+
+	/* Always set the alpha value to 0xff.
+	 */
+
+ 	por %mm3, %mm0
+ 	por %mm3, %mm2
+
+
+	/* Pack the 16-bit values to 8-bit values and store the converted
+	 * pixel data.
+	 */
+
+	packuswb	%mm2, %mm0
+	movq	%mm0, (%edx)
+	addl	$8, %edx
+
+	pshufw	$0xaa, %mm4, %mm0
+	pshufw	$0xff, %mm4, %mm2
+
+	pand	%mm5, %mm0
+	pand	%mm5, %mm2
+	pmullw	%mm6, %mm0
+	pmullw	%mm6, %mm2
+#if SCALE_ADJUST > 0
+	psrlw	$SCALE_ADJUST, %mm0
+	psrlw	$SCALE_ADJUST, %mm2
+#endif
+	pmulhuw	%mm7, %mm0
+	pmulhuw	%mm7, %mm2
+
+ 	por %mm3, %mm0
+ 	por %mm3, %mm2
+
+	packuswb	%mm2, %mm0
+
+	movq	%mm0, (%edx)
+	addl	$8, %edx
+
+	subl	$1, %ecx
+.L02:
+	jne	.L03
+
+
+	/* At this point there can be at most 3 pixels left to process.  If
+	 * there is either 2 or 3 left, process 2.
+         */
+
+	movl	12(%esp), %ecx
+	testl	$0x02, %ecx
+	je	.L04
+
+	movd	(%eax), %mm4
+	addl	$4, %eax
+
+	pshufw	$0x00, %mm4, %mm0
+	pshufw	$0x55, %mm4, %mm2
+
+	pand	%mm5, %mm0
+	pand	%mm5, %mm2
+	pmullw	%mm6, %mm0
+	pmullw	%mm6, %mm2
+#if SCALE_ADJUST > 0
+	psrlw	$SCALE_ADJUST, %mm0
+	psrlw	$SCALE_ADJUST, %mm2
+#endif
+	pmulhuw	%mm7, %mm0
+	pmulhuw	%mm7, %mm2
+
+ 	por %mm3, %mm0
+ 	por %mm3, %mm2
+
+	packuswb	%mm2, %mm0
+
+	movq	%mm0, (%edx)
+	addl	$8, %edx
+
+.L04:
+	/* At this point there can be at most 1 pixel left to process.
+	 * Process it if needed.
+         */
+
+	testl	$0x01, %ecx
+	je	.L01
+
+	movzwl	(%eax), %ecx
+	movd	%ecx, %mm4
+
+	pshufw	$0x00, %mm4, %mm0
+
+	pand	%mm5, %mm0
+	pmullw	%mm6, %mm0
+#if SCALE_ADJUST > 0
+	psrlw	$SCALE_ADJUST, %mm0
+#endif
+	pmulhuw	%mm7, %mm0
+
+ 	por %mm3, %mm0
+
+	packuswb	%mm0, %mm0
+
+	movd	%mm0, (%edx)
+
+.L01:
+#ifdef USE_INNER_EMMS
+	emms
+#endif
+	ret
+#endif /* !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) */
+	
+#if defined (__ELF__) && defined (__linux__)
+	.section .note.GNU-stack,"",%progbits
+#endif
author	marha <marha@users.sourceforge.net>	2011-02-14 14:46:43 +0000
committer	marha <marha@users.sourceforge.net>	2011-02-14 14:46:43 +0000
commit	0f63823d473bf956ec5bddf06da6e4b591e994f8 (patch)
tree	c8fa2b17686616a7443c583e09bbf6eceec883c2 /mesalib/src/mesa
parent	9acb2b3cd11b530debce5008074fa03587ac3331 (diff)
parent	026b85e62b3d8812afb5f04df29aeac28c52b331 (diff)
download	vcxsrv-0f63823d473bf956ec5bddf06da6e4b591e994f8.tar.gz vcxsrv-0f63823d473bf956ec5bddf06da6e4b591e994f8.tar.bz2 vcxsrv-0f63823d473bf956ec5bddf06da6e4b591e994f8.zip