aboutsummaryrefslogtreecommitdiff
path: root/mesalib/src/mesa
diff options
context:
space:
mode:
Diffstat (limited to 'mesalib/src/mesa')
-rw-r--r--mesalib/src/mesa/Makefile412
-rw-r--r--mesalib/src/mesa/drivers/dri/Makefile.template12
-rw-r--r--mesalib/src/mesa/drivers/windows/gldirect/dx9/gld_primitive_dx9.c1
-rw-r--r--mesalib/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c1
-rw-r--r--mesalib/src/mesa/main/mtypes.h2
-rw-r--r--mesalib/src/mesa/program/ir_to_mesa.cpp3
-rw-r--r--mesalib/src/mesa/state_tracker/st_cb_fbo.c29
-rw-r--r--mesalib/src/mesa/state_tracker/st_manager.c15
-rw-r--r--mesalib/src/mesa/swrast/s_trispan.h31
-rw-r--r--mesalib/src/mesa/x86/read_rgba_span_x86.S1364
10 files changed, 947 insertions, 923 deletions
diff --git a/mesalib/src/mesa/Makefile b/mesalib/src/mesa/Makefile
index a6025e990..36cbdd994 100644
--- a/mesalib/src/mesa/Makefile
+++ b/mesalib/src/mesa/Makefile
@@ -1,188 +1,224 @@
-# src/mesa/Makefile
-
-TOP = ../..
-include $(TOP)/configs/current
-
-MESA_LIBS := libmesa.a libmesagallium.a
-DEPENDS := depend
-
-MESA_OBJ_DIR := .
-
-
-include sources.mak
-
-# adjust object dirs
-MESA_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_OBJECTS))
-MESA_GALLIUM_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_GALLIUM_OBJECTS))
-
-# define preprocessor flags
-MESA_CPPFLAGS := $(API_DEFINES) $(DEFINES)
-
-# append include dirs
-MESA_CPPFLAGS += $(INCLUDE_DIRS)
-
-# tidy compiler flags
-CFLAGS := $(filter-out $(DEFINES), $(CFLAGS))
-CXXFLAGS := $(filter-out $(DEFINES), $(CXXFLAGS))
-
-# LLVM is needed for the state tracker
-MESA_CFLAGS := $(LLVM_CFLAGS)
-
-define mesa-cc-c
- @mkdir -p $(dir $@)
- $(CC) -c -o $@ $< $($(1)_CPPFLAGS) $($(1)_CFLAGS) $(CFLAGS)
-endef
-
-define mesa-cxx-c
- @mkdir -p $(dir $@)
- $(CXX) -c -o $@ $< $($(1)_CPPFLAGS) $($(1)_CFLAGS) $(CXXFLAGS)
-endef
-
-$(MESA_OBJ_DIR)/%.o: %.c
- $(call mesa-cc-c,MESA)
-
-$(MESA_OBJ_DIR)/%.o: %.cpp
- $(call mesa-cxx-c,MESA)
-
-$(MESA_OBJ_DIR)/%.o: %.S
- $(call mesa-cc-c,MESA)
-
-# Default: build dependencies, then asm_subdirs, GLSL built-in lib,
-# then convenience libs (.a) and finally the device drivers:
-default: $(DEPENDS) asm_subdirs $(MESA_LIBS) driver_subdirs
-
-main/api_exec_es1.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py
- $(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES1.1 > $@
-
-main/api_exec_es2.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py
- $(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES2.0 > $@
-
-######################################################################
-# Helper libraries used by many drivers:
-
-# Make archive of core mesa object files
-libmesa.a: $(MESA_OBJECTS) $(GLSL_LIBS)
- @ $(MKLIB) -o mesa -static $(MESA_OBJECTS) $(GLSL_LIBS)
-
-# Make archive of subset of core mesa object files for gallium
-libmesagallium.a: $(MESA_GALLIUM_OBJECTS) $(GLSL_LIBS)
- @ $(MKLIB) -o mesagallium -static $(MESA_GALLIUM_OBJECTS) $(GLSL_LIBS)
-
-######################################################################
-# Device drivers
-driver_subdirs: $(MESA_LIBS)
- @ (cd drivers && $(MAKE))
-
-
-######################################################################
-# Assembly subdirs
-asm_subdirs:
- @ if echo "$(ASM_FLAGS)" | grep -q USE_X86_ASM ; then \
- (cd x86 && $(MAKE)) || exit 1 ; \
- fi
- @ if echo "$(ASM_FLAGS)" | grep -q USE_X86_64_ASM ; then \
- (cd x86 && $(MAKE)) || exit 1 ; \
- (cd x86-64 && $(MAKE)) || exit 1 ; \
- fi
-
-
-######################################################################
-# Dependency generation
-
-depend: $(ALL_SOURCES)
- @ echo "running $(MKDEP)"
- @ touch depend
- @$(MKDEP) $(MKDEP_OPTIONS) -p$(MESA_OBJ_DIR)/ $(MESA_CPPFLAGS) \
- $(ALL_SOURCES) > /dev/null 2>/dev/null
-
-######################################################################
-# Installation rules
-
-# this isn't fleshed out yet but is probably the way to go in the future
-new_install:
- (cd drivers && $(MAKE) install)
-
-
-# XXX replace this with new_install above someday
-install: default
- @for driver in $(DRIVER_DIRS) ; do \
- case "$$driver" in \
- osmesa) if [ "$(DRIVER_DIRS)" = osmesa ]; then \
- $(MAKE) install-headers install-osmesa || exit 1 ; \
- else \
- $(MAKE) install-osmesa || exit 1 ; \
- fi ;; \
- dri) $(MAKE) install-libgl install-dri || exit 1 ;; \
- *) $(MAKE) install-libgl || exit 1 ;; \
- esac ; \
- done
-
-pcedit = \
- -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \
- -e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \
- -e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \
- -e 's,@VERSION@,$(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY),' \
-
-
-gl_pcedit = sed \
- $(pcedit) \
- -e 's,@GL_PC_REQ_PRIV@,$(GL_PC_REQ_PRIV),' \
- -e 's,@GL_PC_LIB_PRIV@,$(GL_PC_LIB_PRIV),' \
- -e 's,@GL_PC_CFLAGS@,$(GL_PC_CFLAGS),' \
- -e 's,@GLX_TLS@,$(GLX_TLS),' \
- -e 's,@GL_LIB@,$(GL_LIB),'
-
-gl.pc: gl.pc.in
- $(gl_pcedit) $< > $@
-
-osmesa_pcedit = sed \
- $(pcedit) \
- -e 's,@OSMESA_LIB@,$(OSMESA_LIB),' \
- -e 's,@OSMESA_PC_REQ@,$(OSMESA_PC_REQ),' \
- -e 's,@OSMESA_PC_LIB_PRIV@,$(OSMESA_PC_LIB_PRIV),'
-
-osmesa.pc: osmesa.pc.in
- $(osmesa_pcedit) $< > $@
-
-install-headers:
- $(INSTALL) -d $(DESTDIR)$(INSTALL_INC_DIR)/GL
- $(INSTALL) -m 644 $(TOP)/include/GL/*.h \
- $(DESTDIR)$(INSTALL_INC_DIR)/GL
-
-install-libgl: default gl.pc install-headers
- $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)
- $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
- $(MINSTALL) $(TOP)/$(LIB_DIR)/$(GL_LIB_GLOB) \
- $(DESTDIR)$(INSTALL_LIB_DIR)
- $(INSTALL) -m 644 gl.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
-
-install-osmesa: default osmesa.pc
- $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)
- $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
- $(MINSTALL) $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_GLOB) \
- $(DESTDIR)$(INSTALL_LIB_DIR)
- $(INSTALL) -m 644 osmesa.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
-
-install-dri: default
- cd drivers/dri && $(MAKE) install
-
-
-
-# Emacs tags
-tags:
- etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h
-
-clean:
- -rm -f */*.o
- -rm -f */*/*.o
- -rm -f depend depend.bak libmesa.a libmesagallium.a
- -rm -f drivers/*/*.o
- -rm -f *.pc
- -@cd drivers/dri && $(MAKE) clean
- -@cd drivers/x11 && $(MAKE) clean
- -@cd drivers/osmesa && $(MAKE) clean
- -@cd x86 && $(MAKE) clean
- -@cd x86-64 && $(MAKE) clean
-
-
--include $(DEPENDS)
+# src/mesa/Makefile
+
+TOP = ../..
+include $(TOP)/configs/current
+
+MESA_LIBS := libmesa.a libmesagallium.a
+DEPENDS := depend
+
+MESA_OBJ_DIR := .
+DRICORE_OBJ_DIR := objs-dricore
+
+include sources.mak
+
+# adjust object dirs
+MESA_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_OBJECTS))
+MESA_GALLIUM_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_GALLIUM_OBJECTS))
+
+DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS))
+
+# define preprocessor flags
+MESA_CPPFLAGS := $(API_DEFINES) $(DEFINES)
+
+# append include dirs
+MESA_CPPFLAGS += $(INCLUDE_DIRS)
+
+DRICORE_CPPFLAGS = $(MESA_CPPFLAGS)
+
+# tidy compiler flags
+CFLAGS := $(filter-out $(DEFINES), $(CFLAGS))
+CXXFLAGS := $(filter-out $(DEFINES), $(CXXFLAGS))
+
+# LLVM is needed for the state tracker
+MESA_CFLAGS := $(LLVM_CFLAGS) $(CFLAGS)
+DRICORE_CFLAGS := $(LLVM_CFLAGS) $(DRI_CFLAGS)
+
+MESA_CXXFLAGS := $(LLVM_CFLAGS) $(CXXFLAGS)
+DRICORE_CXXFLAGS := $(LLVM_CFLAGS) $(DRI_CXXFLAGS)
+
+define mesa-cc-c
+ @mkdir -p $(dir $@)
+ $(CC) -c -o $@ $< $($(1)_CPPFLAGS) $($(1)_CFLAGS)
+endef
+
+define mesa-cxx-c
+ @mkdir -p $(dir $@)
+ $(CXX) -c -o $@ $< $($(1)_CPPFLAGS) $($(1)_CFLAGS) $($(1)_CXXFLAGS)
+endef
+
+$(MESA_OBJ_DIR)/%.o: %.c
+ $(call mesa-cc-c,MESA)
+
+$(MESA_OBJ_DIR)/%.o: %.cpp
+ $(call mesa-cxx-c,MESA)
+
+$(MESA_OBJ_DIR)/%.o: %.S
+ $(call mesa-cc-c,MESA)
+
+$(DRICORE_OBJ_DIR)/%.o: %.c
+ $(call mesa-cc-c,DRICORE)
+
+$(DRICORE_OBJ_DIR)/%.o: %.cpp
+ $(call mesa-cxx-c,DRICORE)
+
+$(DRICORE_OBJ_DIR)/%.o: %.S
+ $(call mesa-cc-c,DRICORE)
+
+# Default: build dependencies, then asm_subdirs, GLSL built-in lib,
+# then convenience libs (.a) and finally the device drivers:
+default: $(DEPENDS) asm_subdirs $(MESA_LIBS) $(DRICORE_LIBS) driver_subdirs
+
+main/api_exec_es1.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py
+ $(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES1.1 > $@
+
+main/api_exec_es2.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py
+ $(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES2.0 > $@
+
+######################################################################
+# Helper libraries used by many drivers:
+
+# Make archive of core mesa object files
+libmesa.a: $(MESA_OBJECTS) $(GLSL_LIBS)
+ @ $(MKLIB) -o mesa -static $(MESA_OBJECTS) $(GLSL_LIBS)
+
+# Shared dricore library for classic DRI drivers
+$(TOP)/$(LIB_DIR)/libdricore.so: $(DRICORE_OBJECTS) $(DRICORE_GLSL_LIBS)
+ @$(MKLIB) -o $@ -linker '$(CXX)' -ldflags '$(LDFLAGS)' \
+ -cplusplus -noprefix \
+ -install $(TOP)/$(LIB_DIR) -id $(DRI_DRIVER_INSTALL_DIR)/$@.dylib \
+ $(DRICORE_LIB_DEPS) $(DRICORE_OBJECTS)
+
+# Make archive of subset of core mesa object files for gallium
+libmesagallium.a: $(MESA_GALLIUM_OBJECTS) $(GLSL_LIBS)
+ @ $(MKLIB) -o mesagallium -static $(MESA_GALLIUM_OBJECTS) $(GLSL_LIBS)
+
+######################################################################
+# Device drivers
+driver_subdirs: $(MESA_LIBS) $(DRICORE_LIBS)
+ @ (cd drivers && $(MAKE))
+
+
+######################################################################
+# Assembly subdirs
+asm_subdirs:
+ @ if echo "$(ASM_FLAGS)" | grep -q USE_X86_ASM ; then \
+ (cd x86 && $(MAKE)) || exit 1 ; \
+ fi
+ @ if echo "$(ASM_FLAGS)" | grep -q USE_X86_64_ASM ; then \
+ (cd x86 && $(MAKE)) || exit 1 ; \
+ (cd x86-64 && $(MAKE)) || exit 1 ; \
+ fi
+
+
+######################################################################
+# Dependency generation
+
+depend: $(ALL_SOURCES)
+ @ echo "running $(MKDEP)"
+ @ touch depend
+ @$(MKDEP) $(MKDEP_OPTIONS) -p$(MESA_OBJ_DIR)/ $(MESA_CPPFLAGS) \
+ $(ALL_SOURCES) > /dev/null 2>/dev/null
+
+######################################################################
+# Installation rules
+
+# this isn't fleshed out yet but is probably the way to go in the future
+new_install:
+ (cd drivers && $(MAKE) install)
+
+ifneq (,$(DRICORE_LIBS))
+DRICORE_INSTALL_TARGET = install-dricore
+endif
+
+# XXX replace this with new_install above someday
+install: default $(DRICORE_INSTALL_TARGET)
+ @for driver in $(DRIVER_DIRS) ; do \
+ case "$$driver" in \
+ osmesa) if [ "$(DRIVER_DIRS)" = osmesa ]; then \
+ $(MAKE) install-headers install-osmesa || exit 1 ; \
+ else \
+ $(MAKE) install-osmesa || exit 1 ; \
+ fi ;; \
+ dri) $(MAKE) install-libgl install-dri || exit 1 ;; \
+ *) $(MAKE) install-libgl || exit 1 ;; \
+ esac ; \
+ done
+
+pcedit = \
+ -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \
+ -e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \
+ -e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \
+ -e 's,@VERSION@,$(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY),' \
+
+
+gl_pcedit = sed \
+ $(pcedit) \
+ -e 's,@GL_PC_REQ_PRIV@,$(GL_PC_REQ_PRIV),' \
+ -e 's,@GL_PC_LIB_PRIV@,$(GL_PC_LIB_PRIV),' \
+ -e 's,@GL_PC_CFLAGS@,$(GL_PC_CFLAGS),' \
+ -e 's,@GLX_TLS@,$(GLX_TLS),' \
+ -e 's,@GL_LIB@,$(GL_LIB),'
+
+gl.pc: gl.pc.in
+ $(gl_pcedit) $< > $@
+
+osmesa_pcedit = sed \
+ $(pcedit) \
+ -e 's,@OSMESA_LIB@,$(OSMESA_LIB),' \
+ -e 's,@OSMESA_PC_REQ@,$(OSMESA_PC_REQ),' \
+ -e 's,@OSMESA_PC_LIB_PRIV@,$(OSMESA_PC_LIB_PRIV),'
+
+osmesa.pc: osmesa.pc.in
+ $(osmesa_pcedit) $< > $@
+
+install-headers:
+ $(INSTALL) -d $(DESTDIR)$(INSTALL_INC_DIR)/GL
+ $(INSTALL) -m 644 $(TOP)/include/GL/*.h \
+ $(DESTDIR)$(INSTALL_INC_DIR)/GL
+
+install-libgl: default gl.pc install-headers
+ $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)
+ $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
+ $(MINSTALL) $(TOP)/$(LIB_DIR)/$(GL_LIB_GLOB) \
+ $(DESTDIR)$(INSTALL_LIB_DIR)
+ $(INSTALL) -m 644 gl.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
+
+install-osmesa: default osmesa.pc
+ $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)
+ $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
+ $(MINSTALL) $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_GLOB) \
+ $(DESTDIR)$(INSTALL_LIB_DIR)
+ $(INSTALL) -m 644 osmesa.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
+
+install-dri: default
+ cd drivers/dri && $(MAKE) install
+
+# We don't need MINSTALL here because we're not installing symbolic links
+install-dricore: default
+ $(INSTALL) -d $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
+ $(INSTALL) -m 755 $(DRICORE_LIBS) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
+
+
+# Emacs tags
+tags:
+ etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h
+
+clean-dricore:
+ -rm -f libdricore.so
+ -rm -f $(DRICORE_LIBS)
+ -rm -rf $(DRICORE_OBJ_DIR)
+
+clean: clean-dricore
+ -rm -f */*.o
+ -rm -f */*/*.o
+ -rm -f depend depend.bak libmesa.a libmesagallium.a
+ -rm -f drivers/*/*.o
+ -rm -f *.pc
+ -@cd drivers/dri && $(MAKE) clean
+ -@cd drivers/x11 && $(MAKE) clean
+ -@cd drivers/osmesa && $(MAKE) clean
+ -@cd x86 && $(MAKE) clean
+ -@cd x86-64 && $(MAKE) clean
+
+
+-include $(DEPENDS)
diff --git a/mesalib/src/mesa/drivers/dri/Makefile.template b/mesalib/src/mesa/drivers/dri/Makefile.template
index 6be554af7..588210f8a 100644
--- a/mesalib/src/mesa/drivers/dri/Makefile.template
+++ b/mesalib/src/mesa/drivers/dri/Makefile.template
@@ -1,7 +1,5 @@
# -*-makefile-*-
-MESA_MODULES = $(TOP)/src/mesa/libmesa.a
-
COMMON_GALLIUM_SOURCES = \
../common/utils.c \
../common/vblank.c \
@@ -39,13 +37,13 @@ CXXFLAGS += $(API_DEFINES)
##### RULES #####
.c.o:
- $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
+ $(CC) -c $(INCLUDES) $(DRI_CFLAGS) $(DRIVER_DEFINES) $< -o $@
.cpp.o:
- $(CC) -c $(INCLUDES) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@
+ $(CC) -c $(INCLUDES) $(DRI_CXXFLAGS) $(DRIVER_DEFINES) $< -o $@
.S.o:
- $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
+ $(CC) -c $(INCLUDES) $(DRI_CFLAGS) $(DRIVER_DEFINES) $< -o $@
##### TARGETS #####
@@ -57,10 +55,10 @@ default: subdirs lib
lib: symlinks subdirs depend
@$(MAKE) $(LIBNAME) $(TOP)/$(LIB_DIR)/$(LIBNAME)
-$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(EXTRA_MODULES) Makefile \
+$(LIBNAME): $(OBJECTS) $(EXTRA_MODULES) $(MESA_MODULES) Makefile \
$(TOP)/src/mesa/drivers/dri/Makefile.template $(TOP)/src/mesa/drivers/dri/common/dri_test.o
$(MKLIB) -o $@.tmp -noprefix -linker '$(CXX)' -ldflags '$(LDFLAGS)' \
- $(OBJECTS) $(MESA_MODULES) $(EXTRA_MODULES) $(DRI_LIB_DEPS)
+ $(OBJECTS) $(EXTRA_MODULES) $(DRI_LIB_DEPS)
$(CXX) $(CFLAGS) -o $@.test $(TOP)/src/mesa/drivers/dri/common/dri_test.o $@.tmp $(DRI_LIB_DEPS)
@rm -f $@.test
mv -f $@.tmp $@
diff --git a/mesalib/src/mesa/drivers/windows/gldirect/dx9/gld_primitive_dx9.c b/mesalib/src/mesa/drivers/windows/gldirect/dx9/gld_primitive_dx9.c
index aaed87028..256ffcc38 100644
--- a/mesalib/src/mesa/drivers/windows/gldirect/dx9/gld_primitive_dx9.c
+++ b/mesalib/src/mesa/drivers/windows/gldirect/dx9/gld_primitive_dx9.c
@@ -62,7 +62,6 @@
#include "swrast/s_depth.h"
#include "swrast/s_lines.h"
#include "swrast/s_triangle.h"
-#include "swrast/s_trispan.h"
#include "tnl/tnl.h"
#include "tnl/t_context.h"
#include "tnl/t_pipeline.h"
diff --git a/mesalib/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c b/mesalib/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c
index 36ebe9f31..02f0dfacd 100644
--- a/mesalib/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c
+++ b/mesalib/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c
@@ -61,7 +61,6 @@
#include "swrast/s_depth.h"
#include "swrast/s_lines.h"
#include "swrast/s_triangle.h"
-#include "swrast/s_trispan.h"
#include "tnl/tnl.h"
#include "tnl/t_context.h"
#include "tnl/t_pipeline.h"
diff --git a/mesalib/src/mesa/main/mtypes.h b/mesalib/src/mesa/main/mtypes.h
index 7199a05fb..020595bd0 100644
--- a/mesalib/src/mesa/main/mtypes.h
+++ b/mesalib/src/mesa/main/mtypes.h
@@ -2103,8 +2103,6 @@ struct gl_shader
GLint RefCount; /**< Reference count */
GLboolean DeletePending;
GLboolean CompileStatus;
- GLboolean Main; /**< shader defines main() */
- GLboolean UnresolvedRefs;
const GLchar *Source; /**< Source code string */
GLuint SourceChecksum; /**< for debug/logging purposes */
struct gl_program *Program; /**< Post-compile assembly code */
diff --git a/mesalib/src/mesa/program/ir_to_mesa.cpp b/mesalib/src/mesa/program/ir_to_mesa.cpp
index 77946ad13..aa5f007e9 100644
--- a/mesalib/src/mesa/program/ir_to_mesa.cpp
+++ b/mesalib/src/mesa/program/ir_to_mesa.cpp
@@ -2754,6 +2754,9 @@ ir_to_mesa_visitor::copy_propagate(void)
*/
for (int r = 0; r < this->next_temp; r++) {
for (int c = 0; c < 4; c++) {
+ if (!acp[4 * r + c])
+ continue;
+
if (acp[4 * r + c]->src_reg[0].file == PROGRAM_OUTPUT)
acp[4 * r + c] = NULL;
}
diff --git a/mesalib/src/mesa/state_tracker/st_cb_fbo.c b/mesalib/src/mesa/state_tracker/st_cb_fbo.c
index b778ecf0b..2934f9ffb 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_fbo.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_fbo.c
@@ -448,11 +448,14 @@ st_finish_render_texture(struct gl_context *ctx,
* Validate a renderbuffer attachment for a particular set of bindings.
*/
static GLboolean
-st_validate_attachment(struct pipe_screen *screen,
+st_validate_attachment(struct gl_context *ctx,
+ struct pipe_screen *screen,
const struct gl_renderbuffer_attachment *att,
unsigned bindings)
{
const struct st_texture_object *stObj = st_texture_object(att->Texture);
+ enum pipe_format format;
+ gl_format texFormat;
/* Only validate texture attachments for now, since
* st_renderbuffer_alloc_storage makes sure that
@@ -464,7 +467,20 @@ st_validate_attachment(struct pipe_screen *screen,
if (!stObj)
return GL_FALSE;
- return screen->is_format_supported(screen, stObj->pt->format,
+ format = stObj->pt->format;
+ texFormat =
+ stObj->base.Image[att->CubeMapFace][att->TextureLevel]->TexFormat;
+
+ /* If the encoding is sRGB and sRGB rendering cannot be enabled,
+ * check for linear format support instead.
+ * Later when we create a surface, we change the format to a linear one. */
+ if (!ctx->Const.sRGBCapable &&
+ _mesa_get_format_color_encoding(texFormat) == GL_SRGB) {
+ const gl_format linearFormat = _mesa_get_srgb_format_linear(texFormat);
+ format = st_mesa_format_to_pipe_format(linearFormat);
+ }
+
+ return screen->is_format_supported(screen, format,
PIPE_TEXTURE_2D,
stObj->pt->nr_samples, bindings, 0);
}
@@ -528,20 +544,23 @@ st_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
return;
}
- if (!st_validate_attachment(screen,
+ if (!st_validate_attachment(ctx,
+ screen,
depth,
PIPE_BIND_DEPTH_STENCIL)) {
fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
return;
}
- if (!st_validate_attachment(screen,
+ if (!st_validate_attachment(ctx,
+ screen,
stencil,
PIPE_BIND_DEPTH_STENCIL)) {
fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
return;
}
for (i = 0; i < ctx->Const.MaxColorAttachments; i++) {
- if (!st_validate_attachment(screen,
+ if (!st_validate_attachment(ctx,
+ screen,
&fb->Attachment[BUFFER_COLOR0 + i],
PIPE_BIND_RENDER_TARGET)) {
fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
diff --git a/mesalib/src/mesa/state_tracker/st_manager.c b/mesalib/src/mesa/state_tracker/st_manager.c
index 3b54eb7b8..5684e9768 100644
--- a/mesalib/src/mesa/state_tracker/st_manager.c
+++ b/mesalib/src/mesa/state_tracker/st_manager.c
@@ -426,19 +426,13 @@ st_framebuffer_create(struct st_framebuffer_iface *stfbi)
struct gl_config mode;
gl_buffer_index idx;
+ if (!stfbi)
+ return NULL;
+
stfb = CALLOC_STRUCT(st_framebuffer);
if (!stfb)
return NULL;
- /* for FBO-only context */
- if (!stfbi) {
- struct gl_framebuffer *base = _mesa_get_incomplete_framebuffer();
-
- stfb->Base = *base;
-
- return stfb;
- }
-
st_visual_to_context_mode(stfbi->visual, &mode);
_mesa_initialize_window_framebuffer(&stfb->Base, &mode);
@@ -764,7 +758,8 @@ st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi,
ret = _mesa_make_current(st->ctx, &stdraw->Base, &stread->Base);
}
else {
- ret = FALSE;
+ struct gl_framebuffer *incomplete = _mesa_get_incomplete_framebuffer();
+ ret = _mesa_make_current(st->ctx, incomplete, incomplete);
}
st_framebuffer_reference(&stdraw, NULL);
diff --git a/mesalib/src/mesa/swrast/s_trispan.h b/mesalib/src/mesa/swrast/s_trispan.h
deleted file mode 100644
index 15207e863..000000000
--- a/mesalib/src/mesa/swrast/s_trispan.h
+++ /dev/null
@@ -1,31 +0,0 @@
-
-/*
- * Mesa 3-D graphics library
- * Version: 3.5
- *
- * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#ifndef S_TRISPAN_H
-#define S_TRISPAN_H
-
-
-#endif /* S_TRISPAN_H */
diff --git a/mesalib/src/mesa/x86/read_rgba_span_x86.S b/mesalib/src/mesa/x86/read_rgba_span_x86.S
index 817729973..04571afb7 100644
--- a/mesalib/src/mesa/x86/read_rgba_span_x86.S
+++ b/mesalib/src/mesa/x86/read_rgba_span_x86.S
@@ -1,678 +1,686 @@
-/*
- * (C) Copyright IBM Corporation 2004
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file read_rgba_span_x86.S
- * Optimized routines to transfer pixel data from the framebuffer to a
- * buffer in main memory.
- *
- * \author Ian Romanick <idr@us.ibm.com>
- */
-
- .file "read_rgba_span_x86.S"
-#if !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) /* this one cries for assyntax.h */
-/* Kevin F. Quinn 2nd July 2006
- * Replaced data segment constants with text-segment instructions.
- */
-#define LOAD_MASK(mvins,m1,m2) \
- pushl $0xff00ff00 ;\
- pushl $0xff00ff00 ;\
- pushl $0xff00ff00 ;\
- pushl $0xff00ff00 ;\
- mvins (%esp), m1 ;\
- pushl $0x00ff0000 ;\
- pushl $0x00ff0000 ;\
- pushl $0x00ff0000 ;\
- pushl $0x00ff0000 ;\
- mvins (%esp), m2 ;\
- addl $32, %esp
-
-/* I implemented these as macros because they appear in several places,
- * and I've tweaked them a number of times. I got tired of changing every
- * place they appear. :)
- */
-
-#define DO_ONE_PIXEL() \
- movl (%ebx), %eax ; \
- addl $4, %ebx ; \
- bswap %eax /* ARGB -> BGRA */ ; \
- rorl $8, %eax /* BGRA -> ABGR */ ; \
- movl %eax, (%ecx) /* ABGR -> R, G, B, A */ ; \
- addl $4, %ecx
-
-#define DO_ONE_LAST_PIXEL() \
- movl (%ebx), %eax ; \
- bswap %eax /* ARGB -> BGRA */ ; \
- rorl $8, %eax /* BGRA -> ABGR */ ; \
- movl %eax, (%ecx) /* ABGR -> R, G, B, A */ ; \
-
-
-/**
- * MMX optimized version of the BGRA8888_REV to RGBA copy routine.
- *
- * \warning
- * This function assumes that the caller will issue the EMMS instruction
- * at the correct places.
- */
-
-.globl _generic_read_RGBA_span_BGRA8888_REV_MMX
-.hidden _generic_read_RGBA_span_BGRA8888_REV_MMX
- .type _generic_read_RGBA_span_BGRA8888_REV_MMX, @function
-_generic_read_RGBA_span_BGRA8888_REV_MMX:
- pushl %ebx
-
-#ifdef USE_INNER_EMMS
- emms
-#endif
- LOAD_MASK(movq,%mm1,%mm2)
-
- movl 8(%esp), %ebx /* source pointer */
- movl 16(%esp), %edx /* number of pixels to copy */
- movl 12(%esp), %ecx /* destination pointer */
-
- testl %edx, %edx
- jle .L20 /* Bail if there's nothing to do. */
-
- movl %ebx, %eax
-
- negl %eax
- sarl $2, %eax
- andl $1, %eax
- je .L17
-
- subl %eax, %edx
- DO_ONE_PIXEL()
-.L17:
-
- /* Would it be faster to unroll this loop once and process 4 pixels
- * per pass, instead of just two?
- */
-
- movl %edx, %eax
- shrl %eax
- jmp .L18
-.L19:
- movq (%ebx), %mm0
- addl $8, %ebx
-
- /* These 9 instructions do what PSHUFB (if there were such an
- * instruction) could do in 1. :(
- */
-
- movq %mm0, %mm3
- movq %mm0, %mm4
-
- pand %mm2, %mm3
- psllq $16, %mm4
- psrlq $16, %mm3
- pand %mm2, %mm4
-
- pand %mm1, %mm0
- por %mm4, %mm3
- por %mm3, %mm0
-
- movq %mm0, (%ecx)
- addl $8, %ecx
- subl $1, %eax
-.L18:
- jne .L19
-
-#ifdef USE_INNER_EMMS
- emms
-#endif
-
- /* At this point there are either 1 or 0 pixels remaining to be
- * converted. Convert the last pixel, if needed.
- */
-
- testl $1, %edx
- je .L20
-
- DO_ONE_LAST_PIXEL()
-
-.L20:
- popl %ebx
- ret
- .size _generic_read_RGBA_span_BGRA8888_REV_MMX, .-_generic_read_RGBA_span_BGRA8888_REV_MMX
-
-
-/**
- * SSE optimized version of the BGRA8888_REV to RGBA copy routine. SSE
- * instructions are only actually used to read data from the framebuffer.
- * In practice, the speed-up is pretty small.
- *
- * \todo
- * Do some more testing and determine if there's any reason to have this
- * function in addition to the MMX version.
- *
- * \warning
- * This function assumes that the caller will issue the EMMS instruction
- * at the correct places.
- */
-
-.globl _generic_read_RGBA_span_BGRA8888_REV_SSE
-.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE
- .type _generic_read_RGBA_span_BGRA8888_REV_SSE, @function
-_generic_read_RGBA_span_BGRA8888_REV_SSE:
- pushl %esi
- pushl %ebx
- pushl %ebp
-
-#ifdef USE_INNER_EMMS
- emms
-#endif
-
- LOAD_MASK(movq,%mm1,%mm2)
-
- movl 16(%esp), %ebx /* source pointer */
- movl 24(%esp), %edx /* number of pixels to copy */
- movl 20(%esp), %ecx /* destination pointer */
-
- testl %edx, %edx
- jle .L35 /* Bail if there's nothing to do. */
-
- movl %esp, %ebp
- subl $16, %esp
- andl $0xfffffff0, %esp
-
- movl %ebx, %eax
- movl %edx, %esi
-
- negl %eax
- andl $15, %eax
- sarl $2, %eax
- cmpl %edx, %eax
- cmovle %eax, %esi
-
- subl %esi, %edx
-
- testl $1, %esi
- je .L32
-
- DO_ONE_PIXEL()
-.L32:
-
- testl $2, %esi
- je .L31
-
- movq (%ebx), %mm0
- addl $8, %ebx
-
- movq %mm0, %mm3
- movq %mm0, %mm4
-
- pand %mm2, %mm3
- psllq $16, %mm4
- psrlq $16, %mm3
- pand %mm2, %mm4
-
- pand %mm1, %mm0
- por %mm4, %mm3
- por %mm3, %mm0
-
- movq %mm0, (%ecx)
- addl $8, %ecx
-.L31:
-
- movl %edx, %eax
- shrl $2, %eax
- jmp .L33
-.L34:
- movaps (%ebx), %xmm0
- addl $16, %ebx
-
- /* This would be so much better if we could just move directly from
- * an SSE register to an MMX register. Unfortunately, that
- * functionality wasn't introduced until SSE2 with the MOVDQ2Q
- * instruction.
- */
-
- movaps %xmm0, (%esp)
- movq (%esp), %mm0
- movq 8(%esp), %mm5
-
- movq %mm0, %mm3
- movq %mm0, %mm4
- movq %mm5, %mm6
- movq %mm5, %mm7
-
- pand %mm2, %mm3
- pand %mm2, %mm6
-
- psllq $16, %mm4
- psllq $16, %mm7
-
- psrlq $16, %mm3
- psrlq $16, %mm6
-
- pand %mm2, %mm4
- pand %mm2, %mm7
-
- pand %mm1, %mm0
- pand %mm1, %mm5
-
- por %mm4, %mm3
- por %mm7, %mm6
-
- por %mm3, %mm0
- por %mm6, %mm5
-
- movq %mm0, (%ecx)
- movq %mm5, 8(%ecx)
- addl $16, %ecx
-
- subl $1, %eax
-.L33:
- jne .L34
-
-#ifdef USE_INNER_EMMS
- emms
-#endif
- movl %ebp, %esp
-
- /* At this point there are either [0, 3] pixels remaining to be
- * converted.
- */
-
- testl $2, %edx
- je .L36
-
- movq (%ebx), %mm0
- addl $8, %ebx
-
- movq %mm0, %mm3
- movq %mm0, %mm4
-
- pand %mm2, %mm3
- psllq $16, %mm4
- psrlq $16, %mm3
- pand %mm2, %mm4
-
- pand %mm1, %mm0
- por %mm4, %mm3
- por %mm3, %mm0
-
- movq %mm0, (%ecx)
- addl $8, %ecx
-.L36:
-
- testl $1, %edx
- je .L35
-
- DO_ONE_LAST_PIXEL()
-.L35:
- popl %ebp
- popl %ebx
- popl %esi
- ret
- .size _generic_read_RGBA_span_BGRA8888_REV_SSE, .-_generic_read_RGBA_span_BGRA8888_REV_SSE
-
-
-/**
- * SSE2 optimized version of the BGRA8888_REV to RGBA copy routine.
- */
-
- .text
-.globl _generic_read_RGBA_span_BGRA8888_REV_SSE2
-.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE2
- .type _generic_read_RGBA_span_BGRA8888_REV_SSE2, @function
-_generic_read_RGBA_span_BGRA8888_REV_SSE2:
- pushl %esi
- pushl %ebx
-
- LOAD_MASK(movdqu,%xmm1,%xmm2)
-
- movl 12(%esp), %ebx /* source pointer */
- movl 20(%esp), %edx /* number of pixels to copy */
- movl 16(%esp), %ecx /* destination pointer */
-
- movl %ebx, %eax
- movl %edx, %esi
-
- testl %edx, %edx
- jle .L46 /* Bail if there's nothing to do. */
-
- /* If the source pointer isn't a multiple of 16 we have to process
- * a few pixels the "slow" way to get the address aligned for
- * the SSE fetch intsructions.
- */
-
- negl %eax
- andl $15, %eax
- sarl $2, %eax
-
- cmpl %edx, %eax
- cmovbe %eax, %esi
- subl %esi, %edx
-
- testl $1, %esi
- je .L41
-
- DO_ONE_PIXEL()
-.L41:
- testl $2, %esi
- je .L40
-
- movq (%ebx), %xmm0
- addl $8, %ebx
-
- movdqa %xmm0, %xmm3
- movdqa %xmm0, %xmm4
- andps %xmm1, %xmm0
-
- andps %xmm2, %xmm3
- pslldq $2, %xmm4
- psrldq $2, %xmm3
- andps %xmm2, %xmm4
-
- orps %xmm4, %xmm3
- orps %xmm3, %xmm0
-
- movq %xmm0, (%ecx)
- addl $8, %ecx
-.L40:
-
- /* Would it be worth having a specialized version of this loop for
- * the case where the destination is 16-byte aligned? That version
- * would be identical except that it could use movedqa instead of
- * movdqu.
- */
-
- movl %edx, %eax
- shrl $2, %eax
- jmp .L42
-.L43:
- movdqa (%ebx), %xmm0
- addl $16, %ebx
-
- movdqa %xmm0, %xmm3
- movdqa %xmm0, %xmm4
- andps %xmm1, %xmm0
-
- andps %xmm2, %xmm3
- pslldq $2, %xmm4
- psrldq $2, %xmm3
- andps %xmm2, %xmm4
-
- orps %xmm4, %xmm3
- orps %xmm3, %xmm0
-
- movdqu %xmm0, (%ecx)
- addl $16, %ecx
- subl $1, %eax
-.L42:
- jne .L43
-
-
- /* There may be upto 3 pixels remaining to be copied. Take care
- * of them now. We do the 2 pixel case first because the data
- * will be aligned.
- */
-
- testl $2, %edx
- je .L47
-
- movq (%ebx), %xmm0
- addl $8, %ebx
-
- movdqa %xmm0, %xmm3
- movdqa %xmm0, %xmm4
- andps %xmm1, %xmm0
-
- andps %xmm2, %xmm3
- pslldq $2, %xmm4
- psrldq $2, %xmm3
- andps %xmm2, %xmm4
-
- orps %xmm4, %xmm3
- orps %xmm3, %xmm0
-
- movq %xmm0, (%ecx)
- addl $8, %ecx
-.L47:
-
- testl $1, %edx
- je .L46
-
- DO_ONE_LAST_PIXEL()
-.L46:
-
- popl %ebx
- popl %esi
- ret
- .size _generic_read_RGBA_span_BGRA8888_REV_SSE2, .-_generic_read_RGBA_span_BGRA8888_REV_SSE2
-
-
-
-#define MASK_565_L 0x07e0f800
-#define MASK_565_H 0x0000001f
-/* Setting SCALE_ADJUST to 5 gives a perfect match with the
- * classic C implementation in Mesa. Setting SCALE_ADJUST
- * to 0 is slightly faster but at a small cost to accuracy.
- */
-#define SCALE_ADJUST 5
-#if SCALE_ADJUST == 5
-#define PRESCALE_L 0x00100001
-#define PRESCALE_H 0x00000200
-#define SCALE_L 0x40C620E8
-#define SCALE_H 0x0000839d
-#elif SCALE_ADJUST == 0
-#define PRESCALE_L 0x00200001
-#define PRESCALE_H 0x00000800
-#define SCALE_L 0x01040108
-#define SCALE_H 0x00000108
-#else
-#error SCALE_ADJUST must either be 5 or 0.
-#endif
-#define ALPHA_L 0x00000000
-#define ALPHA_H 0x00ff0000
-
-/**
- * MMX optimized version of the RGB565 to RGBA copy routine.
- */
-
- .text
- .globl _generic_read_RGBA_span_RGB565_MMX
- .hidden _generic_read_RGBA_span_RGB565_MMX
- .type _generic_read_RGBA_span_RGB565_MMX, @function
-
-_generic_read_RGBA_span_RGB565_MMX:
-
-#ifdef USE_INNER_EMMS
- emms
-#endif
-
- movl 4(%esp), %eax /* source pointer */
- movl 8(%esp), %edx /* destination pointer */
- movl 12(%esp), %ecx /* number of pixels to copy */
-
- pushl $MASK_565_H
- pushl $MASK_565_L
- movq (%esp), %mm5
- pushl $PRESCALE_H
- pushl $PRESCALE_L
- movq (%esp), %mm6
- pushl $SCALE_H
- pushl $SCALE_L
- movq (%esp), %mm7
- pushl $ALPHA_H
- pushl $ALPHA_L
- movq (%esp), %mm3
- addl $32,%esp
-
- sarl $2, %ecx
- jl .L01 /* Bail early if the count is negative. */
- jmp .L02
-
-.L03:
- /* Fetch 4 RGB565 pixels into %mm4. Distribute the first and
- * second pixels into the four words of %mm0 and %mm2.
- */
-
- movq (%eax), %mm4
- addl $8, %eax
-
- pshufw $0x00, %mm4, %mm0
- pshufw $0x55, %mm4, %mm2
-
-
- /* Mask the pixels so that each word of each register contains only
- * one color component.
- */
-
- pand %mm5, %mm0
- pand %mm5, %mm2
-
-
- /* Adjust the component values so that they are as small as possible,
- * but large enough so that we can multiply them by an unsigned 16-bit
- * number and get a value as large as 0x00ff0000.
- */
-
- pmullw %mm6, %mm0
- pmullw %mm6, %mm2
-#if SCALE_ADJUST > 0
- psrlw $SCALE_ADJUST, %mm0
- psrlw $SCALE_ADJUST, %mm2
-#endif
-
- /* Scale the input component values to be on the range
- * [0, 0x00ff0000]. This it the real magic of the whole routine.
- */
-
- pmulhuw %mm7, %mm0
- pmulhuw %mm7, %mm2
-
-
- /* Always set the alpha value to 0xff.
- */
-
- por %mm3, %mm0
- por %mm3, %mm2
-
-
- /* Pack the 16-bit values to 8-bit values and store the converted
- * pixel data.
- */
-
- packuswb %mm2, %mm0
- movq %mm0, (%edx)
- addl $8, %edx
-
- pshufw $0xaa, %mm4, %mm0
- pshufw $0xff, %mm4, %mm2
-
- pand %mm5, %mm0
- pand %mm5, %mm2
- pmullw %mm6, %mm0
- pmullw %mm6, %mm2
-#if SCALE_ADJUST > 0
- psrlw $SCALE_ADJUST, %mm0
- psrlw $SCALE_ADJUST, %mm2
-#endif
- pmulhuw %mm7, %mm0
- pmulhuw %mm7, %mm2
-
- por %mm3, %mm0
- por %mm3, %mm2
-
- packuswb %mm2, %mm0
-
- movq %mm0, (%edx)
- addl $8, %edx
-
- subl $1, %ecx
-.L02:
- jne .L03
-
-
- /* At this point there can be at most 3 pixels left to process. If
- * there is either 2 or 3 left, process 2.
- */
-
- movl 12(%esp), %ecx
- testl $0x02, %ecx
- je .L04
-
- movd (%eax), %mm4
- addl $4, %eax
-
- pshufw $0x00, %mm4, %mm0
- pshufw $0x55, %mm4, %mm2
-
- pand %mm5, %mm0
- pand %mm5, %mm2
- pmullw %mm6, %mm0
- pmullw %mm6, %mm2
-#if SCALE_ADJUST > 0
- psrlw $SCALE_ADJUST, %mm0
- psrlw $SCALE_ADJUST, %mm2
-#endif
- pmulhuw %mm7, %mm0
- pmulhuw %mm7, %mm2
-
- por %mm3, %mm0
- por %mm3, %mm2
-
- packuswb %mm2, %mm0
-
- movq %mm0, (%edx)
- addl $8, %edx
-
-.L04:
- /* At this point there can be at most 1 pixel left to process.
- * Process it if needed.
- */
-
- testl $0x01, %ecx
- je .L01
-
- movzwl (%eax), %ecx
- movd %ecx, %mm4
-
- pshufw $0x00, %mm4, %mm0
-
- pand %mm5, %mm0
- pmullw %mm6, %mm0
-#if SCALE_ADJUST > 0
- psrlw $SCALE_ADJUST, %mm0
-#endif
- pmulhuw %mm7, %mm0
-
- por %mm3, %mm0
-
- packuswb %mm0, %mm0
-
- movd %mm0, (%edx)
-
-.L01:
-#ifdef USE_INNER_EMMS
- emms
-#endif
- ret
-#endif /* !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) */
-
-#if defined (__ELF__) && defined (__linux__)
- .section .note.GNU-stack,"",%progbits
-#endif
+/*
+ * (C) Copyright IBM Corporation 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file read_rgba_span_x86.S
+ * Optimized routines to transfer pixel data from the framebuffer to a
+ * buffer in main memory.
+ *
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+
+ .file "read_rgba_span_x86.S"
+#if !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) /* this one cries for assyntax.h */
+/* Kevin F. Quinn 2nd July 2006
+ * Replaced data segment constants with text-segment instructions.
+ */
+#define LOAD_MASK(mvins,m1,m2) \
+ pushl $0xff00ff00 ;\
+ pushl $0xff00ff00 ;\
+ pushl $0xff00ff00 ;\
+ pushl $0xff00ff00 ;\
+ mvins (%esp), m1 ;\
+ pushl $0x00ff0000 ;\
+ pushl $0x00ff0000 ;\
+ pushl $0x00ff0000 ;\
+ pushl $0x00ff0000 ;\
+ mvins (%esp), m2 ;\
+ addl $32, %esp
+
+/* I implemented these as macros because they appear in several places,
+ * and I've tweaked them a number of times. I got tired of changing every
+ * place they appear. :)
+ */
+
+#define DO_ONE_PIXEL() \
+ movl (%ebx), %eax ; \
+ addl $4, %ebx ; \
+ bswap %eax /* ARGB -> BGRA */ ; \
+ rorl $8, %eax /* BGRA -> ABGR */ ; \
+ movl %eax, (%ecx) /* ABGR -> R, G, B, A */ ; \
+ addl $4, %ecx
+
+#define DO_ONE_LAST_PIXEL() \
+ movl (%ebx), %eax ; \
+ bswap %eax /* ARGB -> BGRA */ ; \
+ rorl $8, %eax /* BGRA -> ABGR */ ; \
+ movl %eax, (%ecx) /* ABGR -> R, G, B, A */ ; \
+
+
+/**
+ * MMX optimized version of the BGRA8888_REV to RGBA copy routine.
+ *
+ * \warning
+ * This function assumes that the caller will issue the EMMS instruction
+ * at the correct places.
+ */
+
+.globl _generic_read_RGBA_span_BGRA8888_REV_MMX
+#ifndef USE_DRICORE
+.hidden _generic_read_RGBA_span_BGRA8888_REV_MMX
+#endif
+ .type _generic_read_RGBA_span_BGRA8888_REV_MMX, @function
+_generic_read_RGBA_span_BGRA8888_REV_MMX:
+ pushl %ebx
+
+#ifdef USE_INNER_EMMS
+ emms
+#endif
+ LOAD_MASK(movq,%mm1,%mm2)
+
+ movl 8(%esp), %ebx /* source pointer */
+ movl 16(%esp), %edx /* number of pixels to copy */
+ movl 12(%esp), %ecx /* destination pointer */
+
+ testl %edx, %edx
+ jle .L20 /* Bail if there's nothing to do. */
+
+ movl %ebx, %eax
+
+ negl %eax
+ sarl $2, %eax
+ andl $1, %eax
+ je .L17
+
+ subl %eax, %edx
+ DO_ONE_PIXEL()
+.L17:
+
+ /* Would it be faster to unroll this loop once and process 4 pixels
+ * per pass, instead of just two?
+ */
+
+ movl %edx, %eax
+ shrl %eax
+ jmp .L18
+.L19:
+ movq (%ebx), %mm0
+ addl $8, %ebx
+
+ /* These 9 instructions do what PSHUFB (if there were such an
+ * instruction) could do in 1. :(
+ */
+
+ movq %mm0, %mm3
+ movq %mm0, %mm4
+
+ pand %mm2, %mm3
+ psllq $16, %mm4
+ psrlq $16, %mm3
+ pand %mm2, %mm4
+
+ pand %mm1, %mm0
+ por %mm4, %mm3
+ por %mm3, %mm0
+
+ movq %mm0, (%ecx)
+ addl $8, %ecx
+ subl $1, %eax
+.L18:
+ jne .L19
+
+#ifdef USE_INNER_EMMS
+ emms
+#endif
+
+ /* At this point there are either 1 or 0 pixels remaining to be
+ * converted. Convert the last pixel, if needed.
+ */
+
+ testl $1, %edx
+ je .L20
+
+ DO_ONE_LAST_PIXEL()
+
+.L20:
+ popl %ebx
+ ret
+ .size _generic_read_RGBA_span_BGRA8888_REV_MMX, .-_generic_read_RGBA_span_BGRA8888_REV_MMX
+
+
+/**
+ * SSE optimized version of the BGRA8888_REV to RGBA copy routine. SSE
+ * instructions are only actually used to read data from the framebuffer.
+ * In practice, the speed-up is pretty small.
+ *
+ * \todo
+ * Do some more testing and determine if there's any reason to have this
+ * function in addition to the MMX version.
+ *
+ * \warning
+ * This function assumes that the caller will issue the EMMS instruction
+ * at the correct places.
+ */
+
+.globl _generic_read_RGBA_span_BGRA8888_REV_SSE
+#ifndef USE_DRICORE
+.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE
+#endif
+ .type _generic_read_RGBA_span_BGRA8888_REV_SSE, @function
+_generic_read_RGBA_span_BGRA8888_REV_SSE:
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+
+#ifdef USE_INNER_EMMS
+ emms
+#endif
+
+ LOAD_MASK(movq,%mm1,%mm2)
+
+ movl 16(%esp), %ebx /* source pointer */
+ movl 24(%esp), %edx /* number of pixels to copy */
+ movl 20(%esp), %ecx /* destination pointer */
+
+ testl %edx, %edx
+ jle .L35 /* Bail if there's nothing to do. */
+
+ movl %esp, %ebp
+ subl $16, %esp
+ andl $0xfffffff0, %esp
+
+ movl %ebx, %eax
+ movl %edx, %esi
+
+ negl %eax
+ andl $15, %eax
+ sarl $2, %eax
+ cmpl %edx, %eax
+ cmovle %eax, %esi
+
+ subl %esi, %edx
+
+ testl $1, %esi
+ je .L32
+
+ DO_ONE_PIXEL()
+.L32:
+
+ testl $2, %esi
+ je .L31
+
+ movq (%ebx), %mm0
+ addl $8, %ebx
+
+ movq %mm0, %mm3
+ movq %mm0, %mm4
+
+ pand %mm2, %mm3
+ psllq $16, %mm4
+ psrlq $16, %mm3
+ pand %mm2, %mm4
+
+ pand %mm1, %mm0
+ por %mm4, %mm3
+ por %mm3, %mm0
+
+ movq %mm0, (%ecx)
+ addl $8, %ecx
+.L31:
+
+ movl %edx, %eax
+ shrl $2, %eax
+ jmp .L33
+.L34:
+ movaps (%ebx), %xmm0
+ addl $16, %ebx
+
+ /* This would be so much better if we could just move directly from
+ * an SSE register to an MMX register. Unfortunately, that
+ * functionality wasn't introduced until SSE2 with the MOVDQ2Q
+ * instruction.
+ */
+
+ movaps %xmm0, (%esp)
+ movq (%esp), %mm0
+ movq 8(%esp), %mm5
+
+ movq %mm0, %mm3
+ movq %mm0, %mm4
+ movq %mm5, %mm6
+ movq %mm5, %mm7
+
+ pand %mm2, %mm3
+ pand %mm2, %mm6
+
+ psllq $16, %mm4
+ psllq $16, %mm7
+
+ psrlq $16, %mm3
+ psrlq $16, %mm6
+
+ pand %mm2, %mm4
+ pand %mm2, %mm7
+
+ pand %mm1, %mm0
+ pand %mm1, %mm5
+
+ por %mm4, %mm3
+ por %mm7, %mm6
+
+ por %mm3, %mm0
+ por %mm6, %mm5
+
+ movq %mm0, (%ecx)
+ movq %mm5, 8(%ecx)
+ addl $16, %ecx
+
+ subl $1, %eax
+.L33:
+ jne .L34
+
+#ifdef USE_INNER_EMMS
+ emms
+#endif
+ movl %ebp, %esp
+
+ /* At this point there are either [0, 3] pixels remaining to be
+ * converted.
+ */
+
+ testl $2, %edx
+ je .L36
+
+ movq (%ebx), %mm0
+ addl $8, %ebx
+
+ movq %mm0, %mm3
+ movq %mm0, %mm4
+
+ pand %mm2, %mm3
+ psllq $16, %mm4
+ psrlq $16, %mm3
+ pand %mm2, %mm4
+
+ pand %mm1, %mm0
+ por %mm4, %mm3
+ por %mm3, %mm0
+
+ movq %mm0, (%ecx)
+ addl $8, %ecx
+.L36:
+
+ testl $1, %edx
+ je .L35
+
+ DO_ONE_LAST_PIXEL()
+.L35:
+ popl %ebp
+ popl %ebx
+ popl %esi
+ ret
+ .size _generic_read_RGBA_span_BGRA8888_REV_SSE, .-_generic_read_RGBA_span_BGRA8888_REV_SSE
+
+
+/**
+ * SSE2 optimized version of the BGRA8888_REV to RGBA copy routine.
+ */
+
+ .text
+.globl _generic_read_RGBA_span_BGRA8888_REV_SSE2
+#ifndef USE_DRICORE
+.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE2
+#endif
+ .type _generic_read_RGBA_span_BGRA8888_REV_SSE2, @function
+_generic_read_RGBA_span_BGRA8888_REV_SSE2:
+ pushl %esi
+ pushl %ebx
+
+ LOAD_MASK(movdqu,%xmm1,%xmm2)
+
+ movl 12(%esp), %ebx /* source pointer */
+ movl 20(%esp), %edx /* number of pixels to copy */
+ movl 16(%esp), %ecx /* destination pointer */
+
+ movl %ebx, %eax
+ movl %edx, %esi
+
+ testl %edx, %edx
+ jle .L46 /* Bail if there's nothing to do. */
+
+ /* If the source pointer isn't a multiple of 16 we have to process
+ * a few pixels the "slow" way to get the address aligned for
+ * the SSE fetch intsructions.
+ */
+
+ negl %eax
+ andl $15, %eax
+ sarl $2, %eax
+
+ cmpl %edx, %eax
+ cmovbe %eax, %esi
+ subl %esi, %edx
+
+ testl $1, %esi
+ je .L41
+
+ DO_ONE_PIXEL()
+.L41:
+ testl $2, %esi
+ je .L40
+
+ movq (%ebx), %xmm0
+ addl $8, %ebx
+
+ movdqa %xmm0, %xmm3
+ movdqa %xmm0, %xmm4
+ andps %xmm1, %xmm0
+
+ andps %xmm2, %xmm3
+ pslldq $2, %xmm4
+ psrldq $2, %xmm3
+ andps %xmm2, %xmm4
+
+ orps %xmm4, %xmm3
+ orps %xmm3, %xmm0
+
+ movq %xmm0, (%ecx)
+ addl $8, %ecx
+.L40:
+
+ /* Would it be worth having a specialized version of this loop for
+ * the case where the destination is 16-byte aligned? That version
+ * would be identical except that it could use movedqa instead of
+ * movdqu.
+ */
+
+ movl %edx, %eax
+ shrl $2, %eax
+ jmp .L42
+.L43:
+ movdqa (%ebx), %xmm0
+ addl $16, %ebx
+
+ movdqa %xmm0, %xmm3
+ movdqa %xmm0, %xmm4
+ andps %xmm1, %xmm0
+
+ andps %xmm2, %xmm3
+ pslldq $2, %xmm4
+ psrldq $2, %xmm3
+ andps %xmm2, %xmm4
+
+ orps %xmm4, %xmm3
+ orps %xmm3, %xmm0
+
+ movdqu %xmm0, (%ecx)
+ addl $16, %ecx
+ subl $1, %eax
+.L42:
+ jne .L43
+
+
+ /* There may be upto 3 pixels remaining to be copied. Take care
+ * of them now. We do the 2 pixel case first because the data
+ * will be aligned.
+ */
+
+ testl $2, %edx
+ je .L47
+
+ movq (%ebx), %xmm0
+ addl $8, %ebx
+
+ movdqa %xmm0, %xmm3
+ movdqa %xmm0, %xmm4
+ andps %xmm1, %xmm0
+
+ andps %xmm2, %xmm3
+ pslldq $2, %xmm4
+ psrldq $2, %xmm3
+ andps %xmm2, %xmm4
+
+ orps %xmm4, %xmm3
+ orps %xmm3, %xmm0
+
+ movq %xmm0, (%ecx)
+ addl $8, %ecx
+.L47:
+
+ testl $1, %edx
+ je .L46
+
+ DO_ONE_LAST_PIXEL()
+.L46:
+
+ popl %ebx
+ popl %esi
+ ret
+ .size _generic_read_RGBA_span_BGRA8888_REV_SSE2, .-_generic_read_RGBA_span_BGRA8888_REV_SSE2
+
+
+
+#define MASK_565_L 0x07e0f800
+#define MASK_565_H 0x0000001f
+/* Setting SCALE_ADJUST to 5 gives a perfect match with the
+ * classic C implementation in Mesa. Setting SCALE_ADJUST
+ * to 0 is slightly faster but at a small cost to accuracy.
+ */
+#define SCALE_ADJUST 5
+#if SCALE_ADJUST == 5
+#define PRESCALE_L 0x00100001
+#define PRESCALE_H 0x00000200
+#define SCALE_L 0x40C620E8
+#define SCALE_H 0x0000839d
+#elif SCALE_ADJUST == 0
+#define PRESCALE_L 0x00200001
+#define PRESCALE_H 0x00000800
+#define SCALE_L 0x01040108
+#define SCALE_H 0x00000108
+#else
+#error SCALE_ADJUST must either be 5 or 0.
+#endif
+#define ALPHA_L 0x00000000
+#define ALPHA_H 0x00ff0000
+
+/**
+ * MMX optimized version of the RGB565 to RGBA copy routine.
+ */
+
+ .text
+ .globl _generic_read_RGBA_span_RGB565_MMX
+#ifndef USE_DRICORE
+ .hidden _generic_read_RGBA_span_RGB565_MMX
+#endif
+ .type _generic_read_RGBA_span_RGB565_MMX, @function
+
+_generic_read_RGBA_span_RGB565_MMX:
+
+#ifdef USE_INNER_EMMS
+ emms
+#endif
+
+ movl 4(%esp), %eax /* source pointer */
+ movl 8(%esp), %edx /* destination pointer */
+ movl 12(%esp), %ecx /* number of pixels to copy */
+
+ pushl $MASK_565_H
+ pushl $MASK_565_L
+ movq (%esp), %mm5
+ pushl $PRESCALE_H
+ pushl $PRESCALE_L
+ movq (%esp), %mm6
+ pushl $SCALE_H
+ pushl $SCALE_L
+ movq (%esp), %mm7
+ pushl $ALPHA_H
+ pushl $ALPHA_L
+ movq (%esp), %mm3
+ addl $32,%esp
+
+ sarl $2, %ecx
+ jl .L01 /* Bail early if the count is negative. */
+ jmp .L02
+
+.L03:
+ /* Fetch 4 RGB565 pixels into %mm4. Distribute the first and
+ * second pixels into the four words of %mm0 and %mm2.
+ */
+
+ movq (%eax), %mm4
+ addl $8, %eax
+
+ pshufw $0x00, %mm4, %mm0
+ pshufw $0x55, %mm4, %mm2
+
+
+ /* Mask the pixels so that each word of each register contains only
+ * one color component.
+ */
+
+ pand %mm5, %mm0
+ pand %mm5, %mm2
+
+
+ /* Adjust the component values so that they are as small as possible,
+ * but large enough so that we can multiply them by an unsigned 16-bit
+ * number and get a value as large as 0x00ff0000.
+ */
+
+ pmullw %mm6, %mm0
+ pmullw %mm6, %mm2
+#if SCALE_ADJUST > 0
+ psrlw $SCALE_ADJUST, %mm0
+ psrlw $SCALE_ADJUST, %mm2
+#endif
+
+ /* Scale the input component values to be on the range
+ * [0, 0x00ff0000]. This it the real magic of the whole routine.
+ */
+
+ pmulhuw %mm7, %mm0
+ pmulhuw %mm7, %mm2
+
+
+ /* Always set the alpha value to 0xff.
+ */
+
+ por %mm3, %mm0
+ por %mm3, %mm2
+
+
+ /* Pack the 16-bit values to 8-bit values and store the converted
+ * pixel data.
+ */
+
+ packuswb %mm2, %mm0
+ movq %mm0, (%edx)
+ addl $8, %edx
+
+ pshufw $0xaa, %mm4, %mm0
+ pshufw $0xff, %mm4, %mm2
+
+ pand %mm5, %mm0
+ pand %mm5, %mm2
+ pmullw %mm6, %mm0
+ pmullw %mm6, %mm2
+#if SCALE_ADJUST > 0
+ psrlw $SCALE_ADJUST, %mm0
+ psrlw $SCALE_ADJUST, %mm2
+#endif
+ pmulhuw %mm7, %mm0
+ pmulhuw %mm7, %mm2
+
+ por %mm3, %mm0
+ por %mm3, %mm2
+
+ packuswb %mm2, %mm0
+
+ movq %mm0, (%edx)
+ addl $8, %edx
+
+ subl $1, %ecx
+.L02:
+ jne .L03
+
+
+ /* At this point there can be at most 3 pixels left to process. If
+ * there is either 2 or 3 left, process 2.
+ */
+
+ movl 12(%esp), %ecx
+ testl $0x02, %ecx
+ je .L04
+
+ movd (%eax), %mm4
+ addl $4, %eax
+
+ pshufw $0x00, %mm4, %mm0
+ pshufw $0x55, %mm4, %mm2
+
+ pand %mm5, %mm0
+ pand %mm5, %mm2
+ pmullw %mm6, %mm0
+ pmullw %mm6, %mm2
+#if SCALE_ADJUST > 0
+ psrlw $SCALE_ADJUST, %mm0
+ psrlw $SCALE_ADJUST, %mm2
+#endif
+ pmulhuw %mm7, %mm0
+ pmulhuw %mm7, %mm2
+
+ por %mm3, %mm0
+ por %mm3, %mm2
+
+ packuswb %mm2, %mm0
+
+ movq %mm0, (%edx)
+ addl $8, %edx
+
+.L04:
+ /* At this point there can be at most 1 pixel left to process.
+ * Process it if needed.
+ */
+
+ testl $0x01, %ecx
+ je .L01
+
+ movzwl (%eax), %ecx
+ movd %ecx, %mm4
+
+ pshufw $0x00, %mm4, %mm0
+
+ pand %mm5, %mm0
+ pmullw %mm6, %mm0
+#if SCALE_ADJUST > 0
+ psrlw $SCALE_ADJUST, %mm0
+#endif
+ pmulhuw %mm7, %mm0
+
+ por %mm3, %mm0
+
+ packuswb %mm0, %mm0
+
+ movd %mm0, (%edx)
+
+.L01:
+#ifdef USE_INNER_EMMS
+ emms
+#endif
+ ret
+#endif /* !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) */
+
+#if defined (__ELF__) && defined (__linux__)
+ .section .note.GNU-stack,"",%progbits
+#endif