From 28257038c4f13ac26127d536c14c922aa036efed Mon Sep 17 00:00:00 2001 From: marha Date: Fri, 29 Jul 2011 09:02:30 +0200 Subject: libX11 mesa pixman git update 29 jul 2011 --- libX11/configure.ac | 2 +- mesalib/configure.ac | 14 +- mesalib/src/glsl/Makefile | 1 + mesalib/src/glsl/TODO | 63 +- mesalib/src/mesa/Makefile | 461 ++++++------ mesalib/src/mesa/drivers/common/meta.c | 10 + mesalib/src/mesa/main/ff_fragment_shader.cpp | 3 +- mesalib/src/mesa/main/framebuffer.c | 1 + mesalib/src/mesa/main/texparam.c | 2 +- mesalib/src/mesa/state_tracker/st_atom_texture.c | 6 +- pixman/pixman/pixman-arm-neon-asm-bilinear.S | 1 + pixman/pixman/pixman-arm-neon-asm.S | 1 + pixman/pixman/pixman-arm-simd-asm.S | 877 ++++++++++++----------- 13 files changed, 722 insertions(+), 720 deletions(-) diff --git a/libX11/configure.ac b/libX11/configure.ac index a449ba8ba..6a4878e8d 100644 --- a/libX11/configure.ac +++ b/libX11/configure.ac @@ -1,7 +1,7 @@ # Initialize Autoconf AC_PREREQ([2.60]) -AC_INIT([libX11], [1.4.3], +AC_INIT([libX11], [1.4.4], [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg], [libX11]) AC_CONFIG_SRCDIR([Makefile.am]) AC_CONFIG_HEADERS([src/config.h include/X11/XlibConf.h]) diff --git a/mesalib/configure.ac b/mesalib/configure.ac index 5c832e646..1b1823a21 100644 --- a/mesalib/configure.ac +++ b/mesalib/configure.ac @@ -951,7 +951,7 @@ xyesyes) GL_PC_LIB_PRIV="$GL_LIB_DEPS" GL_PC_CFLAGS="$X11_INCLUDES" fi - GL_LIB_DEPS="$GL_LIB_DEPS $SELINUX_LIBS -lm -lpthread" + GL_LIB_DEPS="$GL_LIB_DEPS $SELINUX_LIBS -lm -lpthread $DLOPEN_LIBS" GL_PC_LIB_PRIV="$GL_PC_LIB_PRIV $SELINUX_LIBS -lm -lpthread" # if static, move the external libraries to the programs @@ -1936,11 +1936,12 @@ if test "x$with_gallium_drivers" != x; then gallium_check_st "nouveau/drm" "dri-nouveau" "xorg-nouveau" "" "xvmc-nouveau" ;; xswrast) + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS softpipe" + if test "x$MESA_LLVM" = x1; then + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS llvmpipe" + fi + if test "x$HAVE_ST_DRI" = xyes; then - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS softpipe" - if test "x$MESA_LLVM" = x1; then - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS llvmpipe" - fi GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS dri-swrast" fi if test "x$HAVE_ST_VDPAU" = xyes; then @@ -1958,9 +1959,6 @@ if test "x$with_gallium_drivers" != x; then if test "x$HAVE_WINSYS_XLIB" != xyes; then GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS sw/xlib" fi - if test "x$HAVE_ST_DRI" != xyes; then - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS softpipe" - fi fi ;; *) diff --git a/mesalib/src/glsl/Makefile b/mesalib/src/glsl/Makefile index 005b51d72..c20a6c9ed 100644 --- a/mesalib/src/glsl/Makefile +++ b/mesalib/src/glsl/Makefile @@ -164,6 +164,7 @@ depend: $(ALL_SOURCES) Makefile rm -f depend touch depend $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(ALL_SOURCES) 2> /dev/null + $(MKDEP) $(MKDEP_OPTIONS) -a -p $(DRICORE_OBJ_DIR)/ $(INCLUDES) $(ALL_SOURCES) 2> /dev/null # Remove .o and backup files clean: clean-dricore diff --git a/mesalib/src/glsl/TODO b/mesalib/src/glsl/TODO index 6eed986bf..c99d7e152 100644 --- a/mesalib/src/glsl/TODO +++ b/mesalib/src/glsl/TODO @@ -1,38 +1,27 @@ -- Detect code paths in non-void functions that don't reach a return statement - -- Improve handling of constants and their initializers. Constant initializers - should never generate any code. This is trival for scalar constants. It is - also trivial for arrays, matrices, and vectors that are accessed with - constant index values. For others it is more complicated. Perhaps these - cases should be silently converted to uniforms? - -- Implement support for ir_binop_dot in ir_algebraic.cpp. Perform - transformations such as "dot(v, vec3(0.0, 1.0, 0.0))" -> v.y. - -1.30 features: - -- Implement AST-to-HIR conversion of bit-shift operators. - -- Implement AST-to-HIR conversion of bit-wise {&,|,^,!} operators. - -- Implement AST-to-HIR conversion of switch-statements - - switch - - case - - Update break to correcly handle mixed nexting of switch-statements - and loops. - -- Handle currently unsupported constant expression types - - ir_unop_bit_not - - ir_binop_mod - - ir_binop_lshift - - ir_binop_rshift - - ir_binop_bit_and - - ir_binop_bit_xor - - ir_binop_bit_or - -- Implement support for 1.30 style shadow compares which only return a float - instead of a vec4. - -- Implement support for gl_ClipDistance. This is non-trivial because - gl_ClipDistance is exposed as a float[8], but all hardware actually +- Detect code paths in non-void functions that don't reach a return statement + +- Improve handling of constants and their initializers. Constant initializers + should never generate any code. This is trival for scalar constants. It is + also trivial for arrays, matrices, and vectors that are accessed with + constant index values. For others it is more complicated. Perhaps these + cases should be silently converted to uniforms? + +- Implement support for ir_binop_dot in ir_algebraic.cpp. Perform + transformations such as "dot(v, vec3(0.0, 1.0, 0.0))" -> v.y. + +- Track source locations throughout the IR. There are currently several + places where we cannot emit line numbers for errors (and currently emit 0:0) + because we've "lost" the line number information. This is particularly + noticeable at link time. + +1.30 features: + +- Implement AST-to-HIR conversion of switch-statements + - switch + - case + - Update break to correcly handle mixed nexting of switch-statements + and loops. + +- Implement support for gl_ClipDistance. This is non-trivial because + gl_ClipDistance is exposed as a float[8], but all hardware actually implements it as vec4[2]. \ No newline at end of file diff --git a/mesalib/src/mesa/Makefile b/mesalib/src/mesa/Makefile index e17ee3b58..88f31b686 100644 --- a/mesalib/src/mesa/Makefile +++ b/mesalib/src/mesa/Makefile @@ -1,230 +1,231 @@ -# src/mesa/Makefile - -TOP = ../.. -include $(TOP)/configs/current - -MESA_LIBS := libmesa.a libmesagallium.a -DEPENDS := depend - -MESA_OBJ_DIR := . -DRICORE_OBJ_DIR := objs-dricore - -include sources.mak - -# adjust object dirs -MESA_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_OBJECTS)) -MESA_GALLIUM_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_GALLIUM_OBJECTS)) - -DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS)) - -# define preprocessor flags -MESA_CPPFLAGS := $(API_DEFINES) $(DEFINES) - -# append include dirs -MESA_CPPFLAGS += $(INCLUDE_DIRS) - -DRICORE_CPPFLAGS = $(MESA_CPPFLAGS) - -# tidy compiler flags -CFLAGS := $(filter-out $(DEFINES), $(CFLAGS)) -CXXFLAGS := $(filter-out $(DEFINES), $(CXXFLAGS)) - -# LLVM is needed for the state tracker -MESA_CFLAGS := $(LLVM_CFLAGS) $(CFLAGS) -DRICORE_CFLAGS := $(LLVM_CFLAGS) $(DRI_CFLAGS) - -MESA_CXXFLAGS := $(LLVM_CFLAGS) $(CXXFLAGS) -DRICORE_CXXFLAGS := $(LLVM_CFLAGS) $(DRI_CXXFLAGS) - -define mesa-cc-c - @mkdir -p $(dir $@) - $(CC) -c -o $@ $< $($(1)_CPPFLAGS) $($(1)_CFLAGS) -endef - -define mesa-cxx-c - @mkdir -p $(dir $@) - $(CXX) -c -o $@ $< $($(1)_CPPFLAGS) $($(1)_CXXFLAGS) -endef - -$(MESA_OBJ_DIR)/%.o: %.c - $(call mesa-cc-c,MESA) - -$(MESA_OBJ_DIR)/%.o: %.cpp - $(call mesa-cxx-c,MESA) - -$(MESA_OBJ_DIR)/%.o: %.S - $(call mesa-cc-c,MESA) - -$(DRICORE_OBJ_DIR)/%.o: %.c - $(call mesa-cc-c,DRICORE) - -$(DRICORE_OBJ_DIR)/%.o: %.cpp - $(call mesa-cxx-c,DRICORE) - -$(DRICORE_OBJ_DIR)/%.o: %.S - $(call mesa-cc-c,DRICORE) - -# Default: build dependencies, then asm_subdirs, GLSL built-in lib, -# then convenience libs (.a) and finally the device drivers: -default: $(DEPENDS) asm_subdirs $(MESA_LIBS) $(DRICORE_LIBS) driver_subdirs - -main/api_exec_es1.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py - $(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES1.1 > $@ - -main/api_exec_es2.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py - $(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES2.0 > $@ - -program/program_parse.tab.c program/program_parse.tab.h: program/program_parse.y - $(BISON) -v -d --output=program/program_parse.tab.c $< - -program/lex.yy.c: program/program_lexer.l - $(FLEX) --never-interactive --outfile=$@ $< - -###################################################################### -# Helper libraries used by many drivers: - -# Make archive of core mesa object files -libmesa.a: $(MESA_OBJECTS) $(GLSL_LIBS) - @ $(MKLIB) -o mesa -static $(MESA_OBJECTS) $(GLSL_LIBS) - -# Shared dricore library for classic DRI drivers -$(TOP)/$(LIB_DIR)/libdricore.so: $(DRICORE_OBJECTS) $(DRICORE_GLSL_LIBS) - @$(MKLIB) -o $@ -linker '$(CXX)' -ldflags '$(LDFLAGS)' \ - -cplusplus -noprefix \ - -install $(TOP)/$(LIB_DIR) -id $(DRI_DRIVER_INSTALL_DIR)/$@.dylib \ - $(DRICORE_LIB_DEPS) $(DRICORE_OBJECTS) - -# Make archive of subset of core mesa object files for gallium -libmesagallium.a: $(MESA_GALLIUM_OBJECTS) $(GLSL_LIBS) - @ $(MKLIB) -o mesagallium -static $(MESA_GALLIUM_OBJECTS) $(GLSL_LIBS) - -###################################################################### -# Device drivers -driver_subdirs: $(MESA_LIBS) $(DRICORE_LIBS) - @ (cd drivers && $(MAKE)) - - -###################################################################### -# Assembly subdirs -asm_subdirs: - @ if echo "$(ASM_FLAGS)" | grep -q USE_X86_ASM ; then \ - (cd x86 && $(MAKE)) || exit 1 ; \ - fi - @ if echo "$(ASM_FLAGS)" | grep -q USE_X86_64_ASM ; then \ - (cd x86 && $(MAKE)) || exit 1 ; \ - (cd x86-64 && $(MAKE)) || exit 1 ; \ - fi - - -###################################################################### -# Dependency generation - -depend: $(ALL_SOURCES) - @ echo "running $(MKDEP)" - @ touch depend - @$(MKDEP) $(MKDEP_OPTIONS) -p$(MESA_OBJ_DIR)/ $(MESA_CPPFLAGS) \ - $(ALL_SOURCES) > /dev/null 2>/dev/null - -###################################################################### -# Installation rules - -# this isn't fleshed out yet but is probably the way to go in the future -new_install: - (cd drivers && $(MAKE) install) - -ifneq (,$(DRICORE_LIBS)) -DRICORE_INSTALL_TARGET = install-dricore -endif - -# XXX replace this with new_install above someday -install: default $(DRICORE_INSTALL_TARGET) - @for driver in $(DRIVER_DIRS) ; do \ - case "$$driver" in \ - osmesa) if [ "$(DRIVER_DIRS)" = osmesa ]; then \ - $(MAKE) install-headers install-osmesa || exit 1 ; \ - else \ - $(MAKE) install-osmesa || exit 1 ; \ - fi ;; \ - dri) $(MAKE) install-libgl install-dri || exit 1 ;; \ - *) $(MAKE) install-libgl || exit 1 ;; \ - esac ; \ - done - -pcedit = \ - -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \ - -e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \ - -e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \ - -e 's,@VERSION@,$(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY),' \ - - -gl_pcedit = sed \ - $(pcedit) \ - -e 's,@GL_PC_REQ_PRIV@,$(GL_PC_REQ_PRIV),' \ - -e 's,@GL_PC_LIB_PRIV@,$(GL_PC_LIB_PRIV),' \ - -e 's,@GL_PC_CFLAGS@,$(GL_PC_CFLAGS),' \ - -e 's,@GLX_TLS@,$(GLX_TLS),' \ - -e 's,@GL_LIB@,$(GL_LIB),' - -gl.pc: gl.pc.in - $(gl_pcedit) $< > $@ - -osmesa_pcedit = sed \ - $(pcedit) \ - -e 's,@OSMESA_LIB@,$(OSMESA_LIB),' \ - -e 's,@OSMESA_PC_REQ@,$(OSMESA_PC_REQ),' \ - -e 's,@OSMESA_PC_LIB_PRIV@,$(OSMESA_PC_LIB_PRIV),' - -osmesa.pc: osmesa.pc.in - $(osmesa_pcedit) $< > $@ - -install-headers: - $(INSTALL) -d $(DESTDIR)$(INSTALL_INC_DIR)/GL - $(INSTALL) -m 644 $(TOP)/include/GL/*.h \ - $(DESTDIR)$(INSTALL_INC_DIR)/GL - -install-libgl: default gl.pc install-headers - $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR) - $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig - $(MINSTALL) $(TOP)/$(LIB_DIR)/$(GL_LIB_GLOB) \ - $(DESTDIR)$(INSTALL_LIB_DIR) - $(INSTALL) -m 644 gl.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig - -install-osmesa: default osmesa.pc - $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR) - $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig - $(MINSTALL) $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_GLOB) \ - $(DESTDIR)$(INSTALL_LIB_DIR) - $(INSTALL) -m 644 osmesa.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig - -install-dri: default - cd drivers/dri && $(MAKE) install - -# We don't need MINSTALL here because we're not installing symbolic links -install-dricore: default - $(INSTALL) -d $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR) - $(INSTALL) -m 755 $(DRICORE_LIBS) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR) - - -# Emacs tags -tags: - etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h - -clean-dricore: - -rm -f libdricore.so - -rm -f $(DRICORE_LIBS) - -rm -rf $(DRICORE_OBJ_DIR) - -clean: clean-dricore - -rm -f */*.o - -rm -f */*/*.o - -rm -f depend depend.bak libmesa.a libmesagallium.a - -rm -f drivers/*/*.o - -rm -f *.pc - -@cd drivers/dri && $(MAKE) clean - -@cd drivers/x11 && $(MAKE) clean - -@cd drivers/osmesa && $(MAKE) clean - -@cd x86 && $(MAKE) clean - -@cd x86-64 && $(MAKE) clean - - --include $(DEPENDS) +# src/mesa/Makefile + +TOP = ../.. +include $(TOP)/configs/current + +MESA_LIBS := libmesa.a libmesagallium.a +DEPENDS := depend + +MESA_OBJ_DIR := . +DRICORE_OBJ_DIR := objs-dricore + +include sources.mak + +# adjust object dirs +DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS)) +MESA_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_OBJECTS)) +MESA_GALLIUM_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_GALLIUM_OBJECTS)) + +# define preprocessor flags +MESA_CPPFLAGS := $(API_DEFINES) $(DEFINES) + +# append include dirs +MESA_CPPFLAGS += $(INCLUDE_DIRS) + +DRICORE_CPPFLAGS = $(MESA_CPPFLAGS) + +# tidy compiler flags +CFLAGS := $(filter-out $(DEFINES), $(CFLAGS)) +CXXFLAGS := $(filter-out $(DEFINES), $(CXXFLAGS)) + +# LLVM is needed for the state tracker +MESA_CFLAGS := $(LLVM_CFLAGS) $(CFLAGS) +DRICORE_CFLAGS := $(LLVM_CFLAGS) $(DRI_CFLAGS) + +MESA_CXXFLAGS := $(LLVM_CFLAGS) $(CXXFLAGS) +DRICORE_CXXFLAGS := $(LLVM_CFLAGS) $(DRI_CXXFLAGS) + +define mesa-cc-c + @mkdir -p $(dir $@) + $(CC) -c -o $@ $< $($(1)_CPPFLAGS) $($(1)_CFLAGS) +endef + +define mesa-cxx-c + @mkdir -p $(dir $@) + $(CXX) -c -o $@ $< $($(1)_CPPFLAGS) $($(1)_CXXFLAGS) +endef + +$(MESA_OBJ_DIR)/%.o: %.c + $(call mesa-cc-c,MESA) + +$(MESA_OBJ_DIR)/%.o: %.cpp + $(call mesa-cxx-c,MESA) + +$(MESA_OBJ_DIR)/%.o: %.S + $(call mesa-cc-c,MESA) + +$(DRICORE_OBJ_DIR)/%.o: %.c + $(call mesa-cc-c,DRICORE) + +$(DRICORE_OBJ_DIR)/%.o: %.cpp + $(call mesa-cxx-c,DRICORE) + +$(DRICORE_OBJ_DIR)/%.o: %.S + $(call mesa-cc-c,DRICORE) + +# Default: build dependencies, then asm_subdirs, GLSL built-in lib, +# then convenience libs (.a) and finally the device drivers: +default: $(DEPENDS) asm_subdirs $(MESA_LIBS) $(DRICORE_LIBS) driver_subdirs + +main/api_exec_es1.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py + $(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES1.1 > $@ + +main/api_exec_es2.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py + $(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES2.0 > $@ + +program/program_parse.tab.c program/program_parse.tab.h: program/program_parse.y + $(BISON) -v -d --output=program/program_parse.tab.c $< + +program/lex.yy.c: program/program_lexer.l + $(FLEX) --never-interactive --outfile=$@ $< + +###################################################################### +# Helper libraries used by many drivers: + +# Make archive of core mesa object files +libmesa.a: $(MESA_OBJECTS) $(GLSL_LIBS) + @ $(MKLIB) -o mesa -static $(MESA_OBJECTS) $(GLSL_LIBS) + +# Shared dricore library for classic DRI drivers +$(TOP)/$(LIB_DIR)/libdricore.so: $(DRICORE_OBJECTS) $(DRICORE_GLSL_LIBS) + @$(MKLIB) -o $@ -linker '$(CXX)' -ldflags '$(LDFLAGS)' \ + -cplusplus -noprefix \ + -install $(TOP)/$(LIB_DIR) -id $(DRI_DRIVER_INSTALL_DIR)/$@.dylib \ + $(DRICORE_LIB_DEPS) $(DRICORE_OBJECTS) + +# Make archive of subset of core mesa object files for gallium +libmesagallium.a: $(MESA_GALLIUM_OBJECTS) $(GLSL_LIBS) + @ $(MKLIB) -o mesagallium -static $(MESA_GALLIUM_OBJECTS) $(GLSL_LIBS) + +###################################################################### +# Device drivers +driver_subdirs: $(MESA_LIBS) $(DRICORE_LIBS) + @ (cd drivers && $(MAKE)) + + +###################################################################### +# Assembly subdirs +asm_subdirs: + @ if echo "$(ASM_FLAGS)" | grep -q USE_X86_ASM ; then \ + (cd x86 && $(MAKE)) || exit 1 ; \ + fi + @ if echo "$(ASM_FLAGS)" | grep -q USE_X86_64_ASM ; then \ + (cd x86 && $(MAKE)) || exit 1 ; \ + (cd x86-64 && $(MAKE)) || exit 1 ; \ + fi + + +###################################################################### +# Dependency generation + +depend: $(ALL_SOURCES) + @ echo "running $(MKDEP)" + @ touch depend + @$(MKDEP) $(MKDEP_OPTIONS) -p$(MESA_OBJ_DIR)/ $(MESA_CPPFLAGS) \ + $(ALL_SOURCES) > /dev/null 2>/dev/null + @$(MKDEP) $(MKDEP_OPTIONS) -a -p$(DRICORE_OBJ_DIR)/ $(MESA_CPPFLAGS) \ + $(ALL_SOURCES) > /dev/null 2>/dev/null + +###################################################################### +# Installation rules + +# this isn't fleshed out yet but is probably the way to go in the future +new_install: + (cd drivers && $(MAKE) install) + +ifneq (,$(DRICORE_LIBS)) +DRICORE_INSTALL_TARGET = install-dricore +endif + +# XXX replace this with new_install above someday +install: default $(DRICORE_INSTALL_TARGET) + @for driver in $(DRIVER_DIRS) ; do \ + case "$$driver" in \ + osmesa) if [ "$(DRIVER_DIRS)" = osmesa ]; then \ + $(MAKE) install-headers install-osmesa || exit 1 ; \ + else \ + $(MAKE) install-osmesa || exit 1 ; \ + fi ;; \ + dri) $(MAKE) install-libgl install-dri || exit 1 ;; \ + *) $(MAKE) install-libgl || exit 1 ;; \ + esac ; \ + done + +pcedit = \ + -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \ + -e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \ + -e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \ + -e 's,@VERSION@,$(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY),' \ + + +gl_pcedit = sed \ + $(pcedit) \ + -e 's,@GL_PC_REQ_PRIV@,$(GL_PC_REQ_PRIV),' \ + -e 's,@GL_PC_LIB_PRIV@,$(GL_PC_LIB_PRIV),' \ + -e 's,@GL_PC_CFLAGS@,$(GL_PC_CFLAGS),' \ + -e 's,@GLX_TLS@,$(GLX_TLS),' \ + -e 's,@GL_LIB@,$(GL_LIB),' + +gl.pc: gl.pc.in + $(gl_pcedit) $< > $@ + +osmesa_pcedit = sed \ + $(pcedit) \ + -e 's,@OSMESA_LIB@,$(OSMESA_LIB),' \ + -e 's,@OSMESA_PC_REQ@,$(OSMESA_PC_REQ),' \ + -e 's,@OSMESA_PC_LIB_PRIV@,$(OSMESA_PC_LIB_PRIV),' + +osmesa.pc: osmesa.pc.in + $(osmesa_pcedit) $< > $@ + +install-headers: + $(INSTALL) -d $(DESTDIR)$(INSTALL_INC_DIR)/GL + $(INSTALL) -m 644 $(TOP)/include/GL/*.h \ + $(DESTDIR)$(INSTALL_INC_DIR)/GL + +install-libgl: default gl.pc install-headers + $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR) + $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig + $(MINSTALL) $(TOP)/$(LIB_DIR)/$(GL_LIB_GLOB) \ + $(DESTDIR)$(INSTALL_LIB_DIR) + $(INSTALL) -m 644 gl.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig + +install-osmesa: default osmesa.pc + $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR) + $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig + $(MINSTALL) $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_GLOB) \ + $(DESTDIR)$(INSTALL_LIB_DIR) + $(INSTALL) -m 644 osmesa.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig + +install-dri: default + cd drivers/dri && $(MAKE) install + +# We don't need MINSTALL here because we're not installing symbolic links +install-dricore: default + $(INSTALL) -d $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR) + $(INSTALL) -m 755 $(DRICORE_LIBS) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR) + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h + +clean-dricore: + -rm -f libdricore.so + -rm -f $(DRICORE_LIBS) + -rm -rf $(DRICORE_OBJ_DIR) + +clean: clean-dricore + -rm -f */*.o + -rm -f */*/*.o + -rm -f depend depend.bak libmesa.a libmesagallium.a + -rm -f drivers/*/*.o + -rm -f *.pc + -@cd drivers/dri && $(MAKE) clean + -@cd drivers/x11 && $(MAKE) clean + -@cd drivers/osmesa && $(MAKE) clean + -@cd x86 && $(MAKE) clean + -@cd x86-64 && $(MAKE) clean + + +-include $(DEPENDS) diff --git a/mesalib/src/mesa/drivers/common/meta.c b/mesalib/src/mesa/drivers/common/meta.c index 26c895196..f9b475598 100644 --- a/mesalib/src/mesa/drivers/common/meta.c +++ b/mesalib/src/mesa/drivers/common/meta.c @@ -2869,6 +2869,16 @@ copy_tex_sub_image(struct gl_context *ctx, /* Choose format/type for temporary image buffer */ format = _mesa_get_format_base_format(texImage->TexFormat); + if (format == GL_LUMINANCE || + format == GL_LUMINANCE_ALPHA || + format == GL_INTENSITY) { + /* We don't want to use GL_LUMINANCE, GL_INTENSITY, etc. for the + * temp image buffer because glReadPixels will do L=R+G+B which is + * not what we want (should be L=R). + */ + format = GL_RGBA; + } + type = get_temp_image_type(ctx, format); bpp = _mesa_bytes_per_pixel(format, type); if (bpp <= 0) { diff --git a/mesalib/src/mesa/main/ff_fragment_shader.cpp b/mesalib/src/mesa/main/ff_fragment_shader.cpp index 0b53c28f7..dbfa6b57d 100644 --- a/mesalib/src/mesa/main/ff_fragment_shader.cpp +++ b/mesalib/src/mesa/main/ff_fragment_shader.cpp @@ -330,8 +330,7 @@ static GLbitfield get_fp_input_mask( struct gl_context *ctx ) /* _NEW_RENDERMODE */ fp_inputs = (FRAG_BIT_COL0 | FRAG_BIT_TEX0); } - else if (!(vertexProgram || vertexShader) || - !ctx->VertexProgram._Current) { + else if (!(vertexProgram || vertexShader)) { /* Fixed function vertex logic */ /* _NEW_ARRAY */ GLbitfield varying_inputs = ctx->varying_vp_inputs; diff --git a/mesalib/src/mesa/main/framebuffer.c b/mesalib/src/mesa/main/framebuffer.c index e27569a6f..23fa1b2c1 100644 --- a/mesalib/src/mesa/main/framebuffer.c +++ b/mesalib/src/mesa/main/framebuffer.c @@ -548,6 +548,7 @@ _mesa_update_framebuffer_visual(struct gl_context *ctx, fb->Visual.rgbBits = fb->Visual.redBits + fb->Visual.greenBits + fb->Visual.blueBits; fb->Visual.samples = rb->NumSamples; + fb->Visual.sampleBuffers = rb->NumSamples > 0 ? 1 : 0; if (_mesa_get_format_color_encoding(fmt) == GL_SRGB) fb->Visual.sRGBCapable = ctx->Const.sRGBCapable; break; diff --git a/mesalib/src/mesa/main/texparam.c b/mesalib/src/mesa/main/texparam.c index c4ec29533..3f771f08b 100644 --- a/mesalib/src/mesa/main/texparam.c +++ b/mesalib/src/mesa/main/texparam.c @@ -888,7 +888,7 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, texObj = _mesa_select_tex_object(ctx, texUnit, target); img = _mesa_select_tex_image(ctx, texObj, target, level); - if (!img || !img->TexFormat) { + if (!img || img->TexFormat == MESA_FORMAT_NONE) { /* undefined texture image */ if (pname == GL_TEXTURE_COMPONENTS) *params = 1; diff --git a/mesalib/src/mesa/state_tracker/st_atom_texture.c b/mesalib/src/mesa/state_tracker/st_atom_texture.c index 800a9f1f0..3115a2511 100644 --- a/mesalib/src/mesa/state_tracker/st_atom_texture.c +++ b/mesalib/src/mesa/state_tracker/st_atom_texture.c @@ -221,9 +221,9 @@ update_single_texture(struct st_context *st, struct pipe_sampler_view **sampler_ if ((samp->sRGBDecode == GL_SKIP_DECODE_EXT) && (_mesa_get_format_color_encoding(texFormat) == GL_SRGB)) { - /* don't do sRGB->RGB conversion. Interpret the texture - * texture data as linear values. - */ + /* Don't do sRGB->RGB conversion. Interpret the texture data as + * linear values. + */ const gl_format linearFormat = _mesa_get_srgb_format_linear(texFormat); firstImageFormat = st_mesa_format_to_pipe_format(linearFormat); diff --git a/pixman/pixman/pixman-arm-neon-asm-bilinear.S b/pixman/pixman/pixman-arm-neon-asm-bilinear.S index 9a4a1ffba..3c7fe0fea 100644 --- a/pixman/pixman/pixman-arm-neon-asm-bilinear.S +++ b/pixman/pixman/pixman-arm-neon-asm-bilinear.S @@ -66,6 +66,7 @@ .eabi_attribute 12, 0 .arm .altmacro +.p2align 2 #include "pixman-arm-neon-asm.h" diff --git a/pixman/pixman/pixman-arm-neon-asm.S b/pixman/pixman/pixman-arm-neon-asm.S index 7cddf7e4f..e32bfa1ae 100644 --- a/pixman/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman/pixman-arm-neon-asm.S @@ -47,6 +47,7 @@ .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */ .arm .altmacro + .p2align 2 #include "pixman-arm-neon-asm.h" diff --git a/pixman/pixman/pixman-arm-simd-asm.S b/pixman/pixman/pixman-arm-simd-asm.S index e00836b6c..8fe1b5038 100644 --- a/pixman/pixman/pixman-arm-simd-asm.S +++ b/pixman/pixman/pixman-arm-simd-asm.S @@ -1,438 +1,439 @@ -/* - * Copyright © 2008 Mozilla Corporation - * Copyright © 2010 Nokia Corporation - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Mozilla Corporation not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Mozilla Corporation makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Jeff Muizelaar (jeff@infidigm.net) - * - */ - -/* Prevent the stack from becoming executable */ -#if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits -#endif - - .text - .arch armv6 - .object_arch armv4 - .arm - .altmacro - -/* Supplementary macro for setting function attributes */ -.macro pixman_asm_function fname - .func fname - .global fname -#ifdef __ELF__ - .hidden fname - .type fname, %function -#endif -fname: -.endm - -/* - * The code below was generated by gcc 4.3.4 from the commented out - * functions in 'pixman-arm-simd.c' file with the following optimization - * options: "-O3 -mcpu=arm1136jf-s -fomit-frame-pointer" - * - * TODO: replace gcc generated code with hand tuned versions because - * the code quality is not very good, introduce symbolic register - * aliases for better readability and maintainability. - */ - -pixman_asm_function pixman_composite_add_8_8_asm_armv6 - push {r4, r5, r6, r7, r8, r9, r10, r11} - mov r10, r1 - sub sp, sp, #4 - subs r10, r10, #1 - mov r11, r0 - mov r8, r2 - str r3, [sp] - ldr r7, [sp, #36] - bcc 0f -6: cmp r11, #0 - beq 1f - orr r3, r8, r7 - tst r3, #3 - beq 2f - mov r1, r8 - mov r0, r7 - mov r12, r11 - b 3f -5: tst r3, #3 - beq 4f -3: ldrb r2, [r0], #1 - subs r12, r12, #1 - ldrb r3, [r1] - uqadd8 r3, r2, r3 - strb r3, [r1], #1 - orr r3, r1, r0 - bne 5b -1: ldr r3, [sp] - add r8, r8, r3 - ldr r3, [sp, #40] - add r7, r7, r3 -10: subs r10, r10, #1 - bcs 6b -0: add sp, sp, #4 - pop {r4, r5, r6, r7, r8, r9, r10, r11} - bx lr -2: mov r12, r11 - mov r1, r8 - mov r0, r7 -4: cmp r12, #3 - subgt r6, r12, #4 - movgt r9, r12 - lsrgt r5, r6, #2 - addgt r3, r5, #1 - movgt r12, #0 - lslgt r4, r3, #2 - ble 7f -8: ldr r3, [r0, r12] - ldr r2, [r1, r12] - uqadd8 r3, r3, r2 - str r3, [r1, r12] - add r12, r12, #4 - cmp r12, r4 - bne 8b - sub r3, r9, #4 - bic r3, r3, #3 - add r3, r3, #4 - subs r12, r6, r5, lsl #2 - add r1, r1, r3 - add r0, r0, r3 - beq 1b -7: mov r4, #0 -9: ldrb r3, [r1, r4] - ldrb r2, [r0, r4] - uqadd8 r3, r2, r3 - strb r3, [r1, r4] - add r4, r4, #1 - cmp r4, r12 - bne 9b - ldr r3, [sp] - add r8, r8, r3 - ldr r3, [sp, #40] - add r7, r7, r3 - b 10b -.endfunc - -pixman_asm_function pixman_composite_over_8888_8888_asm_armv6 - push {r4, r5, r6, r7, r8, r9, r10, r11} - sub sp, sp, #20 - cmp r1, #0 - mov r12, r2 - str r1, [sp, #12] - str r0, [sp, #16] - ldr r2, [sp, #52] - beq 0f - lsl r3, r3, #2 - str r3, [sp] - ldr r3, [sp, #56] - mov r10, #0 - lsl r3, r3, #2 - str r3, [sp, #8] - mov r11, r3 - b 1f -6: ldr r11, [sp, #8] -1: ldr r9, [sp] - mov r0, r12 - add r12, r12, r9 - mov r1, r2 - str r12, [sp, #4] - add r2, r2, r11 - ldr r12, [sp, #16] - ldr r3, =0x00800080 - ldr r9, =0xff00ff00 - mov r11, #255 - cmp r12, #0 - beq 4f -5: ldr r5, [r1], #4 - ldr r4, [r0] - sub r8, r11, r5, lsr #24 - uxtb16 r6, r4 - uxtb16 r7, r4, ror #8 - mla r6, r6, r8, r3 - mla r7, r7, r8, r3 - uxtab16 r6, r6, r6, ror #8 - uxtab16 r7, r7, r7, ror #8 - and r7, r7, r9 - uxtab16 r6, r7, r6, ror #8 - uqadd8 r5, r6, r5 - str r5, [r0], #4 - subs r12, r12, #1 - bne 5b -4: ldr r3, [sp, #12] - add r10, r10, #1 - cmp r10, r3 - ldr r12, [sp, #4] - bne 6b -0: add sp, sp, #20 - pop {r4, r5, r6, r7, r8, r9, r10, r11} - bx lr -.endfunc - -pixman_asm_function pixman_composite_over_8888_n_8888_asm_armv6 - push {r4, r5, r6, r7, r8, r9, r10, r11} - sub sp, sp, #28 - cmp r1, #0 - str r1, [sp, #12] - ldrb r1, [sp, #71] - mov r12, r2 - str r0, [sp, #16] - ldr r2, [sp, #60] - str r1, [sp, #24] - beq 0f - lsl r3, r3, #2 - str r3, [sp, #20] - ldr r3, [sp, #64] - mov r10, #0 - lsl r3, r3, #2 - str r3, [sp, #8] - mov r11, r3 - b 1f -5: ldr r11, [sp, #8] -1: ldr r4, [sp, #20] - mov r0, r12 - mov r1, r2 - add r12, r12, r4 - add r2, r2, r11 - str r12, [sp] - str r2, [sp, #4] - ldr r12, [sp, #16] - ldr r2, =0x00800080 - ldr r3, [sp, #24] - mov r11, #255 - cmp r12, #0 - beq 3f -4: ldr r5, [r1], #4 - ldr r4, [r0] - uxtb16 r6, r5 - uxtb16 r7, r5, ror #8 - mla r6, r6, r3, r2 - mla r7, r7, r3, r2 - uxtab16 r6, r6, r6, ror #8 - uxtab16 r7, r7, r7, ror #8 - uxtb16 r6, r6, ror #8 - uxtb16 r7, r7, ror #8 - orr r5, r6, r7, lsl #8 - uxtb16 r6, r4 - uxtb16 r7, r4, ror #8 - sub r8, r11, r5, lsr #24 - mla r6, r6, r8, r2 - mla r7, r7, r8, r2 - uxtab16 r6, r6, r6, ror #8 - uxtab16 r7, r7, r7, ror #8 - uxtb16 r6, r6, ror #8 - uxtb16 r7, r7, ror #8 - orr r6, r6, r7, lsl #8 - uqadd8 r5, r6, r5 - str r5, [r0], #4 - subs r12, r12, #1 - bne 4b -3: ldr r1, [sp, #12] - add r10, r10, #1 - cmp r10, r1 - ldr r12, [sp] - ldr r2, [sp, #4] - bne 5b -0: add sp, sp, #28 - pop {r4, r5, r6, r7, r8, r9, r10, r11} - bx lr -.endfunc - -pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6 - push {r4, r5, r6, r7, r8, r9, r10, r11} - sub sp, sp, #28 - cmp r1, #0 - ldr r9, [sp, #60] - str r1, [sp, #12] - bic r1, r9, #-16777216 - str r1, [sp, #20] - mov r12, r2 - lsr r1, r9, #8 - ldr r2, [sp, #20] - bic r1, r1, #-16777216 - bic r2, r2, #65280 - bic r1, r1, #65280 - str r2, [sp, #20] - str r0, [sp, #16] - str r1, [sp, #4] - ldr r2, [sp, #68] - beq 0f - lsl r3, r3, #2 - str r3, [sp, #24] - mov r0, #0 - b 1f -5: ldr r3, [sp, #24] -1: ldr r4, [sp, #72] - mov r10, r12 - mov r1, r2 - add r12, r12, r3 - add r2, r2, r4 - str r12, [sp, #8] - str r2, [sp] - ldr r12, [sp, #16] - ldr r11, =0x00800080 - ldr r2, [sp, #4] - ldr r3, [sp, #20] - cmp r12, #0 - beq 3f -4: ldrb r5, [r1], #1 - ldr r4, [r10] - mla r6, r3, r5, r11 - mla r7, r2, r5, r11 - uxtab16 r6, r6, r6, ror #8 - uxtab16 r7, r7, r7, ror #8 - uxtb16 r6, r6, ror #8 - uxtb16 r7, r7, ror #8 - orr r5, r6, r7, lsl #8 - uxtb16 r6, r4 - uxtb16 r7, r4, ror #8 - mvn r8, r5 - lsr r8, r8, #24 - mla r6, r6, r8, r11 - mla r7, r7, r8, r11 - uxtab16 r6, r6, r6, ror #8 - uxtab16 r7, r7, r7, ror #8 - uxtb16 r6, r6, ror #8 - uxtb16 r7, r7, ror #8 - orr r6, r6, r7, lsl #8 - uqadd8 r5, r6, r5 - str r5, [r10], #4 - subs r12, r12, #1 - bne 4b -3: ldr r4, [sp, #12] - add r0, r0, #1 - cmp r0, r4 - ldr r12, [sp, #8] - ldr r2, [sp] - bne 5b -0: add sp, sp, #28 - pop {r4, r5, r6, r7, r8, r9, r10, r11} - bx lr -.endfunc - -/* - * Note: This code is only using armv5te instructions (not even armv6), - * but is scheduled for ARM Cortex-A8 pipeline. So it might need to - * be split into a few variants, tuned for each microarchitecture. - * - * TODO: In order to get good performance on ARM9/ARM11 cores (which don't - * have efficient write combining), it needs to be changed to use 16-byte - * aligned writes using STM instruction. - * - * Nearest scanline scaler macro template uses the following arguments: - * fname - name of the function to generate - * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes - * t - type suffix for LDR/STR instructions - * prefetch_distance - prefetch in the source image by that many - * pixels ahead - * prefetch_braking_distance - stop prefetching when that many pixels are - * remaining before the end of scanline - */ - -.macro generate_nearest_scanline_func fname, bpp_shift, t, \ - prefetch_distance, \ - prefetch_braking_distance - -pixman_asm_function fname - W .req r0 - DST .req r1 - SRC .req r2 - VX .req r3 - UNIT_X .req ip - TMP1 .req r4 - TMP2 .req r5 - VXMASK .req r6 - PF_OFFS .req r7 - - ldr UNIT_X, [sp] - push {r4, r5, r6, r7} - mvn VXMASK, #((1 << bpp_shift) - 1) - - /* define helper macro */ - .macro scale_2_pixels - ldr&t TMP1, [SRC, TMP1] - and TMP2, VXMASK, VX, lsr #(16 - bpp_shift) - add VX, VX, UNIT_X - str&t TMP1, [DST], #(1 << bpp_shift) - - ldr&t TMP2, [SRC, TMP2] - and TMP1, VXMASK, VX, lsr #(16 - bpp_shift) - add VX, VX, UNIT_X - str&t TMP2, [DST], #(1 << bpp_shift) - .endm - - /* now do the scaling */ - and TMP1, VXMASK, VX, lsr #(16 - bpp_shift) - add VX, VX, UNIT_X - subs W, W, #(8 + prefetch_braking_distance) - blt 2f - /* calculate prefetch offset */ - mov PF_OFFS, #prefetch_distance - mla PF_OFFS, UNIT_X, PF_OFFS, VX -1: /* main loop, process 8 pixels per iteration with prefetch */ - subs W, W, #8 - add PF_OFFS, UNIT_X, lsl #3 - scale_2_pixels - scale_2_pixels - scale_2_pixels - scale_2_pixels - pld [SRC, PF_OFFS, lsr #(16 - bpp_shift)] - bge 1b -2: - subs W, W, #(4 - 8 - prefetch_braking_distance) - blt 2f -1: /* process the remaining pixels */ - scale_2_pixels - scale_2_pixels - subs W, W, #4 - bge 1b -2: - tst W, #2 - beq 2f - scale_2_pixels -2: - tst W, #1 - ldrne&t TMP1, [SRC, TMP1] - strne&t TMP1, [DST] - /* cleanup helper macro */ - .purgem scale_2_pixels - .unreq DST - .unreq SRC - .unreq W - .unreq VX - .unreq UNIT_X - .unreq TMP1 - .unreq TMP2 - .unreq VXMASK - .unreq PF_OFFS - /* return */ - pop {r4, r5, r6, r7} - bx lr -.endfunc -.endm - -generate_nearest_scanline_func \ - pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32 - -generate_nearest_scanline_func \ - pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32 +/* + * Copyright © 2008 Mozilla Corporation + * Copyright © 2010 Nokia Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Mozilla Corporation not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Mozilla Corporation makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Jeff Muizelaar (jeff@infidigm.net) + * + */ + +/* Prevent the stack from becoming executable */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + + .text + .arch armv6 + .object_arch armv4 + .arm + .altmacro + .p2align 2 + +/* Supplementary macro for setting function attributes */ +.macro pixman_asm_function fname + .func fname + .global fname +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif +fname: +.endm + +/* + * The code below was generated by gcc 4.3.4 from the commented out + * functions in 'pixman-arm-simd.c' file with the following optimization + * options: "-O3 -mcpu=arm1136jf-s -fomit-frame-pointer" + * + * TODO: replace gcc generated code with hand tuned versions because + * the code quality is not very good, introduce symbolic register + * aliases for better readability and maintainability. + */ + +pixman_asm_function pixman_composite_add_8_8_asm_armv6 + push {r4, r5, r6, r7, r8, r9, r10, r11} + mov r10, r1 + sub sp, sp, #4 + subs r10, r10, #1 + mov r11, r0 + mov r8, r2 + str r3, [sp] + ldr r7, [sp, #36] + bcc 0f +6: cmp r11, #0 + beq 1f + orr r3, r8, r7 + tst r3, #3 + beq 2f + mov r1, r8 + mov r0, r7 + mov r12, r11 + b 3f +5: tst r3, #3 + beq 4f +3: ldrb r2, [r0], #1 + subs r12, r12, #1 + ldrb r3, [r1] + uqadd8 r3, r2, r3 + strb r3, [r1], #1 + orr r3, r1, r0 + bne 5b +1: ldr r3, [sp] + add r8, r8, r3 + ldr r3, [sp, #40] + add r7, r7, r3 +10: subs r10, r10, #1 + bcs 6b +0: add sp, sp, #4 + pop {r4, r5, r6, r7, r8, r9, r10, r11} + bx lr +2: mov r12, r11 + mov r1, r8 + mov r0, r7 +4: cmp r12, #3 + subgt r6, r12, #4 + movgt r9, r12 + lsrgt r5, r6, #2 + addgt r3, r5, #1 + movgt r12, #0 + lslgt r4, r3, #2 + ble 7f +8: ldr r3, [r0, r12] + ldr r2, [r1, r12] + uqadd8 r3, r3, r2 + str r3, [r1, r12] + add r12, r12, #4 + cmp r12, r4 + bne 8b + sub r3, r9, #4 + bic r3, r3, #3 + add r3, r3, #4 + subs r12, r6, r5, lsl #2 + add r1, r1, r3 + add r0, r0, r3 + beq 1b +7: mov r4, #0 +9: ldrb r3, [r1, r4] + ldrb r2, [r0, r4] + uqadd8 r3, r2, r3 + strb r3, [r1, r4] + add r4, r4, #1 + cmp r4, r12 + bne 9b + ldr r3, [sp] + add r8, r8, r3 + ldr r3, [sp, #40] + add r7, r7, r3 + b 10b +.endfunc + +pixman_asm_function pixman_composite_over_8888_8888_asm_armv6 + push {r4, r5, r6, r7, r8, r9, r10, r11} + sub sp, sp, #20 + cmp r1, #0 + mov r12, r2 + str r1, [sp, #12] + str r0, [sp, #16] + ldr r2, [sp, #52] + beq 0f + lsl r3, r3, #2 + str r3, [sp] + ldr r3, [sp, #56] + mov r10, #0 + lsl r3, r3, #2 + str r3, [sp, #8] + mov r11, r3 + b 1f +6: ldr r11, [sp, #8] +1: ldr r9, [sp] + mov r0, r12 + add r12, r12, r9 + mov r1, r2 + str r12, [sp, #4] + add r2, r2, r11 + ldr r12, [sp, #16] + ldr r3, =0x00800080 + ldr r9, =0xff00ff00 + mov r11, #255 + cmp r12, #0 + beq 4f +5: ldr r5, [r1], #4 + ldr r4, [r0] + sub r8, r11, r5, lsr #24 + uxtb16 r6, r4 + uxtb16 r7, r4, ror #8 + mla r6, r6, r8, r3 + mla r7, r7, r8, r3 + uxtab16 r6, r6, r6, ror #8 + uxtab16 r7, r7, r7, ror #8 + and r7, r7, r9 + uxtab16 r6, r7, r6, ror #8 + uqadd8 r5, r6, r5 + str r5, [r0], #4 + subs r12, r12, #1 + bne 5b +4: ldr r3, [sp, #12] + add r10, r10, #1 + cmp r10, r3 + ldr r12, [sp, #4] + bne 6b +0: add sp, sp, #20 + pop {r4, r5, r6, r7, r8, r9, r10, r11} + bx lr +.endfunc + +pixman_asm_function pixman_composite_over_8888_n_8888_asm_armv6 + push {r4, r5, r6, r7, r8, r9, r10, r11} + sub sp, sp, #28 + cmp r1, #0 + str r1, [sp, #12] + ldrb r1, [sp, #71] + mov r12, r2 + str r0, [sp, #16] + ldr r2, [sp, #60] + str r1, [sp, #24] + beq 0f + lsl r3, r3, #2 + str r3, [sp, #20] + ldr r3, [sp, #64] + mov r10, #0 + lsl r3, r3, #2 + str r3, [sp, #8] + mov r11, r3 + b 1f +5: ldr r11, [sp, #8] +1: ldr r4, [sp, #20] + mov r0, r12 + mov r1, r2 + add r12, r12, r4 + add r2, r2, r11 + str r12, [sp] + str r2, [sp, #4] + ldr r12, [sp, #16] + ldr r2, =0x00800080 + ldr r3, [sp, #24] + mov r11, #255 + cmp r12, #0 + beq 3f +4: ldr r5, [r1], #4 + ldr r4, [r0] + uxtb16 r6, r5 + uxtb16 r7, r5, ror #8 + mla r6, r6, r3, r2 + mla r7, r7, r3, r2 + uxtab16 r6, r6, r6, ror #8 + uxtab16 r7, r7, r7, ror #8 + uxtb16 r6, r6, ror #8 + uxtb16 r7, r7, ror #8 + orr r5, r6, r7, lsl #8 + uxtb16 r6, r4 + uxtb16 r7, r4, ror #8 + sub r8, r11, r5, lsr #24 + mla r6, r6, r8, r2 + mla r7, r7, r8, r2 + uxtab16 r6, r6, r6, ror #8 + uxtab16 r7, r7, r7, ror #8 + uxtb16 r6, r6, ror #8 + uxtb16 r7, r7, ror #8 + orr r6, r6, r7, lsl #8 + uqadd8 r5, r6, r5 + str r5, [r0], #4 + subs r12, r12, #1 + bne 4b +3: ldr r1, [sp, #12] + add r10, r10, #1 + cmp r10, r1 + ldr r12, [sp] + ldr r2, [sp, #4] + bne 5b +0: add sp, sp, #28 + pop {r4, r5, r6, r7, r8, r9, r10, r11} + bx lr +.endfunc + +pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6 + push {r4, r5, r6, r7, r8, r9, r10, r11} + sub sp, sp, #28 + cmp r1, #0 + ldr r9, [sp, #60] + str r1, [sp, #12] + bic r1, r9, #-16777216 + str r1, [sp, #20] + mov r12, r2 + lsr r1, r9, #8 + ldr r2, [sp, #20] + bic r1, r1, #-16777216 + bic r2, r2, #65280 + bic r1, r1, #65280 + str r2, [sp, #20] + str r0, [sp, #16] + str r1, [sp, #4] + ldr r2, [sp, #68] + beq 0f + lsl r3, r3, #2 + str r3, [sp, #24] + mov r0, #0 + b 1f +5: ldr r3, [sp, #24] +1: ldr r4, [sp, #72] + mov r10, r12 + mov r1, r2 + add r12, r12, r3 + add r2, r2, r4 + str r12, [sp, #8] + str r2, [sp] + ldr r12, [sp, #16] + ldr r11, =0x00800080 + ldr r2, [sp, #4] + ldr r3, [sp, #20] + cmp r12, #0 + beq 3f +4: ldrb r5, [r1], #1 + ldr r4, [r10] + mla r6, r3, r5, r11 + mla r7, r2, r5, r11 + uxtab16 r6, r6, r6, ror #8 + uxtab16 r7, r7, r7, ror #8 + uxtb16 r6, r6, ror #8 + uxtb16 r7, r7, ror #8 + orr r5, r6, r7, lsl #8 + uxtb16 r6, r4 + uxtb16 r7, r4, ror #8 + mvn r8, r5 + lsr r8, r8, #24 + mla r6, r6, r8, r11 + mla r7, r7, r8, r11 + uxtab16 r6, r6, r6, ror #8 + uxtab16 r7, r7, r7, ror #8 + uxtb16 r6, r6, ror #8 + uxtb16 r7, r7, ror #8 + orr r6, r6, r7, lsl #8 + uqadd8 r5, r6, r5 + str r5, [r10], #4 + subs r12, r12, #1 + bne 4b +3: ldr r4, [sp, #12] + add r0, r0, #1 + cmp r0, r4 + ldr r12, [sp, #8] + ldr r2, [sp] + bne 5b +0: add sp, sp, #28 + pop {r4, r5, r6, r7, r8, r9, r10, r11} + bx lr +.endfunc + +/* + * Note: This code is only using armv5te instructions (not even armv6), + * but is scheduled for ARM Cortex-A8 pipeline. So it might need to + * be split into a few variants, tuned for each microarchitecture. + * + * TODO: In order to get good performance on ARM9/ARM11 cores (which don't + * have efficient write combining), it needs to be changed to use 16-byte + * aligned writes using STM instruction. + * + * Nearest scanline scaler macro template uses the following arguments: + * fname - name of the function to generate + * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes + * t - type suffix for LDR/STR instructions + * prefetch_distance - prefetch in the source image by that many + * pixels ahead + * prefetch_braking_distance - stop prefetching when that many pixels are + * remaining before the end of scanline + */ + +.macro generate_nearest_scanline_func fname, bpp_shift, t, \ + prefetch_distance, \ + prefetch_braking_distance + +pixman_asm_function fname + W .req r0 + DST .req r1 + SRC .req r2 + VX .req r3 + UNIT_X .req ip + TMP1 .req r4 + TMP2 .req r5 + VXMASK .req r6 + PF_OFFS .req r7 + + ldr UNIT_X, [sp] + push {r4, r5, r6, r7} + mvn VXMASK, #((1 << bpp_shift) - 1) + + /* define helper macro */ + .macro scale_2_pixels + ldr&t TMP1, [SRC, TMP1] + and TMP2, VXMASK, VX, lsr #(16 - bpp_shift) + add VX, VX, UNIT_X + str&t TMP1, [DST], #(1 << bpp_shift) + + ldr&t TMP2, [SRC, TMP2] + and TMP1, VXMASK, VX, lsr #(16 - bpp_shift) + add VX, VX, UNIT_X + str&t TMP2, [DST], #(1 << bpp_shift) + .endm + + /* now do the scaling */ + and TMP1, VXMASK, VX, lsr #(16 - bpp_shift) + add VX, VX, UNIT_X + subs W, W, #(8 + prefetch_braking_distance) + blt 2f + /* calculate prefetch offset */ + mov PF_OFFS, #prefetch_distance + mla PF_OFFS, UNIT_X, PF_OFFS, VX +1: /* main loop, process 8 pixels per iteration with prefetch */ + subs W, W, #8 + add PF_OFFS, UNIT_X, lsl #3 + scale_2_pixels + scale_2_pixels + scale_2_pixels + scale_2_pixels + pld [SRC, PF_OFFS, lsr #(16 - bpp_shift)] + bge 1b +2: + subs W, W, #(4 - 8 - prefetch_braking_distance) + blt 2f +1: /* process the remaining pixels */ + scale_2_pixels + scale_2_pixels + subs W, W, #4 + bge 1b +2: + tst W, #2 + beq 2f + scale_2_pixels +2: + tst W, #1 + ldrne&t TMP1, [SRC, TMP1] + strne&t TMP1, [DST] + /* cleanup helper macro */ + .purgem scale_2_pixels + .unreq DST + .unreq SRC + .unreq W + .unreq VX + .unreq UNIT_X + .unreq TMP1 + .unreq TMP2 + .unreq VXMASK + .unreq PF_OFFS + /* return */ + pop {r4, r5, r6, r7} + bx lr +.endfunc +.endm + +generate_nearest_scanline_func \ + pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32 + +generate_nearest_scanline_func \ + pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32 -- cgit v1.2.3