diff options
Diffstat (limited to 'dxtn')
-rw-r--r-- | dxtn/Changelog | 12 | ||||
-rw-r--r-- | dxtn/Makefile | 18 | ||||
-rw-r--r-- | dxtn/Makefile.old | 14 | ||||
-rw-r--r-- | dxtn/base/basictypes.h | 348 | ||||
-rw-r--r-- | dxtn/base/port.h | 54 | ||||
-rw-r--r-- | dxtn/build/build_config.h | 69 | ||||
-rw-r--r-- | dxtn/getsrc.btm | 4 | ||||
-rw-r--r-- | dxtn/txc_compress_dxtn.c | 838 | ||||
-rw-r--r-- | dxtn/txc_dxtn.h | 53 | ||||
-rw-r--r-- | dxtn/txc_fetch_dxtn.cc | 243 |
10 files changed, 1653 insertions, 0 deletions
diff --git a/dxtn/Changelog b/dxtn/Changelog new file mode 100644 index 000000000..b4f5fa07e --- /dev/null +++ b/dxtn/Changelog @@ -0,0 +1,12 @@ +20050819: +- clean up / make faster the fetch functions, especially the 565 to 888 expand was slow +20050908: +- fix overflow/underflow of some values (avoids artifacts with some textures) in + fancybasecolorsearch when compressing (fix by Dave Airlie). +20060508: +- fix serious copy & paste errors in fetch functions introduced with 20050819 version +- new Makefile (provided by Tilman Sauerbeck) +- minor cosmetic fixes + +20070518: +- add the missing dstRowStride parameter Mesa uses diff --git a/dxtn/Makefile b/dxtn/Makefile new file mode 100644 index 000000000..975063f8b --- /dev/null +++ b/dxtn/Makefile @@ -0,0 +1,18 @@ +CFLAGS += -Wall -pedantic -fPIC +OPT_CFLAGS = -O3 +LDFLAGS += -shared -fPIC +OBJS = txc_compress_dxtn.o txc_fetch_dxtn.o +LIB = libtxc_dxtn.so + +$(LIB): $(OBJS) + $(CC) $(LDFLAGS) -o $@ $(OBJS) + +%.o: %.c txc_dxtn.h + $(CC) $(CFLAGS) $(OPT_CFLAGS) -c -o $@ $< + +clean: + rm -f $(OBJS) $(LIB) + +install: $(LIB) + install -d $(DESTDIR)/usr/lib + install -m 755 $(LIB) $(DESTDIR)/usr/lib diff --git a/dxtn/Makefile.old b/dxtn/Makefile.old new file mode 100644 index 000000000..21b502ee7 --- /dev/null +++ b/dxtn/Makefile.old @@ -0,0 +1,14 @@ +libtxc_dxtn.so: txc_compress_dxtn.o txc_fetch_dxtn.o + gcc -O3 -Wall -pedantic -fPIC -lpthread -shared -o libtxc_dxtn.so txc_compress_dxtn.o txc_fetch_dxtn.o + +txc_compress_dxtn.o : txc_compress_dxtn.c txc_dxtn.h + gcc -c -O3 -Wall -pedantic -fPIC -o txc_compress_dxtn.o txc_compress_dxtn.c + +txc_fetch_dxtn.o : txc_fetch_dxtn.c txc_dxtn.h + gcc -c -O3 -Wall -pedantic -fPIC -o txc_fetch_dxtn.o txc_fetch_dxtn.c + +clean: + rm txc_fetch_dxtn.o txc_compress_dxtn.o libtxc_dxtn.so + +install: libtxc_dxtn.so + install libtxc_dxtn.so /usr/lib/ diff --git a/dxtn/base/basictypes.h b/dxtn/base/basictypes.h new file mode 100644 index 000000000..1e4430380 --- /dev/null +++ b/dxtn/base/basictypes.h @@ -0,0 +1,348 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_BASICTYPES_H_ +#define BASE_BASICTYPES_H_ + +#include <limits.h> // So we can set the bounds of our types +#include <stddef.h> // For size_t +#include <string.h> // for memcpy + +#include "base/port.h" // Types that only need exist on certain systems + +#ifndef COMPILER_MSVC +// stdint.h is part of C99 but MSVC doesn't have it. +#include <stdint.h> // For intptr_t. +#endif + +typedef signed char schar; +typedef signed char int8; +typedef short int16; +// TODO(mbelshe) Remove these type guards. These are +// temporary to avoid conflicts with npapi.h. +#ifndef _INT32 +#define _INT32 +typedef int int32; +#endif + +// The NSPR system headers define 64-bit as |long| when possible. In order to +// not have typedef mismatches, we do the same on LP64. +#if __LP64__ +typedef long int64; +#else +typedef long long int64; +#endif + +// NOTE: unsigned types are DANGEROUS in loops and other arithmetical +// places. Use the signed types unless your variable represents a bit +// pattern (eg a hash value) or you really need the extra bit. Do NOT +// use 'unsigned' to express "this value should always be positive"; +// use assertions for this. + +typedef unsigned char uint8; +typedef unsigned short uint16; +// TODO(mbelshe) Remove these type guards. These are +// temporary to avoid conflicts with npapi.h. +#ifndef _UINT32 +#define _UINT32 +typedef unsigned int uint32; +#endif + +// See the comment above about NSPR and 64-bit. +#if __LP64__ +typedef unsigned long uint64; +#else +typedef unsigned long long uint64; +#endif + +// A type to represent a Unicode code-point value. As of Unicode 4.0, +// such values require up to 21 bits. +// (For type-checking on pointers, make this explicitly signed, +// and it should always be the signed version of whatever int32 is.) +typedef signed int char32; + +const uint8 kuint8max = (( uint8) 0xFF); +const uint16 kuint16max = ((uint16) 0xFFFF); +const uint32 kuint32max = ((uint32) 0xFFFFFFFF); +const uint64 kuint64max = ((uint64) GG_LONGLONG(0xFFFFFFFFFFFFFFFF)); +const int8 kint8min = (( int8) 0x80); +const int8 kint8max = (( int8) 0x7F); +const int16 kint16min = (( int16) 0x8000); +const int16 kint16max = (( int16) 0x7FFF); +const int32 kint32min = (( int32) 0x80000000); +const int32 kint32max = (( int32) 0x7FFFFFFF); +const int64 kint64min = (( int64) GG_LONGLONG(0x8000000000000000)); +const int64 kint64max = (( int64) GG_LONGLONG(0x7FFFFFFFFFFFFFFF)); + +// A macro to disallow the copy constructor and operator= functions +// This should be used in the private: declarations for a class +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) + +// An older, deprecated, politically incorrect name for the above. +#define DISALLOW_EVIL_CONSTRUCTORS(TypeName) DISALLOW_COPY_AND_ASSIGN(TypeName) + +// A macro to disallow all the implicit constructors, namely the +// default constructor, copy constructor and operator= functions. +// +// This should be used in the private: declarations for a class +// that wants to prevent anyone from instantiating it. This is +// especially useful for classes containing only static methods. +#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ + TypeName(); \ + DISALLOW_COPY_AND_ASSIGN(TypeName) + +// The arraysize(arr) macro returns the # of elements in an array arr. +// The expression is a compile-time constant, and therefore can be +// used in defining new arrays, for example. If you use arraysize on +// a pointer by mistake, you will get a compile-time error. +// +// One caveat is that arraysize() doesn't accept any array of an +// anonymous type or a type defined inside a function. In these rare +// cases, you have to use the unsafe ARRAYSIZE_UNSAFE() macro below. This is +// due to a limitation in C++'s template system. The limitation might +// eventually be removed, but it hasn't happened yet. + +// This template function declaration is used in defining arraysize. +// Note that the function doesn't need an implementation, as we only +// use its type. +template <typename T, size_t N> +char (&ArraySizeHelper(T (&array)[N]))[N]; + +// That gcc wants both of these prototypes seems mysterious. VC, for +// its part, can't decide which to use (another mystery). Matching of +// template overloads: the final frontier. +#ifndef _MSC_VER +template <typename T, size_t N> +char (&ArraySizeHelper(const T (&array)[N]))[N]; +#endif + +#define arraysize(array) (sizeof(ArraySizeHelper(array))) + +// ARRAYSIZE_UNSAFE performs essentially the same calculation as arraysize, +// but can be used on anonymous types or types defined inside +// functions. It's less safe than arraysize as it accepts some +// (although not all) pointers. Therefore, you should use arraysize +// whenever possible. +// +// The expression ARRAYSIZE_UNSAFE(a) is a compile-time constant of type +// size_t. +// +// ARRAYSIZE_UNSAFE catches a few type errors. If you see a compiler error +// +// "warning: division by zero in ..." +// +// when using ARRAYSIZE_UNSAFE, you are (wrongfully) giving it a pointer. +// You should only use ARRAYSIZE_UNSAFE on statically allocated arrays. +// +// The following comments are on the implementation details, and can +// be ignored by the users. +// +// ARRAYSIZE_UNSAFE(arr) works by inspecting sizeof(arr) (the # of bytes in +// the array) and sizeof(*(arr)) (the # of bytes in one array +// element). If the former is divisible by the latter, perhaps arr is +// indeed an array, in which case the division result is the # of +// elements in the array. Otherwise, arr cannot possibly be an array, +// and we generate a compiler error to prevent the code from +// compiling. +// +// Since the size of bool is implementation-defined, we need to cast +// !(sizeof(a) & sizeof(*(a))) to size_t in order to ensure the final +// result has type size_t. +// +// This macro is not perfect as it wrongfully accepts certain +// pointers, namely where the pointer size is divisible by the pointee +// size. Since all our code has to go through a 32-bit compiler, +// where a pointer is 4 bytes, this means all pointers to a type whose +// size is 3 or greater than 4 will be (righteously) rejected. + +#define ARRAYSIZE_UNSAFE(a) \ + ((sizeof(a) / sizeof(*(a))) / \ + static_cast<size_t>(!(sizeof(a) % sizeof(*(a))))) + + +// Use implicit_cast as a safe version of static_cast or const_cast +// for upcasting in the type hierarchy (i.e. casting a pointer to Foo +// to a pointer to SuperclassOfFoo or casting a pointer to Foo to +// a const pointer to Foo). +// When you use implicit_cast, the compiler checks that the cast is safe. +// Such explicit implicit_casts are necessary in surprisingly many +// situations where C++ demands an exact type match instead of an +// argument type convertable to a target type. +// +// The From type can be inferred, so the preferred syntax for using +// implicit_cast is the same as for static_cast etc.: +// +// implicit_cast<ToType>(expr) +// +// implicit_cast would have been part of the C++ standard library, +// but the proposal was submitted too late. It will probably make +// its way into the language in the future. +template<typename To, typename From> +inline To implicit_cast(From const &f) { + return f; +} + +// The COMPILE_ASSERT macro can be used to verify that a compile time +// expression is true. For example, you could use it to verify the +// size of a static array: +// +// COMPILE_ASSERT(ARRAYSIZE_UNSAFE(content_type_names) == CONTENT_NUM_TYPES, +// content_type_names_incorrect_size); +// +// or to make sure a struct is smaller than a certain size: +// +// COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large); +// +// The second argument to the macro is the name of the variable. If +// the expression is false, most compilers will issue a warning/error +// containing the name of the variable. + +template <bool> +struct CompileAssert { +}; + +#undef COMPILE_ASSERT +#define COMPILE_ASSERT(expr, msg) \ + typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1] + +// Implementation details of COMPILE_ASSERT: +// +// - COMPILE_ASSERT works by defining an array type that has -1 +// elements (and thus is invalid) when the expression is false. +// +// - The simpler definition +// +// #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1] +// +// does not work, as gcc supports variable-length arrays whose sizes +// are determined at run-time (this is gcc's extension and not part +// of the C++ standard). As a result, gcc fails to reject the +// following code with the simple definition: +// +// int foo; +// COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is +// // not a compile-time constant. +// +// - By using the type CompileAssert<(bool(expr))>, we ensures that +// expr is a compile-time constant. (Template arguments must be +// determined at compile-time.) +// +// - The outter parentheses in CompileAssert<(bool(expr))> are necessary +// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written +// +// CompileAssert<bool(expr)> +// +// instead, these compilers will refuse to compile +// +// COMPILE_ASSERT(5 > 0, some_message); +// +// (They seem to think the ">" in "5 > 0" marks the end of the +// template argument list.) +// +// - The array size is (bool(expr) ? 1 : -1), instead of simply +// +// ((expr) ? 1 : -1). +// +// This is to avoid running into a bug in MS VC 7.1, which +// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1. + + +// MetatagId refers to metatag-id that we assign to +// each metatag <name, value> pair.. +typedef uint32 MetatagId; + +// Argument type used in interfaces that can optionally take ownership +// of a passed in argument. If TAKE_OWNERSHIP is passed, the called +// object takes ownership of the argument. Otherwise it does not. +enum Ownership { + DO_NOT_TAKE_OWNERSHIP, + TAKE_OWNERSHIP +}; + +// bit_cast<Dest,Source> is a template function that implements the +// equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in +// very low-level functions like the protobuf library and fast math +// support. +// +// float f = 3.14159265358979; +// int i = bit_cast<int32>(f); +// // i = 0x40490fdb +// +// The classical address-casting method is: +// +// // WRONG +// float f = 3.14159265358979; // WRONG +// int i = * reinterpret_cast<int*>(&f); // WRONG +// +// The address-casting method actually produces undefined behavior +// according to ISO C++ specification section 3.10 -15 -. Roughly, this +// section says: if an object in memory has one type, and a program +// accesses it with a different type, then the result is undefined +// behavior for most values of "different type". +// +// This is true for any cast syntax, either *(int*)&f or +// *reinterpret_cast<int*>(&f). And it is particularly true for +// conversions betweeen integral lvalues and floating-point lvalues. +// +// The purpose of 3.10 -15- is to allow optimizing compilers to assume +// that expressions with different types refer to different memory. gcc +// 4.0.1 has an optimizer that takes advantage of this. So a +// non-conforming program quietly produces wildly incorrect output. +// +// The problem is not the use of reinterpret_cast. The problem is type +// punning: holding an object in memory of one type and reading its bits +// back using a different type. +// +// The C++ standard is more subtle and complex than this, but that +// is the basic idea. +// +// Anyways ... +// +// bit_cast<> calls memcpy() which is blessed by the standard, +// especially by the example in section 3.9 . Also, of course, +// bit_cast<> wraps up the nasty logic in one place. +// +// Fortunately memcpy() is very fast. In optimized mode, with a +// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline +// code with the minimal amount of data movement. On a 32-bit system, +// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8) +// compiles to two loads and two stores. +// +// I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1. +// +// WARNING: if Dest or Source is a non-POD type, the result of the memcpy +// is likely to surprise you. + +template <class Dest, class Source> +inline Dest bit_cast(const Source& source) { + // Compile time assertion: sizeof(Dest) == sizeof(Source) + // A compile error here means your Dest and Source have different sizes. + typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : -1]; + + Dest dest; + memcpy(&dest, &source, sizeof(dest)); + return dest; +} + +// The following enum should be used only as a constructor argument to indicate +// that the variable has static storage class, and that the constructor should +// do nothing to its state. It indicates to the reader that it is legal to +// declare a static instance of the class, provided the constructor is given +// the base::LINKER_INITIALIZED argument. Normally, it is unsafe to declare a +// static variable that has a constructor or a destructor because invocation +// order is undefined. However, IF the type can be initialized by filling with +// zeroes (which the loader does for static variables), AND the destructor also +// does nothing to the storage, AND there are no virtual methods, then a +// constructor declared as +// explicit MyClass(base::LinkerInitialized x) {} +// and invoked as +// static MyClass my_variable_name(base::LINKER_INITIALIZED); +namespace base { +enum LinkerInitialized { LINKER_INITIALIZED }; +} // base + + +#endif // BASE_BASICTYPES_H_ diff --git a/dxtn/base/port.h b/dxtn/base/port.h new file mode 100644 index 000000000..18a936152 --- /dev/null +++ b/dxtn/base/port.h @@ -0,0 +1,54 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_PORT_H_ +#define BASE_PORT_H_ + +#include <stdarg.h> +#include "build/build_config.h" + +#ifdef COMPILER_MSVC +#define GG_LONGLONG(x) x##I64 +#define GG_ULONGLONG(x) x##UI64 +#else +#define GG_LONGLONG(x) x##LL +#define GG_ULONGLONG(x) x##ULL +#endif + +// Per C99 7.8.14, define __STDC_CONSTANT_MACROS before including <stdint.h> +// to get the INTn_C and UINTn_C macros for integer constants. It's difficult +// to guarantee any specific ordering of header includes, so it's difficult to +// guarantee that the INTn_C macros can be defined by including <stdint.h> at +// any specific point. Provide GG_INTn_C macros instead. + +#define GG_INT8_C(x) (x) +#define GG_INT16_C(x) (x) +#define GG_INT32_C(x) (x) +#define GG_INT64_C(x) GG_LONGLONG(x) + +#define GG_UINT8_C(x) (x ## U) +#define GG_UINT16_C(x) (x ## U) +#define GG_UINT32_C(x) (x ## U) +#define GG_UINT64_C(x) GG_ULONGLONG(x) + +// It's possible for functions that use a va_list, such as StringPrintf, to +// invalidate the data in it upon use. The fix is to make a copy of the +// structure before using it and use that copy instead. va_copy is provided +// for this purpose. MSVC does not provide va_copy, so define an +// implementation here. It is not guaranteed that assignment is a copy, so the +// StringUtil.VariableArgsFunc unit test tests this capability. +#if defined(COMPILER_GCC) +#define GG_VA_COPY(a, b) (va_copy(a, b)) +#elif defined(COMPILER_MSVC) +#define GG_VA_COPY(a, b) (a = b) +#endif + +// Define an OS-neutral wrapper for shared library entry points +#if defined(OS_WIN) +#define API_CALL __stdcall +#elif defined(OS_LINUX) || defined(OS_MACOSX) +#define API_CALL +#endif + +#endif // BASE_PORT_H_ diff --git a/dxtn/build/build_config.h b/dxtn/build/build_config.h new file mode 100644 index 000000000..2b9efda15 --- /dev/null +++ b/dxtn/build/build_config.h @@ -0,0 +1,69 @@ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// This file adds defines about the platform we're currently building on. +// Operating System: +// OS_WIN / OS_MACOSX / OS_LINUX / OS_POSIX (MACOSX or LINUX) +// Compiler: +// COMPILER_MSVC / COMPILER_GCC +// Processor: +// ARCH_CPU_X86 / ARCH_CPU_X86_64 / ARCH_CPU_X86_FAMILY (X86 or X86_64) +// ARCH_CPU_32_BITS / ARCH_CPU_64_BITS + +#ifndef BUILD_BUILD_CONFIG_H_ +#define BUILD_BUILD_CONFIG_H_ + +// A set of macros to use for platform detection. +#if defined(__APPLE__) +#define OS_MACOSX 1 +#elif defined(__linux__) +#define OS_LINUX 1 +#elif defined(_WIN32) +#define OS_WIN 1 +#else +#error Please add support for your platform in build/build_config.h +#endif + +// For access to standard POSIX features, use OS_POSIX instead of a more +// specific macro. +#if defined(OS_MACOSX) || defined(OS_LINUX) +#define OS_POSIX 1 +#endif + +// Compiler detection. +#if defined(__GNUC__) +#define COMPILER_GCC 1 +#elif defined(_MSC_VER) +#define COMPILER_MSVC 1 +#else +#error Please add support for your compiler in build/build_config.h +#endif + +// Processor architecture detection. For more info on what's defined, see: +// http://msdn.microsoft.com/en-us/library/b0084kay.aspx +// http://www.agner.org/optimize/calling_conventions.pdf +#if defined(_M_X64) || defined(__x86_64__) +#define ARCH_CPU_X86_FAMILY 1 +#define ARCH_CPU_X86_64 1 +#define ARCH_CPU_64_BITS 1 +#elif defined(_M_IX86) || defined(__i386__) +#define ARCH_CPU_X86_FAMILY 1 +#define ARCH_CPU_X86 1 +#define ARCH_CPU_32_BITS 1 +#else +#error Please add support for your architecture in build/build_config.h +#endif + +// Type detection for wchar_t. +#if defined(OS_WIN) +#define WCHAR_T_IS_UTF16 +#elif defined(OS_POSIX) && defined(COMPILER_GCC) && \ + defined(__WCHAR_MAX__) && __WCHAR_MAX__ == 0x7fffffff +#define WCHAR_T_IS_UTF32 +#else +#error Please add support for your compiler in build/build_config.h +#endif + +#endif // BUILD_BUILD_CONFIG_H_ + diff --git a/dxtn/getsrc.btm b/dxtn/getsrc.btm new file mode 100644 index 000000000..dd213f6d2 --- /dev/null +++ b/dxtn/getsrc.btm @@ -0,0 +1,4 @@ +svn export http://o3d.googlecode.com/svn/trunk/googleclient/third_party/libtxc_dxtn/files . --force
+svn export http://o3d.googlecode.com/svn/trunk/googleclient/third_party/chrome/files/base/basictypes.h base --force
+svn export http://o3d.googlecode.com/svn/trunk/googleclient/third_party/chrome/files/base/port.h base --force
+svn export http://o3d.googlecode.com/svn/trunk/googleclient/third_party/chrome/files/build/build_config.h build --force
diff --git a/dxtn/txc_compress_dxtn.c b/dxtn/txc_compress_dxtn.c new file mode 100644 index 000000000..0be8d4618 --- /dev/null +++ b/dxtn/txc_compress_dxtn.c @@ -0,0 +1,838 @@ +/* + * libtxc_dxtn + * Version: 0.1 + * + * Copyright (C) 2004 Roland Scheidegger All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include "txc_dxtn.h" + +/* weights used for error function, basically weights (unsquared 2/4/1) according to rgb->luminance conversion + not sure if this really reflects visual perception */ +#define REDWEIGHT 4 +#define GREENWEIGHT 16 +#define BLUEWEIGHT 1 + +#define ALPHACUT 127 + +static void fancybasecolorsearch( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2], + GLint numxpixels, GLint numypixels, GLint type, GLboolean haveAlpha) +{ + /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */ + + /* TODO could also try to find a better encoding for the 3-color-encoding type, this really should be done + if it's rgba_dxt1 and we have alpha in the block, currently even values which will be mapped to black + due to their alpha value will influence the result */ + GLint i, j, colors, z; + GLuint pixerror, pixerrorred, pixerrorgreen, pixerrorblue, pixerrorbest; + GLint colordist, blockerrlin[2][3]; + GLubyte nrcolor[2]; + GLint pixerrorcolorbest[3]; + GLubyte enc = 0; + GLubyte cv[4][4]; + GLubyte testcolor[2][3]; + +/* fprintf(stderr, "color begin 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n", + bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/ + if (((bestcolor[0][0] & 0xf8) << 8 | (bestcolor[0][1] & 0xfc) << 3 | bestcolor[0][2] >> 3) < + ((bestcolor[1][0] & 0xf8) << 8 | (bestcolor[1][1] & 0xfc) << 3 | bestcolor[1][2] >> 3)) { + testcolor[0][0] = bestcolor[0][0]; + testcolor[0][1] = bestcolor[0][1]; + testcolor[0][2] = bestcolor[0][2]; + testcolor[1][0] = bestcolor[1][0]; + testcolor[1][1] = bestcolor[1][1]; + testcolor[1][2] = bestcolor[1][2]; + } + else { + testcolor[1][0] = bestcolor[0][0]; + testcolor[1][1] = bestcolor[0][1]; + testcolor[1][2] = bestcolor[0][2]; + testcolor[0][0] = bestcolor[1][0]; + testcolor[0][1] = bestcolor[1][1]; + testcolor[0][2] = bestcolor[1][2]; + } + + for (i = 0; i < 3; i ++) { + cv[0][i] = testcolor[0][i]; + cv[1][i] = testcolor[1][i]; + cv[2][i] = (testcolor[0][i] * 2 + testcolor[1][i]) / 3; + cv[3][i] = (testcolor[0][i] + testcolor[1][i] * 2) / 3; + } + + blockerrlin[0][0] = 0; + blockerrlin[0][1] = 0; + blockerrlin[0][2] = 0; + blockerrlin[1][0] = 0; + blockerrlin[1][1] = 0; + blockerrlin[1][2] = 0; + + nrcolor[0] = 0; + nrcolor[1] = 0; + + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + pixerrorbest = 0xffffffff; + for (colors = 0; colors < 4; colors++) { + colordist = srccolors[j][i][0] - (cv[colors][0]); + pixerror = colordist * colordist * REDWEIGHT; + pixerrorred = colordist; + colordist = srccolors[j][i][1] - (cv[colors][1]); + pixerror += colordist * colordist * GREENWEIGHT; + pixerrorgreen = colordist; + colordist = srccolors[j][i][2] - (cv[colors][2]); + pixerror += colordist * colordist * BLUEWEIGHT; + pixerrorblue = colordist; + if (pixerror < pixerrorbest) { + enc = colors; + pixerrorbest = pixerror; + pixerrorcolorbest[0] = pixerrorred; + pixerrorcolorbest[1] = pixerrorgreen; + pixerrorcolorbest[2] = pixerrorblue; + } + } + if (enc == 0) { + for (z = 0; z < 3; z++) { + blockerrlin[0][z] += 3 * pixerrorcolorbest[z]; + } + nrcolor[0] += 3; + } + else if (enc == 2) { + for (z = 0; z < 3; z++) { + blockerrlin[0][z] += 2 * pixerrorcolorbest[z]; + } + nrcolor[0] += 2; + for (z = 0; z < 3; z++) { + blockerrlin[1][z] += 1 * pixerrorcolorbest[z]; + } + nrcolor[1] += 1; + } + else if (enc == 3) { + for (z = 0; z < 3; z++) { + blockerrlin[0][z] += 1 * pixerrorcolorbest[z]; + } + nrcolor[0] += 1; + for (z = 0; z < 3; z++) { + blockerrlin[1][z] += 2 * pixerrorcolorbest[z]; + } + nrcolor[1] += 2; + } + else if (enc == 1) { + for (z = 0; z < 3; z++) { + blockerrlin[1][z] += 3 * pixerrorcolorbest[z]; + } + nrcolor[1] += 3; + } + } + } + if (nrcolor[0] == 0) nrcolor[0] = 1; + if (nrcolor[1] == 0) nrcolor[1] = 1; + for (j = 0; j < 2; j++) { + for (i = 0; i < 3; i++) { + GLint newvalue = testcolor[j][i] + blockerrlin[j][i] / nrcolor[j]; + if (newvalue <= 0) + testcolor[j][i] = 0; + else if (newvalue >= 255) + testcolor[j][i] = 255; + else testcolor[j][i] = newvalue; + } + } + + if ((abs(testcolor[0][0] - testcolor[1][0]) < 8) && + (abs(testcolor[0][1] - testcolor[1][1]) < 4) && + (abs(testcolor[0][2] - testcolor[1][2]) < 8)) { + /* both colors are so close they might get encoded as the same 16bit values */ + GLubyte coldiffred, coldiffgreen, coldiffblue, coldiffmax, factor, ind0, ind1; + + coldiffred = abs(testcolor[0][0] - testcolor[1][0]); + coldiffgreen = 2 * abs(testcolor[0][1] - testcolor[1][1]); + coldiffblue = abs(testcolor[0][2] - testcolor[1][2]); + coldiffmax = coldiffred; + if (coldiffmax < coldiffgreen) coldiffmax = coldiffgreen; + if (coldiffmax < coldiffblue) coldiffmax = coldiffblue; + if (coldiffmax > 0) { + if (coldiffmax > 4) factor = 2; + else if (coldiffmax > 2) factor = 3; + else factor = 4; + /* Won't do much if the color value is near 255... */ + /* argh so many ifs */ + if (testcolor[1][1] >= testcolor[0][1]) { + ind1 = 1; ind0 = 0; + } + else { + ind1 = 0; ind0 = 1; + } + if ((testcolor[ind1][1] + factor * coldiffgreen) <= 255) + testcolor[ind1][1] += factor * coldiffgreen; + else testcolor[ind1][1] = 255; + if ((testcolor[ind1][0] - testcolor[ind0][1]) > 0) { + if ((testcolor[ind1][0] + factor * coldiffred) <= 255) + testcolor[ind1][0] += factor * coldiffred; + else testcolor[ind1][0] = 255; + } + else { + if ((testcolor[ind0][0] + factor * coldiffred) <= 255) + testcolor[ind0][0] += factor * coldiffred; + else testcolor[ind0][0] = 255; + } + if ((testcolor[ind1][2] - testcolor[ind0][2]) > 0) { + if ((testcolor[ind1][2] + factor * coldiffblue) <= 255) + testcolor[ind1][2] += factor * coldiffblue; + else testcolor[ind1][2] = 255; + } + else { + if ((testcolor[ind0][2] + factor * coldiffblue) <= 255) + testcolor[ind0][2] += factor * coldiffblue; + else testcolor[ind0][2] = 255; + } + } + } + + if (((testcolor[0][0] & 0xf8) << 8 | (testcolor[0][1] & 0xfc) << 3 | testcolor[0][2] >> 3) < + ((testcolor[1][0] & 0xf8) << 8 | (testcolor[1][1] & 0xfc) << 3 | testcolor[1][2]) >> 3) { + for (i = 0; i < 3; i++) { + bestcolor[0][i] = testcolor[0][i]; + bestcolor[1][i] = testcolor[1][i]; + } + } + else { + for (i = 0; i < 3; i++) { + bestcolor[0][i] = testcolor[1][i]; + bestcolor[1][i] = testcolor[0][i]; + } + } + +/* fprintf(stderr, "color end 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n", + bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/ +} + + + +static void storedxtencodedblock( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2], + GLint numxpixels, GLint numypixels, GLuint type, GLboolean haveAlpha) +{ + /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */ + + GLint i, j, colors; + GLuint testerror, testerror2, pixerror, pixerrorbest; + GLint colordist; + GLushort color0, color1, tempcolor; + GLuint bits = 0, bits2 = 0; + GLubyte *colorptr; + GLubyte enc = 0; + GLubyte cv[4][4]; + + bestcolor[0][0] = bestcolor[0][0] & 0xf8; + bestcolor[0][1] = bestcolor[0][1] & 0xfc; + bestcolor[0][2] = bestcolor[0][2] & 0xf8; + bestcolor[1][0] = bestcolor[1][0] & 0xf8; + bestcolor[1][1] = bestcolor[1][1] & 0xfc; + bestcolor[1][2] = bestcolor[1][2] & 0xf8; + + color0 = bestcolor[0][0] << 8 | bestcolor[0][1] << 3 | bestcolor[0][2] >> 3; + color1 = bestcolor[1][0] << 8 | bestcolor[1][1] << 3 | bestcolor[1][2] >> 3; + if (color0 < color1) { + tempcolor = color0; color0 = color1; color1 = tempcolor; + colorptr = bestcolor[0]; bestcolor[0] = bestcolor[1]; bestcolor[1] = colorptr; + } + + + for (i = 0; i < 3; i ++) { + cv[0][i] = bestcolor[0][i]; + cv[1][i] = bestcolor[1][i]; + cv[2][i] = (bestcolor[0][i] * 2 + bestcolor[1][i]) / 3; + cv[3][i] = (bestcolor[0][i] + bestcolor[1][i] * 2) / 3; + } + + testerror = 0; + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + pixerrorbest = 0xffffffff; + for (colors = 0; colors < 4; colors++) { + colordist = srccolors[j][i][0] - cv[colors][0]; + pixerror = colordist * colordist * REDWEIGHT; + colordist = srccolors[j][i][1] - cv[colors][1]; + pixerror += colordist * colordist * GREENWEIGHT; + colordist = srccolors[j][i][2] - cv[colors][2]; + pixerror += colordist * colordist * BLUEWEIGHT; + if (pixerror < pixerrorbest) { + pixerrorbest = pixerror; + enc = colors; + } + } + testerror += pixerrorbest; + bits |= enc << (2 * (j * 4 + i)); + } + } + for (i = 0; i < 3; i ++) { + cv[2][i] = (bestcolor[0][i] + bestcolor[1][i]) / 2; + /* this isn't used. Looks like the black color constant can only be used + with RGB_DXT1 if I read the spec correctly (note though that the radeon gpu disagrees, + it will decode 3 to black even with DXT3/5), and due to how the color searching works + it won't get used even then */ + cv[3][i] = 0; + } + testerror2 = 0; + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + pixerrorbest = 0xffffffff; + if ((type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) && (srccolors[j][i][3] <= ALPHACUT)) { + enc = 3; + pixerrorbest = 0; /* don't calculate error */ + } + else { + /* we're calculating the same what we have done already for colors 0-1 above... */ + for (colors = 0; colors < 3; colors++) { + colordist = srccolors[j][i][0] - cv[colors][0]; + pixerror = colordist * colordist * REDWEIGHT; + colordist = srccolors[j][i][1] - cv[colors][1]; + pixerror += colordist * colordist * GREENWEIGHT; + colordist = srccolors[j][i][2] - cv[colors][2]; + pixerror += colordist * colordist * BLUEWEIGHT; + if (pixerror < pixerrorbest) { + pixerrorbest = pixerror; + /* need to exchange colors later */ + if (colors > 1) enc = colors; + else enc = colors ^ 1; + } + } + } + testerror2 += pixerrorbest; + bits2 |= enc << (2 * (j * 4 + i)); + } + } + + + /* finally we're finished, write back colors and bits */ + if ((testerror > testerror2) || (haveAlpha)) { + *blkaddr++ = color1 & 0xff; + *blkaddr++ = color1 >> 8; + *blkaddr++ = color0 & 0xff; + *blkaddr++ = color0 >> 8; + *blkaddr++ = bits2 & 0xff; + *blkaddr++ = ( bits2 >> 8) & 0xff; + *blkaddr++ = ( bits2 >> 16) & 0xff; + *blkaddr = bits2 >> 24; + } + else { + *blkaddr++ = color0 & 0xff; + *blkaddr++ = color0 >> 8; + *blkaddr++ = color1 & 0xff; + *blkaddr++ = color1 >> 8; + *blkaddr++ = bits & 0xff; + *blkaddr++ = ( bits >> 8) & 0xff; + *blkaddr++ = ( bits >> 16) & 0xff; + *blkaddr = bits >> 24; + } +} + +static void encodedxtcolorblockfaster( GLubyte *blkaddr, GLubyte srccolors[4][4][4], + GLint numxpixels, GLint numypixels, GLuint type ) +{ +/* simplistic approach. We need two base colors, simply use the "highest" and the "lowest" color + present in the picture as base colors */ + + /* define lowest and highest color as shortest and longest vector to 0/0/0, though the + vectors are weighted similar to their importance in rgb-luminance conversion + doesn't work too well though... + This seems to be a rather difficult problem */ + + GLubyte *bestcolor[2]; + GLubyte basecolors[2][3]; + GLubyte i, j; + GLuint lowcv, highcv, testcv; + GLboolean haveAlpha = GL_FALSE; + + lowcv = highcv = srccolors[0][0][0] * srccolors[0][0][0] * REDWEIGHT + + srccolors[0][0][1] * srccolors[0][0][1] * GREENWEIGHT + + srccolors[0][0][2] * srccolors[0][0][2] * BLUEWEIGHT; + bestcolor[0] = bestcolor[1] = srccolors[0][0]; + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + /* don't use this as a base color if the pixel will get black/transparent anyway */ + if ((type != GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) || (srccolors[j][i][3] <= ALPHACUT)) { + testcv = srccolors[j][i][0] * srccolors[j][i][0] * REDWEIGHT + + srccolors[j][i][1] * srccolors[j][i][1] * GREENWEIGHT + + srccolors[j][i][2] * srccolors[j][i][2] * BLUEWEIGHT; + if (testcv > highcv) { + highcv = testcv; + bestcolor[1] = srccolors[j][i]; + } + else if (testcv < lowcv) { + lowcv = testcv; + bestcolor[0] = srccolors[j][i]; + } + } + else haveAlpha = GL_TRUE; + } + } + /* make sure the original color values won't get touched... */ + for (j = 0; j < 2; j++) { + for (i = 0; i < 3; i++) { + basecolors[j][i] = bestcolor[j][i]; + } + } + bestcolor[0] = basecolors[0]; + bestcolor[1] = basecolors[1]; + + /* try to find better base colors */ + fancybasecolorsearch(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha); + /* find the best encoding for these colors, and store the result */ + storedxtencodedblock(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha); +} + +static void writedxt5encodedalphablock( GLubyte *blkaddr, GLubyte alphabase1, GLubyte alphabase2, + GLubyte alphaenc[16]) +{ + *blkaddr++ = alphabase1; + *blkaddr++ = alphabase2; + *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6); + *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7); + *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5); + *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6); + *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7); + *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5); +} + +static void encodedxt5alpha(GLubyte *blkaddr, GLubyte srccolors[4][4][4], + GLint numxpixels, GLint numypixels) +{ + GLubyte alphabase[2], alphause[2]; + GLshort alphatest[2]; + GLuint alphablockerror1, alphablockerror2, alphablockerror3; + GLubyte i, j, aindex, acutValues[7]; + GLubyte alphaenc1[16], alphaenc2[16], alphaenc3[16]; + GLboolean alphaabsmin = GL_FALSE; + GLboolean alphaabsmax = GL_FALSE; + GLshort alphadist; + + /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */ + alphabase[0] = 0xff; alphabase[1] = 0x0; + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + if (srccolors[j][i][3] == 0) + alphaabsmin = GL_TRUE; + else if (srccolors[j][i][3] == 255) + alphaabsmax = GL_TRUE; + else { + if (srccolors[j][i][3] > alphabase[1]) + alphabase[1] = srccolors[j][i][3]; + if (srccolors[j][i][3] < alphabase[0]) + alphabase[0] = srccolors[j][i][3]; + } + } + } + + + if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */ + /* shortcut here since it is a very common case (and also avoids later problems) */ + /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */ + /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */ + + *blkaddr++ = srccolors[0][0][3]; + blkaddr++; + *blkaddr++ = 0; + *blkaddr++ = 0; + *blkaddr++ = 0; + *blkaddr++ = 0; + *blkaddr++ = 0; + *blkaddr++ = 0; +/* fprintf(stderr, "enc0 used\n");*/ + return; + } + + /* find best encoding for alpha0 > alpha1 */ + /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */ + alphablockerror1 = 0x0; + alphablockerror2 = 0xffffffff; + alphablockerror3 = 0xffffffff; + if (alphaabsmin) alphause[0] = 0; + else alphause[0] = alphabase[0]; + if (alphaabsmax) alphause[1] = 255; + else alphause[1] = alphabase[1]; + /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */ + for (aindex = 0; aindex < 7; aindex++) { + /* don't forget here is always rounded down */ + acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14; + } + + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + /* maybe it's overkill to have the most complicated calculation just for the error + calculation which we only need to figure out if encoding1 or encoding2 is better... */ + if (srccolors[j][i][3] > acutValues[0]) { + alphaenc1[4*j + i] = 0; + alphadist = srccolors[j][i][3] - alphause[1]; + } + else if (srccolors[j][i][3] > acutValues[1]) { + alphaenc1[4*j + i] = 2; + alphadist = srccolors[j][i][3] - (alphause[1] * 6 + alphause[0] * 1) / 7; + } + else if (srccolors[j][i][3] > acutValues[2]) { + alphaenc1[4*j + i] = 3; + alphadist = srccolors[j][i][3] - (alphause[1] * 5 + alphause[0] * 2) / 7; + } + else if (srccolors[j][i][3] > acutValues[3]) { + alphaenc1[4*j + i] = 4; + alphadist = srccolors[j][i][3] - (alphause[1] * 4 + alphause[0] * 3) / 7; + } + else if (srccolors[j][i][3] > acutValues[4]) { + alphaenc1[4*j + i] = 5; + alphadist = srccolors[j][i][3] - (alphause[1] * 3 + alphause[0] * 4) / 7; + } + else if (srccolors[j][i][3] > acutValues[5]) { + alphaenc1[4*j + i] = 6; + alphadist = srccolors[j][i][3] - (alphause[1] * 2 + alphause[0] * 5) / 7; + } + else if (srccolors[j][i][3] > acutValues[6]) { + alphaenc1[4*j + i] = 7; + alphadist = srccolors[j][i][3] - (alphause[1] * 1 + alphause[0] * 6) / 7; + } + else { + alphaenc1[4*j + i] = 1; + alphadist = srccolors[j][i][3] - alphause[0]; + } + alphablockerror1 += alphadist * alphadist; + } + } +/* for (i = 0; i < 16; i++) { + fprintf(stderr, "%d ", alphaenc1[i]); + } + fprintf(stderr, "cutVals "); + for (i = 0; i < 8; i++) { + fprintf(stderr, "%d ", acutValues[i]); + } + fprintf(stderr, "srcVals "); + for (j = 0; j < numypixels; j++) + for (i = 0; i < numxpixels; i++) { + fprintf(stderr, "%d ", srccolors[j][i][3]); + } + + fprintf(stderr, "\n"); + }*/ + /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax + are false but try it anyway */ + if (alphablockerror1 >= 32) { + + /* don't bother if encoding is already very good, this condition should also imply + we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */ + alphablockerror2 = 0; + for (aindex = 0; aindex < 5; aindex++) { + /* don't forget here is always rounded down */ + acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10; + } + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + /* maybe it's overkill to have the most complicated calculation just for the error + calculation which we only need to figure out if encoding1 or encoding2 is better... */ + if (srccolors[j][i][3] == 0) { + alphaenc2[4*j + i] = 6; + alphadist = 0; + } + else if (srccolors[j][i][3] == 255) { + alphaenc2[4*j + i] = 7; + alphadist = 0; + } + else if (srccolors[j][i][3] <= acutValues[0]) { + alphaenc2[4*j + i] = 0; + alphadist = srccolors[j][i][3] - alphabase[0]; + } + else if (srccolors[j][i][3] <= acutValues[1]) { + alphaenc2[4*j + i] = 2; + alphadist = srccolors[j][i][3] - (alphabase[0] * 4 + alphabase[1] * 1) / 5; + } + else if (srccolors[j][i][3] <= acutValues[2]) { + alphaenc2[4*j + i] = 3; + alphadist = srccolors[j][i][3] - (alphabase[0] * 3 + alphabase[1] * 2) / 5; + } + else if (srccolors[j][i][3] <= acutValues[3]) { + alphaenc2[4*j + i] = 4; + alphadist = srccolors[j][i][3] - (alphabase[0] * 2 + alphabase[1] * 3) / 5; + } + else if (srccolors[j][i][3] <= acutValues[4]) { + alphaenc2[4*j + i] = 5; + alphadist = srccolors[j][i][3] - (alphabase[0] * 1 + alphabase[1] * 4) / 5; + } + else { + alphaenc2[4*j + i] = 1; + alphadist = srccolors[j][i][3] - alphabase[1]; + } + alphablockerror2 += alphadist * alphadist; + } + } + + + /* skip this if the error is already very small + this encoding is MUCH better on average than #2 though, but expensive! */ + if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) { + GLshort blockerrlin1 = 0; + GLshort blockerrlin2 = 0; + GLubyte nralphainrangelow = 0; + GLubyte nralphainrangehigh = 0; + alphatest[0] = 0xff; + alphatest[1] = 0x0; + /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */ + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + if ((srccolors[j][i][3] > alphatest[1]) && (srccolors[j][i][3] < (255 -(alphabase[1] - alphabase[0]) / 28))) + alphatest[1] = srccolors[j][i][3]; + if ((srccolors[j][i][3] < alphatest[0]) && (srccolors[j][i][3] > (alphabase[1] - alphabase[0]) / 28)) + alphatest[0] = srccolors[j][i][3]; + } + } + /* shouldn't happen too often, don't really care about those degenerated cases */ + if (alphatest[1] <= alphatest[0]) { + alphatest[0] = 1; + alphatest[1] = 254; +/* fprintf(stderr, "only 1 or 0 colors for encoding!\n");*/ + } + for (aindex = 0; aindex < 5; aindex++) { + /* don't forget here is always rounded down */ + acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10; + } + + /* find the "average" difference between the alpha values and the next encoded value. + This is then used to calculate new base values. + Should there be some weighting, i.e. those values closer to alphatest[x] have more weight, + since they will see more improvement, and also because the values in the middle are somewhat + likely to get no improvement at all (because the base values might move in different directions)? + OTOH it would mean the values in the middle are even less likely to get an improvement + */ + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + if (srccolors[j][i][3] <= alphatest[0] / 2) { + } + else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) { + } + else if (srccolors[j][i][3] <= acutValues[0]) { + blockerrlin1 += (srccolors[j][i][3] - alphatest[0]); + nralphainrangelow += 1; + } + else if (srccolors[j][i][3] <= acutValues[1]) { + blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5); + blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5); + nralphainrangelow += 1; + nralphainrangehigh += 1; + } + else if (srccolors[j][i][3] <= acutValues[2]) { + blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5); + blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5); + nralphainrangelow += 1; + nralphainrangehigh += 1; + } + else if (srccolors[j][i][3] <= acutValues[3]) { + blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5); + blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5); + nralphainrangelow += 1; + nralphainrangehigh += 1; + } + else if (srccolors[j][i][3] <= acutValues[4]) { + blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5); + blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5); + nralphainrangelow += 1; + nralphainrangehigh += 1; + } + else { + blockerrlin2 += (srccolors[j][i][3] - alphatest[1]); + nralphainrangehigh += 1; + } + } + } + /* shouldn't happen often, needed to avoid div by zero */ + if (nralphainrangelow == 0) nralphainrangelow = 1; + if (nralphainrangehigh == 0) nralphainrangehigh = 1; + alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow); +/* fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow); + fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);*/ + /* again shouldn't really happen often... */ + if (alphatest[0] < 0) { + alphatest[0] = 0; +/* fprintf(stderr, "adj alpha base val to 0\n");*/ + } + alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh); + if (alphatest[1] > 255) { + alphatest[1] = 255; +/* fprintf(stderr, "adj alpha base val to 255\n");*/ + } + + alphablockerror3 = 0; + for (aindex = 0; aindex < 5; aindex++) { + /* don't forget here is always rounded down */ + acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10; + } + for (j = 0; j < numypixels; j++) { + for (i = 0; i < numxpixels; i++) { + /* maybe it's overkill to have the most complicated calculation just for the error + calculation which we only need to figure out if encoding1 or encoding2 is better... */ + if (srccolors[j][i][3] <= alphatest[0] / 2) { + alphaenc3[4*j + i] = 6; + alphadist = srccolors[j][i][3]; + } + else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) { + alphaenc3[4*j + i] = 7; + alphadist = 255 - srccolors[j][i][3]; + } + else if (srccolors[j][i][3] <= acutValues[0]) { + alphaenc3[4*j + i] = 0; + alphadist = srccolors[j][i][3] - alphatest[0]; + } + else if (srccolors[j][i][3] <= acutValues[1]) { + alphaenc3[4*j + i] = 2; + alphadist = srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5; + } + else if (srccolors[j][i][3] <= acutValues[2]) { + alphaenc3[4*j + i] = 3; + alphadist = srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5; + } + else if (srccolors[j][i][3] <= acutValues[3]) { + alphaenc3[4*j + i] = 4; + alphadist = srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5; + } + else if (srccolors[j][i][3] <= acutValues[4]) { + alphaenc3[4*j + i] = 5; + alphadist = srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5; + } + else { + alphaenc3[4*j + i] = 1; + alphadist = srccolors[j][i][3] - alphatest[1]; + } + alphablockerror3 += alphadist * alphadist; + } + } + } + } + /* write the alpha values and encoding back. */ + if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) { +/* if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);*/ + writedxt5encodedalphablock( blkaddr, alphause[1], alphause[0], alphaenc1 ); + } + else if (alphablockerror2 <= alphablockerror3) { +/* if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);*/ + writedxt5encodedalphablock( blkaddr, alphabase[0], alphabase[1], alphaenc2 ); + } + else { +/* fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);*/ + writedxt5encodedalphablock( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 ); + } +} + +static void extractsrccolors( GLubyte srcpixels[4][4][4], const GLchan *srcaddr, + GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps) +{ + GLubyte i, j, c; + const GLchan *curaddr; + for (j = 0; j < numypixels; j++) { + curaddr = srcaddr + j * srcRowStride * comps; + for (i = 0; i < numxpixels; i++) { + for (c = 0; c < comps; c++) { + srcpixels[j][i][c] = *curaddr++ / (CHAN_MAX / 255); + } + } + } +} + + +void tx_compress_dxtn(GLint srccomps, GLint width, GLint height, const GLubyte *srcPixData, + GLenum destFormat, GLubyte *dest, GLint dstRowStride) +{ + GLubyte *blkaddr = dest; + GLubyte srcpixels[4][4][4]; + const GLchan *srcaddr = srcPixData; + GLint numxpixels, numypixels; + GLint i, j; + GLint dstRowDiff; + + switch (destFormat) { + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + /* hmm we used to get called without dstRowStride... */ + dstRowDiff = dstRowStride >= (width * 2) ? dstRowStride - (((width + 3) & ~3) * 2) : 0; +/* fprintf(stderr, "dxt1 tex width %d tex height %d dstRowStride %d\n", + width, height, dstRowStride); */ + for (j = 0; j < height; j += 4) { + if (height > j + 3) numypixels = 4; + else numypixels = height - j; + srcaddr = srcPixData + j * width * srccomps; + for (i = 0; i < width; i += 4) { + if (width > i + 3) numxpixels = 4; + else numxpixels = width - i; + extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps); + encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat); + srcaddr += srccomps * numxpixels; + blkaddr += 8; + } + blkaddr += dstRowDiff; + } + break; + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0; +/* fprintf(stderr, "dxt3 tex width %d tex height %d dstRowStride %d\n", + width, height, dstRowStride); */ + for (j = 0; j < height; j += 4) { + if (height > j + 3) numypixels = 4; + else numypixels = height - j; + srcaddr = srcPixData + j * width * srccomps; + for (i = 0; i < width; i += 4) { + if (width > i + 3) numxpixels = 4; + else numxpixels = width - i; + extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps); + *blkaddr++ = (srcpixels[0][0][3] >> 4) | (srcpixels[0][1][3] & 0xf0); + *blkaddr++ = (srcpixels[0][2][3] >> 4) | (srcpixels[0][3][3] & 0xf0); + *blkaddr++ = (srcpixels[1][0][3] >> 4) | (srcpixels[1][1][3] & 0xf0); + *blkaddr++ = (srcpixels[1][2][3] >> 4) | (srcpixels[1][3][3] & 0xf0); + *blkaddr++ = (srcpixels[2][0][3] >> 4) | (srcpixels[2][1][3] & 0xf0); + *blkaddr++ = (srcpixels[2][2][3] >> 4) | (srcpixels[2][3][3] & 0xf0); + *blkaddr++ = (srcpixels[3][0][3] >> 4) | (srcpixels[3][1][3] & 0xf0); + *blkaddr++ = (srcpixels[3][2][3] >> 4) | (srcpixels[3][3][3] & 0xf0); + encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat); + srcaddr += srccomps * numxpixels; + blkaddr += 8; + } + blkaddr += dstRowDiff; + } + break; + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0; +/* fprintf(stderr, "dxt5 tex width %d tex height %d dstRowStride %d\n", + width, height, dstRowStride); */ + for (j = 0; j < height; j += 4) { + if (height > j + 3) numypixels = 4; + else numypixels = height - j; + srcaddr = srcPixData + j * width * srccomps; + for (i = 0; i < width; i += 4) { + if (width > i + 3) numxpixels = 4; + else numxpixels = width - i; + extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps); + encodedxt5alpha(blkaddr, srcpixels, numxpixels, numypixels); + encodedxtcolorblockfaster(blkaddr + 8, srcpixels, numxpixels, numypixels, destFormat); + srcaddr += srccomps * numxpixels; + blkaddr += 16; + } + blkaddr += dstRowDiff; + } + break; + default: + fprintf(stderr, "libdxtn: Bad dstFormat %d in tx_compress_dxtn\n", destFormat); + return; + } +} + + diff --git a/dxtn/txc_dxtn.h b/dxtn/txc_dxtn.h new file mode 100644 index 000000000..bf6842c05 --- /dev/null +++ b/dxtn/txc_dxtn.h @@ -0,0 +1,53 @@ +/* + * libtxc_dxtn + * Version: 0.1 + * + * Copyright (C) 2004 Roland Scheidegger All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "base/basictypes.h" + +typedef uint32 GLenum; +typedef int32 GLint; +typedef uint8 GLubyte; +typedef uint16 GLushort; +typedef uint32 GLuint; +typedef void GLvoid; + +typedef GLubyte GLchan; +#define UBYTE_TO_CHAN(b) (b) +#define CHAN_MAX 255 +#define RCOMP 0 +#define GCOMP 1 +#define BCOMP 2 +#define ACOMP 3 + +void fetch_2d_texel_rgb_dxt1(GLint srcRowStride, const GLubyte *pixdata, + GLint i, GLint j, GLvoid *texel); +void fetch_2d_texel_rgba_dxt1(GLint srcRowStride, const GLubyte *pixdata, + GLint i, GLint j, GLvoid *texel); +void fetch_2d_texel_rgba_dxt3(GLint srcRowStride, const GLubyte *pixdata, + GLint i, GLint j, GLvoid *texel); +void fetch_2d_texel_rgba_dxt5(GLint srcRowStride, const GLubyte *pixdata, + GLint i, GLint j, GLvoid *texel); + +void tx_compress_dxtn(GLint srccomps, GLint width, GLint height, + const GLubyte *srcPixData, GLenum destformat, + GLubyte *dest, GLint dstRowStride); diff --git a/dxtn/txc_fetch_dxtn.cc b/dxtn/txc_fetch_dxtn.cc new file mode 100644 index 000000000..1004b7cec --- /dev/null +++ b/dxtn/txc_fetch_dxtn.cc @@ -0,0 +1,243 @@ +/* + * libtxc_dxtn + * Version: 0.1 + * + * Copyright (C) 2004 Roland Scheidegger All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <stdio.h> +#include "txc_dxtn.h" + +#define EXP5TO8R(packedcol) \ + ((((packedcol) >> 8) & 0xf8) | (((packedcol) >> 13) & 0x7)) + +#define EXP6TO8G(packedcol) \ + ((((packedcol) >> 3) & 0xfc) | (((packedcol) >> 9) & 0x3)) + +#define EXP5TO8B(packedcol) \ + ((((packedcol) << 3) & 0xf8) | (((packedcol) >> 2) & 0x7)) + +#define EXP4TO8(col) \ + ((col) | ((col) << 4)) + +/* inefficient. To be efficient, it would be necessary to decode 16 pixels at once */ + +static void dxt135_decode_imageblock ( const GLubyte *img_block_src, + GLint i, GLint j, GLuint dxt_type, GLvoid *texel ) { + GLchan *rgba = (GLchan *) texel; + const GLushort color0 = img_block_src[0] | (img_block_src[1] << 8); + const GLushort color1 = img_block_src[2] | (img_block_src[3] << 8); + const GLuint bits = img_block_src[4] | (img_block_src[5] << 8) | + (img_block_src[6] << 16) | (img_block_src[7] << 24); + /* What about big/little endian? */ + GLubyte bit_pos = 2 * (j * 4 + i) ; + GLubyte code = (GLubyte) ((bits >> bit_pos) & 3); + + rgba[ACOMP] = CHAN_MAX; + switch (code) { + case 0: + rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color0) ); + rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color0) ); + rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color0) ); + break; + case 1: + rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color1) ); + rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color1) ); + rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color1) ); + break; + case 2: + if (color0 > color1) { + rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) * 2 + EXP5TO8R(color1)) / 3) ); + rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) * 2 + EXP6TO8G(color1)) / 3) ); + rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) * 2 + EXP5TO8B(color1)) / 3) ); + } + else { + rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1)) / 2) ); + rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1)) / 2) ); + rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1)) / 2) ); + } + break; + case 3: + if ((dxt_type > 1) || (color0 > color1)) { + rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1) * 2) / 3) ); + rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1) * 2) / 3) ); + rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1) * 2) / 3) ); + } + else { + rgba[RCOMP] = 0; + rgba[GCOMP] = 0; + rgba[BCOMP] = 0; + if (dxt_type == 1) rgba[ACOMP] = UBYTE_TO_CHAN(0); + } + break; + default: + /* CANNOT happen (I hope) */ + break; + } +} + + +void fetch_2d_texel_rgb_dxt1(GLint srcRowStride, const GLubyte *pixdata, + GLint i, GLint j, GLvoid *texel) +{ + /* Extract the (i,j) pixel from pixdata and return it + * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP]. + */ + + const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8); + dxt135_decode_imageblock(blksrc, (i&3), (j&3), 0, texel); +} + + +void fetch_2d_texel_rgba_dxt1(GLint srcRowStride, const GLubyte *pixdata, + GLint i, GLint j, GLvoid *texel) +{ + /* Extract the (i,j) pixel from pixdata and return it + * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP]. + */ + + const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8); + dxt135_decode_imageblock(blksrc, (i&3), (j&3), 1, texel); +} + +void fetch_2d_texel_rgba_dxt3(GLint srcRowStride, const GLubyte *pixdata, + GLint i, GLint j, GLvoid *texel) { + + /* Extract the (i,j) pixel from pixdata and return it + * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP]. + */ + + GLchan *rgba = (GLchan *) texel; + const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16); +#if 0 + /* Simple 32bit version. */ +/* that's pretty brain-dead for a single pixel, isn't it? */ + const GLubyte bit_pos = 4 * ((j&3) * 4 + (i&3)); + const GLuint alpha_low = blksrc[0] | (blksrc[1] << 8) | (blksrc[2] << 16) | (blksrc[3] << 24); + const GLuint alpha_high = blksrc[4] | (blksrc[5] << 8) | (blksrc[6] << 16) | (blksrc[7] << 24); + + dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel); + if (bit_pos < 32) + rgba[ACOMP] = UBYTE_TO_CHAN( (GLubyte)(EXP4TO8((alpha_low >> bit_pos) & 15)) ); + else + rgba[ACOMP] = UBYTE_TO_CHAN( (GLubyte)(EXP4TO8((alpha_high >> (bit_pos - 32)) & 15)) ); +#endif +#if 1 +/* TODO test this! */ + const GLubyte anibble = (blksrc[((j&3) * 4 + (i&3)) / 2] >> (4 * (i&1))) & 0xf; + dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel); + rgba[ACOMP] = UBYTE_TO_CHAN( (GLubyte)(EXP4TO8(anibble)) ); +#endif + +} + +void fetch_2d_texel_rgba_dxt5(GLint srcRowStride, const GLubyte *pixdata, + GLint i, GLint j, GLvoid *texel) { + + /* Extract the (i,j) pixel from pixdata and return it + * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP]. + */ + + GLchan *rgba = (GLchan *) texel; + const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16); + const GLubyte alpha0 = blksrc[0]; + const GLubyte alpha1 = blksrc[1]; +#if 0 + const GLubyte bit_pos = 3 * ((j&3) * 4 + (i&3)); + /* simple 32bit version */ + const GLuint bits_low = blksrc[2] | (blksrc[3] << 8) | (blksrc[4] << 16) | (blksrc[5] << 24); + const GLuint bits_high = blksrc[6] | (blksrc[7] << 8); + GLubyte code; + + if (bit_pos < 30) + code = (GLubyte) ((bits_low >> bit_pos) & 7); + else if (bit_pos == 30) + code = (GLubyte) ((bits_low >> 30) & 3) | ((bits_high << 2) & 4); + else + code = (GLubyte) ((bits_high >> (bit_pos - 32)) & 7); +#endif +#if 1 +/* TODO test this! */ + const GLubyte bit_pos = ((j&3) * 4 + (i&3)) * 3; + const GLubyte acodelow = blksrc[2 + bit_pos / 8]; + const GLubyte acodehigh = blksrc[3 + bit_pos / 8]; + const GLubyte code = (acodelow >> (bit_pos & 0x7) | + (acodehigh << (8 - (bit_pos & 0x7)))) & 0x7; +#endif + dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel); +#if 0 + if (alpha0 > alpha1) { + switch (code) { + case 0: + rgba[ACOMP] = UBYTE_TO_CHAN( alpha0 ); + break; + case 1: + rgba[ACOMP] = UBYTE_TO_CHAN( alpha1 ); + break; + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7) ); + break; + } + } + else { + switch (code) { + case 0: + rgba[ACOMP] = UBYTE_TO_CHAN( alpha0 ); + break; + case 1: + rgba[ACOMP] = UBYTE_TO_CHAN( alpha1 ); + break; + case 2: + case 3: + case 4: + case 5: + rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5) ); + break; + case 6: + rgba[ACOMP] = 0; + break; + case 7: + rgba[ACOMP] = CHAN_MAX; + break; + } + } +#endif +/* not sure. Which version is faster? */ +#if 1 +/* TODO test this */ + if (code == 0) + rgba[ACOMP] = UBYTE_TO_CHAN( alpha0 ); + else if (code == 1) + rgba[ACOMP] = UBYTE_TO_CHAN( alpha1 ); + else if (alpha0 > alpha1) + rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7) ); + else if (code < 6) + rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5) ); + else if (code == 6) + rgba[ACOMP] = 0; + else + rgba[ACOMP] = CHAN_MAX; +#endif +} |