aboutsummaryrefslogtreecommitdiff
path: root/dxtn
diff options
context:
space:
mode:
Diffstat (limited to 'dxtn')
-rw-r--r--dxtn/Changelog12
-rw-r--r--dxtn/Makefile18
-rw-r--r--dxtn/Makefile.old14
-rw-r--r--dxtn/base/basictypes.h348
-rw-r--r--dxtn/base/port.h54
-rw-r--r--dxtn/build/build_config.h69
-rw-r--r--dxtn/getsrc.btm4
-rw-r--r--dxtn/txc_compress_dxtn.c838
-rw-r--r--dxtn/txc_dxtn.h53
-rw-r--r--dxtn/txc_fetch_dxtn.cc243
10 files changed, 1653 insertions, 0 deletions
diff --git a/dxtn/Changelog b/dxtn/Changelog
new file mode 100644
index 000000000..b4f5fa07e
--- /dev/null
+++ b/dxtn/Changelog
@@ -0,0 +1,12 @@
+20050819:
+- clean up / make faster the fetch functions, especially the 565 to 888 expand was slow
+20050908:
+- fix overflow/underflow of some values (avoids artifacts with some textures) in
+ fancybasecolorsearch when compressing (fix by Dave Airlie).
+20060508:
+- fix serious copy & paste errors in fetch functions introduced with 20050819 version
+- new Makefile (provided by Tilman Sauerbeck)
+- minor cosmetic fixes
+
+20070518:
+- add the missing dstRowStride parameter Mesa uses
diff --git a/dxtn/Makefile b/dxtn/Makefile
new file mode 100644
index 000000000..975063f8b
--- /dev/null
+++ b/dxtn/Makefile
@@ -0,0 +1,18 @@
+CFLAGS += -Wall -pedantic -fPIC
+OPT_CFLAGS = -O3
+LDFLAGS += -shared -fPIC
+OBJS = txc_compress_dxtn.o txc_fetch_dxtn.o
+LIB = libtxc_dxtn.so
+
+$(LIB): $(OBJS)
+ $(CC) $(LDFLAGS) -o $@ $(OBJS)
+
+%.o: %.c txc_dxtn.h
+ $(CC) $(CFLAGS) $(OPT_CFLAGS) -c -o $@ $<
+
+clean:
+ rm -f $(OBJS) $(LIB)
+
+install: $(LIB)
+ install -d $(DESTDIR)/usr/lib
+ install -m 755 $(LIB) $(DESTDIR)/usr/lib
diff --git a/dxtn/Makefile.old b/dxtn/Makefile.old
new file mode 100644
index 000000000..21b502ee7
--- /dev/null
+++ b/dxtn/Makefile.old
@@ -0,0 +1,14 @@
+libtxc_dxtn.so: txc_compress_dxtn.o txc_fetch_dxtn.o
+ gcc -O3 -Wall -pedantic -fPIC -lpthread -shared -o libtxc_dxtn.so txc_compress_dxtn.o txc_fetch_dxtn.o
+
+txc_compress_dxtn.o : txc_compress_dxtn.c txc_dxtn.h
+ gcc -c -O3 -Wall -pedantic -fPIC -o txc_compress_dxtn.o txc_compress_dxtn.c
+
+txc_fetch_dxtn.o : txc_fetch_dxtn.c txc_dxtn.h
+ gcc -c -O3 -Wall -pedantic -fPIC -o txc_fetch_dxtn.o txc_fetch_dxtn.c
+
+clean:
+ rm txc_fetch_dxtn.o txc_compress_dxtn.o libtxc_dxtn.so
+
+install: libtxc_dxtn.so
+ install libtxc_dxtn.so /usr/lib/
diff --git a/dxtn/base/basictypes.h b/dxtn/base/basictypes.h
new file mode 100644
index 000000000..1e4430380
--- /dev/null
+++ b/dxtn/base/basictypes.h
@@ -0,0 +1,348 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_BASICTYPES_H_
+#define BASE_BASICTYPES_H_
+
+#include <limits.h> // So we can set the bounds of our types
+#include <stddef.h> // For size_t
+#include <string.h> // for memcpy
+
+#include "base/port.h" // Types that only need exist on certain systems
+
+#ifndef COMPILER_MSVC
+// stdint.h is part of C99 but MSVC doesn't have it.
+#include <stdint.h> // For intptr_t.
+#endif
+
+typedef signed char schar;
+typedef signed char int8;
+typedef short int16;
+// TODO(mbelshe) Remove these type guards. These are
+// temporary to avoid conflicts with npapi.h.
+#ifndef _INT32
+#define _INT32
+typedef int int32;
+#endif
+
+// The NSPR system headers define 64-bit as |long| when possible. In order to
+// not have typedef mismatches, we do the same on LP64.
+#if __LP64__
+typedef long int64;
+#else
+typedef long long int64;
+#endif
+
+// NOTE: unsigned types are DANGEROUS in loops and other arithmetical
+// places. Use the signed types unless your variable represents a bit
+// pattern (eg a hash value) or you really need the extra bit. Do NOT
+// use 'unsigned' to express "this value should always be positive";
+// use assertions for this.
+
+typedef unsigned char uint8;
+typedef unsigned short uint16;
+// TODO(mbelshe) Remove these type guards. These are
+// temporary to avoid conflicts with npapi.h.
+#ifndef _UINT32
+#define _UINT32
+typedef unsigned int uint32;
+#endif
+
+// See the comment above about NSPR and 64-bit.
+#if __LP64__
+typedef unsigned long uint64;
+#else
+typedef unsigned long long uint64;
+#endif
+
+// A type to represent a Unicode code-point value. As of Unicode 4.0,
+// such values require up to 21 bits.
+// (For type-checking on pointers, make this explicitly signed,
+// and it should always be the signed version of whatever int32 is.)
+typedef signed int char32;
+
+const uint8 kuint8max = (( uint8) 0xFF);
+const uint16 kuint16max = ((uint16) 0xFFFF);
+const uint32 kuint32max = ((uint32) 0xFFFFFFFF);
+const uint64 kuint64max = ((uint64) GG_LONGLONG(0xFFFFFFFFFFFFFFFF));
+const int8 kint8min = (( int8) 0x80);
+const int8 kint8max = (( int8) 0x7F);
+const int16 kint16min = (( int16) 0x8000);
+const int16 kint16max = (( int16) 0x7FFF);
+const int32 kint32min = (( int32) 0x80000000);
+const int32 kint32max = (( int32) 0x7FFFFFFF);
+const int64 kint64min = (( int64) GG_LONGLONG(0x8000000000000000));
+const int64 kint64max = (( int64) GG_LONGLONG(0x7FFFFFFFFFFFFFFF));
+
+// A macro to disallow the copy constructor and operator= functions
+// This should be used in the private: declarations for a class
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+ TypeName(const TypeName&); \
+ void operator=(const TypeName&)
+
+// An older, deprecated, politically incorrect name for the above.
+#define DISALLOW_EVIL_CONSTRUCTORS(TypeName) DISALLOW_COPY_AND_ASSIGN(TypeName)
+
+// A macro to disallow all the implicit constructors, namely the
+// default constructor, copy constructor and operator= functions.
+//
+// This should be used in the private: declarations for a class
+// that wants to prevent anyone from instantiating it. This is
+// especially useful for classes containing only static methods.
+#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
+ TypeName(); \
+ DISALLOW_COPY_AND_ASSIGN(TypeName)
+
+// The arraysize(arr) macro returns the # of elements in an array arr.
+// The expression is a compile-time constant, and therefore can be
+// used in defining new arrays, for example. If you use arraysize on
+// a pointer by mistake, you will get a compile-time error.
+//
+// One caveat is that arraysize() doesn't accept any array of an
+// anonymous type or a type defined inside a function. In these rare
+// cases, you have to use the unsafe ARRAYSIZE_UNSAFE() macro below. This is
+// due to a limitation in C++'s template system. The limitation might
+// eventually be removed, but it hasn't happened yet.
+
+// This template function declaration is used in defining arraysize.
+// Note that the function doesn't need an implementation, as we only
+// use its type.
+template <typename T, size_t N>
+char (&ArraySizeHelper(T (&array)[N]))[N];
+
+// That gcc wants both of these prototypes seems mysterious. VC, for
+// its part, can't decide which to use (another mystery). Matching of
+// template overloads: the final frontier.
+#ifndef _MSC_VER
+template <typename T, size_t N>
+char (&ArraySizeHelper(const T (&array)[N]))[N];
+#endif
+
+#define arraysize(array) (sizeof(ArraySizeHelper(array)))
+
+// ARRAYSIZE_UNSAFE performs essentially the same calculation as arraysize,
+// but can be used on anonymous types or types defined inside
+// functions. It's less safe than arraysize as it accepts some
+// (although not all) pointers. Therefore, you should use arraysize
+// whenever possible.
+//
+// The expression ARRAYSIZE_UNSAFE(a) is a compile-time constant of type
+// size_t.
+//
+// ARRAYSIZE_UNSAFE catches a few type errors. If you see a compiler error
+//
+// "warning: division by zero in ..."
+//
+// when using ARRAYSIZE_UNSAFE, you are (wrongfully) giving it a pointer.
+// You should only use ARRAYSIZE_UNSAFE on statically allocated arrays.
+//
+// The following comments are on the implementation details, and can
+// be ignored by the users.
+//
+// ARRAYSIZE_UNSAFE(arr) works by inspecting sizeof(arr) (the # of bytes in
+// the array) and sizeof(*(arr)) (the # of bytes in one array
+// element). If the former is divisible by the latter, perhaps arr is
+// indeed an array, in which case the division result is the # of
+// elements in the array. Otherwise, arr cannot possibly be an array,
+// and we generate a compiler error to prevent the code from
+// compiling.
+//
+// Since the size of bool is implementation-defined, we need to cast
+// !(sizeof(a) & sizeof(*(a))) to size_t in order to ensure the final
+// result has type size_t.
+//
+// This macro is not perfect as it wrongfully accepts certain
+// pointers, namely where the pointer size is divisible by the pointee
+// size. Since all our code has to go through a 32-bit compiler,
+// where a pointer is 4 bytes, this means all pointers to a type whose
+// size is 3 or greater than 4 will be (righteously) rejected.
+
+#define ARRAYSIZE_UNSAFE(a) \
+ ((sizeof(a) / sizeof(*(a))) / \
+ static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
+
+
+// Use implicit_cast as a safe version of static_cast or const_cast
+// for upcasting in the type hierarchy (i.e. casting a pointer to Foo
+// to a pointer to SuperclassOfFoo or casting a pointer to Foo to
+// a const pointer to Foo).
+// When you use implicit_cast, the compiler checks that the cast is safe.
+// Such explicit implicit_casts are necessary in surprisingly many
+// situations where C++ demands an exact type match instead of an
+// argument type convertable to a target type.
+//
+// The From type can be inferred, so the preferred syntax for using
+// implicit_cast is the same as for static_cast etc.:
+//
+// implicit_cast<ToType>(expr)
+//
+// implicit_cast would have been part of the C++ standard library,
+// but the proposal was submitted too late. It will probably make
+// its way into the language in the future.
+template<typename To, typename From>
+inline To implicit_cast(From const &f) {
+ return f;
+}
+
+// The COMPILE_ASSERT macro can be used to verify that a compile time
+// expression is true. For example, you could use it to verify the
+// size of a static array:
+//
+// COMPILE_ASSERT(ARRAYSIZE_UNSAFE(content_type_names) == CONTENT_NUM_TYPES,
+// content_type_names_incorrect_size);
+//
+// or to make sure a struct is smaller than a certain size:
+//
+// COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large);
+//
+// The second argument to the macro is the name of the variable. If
+// the expression is false, most compilers will issue a warning/error
+// containing the name of the variable.
+
+template <bool>
+struct CompileAssert {
+};
+
+#undef COMPILE_ASSERT
+#define COMPILE_ASSERT(expr, msg) \
+ typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
+
+// Implementation details of COMPILE_ASSERT:
+//
+// - COMPILE_ASSERT works by defining an array type that has -1
+// elements (and thus is invalid) when the expression is false.
+//
+// - The simpler definition
+//
+// #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
+//
+// does not work, as gcc supports variable-length arrays whose sizes
+// are determined at run-time (this is gcc's extension and not part
+// of the C++ standard). As a result, gcc fails to reject the
+// following code with the simple definition:
+//
+// int foo;
+// COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is
+// // not a compile-time constant.
+//
+// - By using the type CompileAssert<(bool(expr))>, we ensures that
+// expr is a compile-time constant. (Template arguments must be
+// determined at compile-time.)
+//
+// - The outter parentheses in CompileAssert<(bool(expr))> are necessary
+// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
+//
+// CompileAssert<bool(expr)>
+//
+// instead, these compilers will refuse to compile
+//
+// COMPILE_ASSERT(5 > 0, some_message);
+//
+// (They seem to think the ">" in "5 > 0" marks the end of the
+// template argument list.)
+//
+// - The array size is (bool(expr) ? 1 : -1), instead of simply
+//
+// ((expr) ? 1 : -1).
+//
+// This is to avoid running into a bug in MS VC 7.1, which
+// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
+
+
+// MetatagId refers to metatag-id that we assign to
+// each metatag <name, value> pair..
+typedef uint32 MetatagId;
+
+// Argument type used in interfaces that can optionally take ownership
+// of a passed in argument. If TAKE_OWNERSHIP is passed, the called
+// object takes ownership of the argument. Otherwise it does not.
+enum Ownership {
+ DO_NOT_TAKE_OWNERSHIP,
+ TAKE_OWNERSHIP
+};
+
+// bit_cast<Dest,Source> is a template function that implements the
+// equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in
+// very low-level functions like the protobuf library and fast math
+// support.
+//
+// float f = 3.14159265358979;
+// int i = bit_cast<int32>(f);
+// // i = 0x40490fdb
+//
+// The classical address-casting method is:
+//
+// // WRONG
+// float f = 3.14159265358979; // WRONG
+// int i = * reinterpret_cast<int*>(&f); // WRONG
+//
+// The address-casting method actually produces undefined behavior
+// according to ISO C++ specification section 3.10 -15 -. Roughly, this
+// section says: if an object in memory has one type, and a program
+// accesses it with a different type, then the result is undefined
+// behavior for most values of "different type".
+//
+// This is true for any cast syntax, either *(int*)&f or
+// *reinterpret_cast<int*>(&f). And it is particularly true for
+// conversions betweeen integral lvalues and floating-point lvalues.
+//
+// The purpose of 3.10 -15- is to allow optimizing compilers to assume
+// that expressions with different types refer to different memory. gcc
+// 4.0.1 has an optimizer that takes advantage of this. So a
+// non-conforming program quietly produces wildly incorrect output.
+//
+// The problem is not the use of reinterpret_cast. The problem is type
+// punning: holding an object in memory of one type and reading its bits
+// back using a different type.
+//
+// The C++ standard is more subtle and complex than this, but that
+// is the basic idea.
+//
+// Anyways ...
+//
+// bit_cast<> calls memcpy() which is blessed by the standard,
+// especially by the example in section 3.9 . Also, of course,
+// bit_cast<> wraps up the nasty logic in one place.
+//
+// Fortunately memcpy() is very fast. In optimized mode, with a
+// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
+// code with the minimal amount of data movement. On a 32-bit system,
+// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
+// compiles to two loads and two stores.
+//
+// I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
+//
+// WARNING: if Dest or Source is a non-POD type, the result of the memcpy
+// is likely to surprise you.
+
+template <class Dest, class Source>
+inline Dest bit_cast(const Source& source) {
+ // Compile time assertion: sizeof(Dest) == sizeof(Source)
+ // A compile error here means your Dest and Source have different sizes.
+ typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : -1];
+
+ Dest dest;
+ memcpy(&dest, &source, sizeof(dest));
+ return dest;
+}
+
+// The following enum should be used only as a constructor argument to indicate
+// that the variable has static storage class, and that the constructor should
+// do nothing to its state. It indicates to the reader that it is legal to
+// declare a static instance of the class, provided the constructor is given
+// the base::LINKER_INITIALIZED argument. Normally, it is unsafe to declare a
+// static variable that has a constructor or a destructor because invocation
+// order is undefined. However, IF the type can be initialized by filling with
+// zeroes (which the loader does for static variables), AND the destructor also
+// does nothing to the storage, AND there are no virtual methods, then a
+// constructor declared as
+// explicit MyClass(base::LinkerInitialized x) {}
+// and invoked as
+// static MyClass my_variable_name(base::LINKER_INITIALIZED);
+namespace base {
+enum LinkerInitialized { LINKER_INITIALIZED };
+} // base
+
+
+#endif // BASE_BASICTYPES_H_
diff --git a/dxtn/base/port.h b/dxtn/base/port.h
new file mode 100644
index 000000000..18a936152
--- /dev/null
+++ b/dxtn/base/port.h
@@ -0,0 +1,54 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_PORT_H_
+#define BASE_PORT_H_
+
+#include <stdarg.h>
+#include "build/build_config.h"
+
+#ifdef COMPILER_MSVC
+#define GG_LONGLONG(x) x##I64
+#define GG_ULONGLONG(x) x##UI64
+#else
+#define GG_LONGLONG(x) x##LL
+#define GG_ULONGLONG(x) x##ULL
+#endif
+
+// Per C99 7.8.14, define __STDC_CONSTANT_MACROS before including <stdint.h>
+// to get the INTn_C and UINTn_C macros for integer constants. It's difficult
+// to guarantee any specific ordering of header includes, so it's difficult to
+// guarantee that the INTn_C macros can be defined by including <stdint.h> at
+// any specific point. Provide GG_INTn_C macros instead.
+
+#define GG_INT8_C(x) (x)
+#define GG_INT16_C(x) (x)
+#define GG_INT32_C(x) (x)
+#define GG_INT64_C(x) GG_LONGLONG(x)
+
+#define GG_UINT8_C(x) (x ## U)
+#define GG_UINT16_C(x) (x ## U)
+#define GG_UINT32_C(x) (x ## U)
+#define GG_UINT64_C(x) GG_ULONGLONG(x)
+
+// It's possible for functions that use a va_list, such as StringPrintf, to
+// invalidate the data in it upon use. The fix is to make a copy of the
+// structure before using it and use that copy instead. va_copy is provided
+// for this purpose. MSVC does not provide va_copy, so define an
+// implementation here. It is not guaranteed that assignment is a copy, so the
+// StringUtil.VariableArgsFunc unit test tests this capability.
+#if defined(COMPILER_GCC)
+#define GG_VA_COPY(a, b) (va_copy(a, b))
+#elif defined(COMPILER_MSVC)
+#define GG_VA_COPY(a, b) (a = b)
+#endif
+
+// Define an OS-neutral wrapper for shared library entry points
+#if defined(OS_WIN)
+#define API_CALL __stdcall
+#elif defined(OS_LINUX) || defined(OS_MACOSX)
+#define API_CALL
+#endif
+
+#endif // BASE_PORT_H_
diff --git a/dxtn/build/build_config.h b/dxtn/build/build_config.h
new file mode 100644
index 000000000..2b9efda15
--- /dev/null
+++ b/dxtn/build/build_config.h
@@ -0,0 +1,69 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// This file adds defines about the platform we're currently building on.
+// Operating System:
+// OS_WIN / OS_MACOSX / OS_LINUX / OS_POSIX (MACOSX or LINUX)
+// Compiler:
+// COMPILER_MSVC / COMPILER_GCC
+// Processor:
+// ARCH_CPU_X86 / ARCH_CPU_X86_64 / ARCH_CPU_X86_FAMILY (X86 or X86_64)
+// ARCH_CPU_32_BITS / ARCH_CPU_64_BITS
+
+#ifndef BUILD_BUILD_CONFIG_H_
+#define BUILD_BUILD_CONFIG_H_
+
+// A set of macros to use for platform detection.
+#if defined(__APPLE__)
+#define OS_MACOSX 1
+#elif defined(__linux__)
+#define OS_LINUX 1
+#elif defined(_WIN32)
+#define OS_WIN 1
+#else
+#error Please add support for your platform in build/build_config.h
+#endif
+
+// For access to standard POSIX features, use OS_POSIX instead of a more
+// specific macro.
+#if defined(OS_MACOSX) || defined(OS_LINUX)
+#define OS_POSIX 1
+#endif
+
+// Compiler detection.
+#if defined(__GNUC__)
+#define COMPILER_GCC 1
+#elif defined(_MSC_VER)
+#define COMPILER_MSVC 1
+#else
+#error Please add support for your compiler in build/build_config.h
+#endif
+
+// Processor architecture detection. For more info on what's defined, see:
+// http://msdn.microsoft.com/en-us/library/b0084kay.aspx
+// http://www.agner.org/optimize/calling_conventions.pdf
+#if defined(_M_X64) || defined(__x86_64__)
+#define ARCH_CPU_X86_FAMILY 1
+#define ARCH_CPU_X86_64 1
+#define ARCH_CPU_64_BITS 1
+#elif defined(_M_IX86) || defined(__i386__)
+#define ARCH_CPU_X86_FAMILY 1
+#define ARCH_CPU_X86 1
+#define ARCH_CPU_32_BITS 1
+#else
+#error Please add support for your architecture in build/build_config.h
+#endif
+
+// Type detection for wchar_t.
+#if defined(OS_WIN)
+#define WCHAR_T_IS_UTF16
+#elif defined(OS_POSIX) && defined(COMPILER_GCC) && \
+ defined(__WCHAR_MAX__) && __WCHAR_MAX__ == 0x7fffffff
+#define WCHAR_T_IS_UTF32
+#else
+#error Please add support for your compiler in build/build_config.h
+#endif
+
+#endif // BUILD_BUILD_CONFIG_H_
+
diff --git a/dxtn/getsrc.btm b/dxtn/getsrc.btm
new file mode 100644
index 000000000..dd213f6d2
--- /dev/null
+++ b/dxtn/getsrc.btm
@@ -0,0 +1,4 @@
+svn export http://o3d.googlecode.com/svn/trunk/googleclient/third_party/libtxc_dxtn/files . --force
+svn export http://o3d.googlecode.com/svn/trunk/googleclient/third_party/chrome/files/base/basictypes.h base --force
+svn export http://o3d.googlecode.com/svn/trunk/googleclient/third_party/chrome/files/base/port.h base --force
+svn export http://o3d.googlecode.com/svn/trunk/googleclient/third_party/chrome/files/build/build_config.h build --force
diff --git a/dxtn/txc_compress_dxtn.c b/dxtn/txc_compress_dxtn.c
new file mode 100644
index 000000000..0be8d4618
--- /dev/null
+++ b/dxtn/txc_compress_dxtn.c
@@ -0,0 +1,838 @@
+/*
+ * libtxc_dxtn
+ * Version: 0.1
+ *
+ * Copyright (C) 2004 Roland Scheidegger All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "txc_dxtn.h"
+
+/* weights used for error function, basically weights (unsquared 2/4/1) according to rgb->luminance conversion
+ not sure if this really reflects visual perception */
+#define REDWEIGHT 4
+#define GREENWEIGHT 16
+#define BLUEWEIGHT 1
+
+#define ALPHACUT 127
+
+static void fancybasecolorsearch( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
+ GLint numxpixels, GLint numypixels, GLint type, GLboolean haveAlpha)
+{
+ /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
+
+ /* TODO could also try to find a better encoding for the 3-color-encoding type, this really should be done
+ if it's rgba_dxt1 and we have alpha in the block, currently even values which will be mapped to black
+ due to their alpha value will influence the result */
+ GLint i, j, colors, z;
+ GLuint pixerror, pixerrorred, pixerrorgreen, pixerrorblue, pixerrorbest;
+ GLint colordist, blockerrlin[2][3];
+ GLubyte nrcolor[2];
+ GLint pixerrorcolorbest[3];
+ GLubyte enc = 0;
+ GLubyte cv[4][4];
+ GLubyte testcolor[2][3];
+
+/* fprintf(stderr, "color begin 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
+ bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
+ if (((bestcolor[0][0] & 0xf8) << 8 | (bestcolor[0][1] & 0xfc) << 3 | bestcolor[0][2] >> 3) <
+ ((bestcolor[1][0] & 0xf8) << 8 | (bestcolor[1][1] & 0xfc) << 3 | bestcolor[1][2] >> 3)) {
+ testcolor[0][0] = bestcolor[0][0];
+ testcolor[0][1] = bestcolor[0][1];
+ testcolor[0][2] = bestcolor[0][2];
+ testcolor[1][0] = bestcolor[1][0];
+ testcolor[1][1] = bestcolor[1][1];
+ testcolor[1][2] = bestcolor[1][2];
+ }
+ else {
+ testcolor[1][0] = bestcolor[0][0];
+ testcolor[1][1] = bestcolor[0][1];
+ testcolor[1][2] = bestcolor[0][2];
+ testcolor[0][0] = bestcolor[1][0];
+ testcolor[0][1] = bestcolor[1][1];
+ testcolor[0][2] = bestcolor[1][2];
+ }
+
+ for (i = 0; i < 3; i ++) {
+ cv[0][i] = testcolor[0][i];
+ cv[1][i] = testcolor[1][i];
+ cv[2][i] = (testcolor[0][i] * 2 + testcolor[1][i]) / 3;
+ cv[3][i] = (testcolor[0][i] + testcolor[1][i] * 2) / 3;
+ }
+
+ blockerrlin[0][0] = 0;
+ blockerrlin[0][1] = 0;
+ blockerrlin[0][2] = 0;
+ blockerrlin[1][0] = 0;
+ blockerrlin[1][1] = 0;
+ blockerrlin[1][2] = 0;
+
+ nrcolor[0] = 0;
+ nrcolor[1] = 0;
+
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ pixerrorbest = 0xffffffff;
+ for (colors = 0; colors < 4; colors++) {
+ colordist = srccolors[j][i][0] - (cv[colors][0]);
+ pixerror = colordist * colordist * REDWEIGHT;
+ pixerrorred = colordist;
+ colordist = srccolors[j][i][1] - (cv[colors][1]);
+ pixerror += colordist * colordist * GREENWEIGHT;
+ pixerrorgreen = colordist;
+ colordist = srccolors[j][i][2] - (cv[colors][2]);
+ pixerror += colordist * colordist * BLUEWEIGHT;
+ pixerrorblue = colordist;
+ if (pixerror < pixerrorbest) {
+ enc = colors;
+ pixerrorbest = pixerror;
+ pixerrorcolorbest[0] = pixerrorred;
+ pixerrorcolorbest[1] = pixerrorgreen;
+ pixerrorcolorbest[2] = pixerrorblue;
+ }
+ }
+ if (enc == 0) {
+ for (z = 0; z < 3; z++) {
+ blockerrlin[0][z] += 3 * pixerrorcolorbest[z];
+ }
+ nrcolor[0] += 3;
+ }
+ else if (enc == 2) {
+ for (z = 0; z < 3; z++) {
+ blockerrlin[0][z] += 2 * pixerrorcolorbest[z];
+ }
+ nrcolor[0] += 2;
+ for (z = 0; z < 3; z++) {
+ blockerrlin[1][z] += 1 * pixerrorcolorbest[z];
+ }
+ nrcolor[1] += 1;
+ }
+ else if (enc == 3) {
+ for (z = 0; z < 3; z++) {
+ blockerrlin[0][z] += 1 * pixerrorcolorbest[z];
+ }
+ nrcolor[0] += 1;
+ for (z = 0; z < 3; z++) {
+ blockerrlin[1][z] += 2 * pixerrorcolorbest[z];
+ }
+ nrcolor[1] += 2;
+ }
+ else if (enc == 1) {
+ for (z = 0; z < 3; z++) {
+ blockerrlin[1][z] += 3 * pixerrorcolorbest[z];
+ }
+ nrcolor[1] += 3;
+ }
+ }
+ }
+ if (nrcolor[0] == 0) nrcolor[0] = 1;
+ if (nrcolor[1] == 0) nrcolor[1] = 1;
+ for (j = 0; j < 2; j++) {
+ for (i = 0; i < 3; i++) {
+ GLint newvalue = testcolor[j][i] + blockerrlin[j][i] / nrcolor[j];
+ if (newvalue <= 0)
+ testcolor[j][i] = 0;
+ else if (newvalue >= 255)
+ testcolor[j][i] = 255;
+ else testcolor[j][i] = newvalue;
+ }
+ }
+
+ if ((abs(testcolor[0][0] - testcolor[1][0]) < 8) &&
+ (abs(testcolor[0][1] - testcolor[1][1]) < 4) &&
+ (abs(testcolor[0][2] - testcolor[1][2]) < 8)) {
+ /* both colors are so close they might get encoded as the same 16bit values */
+ GLubyte coldiffred, coldiffgreen, coldiffblue, coldiffmax, factor, ind0, ind1;
+
+ coldiffred = abs(testcolor[0][0] - testcolor[1][0]);
+ coldiffgreen = 2 * abs(testcolor[0][1] - testcolor[1][1]);
+ coldiffblue = abs(testcolor[0][2] - testcolor[1][2]);
+ coldiffmax = coldiffred;
+ if (coldiffmax < coldiffgreen) coldiffmax = coldiffgreen;
+ if (coldiffmax < coldiffblue) coldiffmax = coldiffblue;
+ if (coldiffmax > 0) {
+ if (coldiffmax > 4) factor = 2;
+ else if (coldiffmax > 2) factor = 3;
+ else factor = 4;
+ /* Won't do much if the color value is near 255... */
+ /* argh so many ifs */
+ if (testcolor[1][1] >= testcolor[0][1]) {
+ ind1 = 1; ind0 = 0;
+ }
+ else {
+ ind1 = 0; ind0 = 1;
+ }
+ if ((testcolor[ind1][1] + factor * coldiffgreen) <= 255)
+ testcolor[ind1][1] += factor * coldiffgreen;
+ else testcolor[ind1][1] = 255;
+ if ((testcolor[ind1][0] - testcolor[ind0][1]) > 0) {
+ if ((testcolor[ind1][0] + factor * coldiffred) <= 255)
+ testcolor[ind1][0] += factor * coldiffred;
+ else testcolor[ind1][0] = 255;
+ }
+ else {
+ if ((testcolor[ind0][0] + factor * coldiffred) <= 255)
+ testcolor[ind0][0] += factor * coldiffred;
+ else testcolor[ind0][0] = 255;
+ }
+ if ((testcolor[ind1][2] - testcolor[ind0][2]) > 0) {
+ if ((testcolor[ind1][2] + factor * coldiffblue) <= 255)
+ testcolor[ind1][2] += factor * coldiffblue;
+ else testcolor[ind1][2] = 255;
+ }
+ else {
+ if ((testcolor[ind0][2] + factor * coldiffblue) <= 255)
+ testcolor[ind0][2] += factor * coldiffblue;
+ else testcolor[ind0][2] = 255;
+ }
+ }
+ }
+
+ if (((testcolor[0][0] & 0xf8) << 8 | (testcolor[0][1] & 0xfc) << 3 | testcolor[0][2] >> 3) <
+ ((testcolor[1][0] & 0xf8) << 8 | (testcolor[1][1] & 0xfc) << 3 | testcolor[1][2]) >> 3) {
+ for (i = 0; i < 3; i++) {
+ bestcolor[0][i] = testcolor[0][i];
+ bestcolor[1][i] = testcolor[1][i];
+ }
+ }
+ else {
+ for (i = 0; i < 3; i++) {
+ bestcolor[0][i] = testcolor[1][i];
+ bestcolor[1][i] = testcolor[0][i];
+ }
+ }
+
+/* fprintf(stderr, "color end 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
+ bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
+}
+
+
+
+static void storedxtencodedblock( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
+ GLint numxpixels, GLint numypixels, GLuint type, GLboolean haveAlpha)
+{
+ /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
+
+ GLint i, j, colors;
+ GLuint testerror, testerror2, pixerror, pixerrorbest;
+ GLint colordist;
+ GLushort color0, color1, tempcolor;
+ GLuint bits = 0, bits2 = 0;
+ GLubyte *colorptr;
+ GLubyte enc = 0;
+ GLubyte cv[4][4];
+
+ bestcolor[0][0] = bestcolor[0][0] & 0xf8;
+ bestcolor[0][1] = bestcolor[0][1] & 0xfc;
+ bestcolor[0][2] = bestcolor[0][2] & 0xf8;
+ bestcolor[1][0] = bestcolor[1][0] & 0xf8;
+ bestcolor[1][1] = bestcolor[1][1] & 0xfc;
+ bestcolor[1][2] = bestcolor[1][2] & 0xf8;
+
+ color0 = bestcolor[0][0] << 8 | bestcolor[0][1] << 3 | bestcolor[0][2] >> 3;
+ color1 = bestcolor[1][0] << 8 | bestcolor[1][1] << 3 | bestcolor[1][2] >> 3;
+ if (color0 < color1) {
+ tempcolor = color0; color0 = color1; color1 = tempcolor;
+ colorptr = bestcolor[0]; bestcolor[0] = bestcolor[1]; bestcolor[1] = colorptr;
+ }
+
+
+ for (i = 0; i < 3; i ++) {
+ cv[0][i] = bestcolor[0][i];
+ cv[1][i] = bestcolor[1][i];
+ cv[2][i] = (bestcolor[0][i] * 2 + bestcolor[1][i]) / 3;
+ cv[3][i] = (bestcolor[0][i] + bestcolor[1][i] * 2) / 3;
+ }
+
+ testerror = 0;
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ pixerrorbest = 0xffffffff;
+ for (colors = 0; colors < 4; colors++) {
+ colordist = srccolors[j][i][0] - cv[colors][0];
+ pixerror = colordist * colordist * REDWEIGHT;
+ colordist = srccolors[j][i][1] - cv[colors][1];
+ pixerror += colordist * colordist * GREENWEIGHT;
+ colordist = srccolors[j][i][2] - cv[colors][2];
+ pixerror += colordist * colordist * BLUEWEIGHT;
+ if (pixerror < pixerrorbest) {
+ pixerrorbest = pixerror;
+ enc = colors;
+ }
+ }
+ testerror += pixerrorbest;
+ bits |= enc << (2 * (j * 4 + i));
+ }
+ }
+ for (i = 0; i < 3; i ++) {
+ cv[2][i] = (bestcolor[0][i] + bestcolor[1][i]) / 2;
+ /* this isn't used. Looks like the black color constant can only be used
+ with RGB_DXT1 if I read the spec correctly (note though that the radeon gpu disagrees,
+ it will decode 3 to black even with DXT3/5), and due to how the color searching works
+ it won't get used even then */
+ cv[3][i] = 0;
+ }
+ testerror2 = 0;
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ pixerrorbest = 0xffffffff;
+ if ((type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) && (srccolors[j][i][3] <= ALPHACUT)) {
+ enc = 3;
+ pixerrorbest = 0; /* don't calculate error */
+ }
+ else {
+ /* we're calculating the same what we have done already for colors 0-1 above... */
+ for (colors = 0; colors < 3; colors++) {
+ colordist = srccolors[j][i][0] - cv[colors][0];
+ pixerror = colordist * colordist * REDWEIGHT;
+ colordist = srccolors[j][i][1] - cv[colors][1];
+ pixerror += colordist * colordist * GREENWEIGHT;
+ colordist = srccolors[j][i][2] - cv[colors][2];
+ pixerror += colordist * colordist * BLUEWEIGHT;
+ if (pixerror < pixerrorbest) {
+ pixerrorbest = pixerror;
+ /* need to exchange colors later */
+ if (colors > 1) enc = colors;
+ else enc = colors ^ 1;
+ }
+ }
+ }
+ testerror2 += pixerrorbest;
+ bits2 |= enc << (2 * (j * 4 + i));
+ }
+ }
+
+
+ /* finally we're finished, write back colors and bits */
+ if ((testerror > testerror2) || (haveAlpha)) {
+ *blkaddr++ = color1 & 0xff;
+ *blkaddr++ = color1 >> 8;
+ *blkaddr++ = color0 & 0xff;
+ *blkaddr++ = color0 >> 8;
+ *blkaddr++ = bits2 & 0xff;
+ *blkaddr++ = ( bits2 >> 8) & 0xff;
+ *blkaddr++ = ( bits2 >> 16) & 0xff;
+ *blkaddr = bits2 >> 24;
+ }
+ else {
+ *blkaddr++ = color0 & 0xff;
+ *blkaddr++ = color0 >> 8;
+ *blkaddr++ = color1 & 0xff;
+ *blkaddr++ = color1 >> 8;
+ *blkaddr++ = bits & 0xff;
+ *blkaddr++ = ( bits >> 8) & 0xff;
+ *blkaddr++ = ( bits >> 16) & 0xff;
+ *blkaddr = bits >> 24;
+ }
+}
+
+static void encodedxtcolorblockfaster( GLubyte *blkaddr, GLubyte srccolors[4][4][4],
+ GLint numxpixels, GLint numypixels, GLuint type )
+{
+/* simplistic approach. We need two base colors, simply use the "highest" and the "lowest" color
+ present in the picture as base colors */
+
+ /* define lowest and highest color as shortest and longest vector to 0/0/0, though the
+ vectors are weighted similar to their importance in rgb-luminance conversion
+ doesn't work too well though...
+ This seems to be a rather difficult problem */
+
+ GLubyte *bestcolor[2];
+ GLubyte basecolors[2][3];
+ GLubyte i, j;
+ GLuint lowcv, highcv, testcv;
+ GLboolean haveAlpha = GL_FALSE;
+
+ lowcv = highcv = srccolors[0][0][0] * srccolors[0][0][0] * REDWEIGHT +
+ srccolors[0][0][1] * srccolors[0][0][1] * GREENWEIGHT +
+ srccolors[0][0][2] * srccolors[0][0][2] * BLUEWEIGHT;
+ bestcolor[0] = bestcolor[1] = srccolors[0][0];
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ /* don't use this as a base color if the pixel will get black/transparent anyway */
+ if ((type != GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) || (srccolors[j][i][3] <= ALPHACUT)) {
+ testcv = srccolors[j][i][0] * srccolors[j][i][0] * REDWEIGHT +
+ srccolors[j][i][1] * srccolors[j][i][1] * GREENWEIGHT +
+ srccolors[j][i][2] * srccolors[j][i][2] * BLUEWEIGHT;
+ if (testcv > highcv) {
+ highcv = testcv;
+ bestcolor[1] = srccolors[j][i];
+ }
+ else if (testcv < lowcv) {
+ lowcv = testcv;
+ bestcolor[0] = srccolors[j][i];
+ }
+ }
+ else haveAlpha = GL_TRUE;
+ }
+ }
+ /* make sure the original color values won't get touched... */
+ for (j = 0; j < 2; j++) {
+ for (i = 0; i < 3; i++) {
+ basecolors[j][i] = bestcolor[j][i];
+ }
+ }
+ bestcolor[0] = basecolors[0];
+ bestcolor[1] = basecolors[1];
+
+ /* try to find better base colors */
+ fancybasecolorsearch(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
+ /* find the best encoding for these colors, and store the result */
+ storedxtencodedblock(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
+}
+
+static void writedxt5encodedalphablock( GLubyte *blkaddr, GLubyte alphabase1, GLubyte alphabase2,
+ GLubyte alphaenc[16])
+{
+ *blkaddr++ = alphabase1;
+ *blkaddr++ = alphabase2;
+ *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
+ *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
+ *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
+ *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
+ *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
+ *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
+}
+
+static void encodedxt5alpha(GLubyte *blkaddr, GLubyte srccolors[4][4][4],
+ GLint numxpixels, GLint numypixels)
+{
+ GLubyte alphabase[2], alphause[2];
+ GLshort alphatest[2];
+ GLuint alphablockerror1, alphablockerror2, alphablockerror3;
+ GLubyte i, j, aindex, acutValues[7];
+ GLubyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
+ GLboolean alphaabsmin = GL_FALSE;
+ GLboolean alphaabsmax = GL_FALSE;
+ GLshort alphadist;
+
+ /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
+ alphabase[0] = 0xff; alphabase[1] = 0x0;
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ if (srccolors[j][i][3] == 0)
+ alphaabsmin = GL_TRUE;
+ else if (srccolors[j][i][3] == 255)
+ alphaabsmax = GL_TRUE;
+ else {
+ if (srccolors[j][i][3] > alphabase[1])
+ alphabase[1] = srccolors[j][i][3];
+ if (srccolors[j][i][3] < alphabase[0])
+ alphabase[0] = srccolors[j][i][3];
+ }
+ }
+ }
+
+
+ if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
+ /* shortcut here since it is a very common case (and also avoids later problems) */
+ /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
+ /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
+
+ *blkaddr++ = srccolors[0][0][3];
+ blkaddr++;
+ *blkaddr++ = 0;
+ *blkaddr++ = 0;
+ *blkaddr++ = 0;
+ *blkaddr++ = 0;
+ *blkaddr++ = 0;
+ *blkaddr++ = 0;
+/* fprintf(stderr, "enc0 used\n");*/
+ return;
+ }
+
+ /* find best encoding for alpha0 > alpha1 */
+ /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
+ alphablockerror1 = 0x0;
+ alphablockerror2 = 0xffffffff;
+ alphablockerror3 = 0xffffffff;
+ if (alphaabsmin) alphause[0] = 0;
+ else alphause[0] = alphabase[0];
+ if (alphaabsmax) alphause[1] = 255;
+ else alphause[1] = alphabase[1];
+ /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
+ for (aindex = 0; aindex < 7; aindex++) {
+ /* don't forget here is always rounded down */
+ acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
+ }
+
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ /* maybe it's overkill to have the most complicated calculation just for the error
+ calculation which we only need to figure out if encoding1 or encoding2 is better... */
+ if (srccolors[j][i][3] > acutValues[0]) {
+ alphaenc1[4*j + i] = 0;
+ alphadist = srccolors[j][i][3] - alphause[1];
+ }
+ else if (srccolors[j][i][3] > acutValues[1]) {
+ alphaenc1[4*j + i] = 2;
+ alphadist = srccolors[j][i][3] - (alphause[1] * 6 + alphause[0] * 1) / 7;
+ }
+ else if (srccolors[j][i][3] > acutValues[2]) {
+ alphaenc1[4*j + i] = 3;
+ alphadist = srccolors[j][i][3] - (alphause[1] * 5 + alphause[0] * 2) / 7;
+ }
+ else if (srccolors[j][i][3] > acutValues[3]) {
+ alphaenc1[4*j + i] = 4;
+ alphadist = srccolors[j][i][3] - (alphause[1] * 4 + alphause[0] * 3) / 7;
+ }
+ else if (srccolors[j][i][3] > acutValues[4]) {
+ alphaenc1[4*j + i] = 5;
+ alphadist = srccolors[j][i][3] - (alphause[1] * 3 + alphause[0] * 4) / 7;
+ }
+ else if (srccolors[j][i][3] > acutValues[5]) {
+ alphaenc1[4*j + i] = 6;
+ alphadist = srccolors[j][i][3] - (alphause[1] * 2 + alphause[0] * 5) / 7;
+ }
+ else if (srccolors[j][i][3] > acutValues[6]) {
+ alphaenc1[4*j + i] = 7;
+ alphadist = srccolors[j][i][3] - (alphause[1] * 1 + alphause[0] * 6) / 7;
+ }
+ else {
+ alphaenc1[4*j + i] = 1;
+ alphadist = srccolors[j][i][3] - alphause[0];
+ }
+ alphablockerror1 += alphadist * alphadist;
+ }
+ }
+/* for (i = 0; i < 16; i++) {
+ fprintf(stderr, "%d ", alphaenc1[i]);
+ }
+ fprintf(stderr, "cutVals ");
+ for (i = 0; i < 8; i++) {
+ fprintf(stderr, "%d ", acutValues[i]);
+ }
+ fprintf(stderr, "srcVals ");
+ for (j = 0; j < numypixels; j++)
+ for (i = 0; i < numxpixels; i++) {
+ fprintf(stderr, "%d ", srccolors[j][i][3]);
+ }
+
+ fprintf(stderr, "\n");
+ }*/
+ /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
+ are false but try it anyway */
+ if (alphablockerror1 >= 32) {
+
+ /* don't bother if encoding is already very good, this condition should also imply
+ we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
+ alphablockerror2 = 0;
+ for (aindex = 0; aindex < 5; aindex++) {
+ /* don't forget here is always rounded down */
+ acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
+ }
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ /* maybe it's overkill to have the most complicated calculation just for the error
+ calculation which we only need to figure out if encoding1 or encoding2 is better... */
+ if (srccolors[j][i][3] == 0) {
+ alphaenc2[4*j + i] = 6;
+ alphadist = 0;
+ }
+ else if (srccolors[j][i][3] == 255) {
+ alphaenc2[4*j + i] = 7;
+ alphadist = 0;
+ }
+ else if (srccolors[j][i][3] <= acutValues[0]) {
+ alphaenc2[4*j + i] = 0;
+ alphadist = srccolors[j][i][3] - alphabase[0];
+ }
+ else if (srccolors[j][i][3] <= acutValues[1]) {
+ alphaenc2[4*j + i] = 2;
+ alphadist = srccolors[j][i][3] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
+ }
+ else if (srccolors[j][i][3] <= acutValues[2]) {
+ alphaenc2[4*j + i] = 3;
+ alphadist = srccolors[j][i][3] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
+ }
+ else if (srccolors[j][i][3] <= acutValues[3]) {
+ alphaenc2[4*j + i] = 4;
+ alphadist = srccolors[j][i][3] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
+ }
+ else if (srccolors[j][i][3] <= acutValues[4]) {
+ alphaenc2[4*j + i] = 5;
+ alphadist = srccolors[j][i][3] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
+ }
+ else {
+ alphaenc2[4*j + i] = 1;
+ alphadist = srccolors[j][i][3] - alphabase[1];
+ }
+ alphablockerror2 += alphadist * alphadist;
+ }
+ }
+
+
+ /* skip this if the error is already very small
+ this encoding is MUCH better on average than #2 though, but expensive! */
+ if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
+ GLshort blockerrlin1 = 0;
+ GLshort blockerrlin2 = 0;
+ GLubyte nralphainrangelow = 0;
+ GLubyte nralphainrangehigh = 0;
+ alphatest[0] = 0xff;
+ alphatest[1] = 0x0;
+ /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ if ((srccolors[j][i][3] > alphatest[1]) && (srccolors[j][i][3] < (255 -(alphabase[1] - alphabase[0]) / 28)))
+ alphatest[1] = srccolors[j][i][3];
+ if ((srccolors[j][i][3] < alphatest[0]) && (srccolors[j][i][3] > (alphabase[1] - alphabase[0]) / 28))
+ alphatest[0] = srccolors[j][i][3];
+ }
+ }
+ /* shouldn't happen too often, don't really care about those degenerated cases */
+ if (alphatest[1] <= alphatest[0]) {
+ alphatest[0] = 1;
+ alphatest[1] = 254;
+/* fprintf(stderr, "only 1 or 0 colors for encoding!\n");*/
+ }
+ for (aindex = 0; aindex < 5; aindex++) {
+ /* don't forget here is always rounded down */
+ acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
+ }
+
+ /* find the "average" difference between the alpha values and the next encoded value.
+ This is then used to calculate new base values.
+ Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
+ since they will see more improvement, and also because the values in the middle are somewhat
+ likely to get no improvement at all (because the base values might move in different directions)?
+ OTOH it would mean the values in the middle are even less likely to get an improvement
+ */
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ if (srccolors[j][i][3] <= alphatest[0] / 2) {
+ }
+ else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
+ }
+ else if (srccolors[j][i][3] <= acutValues[0]) {
+ blockerrlin1 += (srccolors[j][i][3] - alphatest[0]);
+ nralphainrangelow += 1;
+ }
+ else if (srccolors[j][i][3] <= acutValues[1]) {
+ blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
+ blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
+ nralphainrangelow += 1;
+ nralphainrangehigh += 1;
+ }
+ else if (srccolors[j][i][3] <= acutValues[2]) {
+ blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
+ blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
+ nralphainrangelow += 1;
+ nralphainrangehigh += 1;
+ }
+ else if (srccolors[j][i][3] <= acutValues[3]) {
+ blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
+ blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
+ nralphainrangelow += 1;
+ nralphainrangehigh += 1;
+ }
+ else if (srccolors[j][i][3] <= acutValues[4]) {
+ blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
+ blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
+ nralphainrangelow += 1;
+ nralphainrangehigh += 1;
+ }
+ else {
+ blockerrlin2 += (srccolors[j][i][3] - alphatest[1]);
+ nralphainrangehigh += 1;
+ }
+ }
+ }
+ /* shouldn't happen often, needed to avoid div by zero */
+ if (nralphainrangelow == 0) nralphainrangelow = 1;
+ if (nralphainrangehigh == 0) nralphainrangehigh = 1;
+ alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
+/* fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
+ fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);*/
+ /* again shouldn't really happen often... */
+ if (alphatest[0] < 0) {
+ alphatest[0] = 0;
+/* fprintf(stderr, "adj alpha base val to 0\n");*/
+ }
+ alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
+ if (alphatest[1] > 255) {
+ alphatest[1] = 255;
+/* fprintf(stderr, "adj alpha base val to 255\n");*/
+ }
+
+ alphablockerror3 = 0;
+ for (aindex = 0; aindex < 5; aindex++) {
+ /* don't forget here is always rounded down */
+ acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
+ }
+ for (j = 0; j < numypixels; j++) {
+ for (i = 0; i < numxpixels; i++) {
+ /* maybe it's overkill to have the most complicated calculation just for the error
+ calculation which we only need to figure out if encoding1 or encoding2 is better... */
+ if (srccolors[j][i][3] <= alphatest[0] / 2) {
+ alphaenc3[4*j + i] = 6;
+ alphadist = srccolors[j][i][3];
+ }
+ else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
+ alphaenc3[4*j + i] = 7;
+ alphadist = 255 - srccolors[j][i][3];
+ }
+ else if (srccolors[j][i][3] <= acutValues[0]) {
+ alphaenc3[4*j + i] = 0;
+ alphadist = srccolors[j][i][3] - alphatest[0];
+ }
+ else if (srccolors[j][i][3] <= acutValues[1]) {
+ alphaenc3[4*j + i] = 2;
+ alphadist = srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
+ }
+ else if (srccolors[j][i][3] <= acutValues[2]) {
+ alphaenc3[4*j + i] = 3;
+ alphadist = srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
+ }
+ else if (srccolors[j][i][3] <= acutValues[3]) {
+ alphaenc3[4*j + i] = 4;
+ alphadist = srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
+ }
+ else if (srccolors[j][i][3] <= acutValues[4]) {
+ alphaenc3[4*j + i] = 5;
+ alphadist = srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
+ }
+ else {
+ alphaenc3[4*j + i] = 1;
+ alphadist = srccolors[j][i][3] - alphatest[1];
+ }
+ alphablockerror3 += alphadist * alphadist;
+ }
+ }
+ }
+ }
+ /* write the alpha values and encoding back. */
+ if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
+/* if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);*/
+ writedxt5encodedalphablock( blkaddr, alphause[1], alphause[0], alphaenc1 );
+ }
+ else if (alphablockerror2 <= alphablockerror3) {
+/* if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);*/
+ writedxt5encodedalphablock( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
+ }
+ else {
+/* fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);*/
+ writedxt5encodedalphablock( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
+ }
+}
+
+static void extractsrccolors( GLubyte srcpixels[4][4][4], const GLchan *srcaddr,
+ GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps)
+{
+ GLubyte i, j, c;
+ const GLchan *curaddr;
+ for (j = 0; j < numypixels; j++) {
+ curaddr = srcaddr + j * srcRowStride * comps;
+ for (i = 0; i < numxpixels; i++) {
+ for (c = 0; c < comps; c++) {
+ srcpixels[j][i][c] = *curaddr++ / (CHAN_MAX / 255);
+ }
+ }
+ }
+}
+
+
+void tx_compress_dxtn(GLint srccomps, GLint width, GLint height, const GLubyte *srcPixData,
+ GLenum destFormat, GLubyte *dest, GLint dstRowStride)
+{
+ GLubyte *blkaddr = dest;
+ GLubyte srcpixels[4][4][4];
+ const GLchan *srcaddr = srcPixData;
+ GLint numxpixels, numypixels;
+ GLint i, j;
+ GLint dstRowDiff;
+
+ switch (destFormat) {
+ case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+ case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+ /* hmm we used to get called without dstRowStride... */
+ dstRowDiff = dstRowStride >= (width * 2) ? dstRowStride - (((width + 3) & ~3) * 2) : 0;
+/* fprintf(stderr, "dxt1 tex width %d tex height %d dstRowStride %d\n",
+ width, height, dstRowStride); */
+ for (j = 0; j < height; j += 4) {
+ if (height > j + 3) numypixels = 4;
+ else numypixels = height - j;
+ srcaddr = srcPixData + j * width * srccomps;
+ for (i = 0; i < width; i += 4) {
+ if (width > i + 3) numxpixels = 4;
+ else numxpixels = width - i;
+ extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
+ encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
+ srcaddr += srccomps * numxpixels;
+ blkaddr += 8;
+ }
+ blkaddr += dstRowDiff;
+ }
+ break;
+ case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+ dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0;
+/* fprintf(stderr, "dxt3 tex width %d tex height %d dstRowStride %d\n",
+ width, height, dstRowStride); */
+ for (j = 0; j < height; j += 4) {
+ if (height > j + 3) numypixels = 4;
+ else numypixels = height - j;
+ srcaddr = srcPixData + j * width * srccomps;
+ for (i = 0; i < width; i += 4) {
+ if (width > i + 3) numxpixels = 4;
+ else numxpixels = width - i;
+ extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
+ *blkaddr++ = (srcpixels[0][0][3] >> 4) | (srcpixels[0][1][3] & 0xf0);
+ *blkaddr++ = (srcpixels[0][2][3] >> 4) | (srcpixels[0][3][3] & 0xf0);
+ *blkaddr++ = (srcpixels[1][0][3] >> 4) | (srcpixels[1][1][3] & 0xf0);
+ *blkaddr++ = (srcpixels[1][2][3] >> 4) | (srcpixels[1][3][3] & 0xf0);
+ *blkaddr++ = (srcpixels[2][0][3] >> 4) | (srcpixels[2][1][3] & 0xf0);
+ *blkaddr++ = (srcpixels[2][2][3] >> 4) | (srcpixels[2][3][3] & 0xf0);
+ *blkaddr++ = (srcpixels[3][0][3] >> 4) | (srcpixels[3][1][3] & 0xf0);
+ *blkaddr++ = (srcpixels[3][2][3] >> 4) | (srcpixels[3][3][3] & 0xf0);
+ encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
+ srcaddr += srccomps * numxpixels;
+ blkaddr += 8;
+ }
+ blkaddr += dstRowDiff;
+ }
+ break;
+ case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+ dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0;
+/* fprintf(stderr, "dxt5 tex width %d tex height %d dstRowStride %d\n",
+ width, height, dstRowStride); */
+ for (j = 0; j < height; j += 4) {
+ if (height > j + 3) numypixels = 4;
+ else numypixels = height - j;
+ srcaddr = srcPixData + j * width * srccomps;
+ for (i = 0; i < width; i += 4) {
+ if (width > i + 3) numxpixels = 4;
+ else numxpixels = width - i;
+ extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
+ encodedxt5alpha(blkaddr, srcpixels, numxpixels, numypixels);
+ encodedxtcolorblockfaster(blkaddr + 8, srcpixels, numxpixels, numypixels, destFormat);
+ srcaddr += srccomps * numxpixels;
+ blkaddr += 16;
+ }
+ blkaddr += dstRowDiff;
+ }
+ break;
+ default:
+ fprintf(stderr, "libdxtn: Bad dstFormat %d in tx_compress_dxtn\n", destFormat);
+ return;
+ }
+}
+
+
diff --git a/dxtn/txc_dxtn.h b/dxtn/txc_dxtn.h
new file mode 100644
index 000000000..bf6842c05
--- /dev/null
+++ b/dxtn/txc_dxtn.h
@@ -0,0 +1,53 @@
+/*
+ * libtxc_dxtn
+ * Version: 0.1
+ *
+ * Copyright (C) 2004 Roland Scheidegger All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "base/basictypes.h"
+
+typedef uint32 GLenum;
+typedef int32 GLint;
+typedef uint8 GLubyte;
+typedef uint16 GLushort;
+typedef uint32 GLuint;
+typedef void GLvoid;
+
+typedef GLubyte GLchan;
+#define UBYTE_TO_CHAN(b) (b)
+#define CHAN_MAX 255
+#define RCOMP 0
+#define GCOMP 1
+#define BCOMP 2
+#define ACOMP 3
+
+void fetch_2d_texel_rgb_dxt1(GLint srcRowStride, const GLubyte *pixdata,
+ GLint i, GLint j, GLvoid *texel);
+void fetch_2d_texel_rgba_dxt1(GLint srcRowStride, const GLubyte *pixdata,
+ GLint i, GLint j, GLvoid *texel);
+void fetch_2d_texel_rgba_dxt3(GLint srcRowStride, const GLubyte *pixdata,
+ GLint i, GLint j, GLvoid *texel);
+void fetch_2d_texel_rgba_dxt5(GLint srcRowStride, const GLubyte *pixdata,
+ GLint i, GLint j, GLvoid *texel);
+
+void tx_compress_dxtn(GLint srccomps, GLint width, GLint height,
+ const GLubyte *srcPixData, GLenum destformat,
+ GLubyte *dest, GLint dstRowStride);
diff --git a/dxtn/txc_fetch_dxtn.cc b/dxtn/txc_fetch_dxtn.cc
new file mode 100644
index 000000000..1004b7cec
--- /dev/null
+++ b/dxtn/txc_fetch_dxtn.cc
@@ -0,0 +1,243 @@
+/*
+ * libtxc_dxtn
+ * Version: 0.1
+ *
+ * Copyright (C) 2004 Roland Scheidegger All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include "txc_dxtn.h"
+
+#define EXP5TO8R(packedcol) \
+ ((((packedcol) >> 8) & 0xf8) | (((packedcol) >> 13) & 0x7))
+
+#define EXP6TO8G(packedcol) \
+ ((((packedcol) >> 3) & 0xfc) | (((packedcol) >> 9) & 0x3))
+
+#define EXP5TO8B(packedcol) \
+ ((((packedcol) << 3) & 0xf8) | (((packedcol) >> 2) & 0x7))
+
+#define EXP4TO8(col) \
+ ((col) | ((col) << 4))
+
+/* inefficient. To be efficient, it would be necessary to decode 16 pixels at once */
+
+static void dxt135_decode_imageblock ( const GLubyte *img_block_src,
+ GLint i, GLint j, GLuint dxt_type, GLvoid *texel ) {
+ GLchan *rgba = (GLchan *) texel;
+ const GLushort color0 = img_block_src[0] | (img_block_src[1] << 8);
+ const GLushort color1 = img_block_src[2] | (img_block_src[3] << 8);
+ const GLuint bits = img_block_src[4] | (img_block_src[5] << 8) |
+ (img_block_src[6] << 16) | (img_block_src[7] << 24);
+ /* What about big/little endian? */
+ GLubyte bit_pos = 2 * (j * 4 + i) ;
+ GLubyte code = (GLubyte) ((bits >> bit_pos) & 3);
+
+ rgba[ACOMP] = CHAN_MAX;
+ switch (code) {
+ case 0:
+ rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color0) );
+ rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color0) );
+ rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color0) );
+ break;
+ case 1:
+ rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color1) );
+ rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color1) );
+ rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color1) );
+ break;
+ case 2:
+ if (color0 > color1) {
+ rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) * 2 + EXP5TO8R(color1)) / 3) );
+ rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) * 2 + EXP6TO8G(color1)) / 3) );
+ rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) * 2 + EXP5TO8B(color1)) / 3) );
+ }
+ else {
+ rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1)) / 2) );
+ rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1)) / 2) );
+ rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1)) / 2) );
+ }
+ break;
+ case 3:
+ if ((dxt_type > 1) || (color0 > color1)) {
+ rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1) * 2) / 3) );
+ rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1) * 2) / 3) );
+ rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1) * 2) / 3) );
+ }
+ else {
+ rgba[RCOMP] = 0;
+ rgba[GCOMP] = 0;
+ rgba[BCOMP] = 0;
+ if (dxt_type == 1) rgba[ACOMP] = UBYTE_TO_CHAN(0);
+ }
+ break;
+ default:
+ /* CANNOT happen (I hope) */
+ break;
+ }
+}
+
+
+void fetch_2d_texel_rgb_dxt1(GLint srcRowStride, const GLubyte *pixdata,
+ GLint i, GLint j, GLvoid *texel)
+{
+ /* Extract the (i,j) pixel from pixdata and return it
+ * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
+ */
+
+ const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8);
+ dxt135_decode_imageblock(blksrc, (i&3), (j&3), 0, texel);
+}
+
+
+void fetch_2d_texel_rgba_dxt1(GLint srcRowStride, const GLubyte *pixdata,
+ GLint i, GLint j, GLvoid *texel)
+{
+ /* Extract the (i,j) pixel from pixdata and return it
+ * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
+ */
+
+ const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8);
+ dxt135_decode_imageblock(blksrc, (i&3), (j&3), 1, texel);
+}
+
+void fetch_2d_texel_rgba_dxt3(GLint srcRowStride, const GLubyte *pixdata,
+ GLint i, GLint j, GLvoid *texel) {
+
+ /* Extract the (i,j) pixel from pixdata and return it
+ * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
+ */
+
+ GLchan *rgba = (GLchan *) texel;
+ const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16);
+#if 0
+ /* Simple 32bit version. */
+/* that's pretty brain-dead for a single pixel, isn't it? */
+ const GLubyte bit_pos = 4 * ((j&3) * 4 + (i&3));
+ const GLuint alpha_low = blksrc[0] | (blksrc[1] << 8) | (blksrc[2] << 16) | (blksrc[3] << 24);
+ const GLuint alpha_high = blksrc[4] | (blksrc[5] << 8) | (blksrc[6] << 16) | (blksrc[7] << 24);
+
+ dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel);
+ if (bit_pos < 32)
+ rgba[ACOMP] = UBYTE_TO_CHAN( (GLubyte)(EXP4TO8((alpha_low >> bit_pos) & 15)) );
+ else
+ rgba[ACOMP] = UBYTE_TO_CHAN( (GLubyte)(EXP4TO8((alpha_high >> (bit_pos - 32)) & 15)) );
+#endif
+#if 1
+/* TODO test this! */
+ const GLubyte anibble = (blksrc[((j&3) * 4 + (i&3)) / 2] >> (4 * (i&1))) & 0xf;
+ dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel);
+ rgba[ACOMP] = UBYTE_TO_CHAN( (GLubyte)(EXP4TO8(anibble)) );
+#endif
+
+}
+
+void fetch_2d_texel_rgba_dxt5(GLint srcRowStride, const GLubyte *pixdata,
+ GLint i, GLint j, GLvoid *texel) {
+
+ /* Extract the (i,j) pixel from pixdata and return it
+ * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
+ */
+
+ GLchan *rgba = (GLchan *) texel;
+ const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16);
+ const GLubyte alpha0 = blksrc[0];
+ const GLubyte alpha1 = blksrc[1];
+#if 0
+ const GLubyte bit_pos = 3 * ((j&3) * 4 + (i&3));
+ /* simple 32bit version */
+ const GLuint bits_low = blksrc[2] | (blksrc[3] << 8) | (blksrc[4] << 16) | (blksrc[5] << 24);
+ const GLuint bits_high = blksrc[6] | (blksrc[7] << 8);
+ GLubyte code;
+
+ if (bit_pos < 30)
+ code = (GLubyte) ((bits_low >> bit_pos) & 7);
+ else if (bit_pos == 30)
+ code = (GLubyte) ((bits_low >> 30) & 3) | ((bits_high << 2) & 4);
+ else
+ code = (GLubyte) ((bits_high >> (bit_pos - 32)) & 7);
+#endif
+#if 1
+/* TODO test this! */
+ const GLubyte bit_pos = ((j&3) * 4 + (i&3)) * 3;
+ const GLubyte acodelow = blksrc[2 + bit_pos / 8];
+ const GLubyte acodehigh = blksrc[3 + bit_pos / 8];
+ const GLubyte code = (acodelow >> (bit_pos & 0x7) |
+ (acodehigh << (8 - (bit_pos & 0x7)))) & 0x7;
+#endif
+ dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel);
+#if 0
+ if (alpha0 > alpha1) {
+ switch (code) {
+ case 0:
+ rgba[ACOMP] = UBYTE_TO_CHAN( alpha0 );
+ break;
+ case 1:
+ rgba[ACOMP] = UBYTE_TO_CHAN( alpha1 );
+ break;
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7) );
+ break;
+ }
+ }
+ else {
+ switch (code) {
+ case 0:
+ rgba[ACOMP] = UBYTE_TO_CHAN( alpha0 );
+ break;
+ case 1:
+ rgba[ACOMP] = UBYTE_TO_CHAN( alpha1 );
+ break;
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5) );
+ break;
+ case 6:
+ rgba[ACOMP] = 0;
+ break;
+ case 7:
+ rgba[ACOMP] = CHAN_MAX;
+ break;
+ }
+ }
+#endif
+/* not sure. Which version is faster? */
+#if 1
+/* TODO test this */
+ if (code == 0)
+ rgba[ACOMP] = UBYTE_TO_CHAN( alpha0 );
+ else if (code == 1)
+ rgba[ACOMP] = UBYTE_TO_CHAN( alpha1 );
+ else if (alpha0 > alpha1)
+ rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7) );
+ else if (code < 6)
+ rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5) );
+ else if (code == 6)
+ rgba[ACOMP] = 0;
+ else
+ rgba[ACOMP] = CHAN_MAX;
+#endif
+}