/** * \file imports.c * Standard C library function wrappers. * * Imports are services which the device driver or window system or * operating system provides to the core renderer. The core renderer (Mesa) * will call these functions in order to do memory allocation, simple I/O, * etc. * * Some drivers will want to override/replace this file with something * specialized, but that'll be rare. * * Eventually, I want to move roll the glheader.h file into this. * * \todo Functions still needed: * - scanf * - qsort * - rand and RAND_MAX */ /* * Mesa 3-D graphics library * Version: 7.1 * * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "imports.h" #include "context.h" #include "mtypes.h" #include "version.h" #ifdef _GNU_SOURCE #include <locale.h> #ifdef __APPLE__ #include <xlocale.h> #endif #endif #ifdef WIN32 #define vsnprintf _vsnprintf #elif defined(__IBMC__) || defined(__IBMCPP__) || ( defined(__VMS) && __CRTL_VER < 70312000 ) extern int vsnprintf(char *str, size_t count, const char *fmt, va_list arg); #ifdef __VMS #include "vsnprintf.c" #endif #endif /**********************************************************************/ /** \name Memory */ /*@{*/ /** * Allocate aligned memory. * * \param bytes number of bytes to allocate. * \param alignment alignment (must be greater than zero). * * Allocates extra memory to accommodate rounding up the address for * alignment and to record the real malloc address. * * \sa _mesa_align_free(). */ void * _mesa_align_malloc(size_t bytes, unsigned long alignment) { #if defined(HAVE_POSIX_MEMALIGN) void *mem; int err = posix_memalign(& mem, alignment, bytes); if (err) return NULL; return mem; #elif defined(_WIN32) && defined(_MSC_VER) return _aligned_malloc(bytes, alignment); #else uintptr_t ptr, buf; ASSERT( alignment > 0 ); ptr = (uintptr_t) malloc(bytes + alignment + sizeof(void *)); if (!ptr) return NULL; buf = (ptr + alignment + sizeof(void *)) & ~(uintptr_t)(alignment - 1); *(uintptr_t *)(buf - sizeof(void *)) = ptr; #ifdef DEBUG /* mark the non-aligned area */ while ( ptr < buf - sizeof(void *) ) { *(unsigned long *)ptr = 0xcdcdcdcd; ptr += sizeof(unsigned long); } #endif return (void *) buf; #endif /* defined(HAVE_POSIX_MEMALIGN) */ } /** * Same as _mesa_align_malloc(), but using calloc(1, ) instead of * malloc() */ void * _mesa_align_calloc(size_t bytes, unsigned long alignment) { #if defined(HAVE_POSIX_MEMALIGN) void *mem; mem = _mesa_align_malloc(bytes, alignment); if (mem != NULL) { (void) memset(mem, 0, bytes); } return mem; #elif defined(_WIN32) && defined(_MSC_VER) void *mem; mem = _aligned_malloc(bytes, alignment); if (mem != NULL) { (void) memset(mem, 0, bytes); } return mem; #else uintptr_t ptr, buf; ASSERT( alignment > 0 ); ptr = (uintptr_t) calloc(1, bytes + alignment + sizeof(void *)); if (!ptr) return NULL; buf = (ptr + alignment + sizeof(void *)) & ~(uintptr_t)(alignment - 1); *(uintptr_t *)(buf - sizeof(void *)) = ptr; #ifdef DEBUG /* mark the non-aligned area */ while ( ptr < buf - sizeof(void *) ) { *(unsigned long *)ptr = 0xcdcdcdcd; ptr += sizeof(unsigned long); } #endif return (void *)buf; #endif /* defined(HAVE_POSIX_MEMALIGN) */ } /** * Free memory which was allocated with either _mesa_align_malloc() * or _mesa_align_calloc(). * \param ptr pointer to the memory to be freed. * The actual address to free is stored in the word immediately before the * address the client sees. */ void _mesa_align_free(void *ptr) { #if defined(HAVE_POSIX_MEMALIGN) free(ptr); #elif defined(_WIN32) && defined(_MSC_VER) _aligned_free(ptr); #else void **cubbyHole = (void **) ((char *) ptr - sizeof(void *)); void *realAddr = *cubbyHole; free(realAddr); #endif /* defined(HAVE_POSIX_MEMALIGN) */ } /** * Reallocate memory, with alignment. */ void * _mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize, unsigned long alignment) { #if defined(_WIN32) && defined(_MSC_VER) (void) oldSize; return _aligned_realloc(oldBuffer, newSize, alignment); #else const size_t copySize = (oldSize < newSize) ? oldSize : newSize; void *newBuf = _mesa_align_malloc(newSize, alignment); if (newBuf && oldBuffer && copySize > 0) { memcpy(newBuf, oldBuffer, copySize); } if (oldBuffer) _mesa_align_free(oldBuffer); return newBuf; #endif } /** Reallocate memory */ void * _mesa_realloc(void *oldBuffer, size_t oldSize, size_t newSize) { const size_t copySize = (oldSize < newSize) ? oldSize : newSize; void *newBuffer = malloc(newSize); if (newBuffer && oldBuffer && copySize > 0) memcpy(newBuffer, oldBuffer, copySize); if (oldBuffer) free(oldBuffer); return newBuffer; } /** * Fill memory with a constant 16bit word. * \param dst destination pointer. * \param val value. * \param n number of words. */ void _mesa_memset16( unsigned short *dst, unsigned short val, size_t n ) { while (n-- > 0) *dst++ = val; } /*@}*/ /**********************************************************************/ /** \name Math */ /*@{*/ /** Wrapper around sqrt() */ double _mesa_sqrtd(double x) { return sqrt(x); } /* * A High Speed, Low Precision Square Root * by Paul Lalonde and Robert Dawson * from "Graphics Gems", Academic Press, 1990 * * SPARC implementation of a fast square root by table * lookup. * SPARC floating point format is as follows: * * BIT 31 30 23 22 0 * sign exponent mantissa */ static short sqrttab[0x100]; /* declare table of square roots */ void _mesa_init_sqrt_table(void) { #if defined(USE_IEEE) && !defined(DEBUG) unsigned short i; fi_type fi; /* to access the bits of a float in C quickly */ /* we use a union defined in glheader.h */ for(i=0; i<= 0x7f; i++) { fi.i = 0; /* * Build a float with the bit pattern i as mantissa * and an exponent of 0, stored as 127 */ fi.i = (i << 16) | (127 << 23); fi.f = _mesa_sqrtd(fi.f); /* * Take the square root then strip the first 7 bits of * the mantissa into the table */ sqrttab[i] = (fi.i & 0x7fffff) >> 16; /* * Repeat the process, this time with an exponent of * 1, stored as 128 */ fi.i = 0; fi.i = (i << 16) | (128 << 23); fi.f = sqrt(fi.f); sqrttab[i+0x80] = (fi.i & 0x7fffff) >> 16; } #else (void) sqrttab; /* silence compiler warnings */ #endif /*HAVE_FAST_MATH*/ } /** * Single precision square root. */ float _mesa_sqrtf( float x ) { #if defined(USE_IEEE) && !defined(DEBUG) fi_type num; /* to access the bits of a float in C * we use a union from glheader.h */ short e; /* the exponent */ if (x == 0.0F) return 0.0F; /* check for square root of 0 */ num.f = x; e = (num.i >> 23) - 127; /* get the exponent - on a SPARC the */ /* exponent is stored with 127 added */ num.i &= 0x7fffff; /* leave only the mantissa */ if (e & 0x01) num.i |= 0x800000; /* the exponent is odd so we have to */ /* look it up in the second half of */ /* the lookup table, so we set the */ /* high bit */ e >>= 1; /* divide the exponent by two */ /* note that in C the shift */ /* operators are sign preserving */ /* for signed operands */ /* Do the table lookup, based on the quaternary mantissa, * then reconstruct the result back into a float */ num.i = ((sqrttab[num.i >> 16]) << 16) | ((e + 127) << 23); return num.f; #else return (float) _mesa_sqrtd((double) x); #endif } /** inv_sqrt - A single precision 1/sqrt routine for IEEE format floats. written by Josh Vanderhoof, based on newsgroup posts by James Van Buskirk and Vesa Karvonen. */ float _mesa_inv_sqrtf(float n) { #if defined(USE_IEEE) && !defined(DEBUG) float r0, x0, y0; float r1, x1, y1; float r2, x2, y2; #if 0 /* not used, see below -BP */ float r3, x3, y3; #endif fi_type u; unsigned int magic; /* Exponent part of the magic number - We want to: 1. subtract the bias from the exponent, 2. negate it 3. divide by two (rounding towards -inf) 4. add the bias back Which is the same as subtracting the exponent from 381 and dividing by 2. floor(-(x - 127) / 2) + 127 = floor((381 - x) / 2) */ magic = 381 << 23; /* Significand part of magic number - With the current magic number, "(magic - u.i) >> 1" will give you: for 1 <= u.f <= 2: 1.25 - u.f / 4 for 2 <= u.f <= 4: 1.00 - u.f / 8 This isn't a bad approximation of 1/sqrt. The maximum difference from 1/sqrt will be around .06. After three Newton-Raphson iterations, the maximum difference is less than 4.5e-8. (Which is actually close enough to make the following bias academic...) To get a better approximation you can add a bias to the magic number. For example, if you subtract 1/2 of the maximum difference in the first approximation (.03), you will get the following function: for 1 <= u.f <= 2: 1.22 - u.f / 4 for 2 <= u.f <= 3.76: 0.97 - u.f / 8 for 3.76 <= u.f <= 4: 0.72 - u.f / 16 (The 3.76 to 4 range is where the result is < .5.) This is the closest possible initial approximation, but with a maximum error of 8e-11 after three NR iterations, it is still not perfect. If you subtract 0.0332281 instead of .03, the maximum error will be 2.5e-11 after three NR iterations, which should be about as close as is possible. for 1 <= u.f <= 2: 1.2167719 - u.f / 4 for 2 <= u.f <= 3.73: 0.9667719 - u.f / 8 for 3.73 <= u.f <= 4: 0.7167719 - u.f / 16 */ magic -= (int)(0.0332281 * (1 << 25)); u.f = n; u.i = (magic - u.i) >> 1; /* Instead of Newton-Raphson, we use Goldschmidt's algorithm, which allows more parallelism. From what I understand, the parallelism comes at the cost of less precision, because it lets error accumulate across iterations. */ x0 = 1.0f; y0 = 0.5f * n; r0 = u.f; x1 = x0 * r0; y1 = y0 * r0 * r0; r1 = 1.5f - y1; x2 = x1 * r1; y2 = y1 * r1 * r1; r2 = 1.5f - y2; #if 1 return x2 * r2; /* we can stop here, and be conformant -BP */ #else x3 = x2 * r2; y3 = y2 * r2 * r2; r3 = 1.5f - y3; return x3 * r3; #endif #else return (float) (1.0 / sqrt(n)); #endif } #ifndef __GNUC__ /** * Find the first bit set in a word. */ int ffs(int i) { register int bit = 0; if (i != 0) { if ((i & 0xffff) == 0) { bit += 16; i >>= 16; } if ((i & 0xff) == 0) { bit += 8; i >>= 8; } if ((i & 0xf) == 0) { bit += 4; i >>= 4; } while ((i & 1) == 0) { bit++; i >>= 1; } bit++; } return bit; } /** * Find position of first bit set in given value. * XXX Warning: this function can only be used on 64-bit systems! * \return position of least-significant bit set, starting at 1, return zero * if no bits set. */ int ffsll(long long int val) { int bit; assert(sizeof(val) == 8); bit = ffs((int) val); if (bit != 0) return bit; bit = ffs((int) (val >> 32)); if (bit != 0) return 32 + bit; return 0; } #endif /* __GNUC__ */ #if !defined(__GNUC__) ||\ ((__GNUC__ * 100 + __GNUC_MINOR__) < 304) /* Not gcc 3.4 or later */ /** * Return number of bits set in given GLuint. */ unsigned int _mesa_bitcount(unsigned int n) { unsigned int bits; for (bits = 0; n > 0; n = n >> 1) { bits += (n & 1); } return bits; } /** * Return number of bits set in given 64-bit uint. */ unsigned int _mesa_bitcount_64(uint64_t n) { unsigned int bits; for (bits = 0; n > 0; n = n >> 1) { bits += (n & 1); } return bits; } #endif /** * Convert a 4-byte float to a 2-byte half float. * Based on code from: * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html */ GLhalfARB _mesa_float_to_half(float val) { const fi_type fi = {val}; const int flt_m = fi.i & 0x7fffff; const int flt_e = (fi.i >> 23) & 0xff; const int flt_s = (fi.i >> 31) & 0x1; int s, e, m = 0; GLhalfARB result; /* sign bit */ s = flt_s; /* handle special cases */ if ((flt_e == 0) && (flt_m == 0)) { /* zero */ /* m = 0; - already set */ e = 0; } else if ((flt_e == 0) && (flt_m != 0)) { /* denorm -- denorm float maps to 0 half */ /* m = 0; - already set */ e = 0; } else if ((flt_e == 0xff) && (flt_m == 0)) { /* infinity */ /* m = 0; - already set */ e = 31; } else if ((flt_e == 0xff) && (flt_m != 0)) { /* NaN */ m = 1; e = 31; } else { /* regular number */ const int new_exp = flt_e - 127; if (new_exp < -24) { /* this maps to 0 */ /* m = 0; - already set */ e = 0; } else if (new_exp < -14) { /* this maps to a denorm */ unsigned int exp_val = (unsigned int) (-14 - new_exp); /* 2^-exp_val*/ e = 0; switch (exp_val) { case 0: _mesa_warning(NULL, "float_to_half: logical error in denorm creation!\n"); /* m = 0; - already set */ break; case 1: m = 512 + (flt_m >> 14); break; case 2: m = 256 + (flt_m >> 15); break; case 3: m = 128 + (flt_m >> 16); break; case 4: m = 64 + (flt_m >> 17); break; case 5: m = 32 + (flt_m >> 18); break; case 6: m = 16 + (flt_m >> 19); break; case 7: m = 8 + (flt_m >> 20); break; case 8: m = 4 + (flt_m >> 21); break; case 9: m = 2 + (flt_m >> 22); break; case 10: m = 1; break; } } else if (new_exp > 15) { /* map this value to infinity */ /* m = 0; - already set */ e = 31; } else { /* regular */ e = new_exp + 15; m = flt_m >> 13; } } result = (s << 15) | (e << 10) | m; return result; } /** * Convert a 2-byte half float to a 4-byte float. * Based on code from: * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html */ float _mesa_half_to_float(GLhalfARB val) { /* XXX could also use a 64K-entry lookup table */ const int m = val & 0x3ff; const int e = (val >> 10) & 0x1f; const int s = (val >> 15) & 0x1; int flt_m, flt_e, flt_s; fi_type fi; float result; /* sign bit */ flt_s = s; /* handle special cases */ if ((e == 0) && (m == 0)) { /* zero */ flt_m = 0; flt_e = 0; } else if ((e == 0) && (m != 0)) { /* denorm -- denorm half will fit in non-denorm single */ const float half_denorm = 1.0f / 16384.0f; /* 2^-14 */ float mantissa = ((float) (m)) / 1024.0f; float sign = s ? -1.0f : 1.0f; return sign * mantissa * half_denorm; } else if ((e == 31) && (m == 0)) { /* infinity */ flt_e = 0xff; flt_m = 0; } else if ((e == 31) && (m != 0)) { /* NaN */ flt_e = 0xff; flt_m = 1; } else { /* regular */ flt_e = e + 112; flt_m = m << 13; } fi.i = (flt_s << 31) | (flt_e << 23) | flt_m; result = fi.f; return result; } /*@}*/ /**********************************************************************/ /** \name Sort & Search */ /*@{*/ /** * Wrapper for bsearch(). */ void * _mesa_bsearch( const void *key, const void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *) ) { #if defined(_WIN32_WCE) void *mid; int cmp; while (nmemb) { nmemb >>= 1; mid = (char *)base + nmemb * size; cmp = (*compar)(key, mid); if (cmp == 0) return mid; if (cmp > 0) { base = (char *)mid + size; --nmemb; } } return NULL; #else return bsearch(key, base, nmemb, size, compar); #endif } /*@}*/ /**********************************************************************/ /** \name Environment vars */ /*@{*/ /** * Wrapper for getenv(). */ char * _mesa_getenv( const char *var ) { #if defined(_XBOX) || defined(_WIN32_WCE) return NULL; #else return getenv(var); #endif } /*@}*/ /**********************************************************************/ /** \name String */ /*@{*/ /** * Implemented using malloc() and strcpy. * Note that NULL is handled accordingly. */ char * _mesa_strdup( const char *s ) { if (s) { size_t l = strlen(s); char *s2 = (char *) malloc(l + 1); if (s2) strcpy(s2, s); return s2; } else { return NULL; } } /** Wrapper around strtof() */ float _mesa_strtof( const char *s, char **end ) { #if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) && \ !defined(ANDROID) && !defined(__HAIKU__) static locale_t loc = NULL; if (!loc) { loc = newlocale(LC_CTYPE_MASK, "C", NULL); } return strtof_l(s, end, loc); #elif defined(_ISOC99_SOURCE) || (defined(_XOPEN_SOURCE) && _XOPEN_SOURCE >= 600) return strtof(s, end); #else return (float)strtod(s, end); #endif } /** Compute simple checksum/hash for a string */ unsigned int _mesa_str_checksum(const char *str) { /* This could probably be much better */ unsigned int sum, i; const char *c; sum = i = 1; for (c = str; *c; c++, i++) sum += *c * (i % 100); return sum + i; } /*@}*/ /** Needed due to #ifdef's, above. */ int _mesa_vsnprintf(char *str, size_t size, const char *fmt, va_list args) { return vsnprintf( str, size, fmt, args); } /** Wrapper around vsnprintf() */ int _mesa_snprintf( char *str, size_t size, const char *fmt, ... ) { int r; va_list args; va_start( args, fmt ); r = vsnprintf( str, size, fmt, args ); va_end( args ); return r; }