diff options
Diffstat (limited to 'freetype/src/base/ftcalc.c')
-rw-r--r-- | freetype/src/base/ftcalc.c | 475 |
1 files changed, 188 insertions, 287 deletions
diff --git a/freetype/src/base/ftcalc.c b/freetype/src/base/ftcalc.c index fb0492721..7b6425056 100644 --- a/freetype/src/base/ftcalc.c +++ b/freetype/src/base/ftcalc.c @@ -39,7 +39,8 @@ #include <internal/ftdebug.h> #include <internal/ftobjs.h> -#ifdef FT_MULFIX_INLINED + +#ifdef FT_MULFIX_ASSEMBLER #undef FT_MulFix #endif @@ -67,6 +68,16 @@ #define FT_COMPONENT trace_calc + /* transfer sign leaving a positive number */ +#define FT_MOVE_SIGN( x, s ) \ + FT_BEGIN_STMNT \ + if ( x < 0 ) \ + { \ + x = -x; \ + s = -s; \ + } \ + FT_END_STMNT + /* The following three functions are available regardless of whether */ /* FT_LONG64 is defined. */ @@ -75,8 +86,8 @@ FT_EXPORT_DEF( FT_Fixed ) FT_RoundFix( FT_Fixed a ) { - return ( a >= 0 ) ? ( a + 0x8000L ) & ~0xFFFFL - : -((-a + 0x8000L ) & ~0xFFFFL ); + return a >= 0 ? ( a + 0x8000L ) & ~0xFFFFL + : -((-a + 0x8000L ) & ~0xFFFFL ); } @@ -85,8 +96,8 @@ FT_EXPORT_DEF( FT_Fixed ) FT_CeilFix( FT_Fixed a ) { - return ( a >= 0 ) ? ( a + 0xFFFFL ) & ~0xFFFFL - : -((-a + 0xFFFFL ) & ~0xFFFFL ); + return a >= 0 ? ( a + 0xFFFFL ) & ~0xFFFFL + : -((-a + 0xFFFFL ) & ~0xFFFFL ); } @@ -95,38 +106,40 @@ FT_EXPORT_DEF( FT_Fixed ) FT_FloorFix( FT_Fixed a ) { - return ( a >= 0 ) ? a & ~0xFFFFL - : -((-a) & ~0xFFFFL ); + return a >= 0 ? a & ~0xFFFFL + : -((-a) & ~0xFFFFL ); } +#ifndef FT_MSB FT_BASE_DEF ( FT_Int ) FT_MSB( FT_UInt32 z ) { - FT_Int shift = 0; + FT_Int shift = 0; + /* determine msb bit index in `shift' */ - if ( z >= ( 1L << 16 ) ) + if ( z & 0xFFFF0000UL ) { z >>= 16; shift += 16; } - if ( z >= ( 1L << 8 ) ) + if ( z & 0x0000FF00UL ) { z >>= 8; shift += 8; } - if ( z >= ( 1L << 4 ) ) + if ( z & 0x000000F0UL ) { z >>= 4; shift += 4; } - if ( z >= ( 1L << 2 ) ) + if ( z & 0x0000000CUL ) { z >>= 2; shift += 2; } - if ( z >= ( 1L << 1 ) ) + if ( z & 0x00000002UL ) { /* z >>= 1; */ shift += 1; @@ -135,6 +148,8 @@ return shift; } +#endif /* !FT_MSB */ + /* documentation is in ftcalc.h */ @@ -162,19 +177,18 @@ FT_Long b, FT_Long c ) { - FT_Int s; + FT_Int s = 1; FT_Long d; - s = 1; - if ( a < 0 ) { a = -a; s = -1; } - if ( b < 0 ) { b = -b; s = -s; } - if ( c < 0 ) { c = -c; s = -s; } + FT_MOVE_SIGN( a, s ); + FT_MOVE_SIGN( b, s ); + FT_MOVE_SIGN( c, s ); d = (FT_Long)( c > 0 ? ( (FT_Int64)a * b + ( c >> 1 ) ) / c : 0x7FFFFFFFL ); - return ( s > 0 ) ? d : -d; + return s < 0 ? -d : d; } @@ -185,19 +199,18 @@ FT_Long b, FT_Long c ) { - FT_Int s; + FT_Int s = 1; FT_Long d; - s = 1; - if ( a < 0 ) { a = -a; s = -1; } - if ( b < 0 ) { b = -b; s = -s; } - if ( c < 0 ) { c = -c; s = -s; } + FT_MOVE_SIGN( a, s ); + FT_MOVE_SIGN( b, s ); + FT_MOVE_SIGN( c, s ); d = (FT_Long)( c > 0 ? (FT_Int64)a * b / c : 0x7FFFFFFFL ); - return ( s > 0 ) ? d : -d; + return s < 0 ? -d : d; } @@ -217,21 +230,12 @@ FT_Long c; - if ( a < 0 ) - { - a = -a; - s = -1; - } - - if ( b < 0 ) - { - b = -b; - s = -s; - } + FT_MOVE_SIGN( a, s ); + FT_MOVE_SIGN( b, s ); c = (FT_Long)( ( (FT_Int64)a * b + 0x8000L ) >> 16 ); - return ( s > 0 ) ? c : -c; + return s < 0 ? -c : c; #endif /* FT_MULFIX_ASSEMBLER */ } @@ -243,30 +247,17 @@ FT_DivFix( FT_Long a, FT_Long b ) { - FT_Int32 s; - FT_UInt32 q; + FT_Int s = 1; + FT_Long q; - s = 1; - if ( a < 0 ) - { - a = -a; - s = -1; - } - if ( b < 0 ) - { - b = -b; - s = -s; - } + FT_MOVE_SIGN( a, s ); + FT_MOVE_SIGN( b, s ); - if ( b == 0 ) - /* check for division by 0 */ - q = 0x7FFFFFFFL; - else - /* compute result directly */ - q = (FT_UInt32)( ( ( (FT_UInt64)a << 16 ) + ( b >> 1 ) ) / b ); + q = (FT_Long)( b > 0 ? ( ( (FT_UInt64)a << 16 ) + ( b >> 1 ) ) / b + : 0x7FFFFFFFL ); - return ( s < 0 ? -(FT_Long)q : (FT_Long)q ); + return s < 0 ? -q : q; } @@ -314,25 +305,30 @@ FT_Int i; - q = 0; - r = hi; - - if ( r >= y ) + if ( hi >= y ) return (FT_UInt32)0x7FFFFFFFL; - i = 32; + /* We shift as many bits as we can into the high register, perform */ + /* 32-bit division with modulo there, then work through the remaining */ + /* bits with long division. This optimization is especially noticeable */ + /* for smaller dividends that barely use the high register. */ + + i = 31 - FT_MSB( hi ); + r = ( hi << i ) | ( lo >> ( 32 - i ) ); lo <<= i; /* left 64-bit shift */ + q = r / y; + r -= q * y; /* remainder */ + + i = 32 - i; /* bits remaining in low register */ do { - r <<= 1; q <<= 1; - r |= lo >> 31; + r = ( r << 1 ) | ( lo >> 31 ); lo <<= 1; if ( r >= y ) { r -= y; q |= 1; } - lo <<= 1; } while ( --i ); return q; @@ -344,7 +340,7 @@ FT_Int64* y, FT_Int64 *z ) { - register FT_UInt32 lo, hi; + FT_UInt32 lo, hi; lo = x->lo + y->lo; @@ -355,60 +351,95 @@ } - /* documentation is in freetype.h */ - - /* The FT_MulDiv function has been optimized thanks to ideas from */ - /* Graham Asher. The trick is to optimize computation when everything */ - /* fits within 32-bits (a rather common case). */ + /* The FT_MulDiv function has been optimized thanks to ideas from */ + /* Graham Asher and Alexei Podtelezhnikov. The trick is to optimize */ + /* a rather common case when everything fits within 32-bits. */ + /* */ + /* We compute 'a*b+c/2', then divide it by 'c' (all positive values). */ + /* */ + /* The product of two positive numbers never exceeds the square of */ + /* its mean values. Therefore, we always avoid the overflow by */ + /* imposing */ /* */ - /* we compute 'a*b+c/2', then divide it by 'c'. (positive values) */ + /* (a + b) / 2 <= sqrt(X - c/2) , */ /* */ - /* 46340 is FLOOR(SQRT(2^31-1)). */ + /* where X = 2^32 - 1, the maximum unsigned 32-bit value, and using */ + /* unsigned arithmetic. Now we replace `sqrt' with a linear function */ + /* that is smaller or equal for all values of c in the interval */ + /* [0;X/2]; it should be equal to sqrt(X) and sqrt(3X/4) at the */ + /* endpoints. Substituting the linear solution and explicit numbers */ + /* we get */ /* */ - /* if ( a <= 46340 && b <= 46340 ) then ( a*b <= 0x7FFEA810 ) */ + /* a + b <= 131071.99 - c / 122291.84 . */ /* */ - /* 0x7FFFFFFF - 0x7FFEA810 = 0x157F0 */ + /* In practice, we should use a faster and even stronger inequality */ /* */ - /* if ( c < 0x157F0*2 ) then ( a*b+c/2 <= 0x7FFFFFFF ) */ + /* a + b <= 131071 - (c >> 16) */ /* */ - /* and 2*0x157F0 = 176096 */ + /* or, alternatively, */ /* */ + /* a + b <= 129894 - (c >> 17) . */ + /* */ + /* FT_MulFix, on the other hand, is optimized for a small value of */ + /* the first argument, when the second argument can be much larger. */ + /* This can be achieved by scaling the second argument and the limit */ + /* in the above inequalities. For example, */ + /* */ + /* a + (b >> 8) <= (131071 >> 4) */ + /* */ + /* covers the practical range of use. The actual test below is a bit */ + /* tighter to avoid the border case overflows. */ + /* */ + /* In the case of FT_DivFix, the exact overflow check */ + /* */ + /* a << 16 <= X - c/2 */ + /* */ + /* is scaled down by 2^16 and we use */ + /* */ + /* a <= 65535 - (c >> 17) . */ + + /* documentation is in freetype.h */ FT_EXPORT_DEF( FT_Long ) FT_MulDiv( FT_Long a, FT_Long b, FT_Long c ) { - long s; + FT_Int s = 1; /* XXX: this function does not allow 64-bit arguments */ if ( a == 0 || b == c ) return a; - s = a; a = FT_ABS( a ); - s ^= b; b = FT_ABS( b ); - s ^= c; c = FT_ABS( c ); + FT_MOVE_SIGN( a, s ); + FT_MOVE_SIGN( b, s ); + FT_MOVE_SIGN( c, s ); + + if ( c == 0 ) + a = 0x7FFFFFFFL; - if ( a <= 46340L && b <= 46340L && c <= 176095L && c > 0 ) - a = ( a * b + ( c >> 1 ) ) / c; + else if ( (FT_ULong)a + b <= 129894UL - ( c >> 17 ) ) + a = ( (FT_ULong)a * b + ( c >> 1 ) ) / c; - else if ( (FT_Int32)c > 0 ) + else { FT_Int64 temp, temp2; - ft_multo64( (FT_Int32)a, (FT_Int32)b, &temp ); + ft_multo64( a, b, &temp ); temp2.hi = 0; - temp2.lo = (FT_UInt32)(c >> 1); + temp2.lo = c >> 1; + FT_Add64( &temp, &temp2, &temp ); - a = ft_div64by32( temp.hi, temp.lo, (FT_Int32)c ); + + /* last attempt to ditch long division */ + a = temp.hi == 0 ? temp.lo / c + : ft_div64by32( temp.hi, temp.lo, c ); } - else - a = 0x7FFFFFFFL; - return ( s < 0 ? -a : a ); + return s < 0 ? -a : a; } @@ -417,31 +448,35 @@ FT_Long b, FT_Long c ) { - long s; + FT_Int s = 1; if ( a == 0 || b == c ) return a; - s = a; a = FT_ABS( a ); - s ^= b; b = FT_ABS( b ); - s ^= c; c = FT_ABS( c ); + FT_MOVE_SIGN( a, s ); + FT_MOVE_SIGN( b, s ); + FT_MOVE_SIGN( c, s ); + + if ( c == 0 ) + a = 0x7FFFFFFFL; - if ( a <= 46340L && b <= 46340L && c > 0 ) - a = a * b / c; + else if ( (FT_ULong)a + b <= 131071UL ) + a = (FT_ULong)a * b / c; - else if ( (FT_Int32)c > 0 ) + else { FT_Int64 temp; - ft_multo64( (FT_Int32)a, (FT_Int32)b, &temp ); - a = ft_div64by32( temp.hi, temp.lo, (FT_Int32)c ); + ft_multo64( a, b, &temp ); + + /* last attempt to ditch long division */ + a = temp.hi == 0 ? temp.lo / c + : ft_div64by32( temp.hi, temp.lo, c ); } - else - a = 0x7FFFFFFFL; - return ( s < 0 ? -a : a ); + return s < 0 ? -a : a; } @@ -497,7 +532,7 @@ ua = (FT_ULong)a; ub = (FT_ULong)b; - if ( ua <= 2048 && ub <= 1048576L ) + if ( ua + ( ub >> 8 ) <= 8190UL ) ua = ( ua * ub + 0x8000U ) >> 16; else { @@ -515,20 +550,20 @@ #else /* 0 */ - FT_Long s; + FT_Int s = 1; FT_ULong ua, ub; if ( a == 0 || b == 0x10000L ) return a; - s = a; a = FT_ABS( a ); - s ^= b; b = FT_ABS( b ); + FT_MOVE_SIGN( a, s ); + FT_MOVE_SIGN( b, s ); ua = (FT_ULong)a; ub = (FT_ULong)b; - if ( ua <= 2048 && ub <= 1048576L ) + if ( ua + ( ub >> 8 ) <= 8190UL ) ua = ( ua * ub + 0x8000UL ) >> 16; else { @@ -539,7 +574,7 @@ ( ( al * ( ub & 0xFFFFUL ) + 0x8000UL ) >> 16 ); } - return ( s < 0 ? -(FT_Long)ua : (FT_Long)ua ); + return s < 0 ? -(FT_Long)ua : (FT_Long)ua; #endif /* 0 */ @@ -552,23 +587,24 @@ FT_DivFix( FT_Long a, FT_Long b ) { - FT_Int32 s; - FT_UInt32 q; + FT_Int s = 1; + FT_Long q; /* XXX: this function does not allow 64-bit arguments */ - s = (FT_Int32)a; a = FT_ABS( a ); - s ^= (FT_Int32)b; b = FT_ABS( b ); - if ( (FT_UInt32)b == 0 ) + FT_MOVE_SIGN( a, s ); + FT_MOVE_SIGN( b, s ); + + if ( b == 0 ) { /* check for division by 0 */ - q = (FT_UInt32)0x7FFFFFFFL; + q = 0x7FFFFFFFL; } - else if ( ( a >> 16 ) == 0 ) + else if ( a <= 65535L - ( b >> 17 ) ) { /* compute result directly */ - q = (FT_UInt32)( ( (FT_ULong)a << 16 ) + ( b >> 1 ) ) / (FT_UInt32)b; + q = (FT_Long)( ( ( (FT_ULong)a << 16 ) + ( b >> 1 ) ) / b ); } else { @@ -576,138 +612,18 @@ FT_Int64 temp, temp2; - temp.hi = (FT_Int32)( a >> 16 ); - temp.lo = (FT_UInt32)a << 16; + temp.hi = a >> 16; + temp.lo = a << 16; temp2.hi = 0; - temp2.lo = (FT_UInt32)( b >> 1 ); - FT_Add64( &temp, &temp2, &temp ); - q = ft_div64by32( temp.hi, temp.lo, (FT_Int32)b ); - } - - return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q ); - } + temp2.lo = b >> 1; - -#if 0 - - /* documentation is in ftcalc.h */ - - FT_EXPORT_DEF( void ) - FT_MulTo64( FT_Int32 x, - FT_Int32 y, - FT_Int64 *z ) - { - FT_Int32 s; - - - s = x; x = FT_ABS( x ); - s ^= y; y = FT_ABS( y ); - - ft_multo64( x, y, z ); - - if ( s < 0 ) - { - z->lo = (FT_UInt32)-(FT_Int32)z->lo; - z->hi = ~z->hi + !( z->lo ); - } - } - - - /* apparently, the second version of this code is not compiled correctly */ - /* on Mac machines with the MPW C compiler.. tsk, tsk, tsk... */ - -#if 1 - - FT_EXPORT_DEF( FT_Int32 ) - FT_Div64by32( FT_Int64* x, - FT_Int32 y ) - { - FT_Int32 s; - FT_UInt32 q, r, i, lo; - - - s = x->hi; - if ( s < 0 ) - { - x->lo = (FT_UInt32)-(FT_Int32)x->lo; - x->hi = ~x->hi + !x->lo; - } - s ^= y; y = FT_ABS( y ); - - /* Shortcut */ - if ( x->hi == 0 ) - { - if ( y > 0 ) - q = x->lo / y; - else - q = 0x7FFFFFFFL; - - return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q ); - } - - r = x->hi; - lo = x->lo; - - if ( r >= (FT_UInt32)y ) /* we know y is to be treated as unsigned here */ - return ( s < 0 ? 0x80000001UL : 0x7FFFFFFFUL ); - /* Return Max/Min Int32 if division overflow. */ - /* This includes division by zero! */ - q = 0; - for ( i = 0; i < 32; i++ ) - { - r <<= 1; - q <<= 1; - r |= lo >> 31; - - if ( r >= (FT_UInt32)y ) - { - r -= y; - q |= 1; - } - lo <<= 1; - } - - return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q ); - } - -#else /* 0 */ - - FT_EXPORT_DEF( FT_Int32 ) - FT_Div64by32( FT_Int64* x, - FT_Int32 y ) - { - FT_Int32 s; - FT_UInt32 q; - - - s = x->hi; - if ( s < 0 ) - { - x->lo = (FT_UInt32)-(FT_Int32)x->lo; - x->hi = ~x->hi + !x->lo; - } - s ^= y; y = FT_ABS( y ); - - /* Shortcut */ - if ( x->hi == 0 ) - { - if ( y > 0 ) - q = ( x->lo + ( y >> 1 ) ) / y; - else - q = 0x7FFFFFFFL; - - return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q ); + FT_Add64( &temp, &temp2, &temp ); + q = (FT_Long)ft_div64by32( temp.hi, temp.lo, b ); } - q = ft_div64by32( x->hi, x->lo, y ); - - return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q ); + return s < 0 ? -q : q; } -#endif /* 0 */ - -#endif /* 0 */ - #endif /* FT_LONG64 */ @@ -943,55 +859,40 @@ FT_Pos out_x, FT_Pos out_y ) { - FT_Pos ax = in_x; - FT_Pos ay = in_y; - - FT_Pos d_in, d_out, d_corner; - - - /* We approximate the Euclidean metric (sqrt(x^2 + y^2)) with */ - /* the Taxicab metric (|x| + |y|), which can be computed much */ - /* faster. If one of the two vectors is much longer than the */ - /* other one, the direction of the shorter vector doesn't */ - /* influence the result any more. */ - /* */ - /* corner */ - /* x---------------------------x */ - /* \ / */ - /* \ / */ - /* in \ / out */ - /* \ / */ - /* o */ - /* Point */ - /* */ - - if ( ax < 0 ) - ax = -ax; - if ( ay < 0 ) - ay = -ay; - d_in = ax + ay; /* d_in = || in || */ - - ax = out_x; - if ( ax < 0 ) - ax = -ax; - ay = out_y; - if ( ay < 0 ) - ay = -ay; - d_out = ax + ay; /* d_out = || out || */ - - ax = out_x + in_x; - if ( ax < 0 ) - ax = -ax; - ay = out_y + in_y; - if ( ay < 0 ) - ay = -ay; - d_corner = ax + ay; /* d_corner = || in + out || */ + FT_Pos ax = in_x + out_x; + FT_Pos ay = in_y + out_y; + + FT_Pos d_in, d_out, d_hypot; + + + /* The idea of this function is to compare the length of the */ + /* hypotenuse with the `in' and `out' length. The `corner' */ + /* represented by `in' and `out' is flat if the hypotenuse's */ + /* length isn't too large. */ + /* */ + /* This approach has the advantage that the angle between */ + /* `in' and `out' is not checked. In case one of the two */ + /* vectors is `dominant', this is, much larger than the */ + /* other vector, we thus always have a flat corner. */ + /* */ + /* hypotenuse */ + /* x---------------------------x */ + /* \ / */ + /* \ / */ + /* in \ / out */ + /* \ / */ + /* o */ + /* Point */ + + d_in = FT_HYPOT( in_x, in_y ); + d_out = FT_HYPOT( out_x, out_y ); + d_hypot = FT_HYPOT( ax, ay ); /* now do a simple length comparison: */ /* */ - /* d_in + d_out < 17/16 d_corner */ + /* d_in + d_out < 17/16 d_hypot */ - return ( d_in + d_out - d_corner ) < ( d_corner >> 4 ); + return ( d_in + d_out - d_hypot ) < ( d_hypot >> 4 ); } |