aboutsummaryrefslogtreecommitdiff
path: root/freetype/src/base/ftcalc.c
diff options
context:
space:
mode:
Diffstat (limited to 'freetype/src/base/ftcalc.c')
-rw-r--r--freetype/src/base/ftcalc.c475
1 files changed, 188 insertions, 287 deletions
diff --git a/freetype/src/base/ftcalc.c b/freetype/src/base/ftcalc.c
index fb0492721..7b6425056 100644
--- a/freetype/src/base/ftcalc.c
+++ b/freetype/src/base/ftcalc.c
@@ -39,7 +39,8 @@
#include <internal/ftdebug.h>
#include <internal/ftobjs.h>
-#ifdef FT_MULFIX_INLINED
+
+#ifdef FT_MULFIX_ASSEMBLER
#undef FT_MulFix
#endif
@@ -67,6 +68,16 @@
#define FT_COMPONENT trace_calc
+ /* transfer sign leaving a positive number */
+#define FT_MOVE_SIGN( x, s ) \
+ FT_BEGIN_STMNT \
+ if ( x < 0 ) \
+ { \
+ x = -x; \
+ s = -s; \
+ } \
+ FT_END_STMNT
+
/* The following three functions are available regardless of whether */
/* FT_LONG64 is defined. */
@@ -75,8 +86,8 @@
FT_EXPORT_DEF( FT_Fixed )
FT_RoundFix( FT_Fixed a )
{
- return ( a >= 0 ) ? ( a + 0x8000L ) & ~0xFFFFL
- : -((-a + 0x8000L ) & ~0xFFFFL );
+ return a >= 0 ? ( a + 0x8000L ) & ~0xFFFFL
+ : -((-a + 0x8000L ) & ~0xFFFFL );
}
@@ -85,8 +96,8 @@
FT_EXPORT_DEF( FT_Fixed )
FT_CeilFix( FT_Fixed a )
{
- return ( a >= 0 ) ? ( a + 0xFFFFL ) & ~0xFFFFL
- : -((-a + 0xFFFFL ) & ~0xFFFFL );
+ return a >= 0 ? ( a + 0xFFFFL ) & ~0xFFFFL
+ : -((-a + 0xFFFFL ) & ~0xFFFFL );
}
@@ -95,38 +106,40 @@
FT_EXPORT_DEF( FT_Fixed )
FT_FloorFix( FT_Fixed a )
{
- return ( a >= 0 ) ? a & ~0xFFFFL
- : -((-a) & ~0xFFFFL );
+ return a >= 0 ? a & ~0xFFFFL
+ : -((-a) & ~0xFFFFL );
}
+#ifndef FT_MSB
FT_BASE_DEF ( FT_Int )
FT_MSB( FT_UInt32 z )
{
- FT_Int shift = 0;
+ FT_Int shift = 0;
+
/* determine msb bit index in `shift' */
- if ( z >= ( 1L << 16 ) )
+ if ( z & 0xFFFF0000UL )
{
z >>= 16;
shift += 16;
}
- if ( z >= ( 1L << 8 ) )
+ if ( z & 0x0000FF00UL )
{
z >>= 8;
shift += 8;
}
- if ( z >= ( 1L << 4 ) )
+ if ( z & 0x000000F0UL )
{
z >>= 4;
shift += 4;
}
- if ( z >= ( 1L << 2 ) )
+ if ( z & 0x0000000CUL )
{
z >>= 2;
shift += 2;
}
- if ( z >= ( 1L << 1 ) )
+ if ( z & 0x00000002UL )
{
/* z >>= 1; */
shift += 1;
@@ -135,6 +148,8 @@
return shift;
}
+#endif /* !FT_MSB */
+
/* documentation is in ftcalc.h */
@@ -162,19 +177,18 @@
FT_Long b,
FT_Long c )
{
- FT_Int s;
+ FT_Int s = 1;
FT_Long d;
- s = 1;
- if ( a < 0 ) { a = -a; s = -1; }
- if ( b < 0 ) { b = -b; s = -s; }
- if ( c < 0 ) { c = -c; s = -s; }
+ FT_MOVE_SIGN( a, s );
+ FT_MOVE_SIGN( b, s );
+ FT_MOVE_SIGN( c, s );
d = (FT_Long)( c > 0 ? ( (FT_Int64)a * b + ( c >> 1 ) ) / c
: 0x7FFFFFFFL );
- return ( s > 0 ) ? d : -d;
+ return s < 0 ? -d : d;
}
@@ -185,19 +199,18 @@
FT_Long b,
FT_Long c )
{
- FT_Int s;
+ FT_Int s = 1;
FT_Long d;
- s = 1;
- if ( a < 0 ) { a = -a; s = -1; }
- if ( b < 0 ) { b = -b; s = -s; }
- if ( c < 0 ) { c = -c; s = -s; }
+ FT_MOVE_SIGN( a, s );
+ FT_MOVE_SIGN( b, s );
+ FT_MOVE_SIGN( c, s );
d = (FT_Long)( c > 0 ? (FT_Int64)a * b / c
: 0x7FFFFFFFL );
- return ( s > 0 ) ? d : -d;
+ return s < 0 ? -d : d;
}
@@ -217,21 +230,12 @@
FT_Long c;
- if ( a < 0 )
- {
- a = -a;
- s = -1;
- }
-
- if ( b < 0 )
- {
- b = -b;
- s = -s;
- }
+ FT_MOVE_SIGN( a, s );
+ FT_MOVE_SIGN( b, s );
c = (FT_Long)( ( (FT_Int64)a * b + 0x8000L ) >> 16 );
- return ( s > 0 ) ? c : -c;
+ return s < 0 ? -c : c;
#endif /* FT_MULFIX_ASSEMBLER */
}
@@ -243,30 +247,17 @@
FT_DivFix( FT_Long a,
FT_Long b )
{
- FT_Int32 s;
- FT_UInt32 q;
+ FT_Int s = 1;
+ FT_Long q;
- s = 1;
- if ( a < 0 )
- {
- a = -a;
- s = -1;
- }
- if ( b < 0 )
- {
- b = -b;
- s = -s;
- }
+ FT_MOVE_SIGN( a, s );
+ FT_MOVE_SIGN( b, s );
- if ( b == 0 )
- /* check for division by 0 */
- q = 0x7FFFFFFFL;
- else
- /* compute result directly */
- q = (FT_UInt32)( ( ( (FT_UInt64)a << 16 ) + ( b >> 1 ) ) / b );
+ q = (FT_Long)( b > 0 ? ( ( (FT_UInt64)a << 16 ) + ( b >> 1 ) ) / b
+ : 0x7FFFFFFFL );
- return ( s < 0 ? -(FT_Long)q : (FT_Long)q );
+ return s < 0 ? -q : q;
}
@@ -314,25 +305,30 @@
FT_Int i;
- q = 0;
- r = hi;
-
- if ( r >= y )
+ if ( hi >= y )
return (FT_UInt32)0x7FFFFFFFL;
- i = 32;
+ /* We shift as many bits as we can into the high register, perform */
+ /* 32-bit division with modulo there, then work through the remaining */
+ /* bits with long division. This optimization is especially noticeable */
+ /* for smaller dividends that barely use the high register. */
+
+ i = 31 - FT_MSB( hi );
+ r = ( hi << i ) | ( lo >> ( 32 - i ) ); lo <<= i; /* left 64-bit shift */
+ q = r / y;
+ r -= q * y; /* remainder */
+
+ i = 32 - i; /* bits remaining in low register */
do
{
- r <<= 1;
q <<= 1;
- r |= lo >> 31;
+ r = ( r << 1 ) | ( lo >> 31 ); lo <<= 1;
if ( r >= y )
{
r -= y;
q |= 1;
}
- lo <<= 1;
} while ( --i );
return q;
@@ -344,7 +340,7 @@
FT_Int64* y,
FT_Int64 *z )
{
- register FT_UInt32 lo, hi;
+ FT_UInt32 lo, hi;
lo = x->lo + y->lo;
@@ -355,60 +351,95 @@
}
- /* documentation is in freetype.h */
-
- /* The FT_MulDiv function has been optimized thanks to ideas from */
- /* Graham Asher. The trick is to optimize computation when everything */
- /* fits within 32-bits (a rather common case). */
+ /* The FT_MulDiv function has been optimized thanks to ideas from */
+ /* Graham Asher and Alexei Podtelezhnikov. The trick is to optimize */
+ /* a rather common case when everything fits within 32-bits. */
+ /* */
+ /* We compute 'a*b+c/2', then divide it by 'c' (all positive values). */
+ /* */
+ /* The product of two positive numbers never exceeds the square of */
+ /* its mean values. Therefore, we always avoid the overflow by */
+ /* imposing */
/* */
- /* we compute 'a*b+c/2', then divide it by 'c'. (positive values) */
+ /* (a + b) / 2 <= sqrt(X - c/2) , */
/* */
- /* 46340 is FLOOR(SQRT(2^31-1)). */
+ /* where X = 2^32 - 1, the maximum unsigned 32-bit value, and using */
+ /* unsigned arithmetic. Now we replace `sqrt' with a linear function */
+ /* that is smaller or equal for all values of c in the interval */
+ /* [0;X/2]; it should be equal to sqrt(X) and sqrt(3X/4) at the */
+ /* endpoints. Substituting the linear solution and explicit numbers */
+ /* we get */
/* */
- /* if ( a <= 46340 && b <= 46340 ) then ( a*b <= 0x7FFEA810 ) */
+ /* a + b <= 131071.99 - c / 122291.84 . */
/* */
- /* 0x7FFFFFFF - 0x7FFEA810 = 0x157F0 */
+ /* In practice, we should use a faster and even stronger inequality */
/* */
- /* if ( c < 0x157F0*2 ) then ( a*b+c/2 <= 0x7FFFFFFF ) */
+ /* a + b <= 131071 - (c >> 16) */
/* */
- /* and 2*0x157F0 = 176096 */
+ /* or, alternatively, */
/* */
+ /* a + b <= 129894 - (c >> 17) . */
+ /* */
+ /* FT_MulFix, on the other hand, is optimized for a small value of */
+ /* the first argument, when the second argument can be much larger. */
+ /* This can be achieved by scaling the second argument and the limit */
+ /* in the above inequalities. For example, */
+ /* */
+ /* a + (b >> 8) <= (131071 >> 4) */
+ /* */
+ /* covers the practical range of use. The actual test below is a bit */
+ /* tighter to avoid the border case overflows. */
+ /* */
+ /* In the case of FT_DivFix, the exact overflow check */
+ /* */
+ /* a << 16 <= X - c/2 */
+ /* */
+ /* is scaled down by 2^16 and we use */
+ /* */
+ /* a <= 65535 - (c >> 17) . */
+
+ /* documentation is in freetype.h */
FT_EXPORT_DEF( FT_Long )
FT_MulDiv( FT_Long a,
FT_Long b,
FT_Long c )
{
- long s;
+ FT_Int s = 1;
/* XXX: this function does not allow 64-bit arguments */
if ( a == 0 || b == c )
return a;
- s = a; a = FT_ABS( a );
- s ^= b; b = FT_ABS( b );
- s ^= c; c = FT_ABS( c );
+ FT_MOVE_SIGN( a, s );
+ FT_MOVE_SIGN( b, s );
+ FT_MOVE_SIGN( c, s );
+
+ if ( c == 0 )
+ a = 0x7FFFFFFFL;
- if ( a <= 46340L && b <= 46340L && c <= 176095L && c > 0 )
- a = ( a * b + ( c >> 1 ) ) / c;
+ else if ( (FT_ULong)a + b <= 129894UL - ( c >> 17 ) )
+ a = ( (FT_ULong)a * b + ( c >> 1 ) ) / c;
- else if ( (FT_Int32)c > 0 )
+ else
{
FT_Int64 temp, temp2;
- ft_multo64( (FT_Int32)a, (FT_Int32)b, &temp );
+ ft_multo64( a, b, &temp );
temp2.hi = 0;
- temp2.lo = (FT_UInt32)(c >> 1);
+ temp2.lo = c >> 1;
+
FT_Add64( &temp, &temp2, &temp );
- a = ft_div64by32( temp.hi, temp.lo, (FT_Int32)c );
+
+ /* last attempt to ditch long division */
+ a = temp.hi == 0 ? temp.lo / c
+ : ft_div64by32( temp.hi, temp.lo, c );
}
- else
- a = 0x7FFFFFFFL;
- return ( s < 0 ? -a : a );
+ return s < 0 ? -a : a;
}
@@ -417,31 +448,35 @@
FT_Long b,
FT_Long c )
{
- long s;
+ FT_Int s = 1;
if ( a == 0 || b == c )
return a;
- s = a; a = FT_ABS( a );
- s ^= b; b = FT_ABS( b );
- s ^= c; c = FT_ABS( c );
+ FT_MOVE_SIGN( a, s );
+ FT_MOVE_SIGN( b, s );
+ FT_MOVE_SIGN( c, s );
+
+ if ( c == 0 )
+ a = 0x7FFFFFFFL;
- if ( a <= 46340L && b <= 46340L && c > 0 )
- a = a * b / c;
+ else if ( (FT_ULong)a + b <= 131071UL )
+ a = (FT_ULong)a * b / c;
- else if ( (FT_Int32)c > 0 )
+ else
{
FT_Int64 temp;
- ft_multo64( (FT_Int32)a, (FT_Int32)b, &temp );
- a = ft_div64by32( temp.hi, temp.lo, (FT_Int32)c );
+ ft_multo64( a, b, &temp );
+
+ /* last attempt to ditch long division */
+ a = temp.hi == 0 ? temp.lo / c
+ : ft_div64by32( temp.hi, temp.lo, c );
}
- else
- a = 0x7FFFFFFFL;
- return ( s < 0 ? -a : a );
+ return s < 0 ? -a : a;
}
@@ -497,7 +532,7 @@
ua = (FT_ULong)a;
ub = (FT_ULong)b;
- if ( ua <= 2048 && ub <= 1048576L )
+ if ( ua + ( ub >> 8 ) <= 8190UL )
ua = ( ua * ub + 0x8000U ) >> 16;
else
{
@@ -515,20 +550,20 @@
#else /* 0 */
- FT_Long s;
+ FT_Int s = 1;
FT_ULong ua, ub;
if ( a == 0 || b == 0x10000L )
return a;
- s = a; a = FT_ABS( a );
- s ^= b; b = FT_ABS( b );
+ FT_MOVE_SIGN( a, s );
+ FT_MOVE_SIGN( b, s );
ua = (FT_ULong)a;
ub = (FT_ULong)b;
- if ( ua <= 2048 && ub <= 1048576L )
+ if ( ua + ( ub >> 8 ) <= 8190UL )
ua = ( ua * ub + 0x8000UL ) >> 16;
else
{
@@ -539,7 +574,7 @@
( ( al * ( ub & 0xFFFFUL ) + 0x8000UL ) >> 16 );
}
- return ( s < 0 ? -(FT_Long)ua : (FT_Long)ua );
+ return s < 0 ? -(FT_Long)ua : (FT_Long)ua;
#endif /* 0 */
@@ -552,23 +587,24 @@
FT_DivFix( FT_Long a,
FT_Long b )
{
- FT_Int32 s;
- FT_UInt32 q;
+ FT_Int s = 1;
+ FT_Long q;
/* XXX: this function does not allow 64-bit arguments */
- s = (FT_Int32)a; a = FT_ABS( a );
- s ^= (FT_Int32)b; b = FT_ABS( b );
- if ( (FT_UInt32)b == 0 )
+ FT_MOVE_SIGN( a, s );
+ FT_MOVE_SIGN( b, s );
+
+ if ( b == 0 )
{
/* check for division by 0 */
- q = (FT_UInt32)0x7FFFFFFFL;
+ q = 0x7FFFFFFFL;
}
- else if ( ( a >> 16 ) == 0 )
+ else if ( a <= 65535L - ( b >> 17 ) )
{
/* compute result directly */
- q = (FT_UInt32)( ( (FT_ULong)a << 16 ) + ( b >> 1 ) ) / (FT_UInt32)b;
+ q = (FT_Long)( ( ( (FT_ULong)a << 16 ) + ( b >> 1 ) ) / b );
}
else
{
@@ -576,138 +612,18 @@
FT_Int64 temp, temp2;
- temp.hi = (FT_Int32)( a >> 16 );
- temp.lo = (FT_UInt32)a << 16;
+ temp.hi = a >> 16;
+ temp.lo = a << 16;
temp2.hi = 0;
- temp2.lo = (FT_UInt32)( b >> 1 );
- FT_Add64( &temp, &temp2, &temp );
- q = ft_div64by32( temp.hi, temp.lo, (FT_Int32)b );
- }
-
- return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q );
- }
+ temp2.lo = b >> 1;
-
-#if 0
-
- /* documentation is in ftcalc.h */
-
- FT_EXPORT_DEF( void )
- FT_MulTo64( FT_Int32 x,
- FT_Int32 y,
- FT_Int64 *z )
- {
- FT_Int32 s;
-
-
- s = x; x = FT_ABS( x );
- s ^= y; y = FT_ABS( y );
-
- ft_multo64( x, y, z );
-
- if ( s < 0 )
- {
- z->lo = (FT_UInt32)-(FT_Int32)z->lo;
- z->hi = ~z->hi + !( z->lo );
- }
- }
-
-
- /* apparently, the second version of this code is not compiled correctly */
- /* on Mac machines with the MPW C compiler.. tsk, tsk, tsk... */
-
-#if 1
-
- FT_EXPORT_DEF( FT_Int32 )
- FT_Div64by32( FT_Int64* x,
- FT_Int32 y )
- {
- FT_Int32 s;
- FT_UInt32 q, r, i, lo;
-
-
- s = x->hi;
- if ( s < 0 )
- {
- x->lo = (FT_UInt32)-(FT_Int32)x->lo;
- x->hi = ~x->hi + !x->lo;
- }
- s ^= y; y = FT_ABS( y );
-
- /* Shortcut */
- if ( x->hi == 0 )
- {
- if ( y > 0 )
- q = x->lo / y;
- else
- q = 0x7FFFFFFFL;
-
- return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q );
- }
-
- r = x->hi;
- lo = x->lo;
-
- if ( r >= (FT_UInt32)y ) /* we know y is to be treated as unsigned here */
- return ( s < 0 ? 0x80000001UL : 0x7FFFFFFFUL );
- /* Return Max/Min Int32 if division overflow. */
- /* This includes division by zero! */
- q = 0;
- for ( i = 0; i < 32; i++ )
- {
- r <<= 1;
- q <<= 1;
- r |= lo >> 31;
-
- if ( r >= (FT_UInt32)y )
- {
- r -= y;
- q |= 1;
- }
- lo <<= 1;
- }
-
- return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q );
- }
-
-#else /* 0 */
-
- FT_EXPORT_DEF( FT_Int32 )
- FT_Div64by32( FT_Int64* x,
- FT_Int32 y )
- {
- FT_Int32 s;
- FT_UInt32 q;
-
-
- s = x->hi;
- if ( s < 0 )
- {
- x->lo = (FT_UInt32)-(FT_Int32)x->lo;
- x->hi = ~x->hi + !x->lo;
- }
- s ^= y; y = FT_ABS( y );
-
- /* Shortcut */
- if ( x->hi == 0 )
- {
- if ( y > 0 )
- q = ( x->lo + ( y >> 1 ) ) / y;
- else
- q = 0x7FFFFFFFL;
-
- return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q );
+ FT_Add64( &temp, &temp2, &temp );
+ q = (FT_Long)ft_div64by32( temp.hi, temp.lo, b );
}
- q = ft_div64by32( x->hi, x->lo, y );
-
- return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q );
+ return s < 0 ? -q : q;
}
-#endif /* 0 */
-
-#endif /* 0 */
-
#endif /* FT_LONG64 */
@@ -943,55 +859,40 @@
FT_Pos out_x,
FT_Pos out_y )
{
- FT_Pos ax = in_x;
- FT_Pos ay = in_y;
-
- FT_Pos d_in, d_out, d_corner;
-
-
- /* We approximate the Euclidean metric (sqrt(x^2 + y^2)) with */
- /* the Taxicab metric (|x| + |y|), which can be computed much */
- /* faster. If one of the two vectors is much longer than the */
- /* other one, the direction of the shorter vector doesn't */
- /* influence the result any more. */
- /* */
- /* corner */
- /* x---------------------------x */
- /* \ / */
- /* \ / */
- /* in \ / out */
- /* \ / */
- /* o */
- /* Point */
- /* */
-
- if ( ax < 0 )
- ax = -ax;
- if ( ay < 0 )
- ay = -ay;
- d_in = ax + ay; /* d_in = || in || */
-
- ax = out_x;
- if ( ax < 0 )
- ax = -ax;
- ay = out_y;
- if ( ay < 0 )
- ay = -ay;
- d_out = ax + ay; /* d_out = || out || */
-
- ax = out_x + in_x;
- if ( ax < 0 )
- ax = -ax;
- ay = out_y + in_y;
- if ( ay < 0 )
- ay = -ay;
- d_corner = ax + ay; /* d_corner = || in + out || */
+ FT_Pos ax = in_x + out_x;
+ FT_Pos ay = in_y + out_y;
+
+ FT_Pos d_in, d_out, d_hypot;
+
+
+ /* The idea of this function is to compare the length of the */
+ /* hypotenuse with the `in' and `out' length. The `corner' */
+ /* represented by `in' and `out' is flat if the hypotenuse's */
+ /* length isn't too large. */
+ /* */
+ /* This approach has the advantage that the angle between */
+ /* `in' and `out' is not checked. In case one of the two */
+ /* vectors is `dominant', this is, much larger than the */
+ /* other vector, we thus always have a flat corner. */
+ /* */
+ /* hypotenuse */
+ /* x---------------------------x */
+ /* \ / */
+ /* \ / */
+ /* in \ / out */
+ /* \ / */
+ /* o */
+ /* Point */
+
+ d_in = FT_HYPOT( in_x, in_y );
+ d_out = FT_HYPOT( out_x, out_y );
+ d_hypot = FT_HYPOT( ax, ay );
/* now do a simple length comparison: */
/* */
- /* d_in + d_out < 17/16 d_corner */
+ /* d_in + d_out < 17/16 d_hypot */
- return ( d_in + d_out - d_corner ) < ( d_corner >> 4 );
+ return ( d_in + d_out - d_hypot ) < ( d_hypot >> 4 );
}