1 files changed, 188 insertions, 287 deletions
diff --git a/freetype/src/base/ftcalc.c b/freetype/src/base/ftcalc.c
index fb0492721..7b6425056 100644
--- a/freetype/src/base/ftcalc.c
+++ b/freetype/src/base/ftcalc.c
@@ -39,7 +39,8 @@
 #include <internal/ftdebug.h>
 #include <internal/ftobjs.h>
 
-#ifdef FT_MULFIX_INLINED
+
+#ifdef FT_MULFIX_ASSEMBLER
 #undef FT_MulFix
 #endif
 
@@ -67,6 +68,16 @@
 #define FT_COMPONENT  trace_calc
 
 
+  /* transfer sign leaving a positive number */
+#define FT_MOVE_SIGN( x, s ) \
+  FT_BEGIN_STMNT             \
+    if ( x < 0 )             \
+    {                        \
+      x = -x;                \
+      s = -s;                \
+    }                        \
+  FT_END_STMNT
+
   /* The following three functions are available regardless of whether */
   /* FT_LONG64 is defined.                                             */
 
@@ -75,8 +86,8 @@
   FT_EXPORT_DEF( FT_Fixed )
   FT_RoundFix( FT_Fixed  a )
   {
-    return ( a >= 0 ) ?   ( a + 0x8000L ) & ~0xFFFFL
-                      : -((-a + 0x8000L ) & ~0xFFFFL );
+    return a >= 0 ?   ( a + 0x8000L ) & ~0xFFFFL
+                  : -((-a + 0x8000L ) & ~0xFFFFL );
   }
 
 
@@ -85,8 +96,8 @@
   FT_EXPORT_DEF( FT_Fixed )
   FT_CeilFix( FT_Fixed  a )
   {
-    return ( a >= 0 ) ?   ( a + 0xFFFFL ) & ~0xFFFFL
-                      : -((-a + 0xFFFFL ) & ~0xFFFFL );
+    return a >= 0 ?   ( a + 0xFFFFL ) & ~0xFFFFL
+                  : -((-a + 0xFFFFL ) & ~0xFFFFL );
   }
 
 
@@ -95,38 +106,40 @@
   FT_EXPORT_DEF( FT_Fixed )
   FT_FloorFix( FT_Fixed  a )
   {
-    return ( a >= 0 ) ?   a & ~0xFFFFL
-                      : -((-a) & ~0xFFFFL );
+    return a >= 0 ?   a & ~0xFFFFL
+                  : -((-a) & ~0xFFFFL );
   }
 
+#ifndef FT_MSB
 
   FT_BASE_DEF ( FT_Int )
   FT_MSB( FT_UInt32 z )
   {
-    FT_Int shift = 0;
+    FT_Int  shift = 0;
+
 
     /* determine msb bit index in `shift' */
-    if ( z >= ( 1L << 16 ) )
+    if ( z & 0xFFFF0000UL )
     {
       z     >>= 16;
       shift  += 16;
     }
-    if ( z >= ( 1L << 8 ) )
+    if ( z & 0x0000FF00UL )
     {
       z     >>= 8;
       shift  += 8;
     }
-    if ( z >= ( 1L << 4 ) )
+    if ( z & 0x000000F0UL )
     {
       z     >>= 4;
       shift  += 4;
     }
-    if ( z >= ( 1L << 2 ) )
+    if ( z & 0x0000000CUL )
     {
       z     >>= 2;
       shift  += 2;
     }
-    if ( z >= ( 1L << 1 ) )
+    if ( z & 0x00000002UL )
     {
    /* z     >>= 1; */
       shift  += 1;
@@ -135,6 +148,8 @@
     return shift;
   }
 
+#endif /* !FT_MSB */
+
 
   /* documentation is in ftcalc.h */
 
@@ -162,19 +177,18 @@
              FT_Long  b,
              FT_Long  c )
   {
-    FT_Int   s;
+    FT_Int   s = 1;
     FT_Long  d;
 
 
-    s = 1;
-    if ( a < 0 ) { a = -a; s = -1; }
-    if ( b < 0 ) { b = -b; s = -s; }
-    if ( c < 0 ) { c = -c; s = -s; }
+    FT_MOVE_SIGN( a, s );
+    FT_MOVE_SIGN( b, s );
+    FT_MOVE_SIGN( c, s );
 
     d = (FT_Long)( c > 0 ? ( (FT_Int64)a * b + ( c >> 1 ) ) / c
                          : 0x7FFFFFFFL );
 
-    return ( s > 0 ) ? d : -d;
+    return s < 0 ? -d : d;
   }
 
 
@@ -185,19 +199,18 @@
                       FT_Long  b,
                       FT_Long  c )
   {
-    FT_Int   s;
+    FT_Int   s = 1;
     FT_Long  d;
 
 
-    s = 1;
-    if ( a < 0 ) { a = -a; s = -1; }
-    if ( b < 0 ) { b = -b; s = -s; }
-    if ( c < 0 ) { c = -c; s = -s; }
+    FT_MOVE_SIGN( a, s );
+    FT_MOVE_SIGN( b, s );
+    FT_MOVE_SIGN( c, s );
 
     d = (FT_Long)( c > 0 ? (FT_Int64)a * b / c
                          : 0x7FFFFFFFL );
 
-    return ( s > 0 ) ? d : -d;
+    return s < 0 ? -d : d;
   }
 
 
@@ -217,21 +230,12 @@
     FT_Long  c;
 
 
-    if ( a < 0 )
-    {
-      a = -a;
-      s = -1;
-    }
-
-    if ( b < 0 )
-    {
-      b = -b;
-      s = -s;
-    }
+    FT_MOVE_SIGN( a, s );
+    FT_MOVE_SIGN( b, s );
 
     c = (FT_Long)( ( (FT_Int64)a * b + 0x8000L ) >> 16 );
 
-    return ( s > 0 ) ? c : -c;
+    return s < 0 ? -c : c;
 
 #endif /* FT_MULFIX_ASSEMBLER */
   }
@@ -243,30 +247,17 @@
   FT_DivFix( FT_Long  a,
              FT_Long  b )
   {
-    FT_Int32   s;
-    FT_UInt32  q;
+    FT_Int   s = 1;
+    FT_Long  q;
 
 
-    s = 1;
-    if ( a < 0 )
-    {
-      a = -a;
-      s = -1;
-    }
-    if ( b < 0 )
-    {
-      b = -b;
-      s = -s;
-    }
+    FT_MOVE_SIGN( a, s );
+    FT_MOVE_SIGN( b, s );
 
-    if ( b == 0 )
-      /* check for division by 0 */
-      q = 0x7FFFFFFFL;
-    else
-      /* compute result directly */
-      q = (FT_UInt32)( ( ( (FT_UInt64)a << 16 ) + ( b >> 1 ) ) / b );
+    q = (FT_Long)( b > 0 ? ( ( (FT_UInt64)a << 16 ) + ( b >> 1 ) ) / b
+                         : 0x7FFFFFFFL );
 
-    return ( s < 0 ? -(FT_Long)q : (FT_Long)q );
+    return s < 0 ? -q : q;
   }
 
 
@@ -314,25 +305,30 @@
     FT_Int     i;
 
 
-    q = 0;
-    r = hi;
-
-    if ( r >= y )
+    if ( hi >= y )
       return (FT_UInt32)0x7FFFFFFFL;
 
-    i = 32;
+    /* We shift as many bits as we can into the high register, perform     */
+    /* 32-bit division with modulo there, then work through the remaining  */
+    /* bits with long division. This optimization is especially noticeable */
+    /* for smaller dividends that barely use the high register.            */
+
+    i = 31 - FT_MSB( hi );
+    r = ( hi << i ) | ( lo >> ( 32 - i ) ); lo <<= i; /* left 64-bit shift */
+    q = r / y;
+    r -= q * y;   /* remainder */
+
+    i = 32 - i;   /* bits remaining in low register */
     do
     {
-      r <<= 1;
       q <<= 1;
-      r  |= lo >> 31;
+      r   = ( r << 1 ) | ( lo >> 31 ); lo <<= 1;
 
       if ( r >= y )
       {
         r -= y;
         q |= 1;
       }
-      lo <<= 1;
     } while ( --i );
 
     return q;
@@ -344,7 +340,7 @@
             FT_Int64*  y,
             FT_Int64  *z )
   {
-    register FT_UInt32  lo, hi;
+    FT_UInt32  lo, hi;
 
 
     lo = x->lo + y->lo;
@@ -355,60 +351,95 @@
   }
 
 
-  /* documentation is in freetype.h */
-
-  /* The FT_MulDiv function has been optimized thanks to ideas from      */
-  /* Graham Asher.  The trick is to optimize computation when everything */
-  /* fits within 32-bits (a rather common case).                         */
+  /*  The FT_MulDiv function has been optimized thanks to ideas from     */
+  /*  Graham Asher and Alexei Podtelezhnikov.  The trick is to optimize  */
+  /*  a rather common case when everything fits within 32-bits.          */
+  /*                                                                     */
+  /*  We compute 'a*b+c/2', then divide it by 'c' (all positive values). */
+  /*                                                                     */
+  /*  The product of two positive numbers never exceeds the square of    */
+  /*  its mean values.  Therefore, we always avoid the overflow by       */
+  /*  imposing                                                           */
   /*                                                                     */
-  /*  we compute 'a*b+c/2', then divide it by 'c'. (positive values)     */
+  /*    (a + b) / 2 <= sqrt(X - c/2)    ,                                */
   /*                                                                     */
-  /*  46340 is FLOOR(SQRT(2^31-1)).                                      */
+  /*  where X = 2^32 - 1, the maximum unsigned 32-bit value, and using   */
+  /*  unsigned arithmetic.  Now we replace `sqrt' with a linear function */
+  /*  that is smaller or equal for all values of c in the interval       */
+  /*  [0;X/2]; it should be equal to sqrt(X) and sqrt(3X/4) at the       */
+  /*  endpoints.  Substituting the linear solution and explicit numbers  */
+  /*  we get                                                             */
   /*                                                                     */
-  /*  if ( a <= 46340 && b <= 46340 ) then ( a*b <= 0x7FFEA810 )         */
+  /*    a + b <= 131071.99 - c / 122291.84    .                          */
   /*                                                                     */
-  /*  0x7FFFFFFF - 0x7FFEA810 = 0x157F0                                  */
+  /*  In practice, we should use a faster and even stronger inequality   */
   /*                                                                     */
-  /*  if ( c < 0x157F0*2 ) then ( a*b+c/2 <= 0x7FFFFFFF )                */
+  /*    a + b <= 131071 - (c >> 16)                                      */
   /*                                                                     */
-  /*  and 2*0x157F0 = 176096                                             */
+  /*  or, alternatively,                                                 */
   /*                                                                     */
+  /*    a + b <= 129894 - (c >> 17)    .                                 */
+  /*                                                                     */
+  /*  FT_MulFix, on the other hand, is optimized for a small value of    */
+  /*  the first argument, when the second argument can be much larger.   */
+  /*  This can be achieved by scaling the second argument and the limit  */
+  /*  in the above inequalities.  For example,                           */
+  /*                                                                     */
+  /*    a + (b >> 8) <= (131071 >> 4)                                    */
+  /*                                                                     */
+  /*  covers the practical range of use. The actual test below is a bit  */
+  /*  tighter to avoid the border case overflows.                        */
+  /*                                                                     */
+  /*  In the case of FT_DivFix, the exact overflow check                 */
+  /*                                                                     */
+  /*    a << 16 <= X - c/2                                               */
+  /*                                                                     */
+  /*  is scaled down by 2^16 and we use                                  */
+  /*                                                                     */
+  /*    a <= 65535 - (c >> 17)    .                                      */
+
+  /* documentation is in freetype.h */
 
   FT_EXPORT_DEF( FT_Long )
   FT_MulDiv( FT_Long  a,
              FT_Long  b,
              FT_Long  c )
   {
-    long  s;
+    FT_Int  s = 1;
 
 
     /* XXX: this function does not allow 64-bit arguments */
     if ( a == 0 || b == c )
       return a;
 
-    s  = a; a = FT_ABS( a );
-    s ^= b; b = FT_ABS( b );
-    s ^= c; c = FT_ABS( c );
+    FT_MOVE_SIGN( a, s );
+    FT_MOVE_SIGN( b, s );
+    FT_MOVE_SIGN( c, s );
+
+    if ( c == 0 )
+      a = 0x7FFFFFFFL;
 
-    if ( a <= 46340L && b <= 46340L && c <= 176095L && c > 0 )
-      a = ( a * b + ( c >> 1 ) ) / c;
+    else if ( (FT_ULong)a + b <= 129894UL - ( c >> 17 ) )
+      a = ( (FT_ULong)a * b + ( c >> 1 ) ) / c;
 
-    else if ( (FT_Int32)c > 0 )
+    else
     {
       FT_Int64  temp, temp2;
 
 
-      ft_multo64( (FT_Int32)a, (FT_Int32)b, &temp );
+      ft_multo64( a, b, &temp );
 
       temp2.hi = 0;
-      temp2.lo = (FT_UInt32)(c >> 1);
+      temp2.lo = c >> 1;
+
       FT_Add64( &temp, &temp2, &temp );
-      a = ft_div64by32( temp.hi, temp.lo, (FT_Int32)c );
+
+      /* last attempt to ditch long division */
+      a = temp.hi == 0 ? temp.lo / c
+                       : ft_div64by32( temp.hi, temp.lo, c );
     }
-    else
-      a = 0x7FFFFFFFL;
 
-    return ( s < 0 ? -a : a );
+    return s < 0 ? -a : a;
   }
 
 
@@ -417,31 +448,35 @@
                       FT_Long  b,
                       FT_Long  c )
   {
-    long  s;
+    FT_Int  s = 1;
 
 
     if ( a == 0 || b == c )
       return a;
 
-    s  = a; a = FT_ABS( a );
-    s ^= b; b = FT_ABS( b );
-    s ^= c; c = FT_ABS( c );
+    FT_MOVE_SIGN( a, s );
+    FT_MOVE_SIGN( b, s );
+    FT_MOVE_SIGN( c, s );
+
+    if ( c == 0 )
+      a = 0x7FFFFFFFL;
 
-    if ( a <= 46340L && b <= 46340L && c > 0 )
-      a = a * b / c;
+    else if ( (FT_ULong)a + b <= 131071UL )
+      a = (FT_ULong)a * b / c;
 
-    else if ( (FT_Int32)c > 0 )
+    else
     {
       FT_Int64  temp;
 
 
-      ft_multo64( (FT_Int32)a, (FT_Int32)b, &temp );
-      a = ft_div64by32( temp.hi, temp.lo, (FT_Int32)c );
+      ft_multo64( a, b, &temp );
+
+      /* last attempt to ditch long division */
+      a = temp.hi == 0 ? temp.lo / c
+                       : ft_div64by32( temp.hi, temp.lo, c );
     }
-    else
-      a = 0x7FFFFFFFL;
 
-    return ( s < 0 ? -a : a );
+    return s < 0 ? -a : a;
   }
 
 
@@ -497,7 +532,7 @@
     ua = (FT_ULong)a;
     ub = (FT_ULong)b;
 
-    if ( ua <= 2048 && ub <= 1048576L )
+    if ( ua + ( ub >> 8 ) <= 8190UL )
       ua = ( ua * ub + 0x8000U ) >> 16;
     else
     {
@@ -515,20 +550,20 @@
 
 #else /* 0 */
 
-    FT_Long   s;
+    FT_Int    s = 1;
     FT_ULong  ua, ub;
 
 
     if ( a == 0 || b == 0x10000L )
       return a;
 
-    s  = a; a = FT_ABS( a );
-    s ^= b; b = FT_ABS( b );
+    FT_MOVE_SIGN( a, s );
+    FT_MOVE_SIGN( b, s );
 
     ua = (FT_ULong)a;
     ub = (FT_ULong)b;
 
-    if ( ua <= 2048 && ub <= 1048576L )
+    if ( ua + ( ub >> 8 ) <= 8190UL )
       ua = ( ua * ub + 0x8000UL ) >> 16;
     else
     {
@@ -539,7 +574,7 @@
            ( ( al * ( ub & 0xFFFFUL ) + 0x8000UL ) >> 16 );
     }
 
-    return ( s < 0 ? -(FT_Long)ua : (FT_Long)ua );
+    return s < 0 ? -(FT_Long)ua : (FT_Long)ua;
 
 #endif /* 0 */
 
@@ -552,23 +587,24 @@
   FT_DivFix( FT_Long  a,
              FT_Long  b )
   {
-    FT_Int32   s;
-    FT_UInt32  q;
+    FT_Int   s = 1;
+    FT_Long  q;
 
 
     /* XXX: this function does not allow 64-bit arguments */
-    s  = (FT_Int32)a; a = FT_ABS( a );
-    s ^= (FT_Int32)b; b = FT_ABS( b );
 
-    if ( (FT_UInt32)b == 0 )
+    FT_MOVE_SIGN( a, s );
+    FT_MOVE_SIGN( b, s );
+
+    if ( b == 0 )
     {
       /* check for division by 0 */
-      q = (FT_UInt32)0x7FFFFFFFL;
+      q = 0x7FFFFFFFL;
     }
-    else if ( ( a >> 16 ) == 0 )
+    else if ( a <= 65535L - ( b >> 17 ) )
     {
       /* compute result directly */
-      q = (FT_UInt32)( ( (FT_ULong)a << 16 ) + ( b >> 1 ) ) / (FT_UInt32)b;
+      q = (FT_Long)( ( ( (FT_ULong)a << 16 ) + ( b >> 1 ) ) / b );
     }
     else
     {
@@ -576,138 +612,18 @@
       FT_Int64  temp, temp2;
 
 
-      temp.hi  = (FT_Int32)( a >> 16 );
-      temp.lo  = (FT_UInt32)a << 16;
+      temp.hi  = a >> 16;
+      temp.lo  = a << 16;
       temp2.hi = 0;
-      temp2.lo = (FT_UInt32)( b >> 1 );
-      FT_Add64( &temp, &temp2, &temp );
-      q = ft_div64by32( temp.hi, temp.lo, (FT_Int32)b );
-    }
-
-    return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q );
-  }
+      temp2.lo = b >> 1;
 
-
-#if 0
-
-  /* documentation is in ftcalc.h */
-
-  FT_EXPORT_DEF( void )
-  FT_MulTo64( FT_Int32   x,
-              FT_Int32   y,
-              FT_Int64  *z )
-  {
-    FT_Int32  s;
-
-
-    s  = x; x = FT_ABS( x );
-    s ^= y; y = FT_ABS( y );
-
-    ft_multo64( x, y, z );
-
-    if ( s < 0 )
-    {
-      z->lo = (FT_UInt32)-(FT_Int32)z->lo;
-      z->hi = ~z->hi + !( z->lo );
-    }
-  }
-
-
-  /* apparently, the second version of this code is not compiled correctly */
-  /* on Mac machines with the MPW C compiler..  tsk, tsk, tsk...           */
-
-#if 1
-
-  FT_EXPORT_DEF( FT_Int32 )
-  FT_Div64by32( FT_Int64*  x,
-                FT_Int32   y )
-  {
-    FT_Int32   s;
-    FT_UInt32  q, r, i, lo;
-
-
-    s  = x->hi;
-    if ( s < 0 )
-    {
-      x->lo = (FT_UInt32)-(FT_Int32)x->lo;
-      x->hi = ~x->hi + !x->lo;
-    }
-    s ^= y;  y = FT_ABS( y );
-
-    /* Shortcut */
-    if ( x->hi == 0 )
-    {
-      if ( y > 0 )
-        q = x->lo / y;
-      else
-        q = 0x7FFFFFFFL;
-
-      return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q );
-    }
-
-    r  = x->hi;
-    lo = x->lo;
-
-    if ( r >= (FT_UInt32)y ) /* we know y is to be treated as unsigned here */
-      return ( s < 0 ? 0x80000001UL : 0x7FFFFFFFUL );
-                             /* Return Max/Min Int32 if division overflow. */
-                             /* This includes division by zero!            */
-    q = 0;
-    for ( i = 0; i < 32; i++ )
-    {
-      r <<= 1;
-      q <<= 1;
-      r  |= lo >> 31;
-
-      if ( r >= (FT_UInt32)y )
-      {
-        r -= y;
-        q |= 1;
-      }
-      lo <<= 1;
-    }
-
-    return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q );
-  }
-
-#else /* 0 */
-
-  FT_EXPORT_DEF( FT_Int32 )
-  FT_Div64by32( FT_Int64*  x,
-                FT_Int32   y )
-  {
-    FT_Int32   s;
-    FT_UInt32  q;
-
-
-    s  = x->hi;
-    if ( s < 0 )
-    {
-      x->lo = (FT_UInt32)-(FT_Int32)x->lo;
-      x->hi = ~x->hi + !x->lo;
-    }
-    s ^= y;  y = FT_ABS( y );
-
-    /* Shortcut */
-    if ( x->hi == 0 )
-    {
-      if ( y > 0 )
-        q = ( x->lo + ( y >> 1 ) ) / y;
-      else
-        q = 0x7FFFFFFFL;
-
-      return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q );
+      FT_Add64( &temp, &temp2, &temp );
+      q = (FT_Long)ft_div64by32( temp.hi, temp.lo, b );
     }
 
-    q = ft_div64by32( x->hi, x->lo, y );
-
-    return ( s < 0 ? -(FT_Int32)q : (FT_Int32)q );
+    return s < 0 ? -q : q;
   }
 
-#endif /* 0 */
-
-#endif /* 0 */
-
 
 #endif /* FT_LONG64 */
 
@@ -943,55 +859,40 @@
                      FT_Pos  out_x,
                      FT_Pos  out_y )
   {
-    FT_Pos  ax = in_x;
-    FT_Pos  ay = in_y;
-
-    FT_Pos  d_in, d_out, d_corner;
-
-
-    /* We approximate the Euclidean metric (sqrt(x^2 + y^2)) with */
-    /* the Taxicab metric (|x| + |y|), which can be computed much */
-    /* faster.  If one of the two vectors is much longer than the */
-    /* other one, the direction of the shorter vector doesn't     */
-    /* influence the result any more.                             */
-    /*                                                            */
-    /*                 corner                                     */
-    /*       x---------------------------x                        */
-    /*        \                      /                            */
-    /*         \                /                                 */
-    /*      in  \          /  out                                 */
-    /*           \    /                                           */
-    /*            o                                               */
-    /*              Point                                         */
-    /*                                                            */
-
-    if ( ax < 0 )
-      ax = -ax;
-    if ( ay < 0 )
-      ay = -ay;
-    d_in = ax + ay;  /* d_in = || in || */
-
-    ax = out_x;
-    if ( ax < 0 )
-      ax = -ax;
-    ay = out_y;
-    if ( ay < 0 )
-      ay = -ay;
-    d_out = ax + ay;  /* d_out = || out || */
-
-    ax = out_x + in_x;
-    if ( ax < 0 )
-      ax = -ax;
-    ay = out_y + in_y;
-    if ( ay < 0 )
-      ay = -ay;
-    d_corner = ax + ay;  /* d_corner = || in + out || */
+    FT_Pos  ax = in_x + out_x;
+    FT_Pos  ay = in_y + out_y;
+
+    FT_Pos  d_in, d_out, d_hypot;
+
+
+    /* The idea of this function is to compare the length of the */
+    /* hypotenuse with the `in' and `out' length.  The `corner'  */
+    /* represented by `in' and `out' is flat if the hypotenuse's */
+    /* length isn't too large.                                   */
+    /*                                                           */
+    /* This approach has the advantage that the angle between    */
+    /* `in' and `out' is not checked.  In case one of the two    */
+    /* vectors is `dominant', this is, much larger than the      */
+    /* other vector, we thus always have a flat corner.          */
+    /*                                                           */
+    /*                hypotenuse                                 */
+    /*       x---------------------------x                       */
+    /*        \                      /                           */
+    /*         \                /                                */
+    /*      in  \          /  out                                */
+    /*           \    /                                          */
+    /*            o                                              */
+    /*              Point                                        */
+
+    d_in    = FT_HYPOT(  in_x,  in_y );
+    d_out   = FT_HYPOT( out_x, out_y );
+    d_hypot = FT_HYPOT(    ax,    ay );
 
     /* now do a simple length comparison: */
     /*                                    */
-    /*   d_in + d_out < 17/16 d_corner    */
+    /*   d_in + d_out < 17/16 d_hypot     */
 
-    return ( d_in + d_out - d_corner ) < ( d_corner >> 4 );
+    return ( d_in + d_out - d_hypot ) < ( d_hypot >> 4 );
   }