diff options
Diffstat (limited to 'mesalib/src/mesa')
| -rw-r--r-- | mesalib/src/mesa/drivers/common/meta.c | 3 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/extensions.c | 1 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/fbobject.c | 13 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/formats.c | 30 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/formats.h | 3 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/image.c | 22 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/mipmap.c | 96 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/pack.c | 153 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/texfetch.c | 16 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/texfetch_tmp.h | 42 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/texformat.c | 20 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/texparam.c | 8 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/texrender.c | 7 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/texstore.c | 110 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/transformfeedback.c | 18 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/varray.c | 23 | ||||
| -rw-r--r-- | mesalib/src/mesa/program/register_allocate.c | 972 | ||||
| -rw-r--r-- | mesalib/src/mesa/state_tracker/st_cb_drawpixels.c | 8 | ||||
| -rw-r--r-- | mesalib/src/mesa/state_tracker/st_extensions.c | 19 | ||||
| -rw-r--r-- | mesalib/src/mesa/state_tracker/st_format.c | 24 | 
20 files changed, 1094 insertions, 494 deletions
| diff --git a/mesalib/src/mesa/drivers/common/meta.c b/mesalib/src/mesa/drivers/common/meta.c index 2b8b45bce..06d4337f8 100644 --- a/mesalib/src/mesa/drivers/common/meta.c +++ b/mesalib/src/mesa/drivers/common/meta.c @@ -1871,7 +1871,8 @@ _mesa_meta_DrawPixels(struct gl_context *ctx,         * just going for the matching set of channels, in floating
         * point.
         */
 -      if (ctx->Color.ClampFragmentColor != GL_TRUE)
 +      if (ctx->Color.ClampFragmentColor != GL_TRUE &&
 +	  ctx->Extensions.ARB_texture_float)
  	 texIntFormat = GL_RGBA32F;
     }
     else if (_mesa_is_stencil_format(format)) {
 diff --git a/mesalib/src/mesa/main/extensions.c b/mesalib/src/mesa/main/extensions.c index 109cadd95..d31477731 100644 --- a/mesalib/src/mesa/main/extensions.c +++ b/mesalib/src/mesa/main/extensions.c @@ -502,6 +502,7 @@ _mesa_enable_sw_extensions(struct gl_context *ctx)     ctx->Extensions.EXT_texture_env_dot3 = GL_TRUE;
     ctx->Extensions.EXT_texture_mirror_clamp = GL_TRUE;
     ctx->Extensions.EXT_texture_lod_bias = GL_TRUE;
 +   ctx->Extensions.EXT_texture_shared_exponent = GL_TRUE;
  #if FEATURE_EXT_texture_sRGB
     ctx->Extensions.EXT_texture_sRGB = GL_TRUE;
     ctx->Extensions.EXT_texture_sRGB_decode = GL_TRUE;
 diff --git a/mesalib/src/mesa/main/fbobject.c b/mesalib/src/mesa/main/fbobject.c index a961e68cb..68a141d29 100644 --- a/mesalib/src/mesa/main/fbobject.c +++ b/mesalib/src/mesa/main/fbobject.c @@ -418,6 +418,15 @@ _mesa_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)           case GL_RG:
              fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED;
              return;
 +         case GL_RGB:
 +            switch (rb->Format) {
 +            case MESA_FORMAT_RGB9_E5_FLOAT:
 +               fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED;
 +               return;
 +            default:;
 +            }
 +            break;
 +
           default:
              /* render buffer format is supported by software rendering */
              ;
 @@ -1175,6 +1184,10 @@ _mesa_base_fbo_format(struct gl_context *ctx, GLenum internalFormat)     case GL_INTENSITY32F_ARB:
        return ctx->Extensions.ARB_texture_float &&
               ctx->Extensions.ARB_framebuffer_object ? GL_INTENSITY : 0;
 +   case GL_RGB9_E5:
 +      return ctx->Extensions.EXT_texture_shared_exponent ? GL_RGB : 0;
 +   case GL_R11F_G11F_B10F:
 +      return ctx->Extensions.EXT_packed_float ? GL_RGB : 0;
     /* XXX add integer formats eventually */
     default:
        return 0;
 diff --git a/mesalib/src/mesa/main/formats.c b/mesalib/src/mesa/main/formats.c index 5ae8e2e29..f0f30fe0a 100644 --- a/mesalib/src/mesa/main/formats.c +++ b/mesalib/src/mesa/main/formats.c @@ -1072,7 +1072,25 @@ static struct gl_format_info format_info[MESA_FORMAT_COUNT] =        0, 0, 0, 0,
        0, 16, 0, 0, 0,
        1, 1, 2
 -   }
 +   },
 +   {
 +      MESA_FORMAT_RGB9_E5_FLOAT,
 +      "MESA_FORMAT_RGB9_E5",
 +      GL_RGB,
 +      GL_FLOAT,
 +      9, 9, 9, 0,
 +      0, 0, 0, 0, 0,
 +      1, 1, 4
 +   },
 +   {
 +      MESA_FORMAT_R11_G11_B10_FLOAT,
 +      "MESA_FORMAT_R11_G11_B10_FLOAT",
 +      GL_RGB,
 +      GL_FLOAT,
 +      11, 11, 10, 0,
 +      0, 0, 0, 0, 0,
 +      1, 1, 4
 +   },
  };
 @@ -1803,6 +1821,16 @@ _mesa_format_to_type_and_comps(gl_format format,        *comps = 4;
        return;
 +   case MESA_FORMAT_RGB9_E5_FLOAT:
 +      *datatype = GL_UNSIGNED_INT_5_9_9_9_REV;
 +      *comps = 3;
 +      return;
 +
 +   case MESA_FORMAT_R11_G11_B10_FLOAT:
 +      *datatype = GL_UNSIGNED_INT_10F_11F_11F_REV;
 +      *comps = 3;
 +      return;
 +
     case MESA_FORMAT_COUNT:
        assert(0);
        return;
 diff --git a/mesalib/src/mesa/main/formats.h b/mesalib/src/mesa/main/formats.h index 492eef917..c9fd0baab 100644 --- a/mesalib/src/mesa/main/formats.h +++ b/mesalib/src/mesa/main/formats.h @@ -206,6 +206,9 @@ typedef enum     MESA_FORMAT_SIGNED_AL1616,     /* AAAA AAAA AAAA AAAA LLLL LLLL LLLL LLLL */
     MESA_FORMAT_SIGNED_I16,        /*                     IIII IIII IIII IIII */
 +   MESA_FORMAT_RGB9_E5_FLOAT,
 +   MESA_FORMAT_R11_G11_B10_FLOAT,
 +
     MESA_FORMAT_COUNT
  } gl_format;
 diff --git a/mesalib/src/mesa/main/image.c b/mesalib/src/mesa/main/image.c index 4bbbbdb37..a5cd5ac03 100644 --- a/mesalib/src/mesa/main/image.c +++ b/mesalib/src/mesa/main/image.c @@ -82,6 +82,8 @@ _mesa_type_is_packed(GLenum type)     case GL_UNSIGNED_SHORT_8_8_MESA:
     case GL_UNSIGNED_SHORT_8_8_REV_MESA:
     case GL_UNSIGNED_INT_24_8_EXT:
 +   case GL_UNSIGNED_INT_5_9_9_9_REV:
 +   case GL_UNSIGNED_INT_10F_11F_11F_REV:
        return GL_TRUE;
     }
 @@ -222,6 +224,10 @@ _mesa_sizeof_packed_type( GLenum type )           return sizeof(GLushort);      
        case GL_UNSIGNED_INT_24_8_EXT:
           return sizeof(GLuint);
 +      case GL_UNSIGNED_INT_5_9_9_9_REV:
 +         return sizeof(GLuint);
 +      case GL_UNSIGNED_INT_10F_11F_11F_REV:
 +         return sizeof(GLuint);
        default:
           return -1;
     }
 @@ -363,6 +369,16 @@ _mesa_bytes_per_pixel( GLenum format, GLenum type )              return sizeof(GLuint);
           else
              return -1;
 +      case GL_UNSIGNED_INT_5_9_9_9_REV:
 +         if (format == GL_RGB)
 +            return sizeof(GLuint);
 +         else
 +            return -1;
 +      case GL_UNSIGNED_INT_10F_11F_11F_REV:
 +         if (format == GL_RGB)
 +            return sizeof(GLuint);
 +         else
 +            return -1;
        default:
           return -1;
     }
 @@ -458,6 +474,10 @@ _mesa_is_legal_format_and_type(const struct gl_context *ctx,                 return GL_TRUE;
              case GL_HALF_FLOAT_ARB:
                 return ctx->Extensions.ARB_half_float_pixel;
 +            case GL_UNSIGNED_INT_5_9_9_9_REV:
 +               return ctx->Extensions.EXT_texture_shared_exponent;
 +            case GL_UNSIGNED_INT_10F_11F_11F_REV:
 +               return ctx->Extensions.EXT_packed_float;
              default:
                 return GL_FALSE;
           }
 @@ -821,6 +841,8 @@ _mesa_is_color_format(GLenum format)        case GL_INTENSITY_SNORM:
        case GL_INTENSITY8_SNORM:
        case GL_INTENSITY16_SNORM:
 +      case GL_RGB9_E5:
 +      case GL_R11F_G11F_B10F:
           return GL_TRUE;
        case GL_YCBCR_MESA:  /* not considered to be RGB */
           /* fall-through */
 diff --git a/mesalib/src/mesa/main/mipmap.c b/mesalib/src/mesa/main/mipmap.c index b0b0c0c13..ed93cbc6c 100644 --- a/mesalib/src/mesa/main/mipmap.c +++ b/mesalib/src/mesa/main/mipmap.c @@ -34,6 +34,9 @@  #include "teximage.h"
  #include "texstore.h"
  #include "image.h"
 +#include "macros.h"
 +#include "../../gallium/auxiliary/util/u_format_rgb9e5.h"
 +#include "../../gallium/auxiliary/util/u_format_r11g11b10f.h"
 @@ -665,6 +668,44 @@ do_row(GLenum datatype, GLuint comps, GLint srcWidth,        }
     }
 +   else if (datatype == GL_UNSIGNED_INT_5_9_9_9_REV && comps == 3) {
 +      GLuint i, j, k;
 +      const GLuint *rowA = (const GLuint*) srcRowA;
 +      const GLuint *rowB = (const GLuint*) srcRowB;
 +      GLuint *dst = (GLuint*)dstRow;
 +      GLfloat res[3], rowAj[3], rowBj[3], rowAk[3], rowBk[3];
 +      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
 +           i++, j += colStride, k += colStride) {
 +         rgb9e5_to_float3(rowA[j], rowAj);
 +         rgb9e5_to_float3(rowB[j], rowBj);
 +         rgb9e5_to_float3(rowA[k], rowAk);
 +         rgb9e5_to_float3(rowB[k], rowBk);
 +         res[0] = (rowAj[0] + rowAk[0] + rowBj[0] + rowBk[0]) * 0.25F;
 +         res[1] = (rowAj[1] + rowAk[1] + rowBj[1] + rowBk[1]) * 0.25F;
 +         res[2] = (rowAj[2] + rowAk[2] + rowBj[2] + rowBk[2]) * 0.25F;
 +         dst[i] = float3_to_rgb9e5(res);
 +      }
 +   }
 +
 +   else if (datatype == GL_UNSIGNED_INT_10F_11F_11F_REV && comps == 3) {
 +      GLuint i, j, k;
 +      const GLuint *rowA = (const GLuint*) srcRowA;
 +      const GLuint *rowB = (const GLuint*) srcRowB;
 +      GLuint *dst = (GLuint*)dstRow;
 +      GLfloat res[3], rowAj[3], rowBj[3], rowAk[3], rowBk[3];
 +      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
 +           i++, j += colStride, k += colStride) {
 +         r11g11b10f_to_float3(rowA[j], rowAj);
 +         r11g11b10f_to_float3(rowB[j], rowBj);
 +         r11g11b10f_to_float3(rowA[k], rowAk);
 +         r11g11b10f_to_float3(rowB[k], rowBk);
 +         res[0] = (rowAj[0] + rowAk[0] + rowBj[0] + rowBk[0]) * 0.25F;
 +         res[1] = (rowAj[1] + rowAk[1] + rowBj[1] + rowBk[1]) * 0.25F;
 +         res[2] = (rowAj[2] + rowAk[2] + rowBj[2] + rowBk[2]) * 0.25F;
 +         dst[i] = float3_to_r11g11b10f(res);
 +      }
 +   }
 +
     else {
        _mesa_problem(NULL, "bad format in do_row()");
     }
 @@ -1245,6 +1286,61 @@ do_row_3D(GLenum datatype, GLuint comps, GLint srcWidth,           dst[i] = (a << 30) | (b << 20) | (g << 10) | r;
        }
     }
 +
 +   else if (datatype == GL_UNSIGNED_INT_5_9_9_9_REV && comps == 3) {
 +      DECLARE_ROW_POINTERS0(GLuint);
 +
 +      GLfloat res[3];
 +      GLfloat rowAj[3], rowBj[3], rowCj[3], rowDj[3];
 +      GLfloat rowAk[3], rowBk[3], rowCk[3], rowDk[3];
 +
 +      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
 +           i++, j += colStride, k += colStride) {
 +         rgb9e5_to_float3(rowA[j], rowAj);
 +         rgb9e5_to_float3(rowB[j], rowBj);
 +         rgb9e5_to_float3(rowC[j], rowCj);
 +         rgb9e5_to_float3(rowD[j], rowDj);
 +         rgb9e5_to_float3(rowA[k], rowAk);
 +         rgb9e5_to_float3(rowB[k], rowBk);
 +         rgb9e5_to_float3(rowC[k], rowCk);
 +         rgb9e5_to_float3(rowD[k], rowDk);
 +         res[0] = (rowAj[0] + rowAk[0] + rowBj[0] + rowBk[0] +
 +                   rowCj[0] + rowCk[0] + rowDj[0] + rowDk[0]) * 0.125F;
 +         res[1] = (rowAj[1] + rowAk[1] + rowBj[1] + rowBk[1] +
 +                   rowCj[1] + rowCk[1] + rowDj[1] + rowDk[1]) * 0.125F;
 +         res[2] = (rowAj[2] + rowAk[2] + rowBj[2] + rowBk[2] +
 +                   rowCj[2] + rowCk[2] + rowDj[2] + rowDk[2]) * 0.125F;
 +         dst[i] = float3_to_rgb9e5(res);
 +      }
 +   }
 +
 +   else if (datatype == GL_UNSIGNED_INT_10F_11F_11F_REV && comps == 3) {
 +      DECLARE_ROW_POINTERS0(GLuint);
 +
 +      GLfloat res[3];
 +      GLfloat rowAj[3], rowBj[3], rowCj[3], rowDj[3];
 +      GLfloat rowAk[3], rowBk[3], rowCk[3], rowDk[3];
 +
 +      for (i = j = 0, k = k0; i < (GLuint) dstWidth;
 +           i++, j += colStride, k += colStride) {
 +         r11g11b10f_to_float3(rowA[j], rowAj);
 +         r11g11b10f_to_float3(rowB[j], rowBj);
 +         r11g11b10f_to_float3(rowC[j], rowCj);
 +         r11g11b10f_to_float3(rowD[j], rowDj);
 +         r11g11b10f_to_float3(rowA[k], rowAk);
 +         r11g11b10f_to_float3(rowB[k], rowBk);
 +         r11g11b10f_to_float3(rowC[k], rowCk);
 +         r11g11b10f_to_float3(rowD[k], rowDk);
 +         res[0] = (rowAj[0] + rowAk[0] + rowBj[0] + rowBk[0] +
 +                   rowCj[0] + rowCk[0] + rowDj[0] + rowDk[0]) * 0.125F;
 +         res[1] = (rowAj[1] + rowAk[1] + rowBj[1] + rowBk[1] +
 +                   rowCj[1] + rowCk[1] + rowDj[1] + rowDk[1]) * 0.125F;
 +         res[2] = (rowAj[2] + rowAk[2] + rowBj[2] + rowBk[2] +
 +                   rowCj[2] + rowCk[2] + rowDj[2] + rowDk[2]) * 0.125F;
 +         dst[i] = float3_to_r11g11b10f(res);
 +      }
 +   }
 +
     else {
        _mesa_problem(NULL, "bad format in do_row()");
     }
 diff --git a/mesalib/src/mesa/main/pack.c b/mesalib/src/mesa/main/pack.c index 620c8a5cd..e6734bbbc 100644 --- a/mesalib/src/mesa/main/pack.c +++ b/mesalib/src/mesa/main/pack.c @@ -37,6 +37,9 @@  #include "mtypes.h"
  #include "pack.h"
  #include "pixeltransfer.h"
 +#include "imports.h"
 +#include "../../gallium/auxiliary/util/u_format_rgb9e5.h"
 +#include "../../gallium/auxiliary/util/u_format_r11g11b10f.h"
  /**
 @@ -1892,6 +1895,22 @@ _mesa_pack_rgba_span_float(struct gl_context *ctx, GLuint n, GLfloat rgba[][4],              }
           }
           break;
 +      case GL_UNSIGNED_INT_5_9_9_9_REV:
 +         {
 +            GLuint *dst = (GLuint *) dstAddr;
 +            for (i = 0; i < n; i++) {
 +               dst[i] = float3_to_rgb9e5(rgba[i]);
 +            }
 +         }
 +         break;
 +      case GL_UNSIGNED_INT_10F_11F_11F_REV:
 +         {
 +            GLuint *dst = (GLuint *) dstAddr;
 +            for (i = 0; i < n; i++) {
 +               dst[i] = float3_to_r11g11b10f(rgba[i]);
 +            }
 +         }
 +         break;
        default:
           _mesa_problem(ctx, "bad type in _mesa_pack_rgba_span_float");
           return;
 @@ -2330,7 +2349,9 @@ extract_float_rgba(GLuint n, GLfloat rgba[][4],            srcType == GL_UNSIGNED_INT_8_8_8_8 ||
            srcType == GL_UNSIGNED_INT_8_8_8_8_REV ||
            srcType == GL_UNSIGNED_INT_10_10_10_2 ||
 -          srcType == GL_UNSIGNED_INT_2_10_10_10_REV);
 +          srcType == GL_UNSIGNED_INT_2_10_10_10_REV ||
 +          srcType == GL_UNSIGNED_INT_5_9_9_9_REV ||
 +          srcType == GL_UNSIGNED_INT_10F_11F_11F_REV);
     get_component_mapping(srcFormat,
                           &rSrc, &gSrc, &bSrc, &aSrc,
 @@ -2800,6 +2821,62 @@ extract_float_rgba(GLuint n, GLfloat rgba[][4],              }
           }
           break;
 +      case GL_UNSIGNED_INT_5_9_9_9_REV:
 +         if (swapBytes) {
 +            const GLuint *uisrc = (const GLuint *) src;
 +            GLuint i;
 +            GLfloat f[3];
 +            for (i = 0; i < n; i ++) {
 +               GLuint p = uisrc[i];
 +               SWAP4BYTE(p);
 +               rgb9e5_to_float3(p, f);
 +               rgba[i][rDst] = f[0];
 +               rgba[i][gDst] = f[1];
 +               rgba[i][bDst] = f[2];
 +               rgba[i][aDst] = 1.0F;
 +            }
 +         }
 +         else {
 +            const GLuint *uisrc = (const GLuint *) src;
 +            GLuint i;
 +            GLfloat f[3];
 +            for (i = 0; i < n; i ++) {
 +               rgb9e5_to_float3(uisrc[i], f);
 +               rgba[i][rDst] = f[0];
 +               rgba[i][gDst] = f[1];
 +               rgba[i][bDst] = f[2];
 +               rgba[i][aDst] = 1.0F;
 +            }
 +         }
 +         break;
 +      case GL_UNSIGNED_INT_10F_11F_11F_REV:
 +         if (swapBytes) {
 +            const GLuint *uisrc = (const GLuint *) src;
 +            GLuint i;
 +            GLfloat f[3];
 +            for (i = 0; i < n; i ++) {
 +               GLuint p = uisrc[i];
 +               SWAP4BYTE(p);
 +               r11g11b10f_to_float3(p, f);
 +               rgba[i][rDst] = f[0];
 +               rgba[i][gDst] = f[1];
 +               rgba[i][bDst] = f[2];
 +               rgba[i][aDst] = 1.0F;
 +            }
 +         }
 +         else {
 +            const GLuint *uisrc = (const GLuint *) src;
 +            GLuint i;
 +            GLfloat f[3];
 +            for (i = 0; i < n; i ++) {
 +               r11g11b10f_to_float3(uisrc[i], f);
 +               rgba[i][rDst] = f[0];
 +               rgba[i][gDst] = f[1];
 +               rgba[i][bDst] = f[2];
 +               rgba[i][aDst] = 1.0F;
 +            }
 +         }
 +         break;
        default:
           _mesa_problem(NULL, "bad srcType in extract float data");
           break;
 @@ -2902,7 +2979,9 @@ extract_uint_rgba(GLuint n, GLuint rgba[][4],            srcType == GL_UNSIGNED_INT_8_8_8_8 ||
            srcType == GL_UNSIGNED_INT_8_8_8_8_REV ||
            srcType == GL_UNSIGNED_INT_10_10_10_2 ||
 -          srcType == GL_UNSIGNED_INT_2_10_10_10_REV);
 +          srcType == GL_UNSIGNED_INT_2_10_10_10_REV ||
 +          srcType == GL_UNSIGNED_INT_5_9_9_9_REV ||
 +          srcType == GL_UNSIGNED_INT_10F_11F_11F_REV);
     get_component_mapping(srcFormat,
                           &rSrc, &gSrc, &bSrc, &aSrc,
 @@ -3266,6 +3345,64 @@ extract_uint_rgba(GLuint n, GLuint rgba[][4],              }
           }
           break;
 +      case GL_UNSIGNED_INT_5_9_9_9_REV:
 +         if (swapBytes) {
 +            const GLuint *uisrc = (const GLuint *) src;
 +            GLuint i;
 +            float f[3];
 +            for (i = 0; i < n; i ++) {
 +               GLuint p = uisrc[i];
 +               SWAP4BYTE(p);
 +               rgb9e5_to_float3(p, f);
 +               rgba[i][rDst] = clamp_float_to_uint(f[0]);
 +               rgba[i][gDst] = clamp_float_to_uint(f[1]);
 +               rgba[i][bDst] = clamp_float_to_uint(f[2]);
 +               rgba[i][aDst] = 1;
 +            }
 +         }
 +         else {
 +            const GLuint *uisrc = (const GLuint *) src;
 +            GLuint i;
 +            float f[3];
 +            for (i = 0; i < n; i ++) {
 +               GLuint p = uisrc[i];
 +               rgb9e5_to_float3(p, f);
 +               rgba[i][rDst] = clamp_float_to_uint(f[0]);
 +               rgba[i][gDst] = clamp_float_to_uint(f[1]);
 +               rgba[i][bDst] = clamp_float_to_uint(f[2]);
 +               rgba[i][aDst] = 1;
 +            }
 +         }
 +         break;
 +      case GL_UNSIGNED_INT_10F_11F_11F_REV:
 +         if (swapBytes) {
 +            const GLuint *uisrc = (const GLuint *) src;
 +            GLuint i;
 +            float f[3];
 +            for (i = 0; i < n; i ++) {
 +               GLuint p = uisrc[i];
 +               SWAP4BYTE(p);
 +               r11g11b10f_to_float3(p, f);
 +               rgba[i][rDst] = clamp_float_to_uint(f[0]);
 +               rgba[i][gDst] = clamp_float_to_uint(f[1]);
 +               rgba[i][bDst] = clamp_float_to_uint(f[2]);
 +               rgba[i][aDst] = 1;
 +            }
 +         }
 +         else {
 +            const GLuint *uisrc = (const GLuint *) src;
 +            GLuint i;
 +            float f[3];
 +            for (i = 0; i < n; i ++) {
 +               GLuint p = uisrc[i];
 +               r11g11b10f_to_float3(p, f);
 +               rgba[i][rDst] = clamp_float_to_uint(f[0]);
 +               rgba[i][gDst] = clamp_float_to_uint(f[1]);
 +               rgba[i][bDst] = clamp_float_to_uint(f[2]);
 +               rgba[i][aDst] = 1;
 +            }
 +         }
 +         break;
        default:
           _mesa_problem(NULL, "bad srcType in extract uint data");
           break;
 @@ -3345,7 +3482,9 @@ _mesa_unpack_color_span_chan( struct gl_context *ctx,            srcType == GL_UNSIGNED_INT_8_8_8_8 ||
            srcType == GL_UNSIGNED_INT_8_8_8_8_REV ||
            srcType == GL_UNSIGNED_INT_10_10_10_2 ||
 -          srcType == GL_UNSIGNED_INT_2_10_10_10_REV);
 +          srcType == GL_UNSIGNED_INT_2_10_10_10_REV ||
 +          srcType == GL_UNSIGNED_INT_5_9_9_9_REV ||
 +          srcType == GL_UNSIGNED_INT_10F_11F_11F_REV);
     /* Try simple cases first */
     if (transferOps == 0) {
 @@ -3667,7 +3806,9 @@ _mesa_unpack_color_span_float( struct gl_context *ctx,            srcType == GL_UNSIGNED_INT_8_8_8_8 ||
            srcType == GL_UNSIGNED_INT_8_8_8_8_REV ||
            srcType == GL_UNSIGNED_INT_10_10_10_2 ||
 -          srcType == GL_UNSIGNED_INT_2_10_10_10_REV);
 +          srcType == GL_UNSIGNED_INT_2_10_10_10_REV ||
 +          srcType == GL_UNSIGNED_INT_5_9_9_9_REV ||
 +          srcType == GL_UNSIGNED_INT_10F_11F_11F_REV);
     /* general solution, no special cases, yet */
     {
 @@ -3873,7 +4014,9 @@ _mesa_unpack_color_span_uint(struct gl_context *ctx,            srcType == GL_UNSIGNED_INT_8_8_8_8 ||
            srcType == GL_UNSIGNED_INT_8_8_8_8_REV ||
            srcType == GL_UNSIGNED_INT_10_10_10_2 ||
 -          srcType == GL_UNSIGNED_INT_2_10_10_10_REV);
 +          srcType == GL_UNSIGNED_INT_2_10_10_10_REV ||
 +          srcType == GL_UNSIGNED_INT_5_9_9_9_REV ||
 +          srcType == GL_UNSIGNED_INT_10F_11F_11F_REV);
     /* Extract image data as uint[4] pixels */
 diff --git a/mesalib/src/mesa/main/texfetch.c b/mesalib/src/mesa/main/texfetch.c index cb2716619..ab3abf657 100644 --- a/mesalib/src/mesa/main/texfetch.c +++ b/mesalib/src/mesa/main/texfetch.c @@ -41,6 +41,8 @@  #include "texcompress_rgtc.h"
  #include "texfetch.h"
  #include "teximage.h"
 +#include "../../gallium/auxiliary/util/u_format_rgb9e5.h"
 +#include "../../gallium/auxiliary/util/u_format_r11g11b10f.h"
  /**
 @@ -898,6 +900,20 @@ texfetch_funcs[MESA_FORMAT_COUNT] =        fetch_texel_3d_signed_i16,
        store_texel_signed_i16
     },
 +   {
 +      MESA_FORMAT_RGB9_E5_FLOAT,
 +      fetch_texel_1d_rgb9_e5,
 +      fetch_texel_2d_rgb9_e5,
 +      fetch_texel_3d_rgb9_e5,
 +      store_texel_rgb9_e5
 +   },
 +   {
 +      MESA_FORMAT_R11_G11_B10_FLOAT,
 +      fetch_texel_1d_r11_g11_b10f,
 +      fetch_texel_2d_r11_g11_b10f,
 +      fetch_texel_3d_r11_g11_b10f,
 +      store_texel_r11_g11_b10f
 +   }
  };
 diff --git a/mesalib/src/mesa/main/texfetch_tmp.h b/mesalib/src/mesa/main/texfetch_tmp.h index 57bb94c68..278becc11 100644 --- a/mesalib/src/mesa/main/texfetch_tmp.h +++ b/mesalib/src/mesa/main/texfetch_tmp.h @@ -2332,6 +2332,48 @@ static void store_texel_s8_z24(struct gl_texture_image *texImage,  #endif
 +/* MESA_FORMAT_RGB9_E5 ******************************************************/
 +
 +static void FETCH(rgb9_e5)( const struct gl_texture_image *texImage,
 +                            GLint i, GLint j, GLint k, GLfloat *texel )
 +{
 +   const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
 +   rgb9e5_to_float3(*src, texel);
 +   texel[ACOMP] = 1.0F;
 +}
 +
 +#if DIM == 3
 +static void store_texel_rgb9_e5(struct gl_texture_image *texImage,
 +                                GLint i, GLint j, GLint k, const void *texel)
 +{
 +   const GLfloat *src = (const GLfloat *) texel;
 +   GLuint *dst = TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
 +   *dst = float3_to_rgb9e5(src);
 +}
 +#endif
 +
 +
 +/* MESA_FORMAT_R11_G11_B10_FLOAT *********************************************/
 +
 +static void FETCH(r11_g11_b10f)( const struct gl_texture_image *texImage,
 +                                 GLint i, GLint j, GLint k, GLfloat *texel )
 +{
 +   const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
 +   r11g11b10f_to_float3(*src, texel);
 +   texel[ACOMP] = 1.0F;
 +}
 +
 +#if DIM == 3
 +static void store_texel_r11_g11_b10f(struct gl_texture_image *texImage,
 +                                     GLint i, GLint j, GLint k, const void *texel)
 +{
 +   const GLfloat *src = (const GLfloat *) texel;
 +   GLuint *dst = TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
 +   *dst = float3_to_r11g11b10f(src);
 +}
 +#endif
 +
 +
  #undef TEXEL_ADDR
  #undef DIM
  #undef FETCH
 diff --git a/mesalib/src/mesa/main/texformat.c b/mesalib/src/mesa/main/texformat.c index 24c4f1a92..41d9e9599 100644 --- a/mesalib/src/mesa/main/texformat.c +++ b/mesalib/src/mesa/main/texformat.c @@ -382,6 +382,26 @@ _mesa_choose_tex_format( struct gl_context *ctx, GLint internalFormat,        }
     }
 +   if (ctx->Extensions.EXT_texture_shared_exponent) {
 +      switch (internalFormat) {
 +         case GL_RGB9_E5:
 +            ASSERT(ctx->TextureFormatSupported[MESA_FORMAT_RGB9_E5_FLOAT]);
 +            return MESA_FORMAT_RGB9_E5_FLOAT;
 +         default:
 +            ; /* fallthrough */
 +      }
 +   }
 +
 +   if (ctx->Extensions.EXT_packed_float) {
 +      switch (internalFormat) {
 +         case GL_R11F_G11F_B10F:
 +            ASSERT(ctx->TextureFormatSupported[MESA_FORMAT_R11_G11_B10_FLOAT]);
 +            return MESA_FORMAT_R11_G11_B10_FLOAT;
 +         default:
 +            ; /* fallthrough */
 +      }
 +   }
 +
     if (ctx->Extensions.EXT_packed_depth_stencil) {
        switch (internalFormat) {
           case GL_DEPTH_STENCIL_EXT:
 diff --git a/mesalib/src/mesa/main/texparam.c b/mesalib/src/mesa/main/texparam.c index c6683980a..74e1f4a1b 100644 --- a/mesalib/src/mesa/main/texparam.c +++ b/mesalib/src/mesa/main/texparam.c @@ -979,11 +979,9 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level,           }
           break;
        case GL_TEXTURE_SHARED_SIZE:
 -         if (ctx->VersionMajor >= 3) {
 -            /* XXX return number of exponent bits for shared exponent texture
 -             * formats, like GL_RGB9_E5.
 -             */
 -            *params = 0;
 +         if (ctx->VersionMajor >= 3 ||
 +             ctx->Extensions.EXT_texture_shared_exponent) {
 +            *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0;
           }
           else {
              goto invalid_pname;
 diff --git a/mesalib/src/mesa/main/texrender.c b/mesalib/src/mesa/main/texrender.c index 2766bd6c5..4bcef9899 100644 --- a/mesalib/src/mesa/main/texrender.c +++ b/mesalib/src/mesa/main/texrender.c @@ -530,6 +530,7 @@ update_wrapper(struct gl_context *ctx, struct gl_renderbuffer_attachment *att)  {
     struct texture_renderbuffer *trb
        = (struct texture_renderbuffer *) att->Renderbuffer;
 +   GLuint unused;
     (void) ctx;
     ASSERT(trb);
 @@ -602,8 +603,10 @@ update_wrapper(struct gl_context *ctx, struct gl_renderbuffer_attachment *att)        trb->Base._BaseFormat = GL_RGBA;
        break;
     default:
 -      trb->Base.DataType = CHAN_TYPE;
 -      trb->Base._BaseFormat = GL_RGBA;
 +      _mesa_format_to_type_and_comps(trb->TexImage->TexFormat,
 +                                     &trb->Base.DataType, &unused);
 +      trb->Base._BaseFormat =
 +         _mesa_base_fbo_format(ctx, trb->TexImage->InternalFormat);
     }
     trb->Base.Data = trb->TexImage->Data;
  }
 diff --git a/mesalib/src/mesa/main/texstore.c b/mesalib/src/mesa/main/texstore.c index bf2b6b6e4..ea116a011 100644 --- a/mesalib/src/mesa/main/texstore.c +++ b/mesalib/src/mesa/main/texstore.c @@ -70,6 +70,8 @@  #include "teximage.h"
  #include "texstore.h"
  #include "enums.h"
 +#include "../../gallium/auxiliary/util/u_format_rgb9e5.h"
 +#include "../../gallium/auxiliary/util/u_format_r11g11b10f.h"
  enum {
 @@ -4176,6 +4178,111 @@ _mesa_texstore_sla8(TEXSTORE_PARAMS)  #endif /* FEATURE_EXT_texture_sRGB */
 +static GLboolean
 +_mesa_texstore_rgb9_e5(TEXSTORE_PARAMS)
 +{
 +   const GLenum baseFormat = _mesa_get_format_base_format(dstFormat);
 +
 +   ASSERT(dstFormat == MESA_FORMAT_RGB9_E5_FLOAT);
 +   ASSERT(baseInternalFormat == GL_RGB);
 +
 +   if (!ctx->_ImageTransferState &&
 +       !srcPacking->SwapBytes &&
 +       srcFormat == GL_RGB &&
 +       srcType == GL_UNSIGNED_INT_5_9_9_9_REV) {
 +      /* simple memcpy path */
 +      memcpy_texture(ctx, dims,
 +                     dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
 +                     dstRowStride,
 +                     dstImageOffsets,
 +                     srcWidth, srcHeight, srcDepth, srcFormat, srcType,
 +                     srcAddr, srcPacking);
 +   }
 +   else {
 +      /* general path */
 +      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
 +                                                 baseInternalFormat,
 +                                                 baseFormat,
 +                                                 srcWidth, srcHeight, srcDepth,
 +                                                 srcFormat, srcType, srcAddr,
 +                                                 srcPacking,
 +                                                 ctx->_ImageTransferState);
 +      const GLfloat *srcRow = tempImage;
 +      GLint img, row, col;
 +      if (!tempImage)
 +         return GL_FALSE;
 +      for (img = 0; img < srcDepth; img++) {
 +         GLubyte *dstRow = (GLubyte *) dstAddr
 +            + dstImageOffsets[dstZoffset + img] * 4
 +            + dstYoffset * dstRowStride
 +            + dstXoffset * 4;
 +         for (row = 0; row < srcHeight; row++) {
 +            GLuint *dstUI = (GLuint*)dstRow;
 +            for (col = 0; col < srcWidth; col++) {
 +               dstUI[col] = float3_to_rgb9e5(&srcRow[col * 3]);
 +            }
 +            dstRow += dstRowStride;
 +            srcRow += srcWidth * 3;
 +         }
 +      }
 +
 +      free((void *) tempImage);
 +   }
 +   return GL_TRUE;
 +}
 +
 +static GLboolean
 +_mesa_texstore_r11_g11_b10f(TEXSTORE_PARAMS)
 +{
 +   const GLenum baseFormat = _mesa_get_format_base_format(dstFormat);
 +
 +   ASSERT(dstFormat == MESA_FORMAT_R11_G11_B10_FLOAT);
 +   ASSERT(baseInternalFormat == GL_RGB);
 +
 +   if (!ctx->_ImageTransferState &&
 +       !srcPacking->SwapBytes &&
 +       srcFormat == GL_RGB &&
 +       srcType == GL_UNSIGNED_INT_10F_11F_11F_REV) {
 +      /* simple memcpy path */
 +      memcpy_texture(ctx, dims,
 +                     dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset,
 +                     dstRowStride,
 +                     dstImageOffsets,
 +                     srcWidth, srcHeight, srcDepth, srcFormat, srcType,
 +                     srcAddr, srcPacking);
 +   }
 +   else {
 +      /* general path */
 +      const GLfloat *tempImage = _mesa_make_temp_float_image(ctx, dims,
 +                                                 baseInternalFormat,
 +                                                 baseFormat,
 +                                                 srcWidth, srcHeight, srcDepth,
 +                                                 srcFormat, srcType, srcAddr,
 +                                                 srcPacking,
 +                                                 ctx->_ImageTransferState);
 +      const GLfloat *srcRow = tempImage;
 +      GLint img, row, col;
 +      if (!tempImage)
 +         return GL_FALSE;
 +      for (img = 0; img < srcDepth; img++) {
 +         GLubyte *dstRow = (GLubyte *) dstAddr
 +            + dstImageOffsets[dstZoffset + img] * 4
 +            + dstYoffset * dstRowStride
 +            + dstXoffset * 4;
 +         for (row = 0; row < srcHeight; row++) {
 +            GLuint *dstUI = (GLuint*)dstRow;
 +            for (col = 0; col < srcWidth; col++) {
 +               dstUI[col] = float3_to_r11g11b10f(&srcRow[col * 3]);
 +            }
 +            dstRow += dstRowStride;
 +            srcRow += srcWidth * 3;
 +         }
 +      }
 +
 +      free((void *) tempImage);
 +   }
 +   return GL_TRUE;
 +}
 @@ -4309,6 +4416,9 @@ texstore_funcs[MESA_FORMAT_COUNT] =     { MESA_FORMAT_SIGNED_L16, _mesa_texstore_snorm16 },
     { MESA_FORMAT_SIGNED_AL1616, _mesa_texstore_snorm1616 },
     { MESA_FORMAT_SIGNED_I16, _mesa_texstore_snorm16 },
 +
 +   { MESA_FORMAT_RGB9_E5_FLOAT, _mesa_texstore_rgb9_e5 },
 +   { MESA_FORMAT_R11_G11_B10_FLOAT, _mesa_texstore_r11_g11_b10f },
  };
 diff --git a/mesalib/src/mesa/main/transformfeedback.c b/mesalib/src/mesa/main/transformfeedback.c index 2ff262fc6..85c213552 100644 --- a/mesalib/src/mesa/main/transformfeedback.c +++ b/mesalib/src/mesa/main/transformfeedback.c @@ -42,7 +42,6 @@  #include "main/dispatch.h"
  #include "program/prog_parameter.h"
 -//#include "program/shader_api.h"
  #if FEATURE_EXT_transform_feedback
 @@ -507,7 +506,7 @@ _mesa_BindBufferBase(GLenum target, GLuint index, GLuint buffer)     if (obj->Active) {
        _mesa_error(ctx, GL_INVALID_OPERATION,
 -                  "glBindBufferRange(transform feedback active)");
 +                  "glBindBufferBase(transform feedback active)");
        return;
     }
 @@ -555,7 +554,7 @@ _mesa_BindBufferOffsetEXT(GLenum target, GLuint index, GLuint buffer,     if (obj->Active) {
        _mesa_error(ctx, GL_INVALID_OPERATION,
 -                  "glBindBufferRange(transform feedback active)");
 +                  "glBindBufferOffsetEXT(transform feedback active)");
        return;
     }
 @@ -885,7 +884,7 @@ _mesa_ResumeTransformFeedback(void)     if (!obj->Active || !obj->Paused) {
        _mesa_error(ctx, GL_INVALID_OPERATION,
 -               "glPauseTransformFeedback(feedback not active or not paused)");
 +               "glResumeTransformFeedback(feedback not active or not paused)");
        return;
     }
 @@ -932,15 +931,4 @@ _mesa_DrawTransformFeedback(GLenum mode, GLuint name)  }
 -/*
 -XXX misc to do:
 -
 -glGet*() for
 -
 -GL_TRANSFORM_FEEDBACK_BUFFER_PAUSED
 -GL_TRANSFORM_FEEDBACK_BUFFER_ACTIVE
 -GL_TRANSFORM_FEEDBACK_BINDING
 -*/
 -
 -
  #endif /* FEATURE_EXT_transform_feedback */
 diff --git a/mesalib/src/mesa/main/varray.c b/mesalib/src/mesa/main/varray.c index 1f771a585..6b888b485 100644 --- a/mesalib/src/mesa/main/varray.c +++ b/mesalib/src/mesa/main/varray.c @@ -57,8 +57,8 @@  #define HALF_BIT             0x80
  #define FLOAT_BIT            0x100
  #define DOUBLE_BIT           0x200
 -#define FIXED_BIT            0x400
 -
 +#define FIXED_ES_BIT         0x400
 +#define FIXED_GL_BIT         0x800
  /** Convert GL datatype enum into a <type>_BIT value seen above */
 @@ -90,7 +90,7 @@ type_to_bit(const struct gl_context *ctx, GLenum type)     case GL_DOUBLE:
        return DOUBLE_BIT;
     case GL_FIXED:
 -      return FIXED_BIT;
 +      return ctx->API == API_OPENGL ? FIXED_GL_BIT : FIXED_ES_BIT;
     default:
        return 0;
     }
 @@ -130,7 +130,10 @@ update_array(struct gl_context *ctx,     if (ctx->API != API_OPENGLES && ctx->API != API_OPENGLES2) {
        /* fixed point arrays / data is only allowed with OpenGL ES 1.x/2.0 */
 -      legalTypesMask &= ~FIXED_BIT;
 +      legalTypesMask &= ~FIXED_ES_BIT;
 +   }
 +   if (!ctx->Extensions.ARB_ES2_compatibility) {
 +      legalTypesMask &= ~FIXED_GL_BIT;
     }
     typeBit = type_to_bit(ctx, type);
 @@ -198,7 +201,7 @@ void GLAPIENTRY  _mesa_VertexPointer(GLint size, GLenum type, GLsizei stride, const GLvoid *ptr)
  {
     GLbitfield legalTypes = (SHORT_BIT | INT_BIT | FLOAT_BIT |
 -                            DOUBLE_BIT | HALF_BIT | FIXED_BIT);
 +                            DOUBLE_BIT | HALF_BIT | FIXED_ES_BIT);
     GET_CURRENT_CONTEXT(ctx);
     ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx);
 @@ -217,7 +220,7 @@ _mesa_NormalPointer(GLenum type, GLsizei stride, const GLvoid *ptr )  {
     const GLbitfield legalTypes = (BYTE_BIT | SHORT_BIT | INT_BIT |
                                    HALF_BIT | FLOAT_BIT | DOUBLE_BIT |
 -                                  FIXED_BIT);
 +                                  FIXED_ES_BIT);
     GET_CURRENT_CONTEXT(ctx);
     ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx);
 @@ -235,7 +238,7 @@ _mesa_ColorPointer(GLint size, GLenum type, GLsizei stride, const GLvoid *ptr)                                    SHORT_BIT | UNSIGNED_SHORT_BIT |
                                    INT_BIT | UNSIGNED_INT_BIT |
                                    HALF_BIT | FLOAT_BIT | DOUBLE_BIT |
 -                                  FIXED_BIT);
 +                                  FIXED_ES_BIT);
     GET_CURRENT_CONTEXT(ctx);
     ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx);
 @@ -299,7 +302,7 @@ _mesa_TexCoordPointer(GLint size, GLenum type, GLsizei stride,  {
     GLbitfield legalTypes = (SHORT_BIT | INT_BIT |
                              HALF_BIT | FLOAT_BIT | DOUBLE_BIT |
 -                            FIXED_BIT);
 +                            FIXED_ES_BIT);
     GET_CURRENT_CONTEXT(ctx);
     const GLuint unit = ctx->Array.ActiveTexture;
     ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx);
 @@ -337,7 +340,7 @@ _mesa_EdgeFlagPointer(GLsizei stride, const GLvoid *ptr)  void GLAPIENTRY
  _mesa_PointSizePointer(GLenum type, GLsizei stride, const GLvoid *ptr)
  {
 -   const GLbitfield legalTypes = (FLOAT_BIT | FIXED_BIT);
 +   const GLbitfield legalTypes = (FLOAT_BIT | FIXED_ES_BIT);
     GET_CURRENT_CONTEXT(ctx);
     ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx);
 @@ -405,7 +408,7 @@ _mesa_VertexAttribPointerARB(GLuint index, GLint size, GLenum type,                                    SHORT_BIT | UNSIGNED_SHORT_BIT |
                                    INT_BIT | UNSIGNED_INT_BIT |
                                    HALF_BIT | FLOAT_BIT | DOUBLE_BIT |
 -                                  FIXED_BIT);
 +                                  FIXED_ES_BIT | FIXED_GL_BIT);
     GET_CURRENT_CONTEXT(ctx);
     ASSERT_OUTSIDE_BEGIN_END(ctx);
 diff --git a/mesalib/src/mesa/program/register_allocate.c b/mesalib/src/mesa/program/register_allocate.c index 95a9bde40..e78db24a4 100644 --- a/mesalib/src/mesa/program/register_allocate.c +++ b/mesalib/src/mesa/program/register_allocate.c @@ -1,455 +1,517 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - *    Eric Anholt <eric@anholt.net> - * - */ - -/** @file register_allocate.c - * - * Graph-coloring register allocator. - */ - -#include <ralloc.h> - -#include "main/imports.h" -#include "main/macros.h" -#include "main/mtypes.h" -#include "register_allocate.h" - -struct ra_reg { -   GLboolean *conflicts; -   unsigned int *conflict_list; -   unsigned int conflict_list_size; -   unsigned int num_conflicts; -}; - -struct ra_regs { -   struct ra_reg *regs; -   unsigned int count; - -   struct ra_class **classes; -   unsigned int class_count; -}; - -struct ra_class { -   GLboolean *regs; - -   /** -    * p_B in Runeson/Nyström paper. -    * -    * This is "how many regs are in the set." -    */ -   unsigned int p; - -   /** -    * q_B,C in Runeson/Nyström paper. -    */ -   unsigned int *q; -}; - -struct ra_node { -   GLboolean *adjacency; -   unsigned int *adjacency_list; -   unsigned int class; -   unsigned int adjacency_count; -   unsigned int reg; -   GLboolean in_stack; -   float spill_cost; -}; - -struct ra_graph { -   struct ra_regs *regs; -   /** -    * the variables that need register allocation. -    */ -   struct ra_node *nodes; -   unsigned int count; /**< count of nodes. */ - -   unsigned int *stack; -   unsigned int stack_count; -}; - -struct ra_regs * -ra_alloc_reg_set(unsigned int count) -{ -   unsigned int i; -   struct ra_regs *regs; - -   regs = rzalloc(NULL, struct ra_regs); -   regs->count = count; -   regs->regs = rzalloc_array(regs, struct ra_reg, count); - -   for (i = 0; i < count; i++) { -      regs->regs[i].conflicts = rzalloc_array(regs->regs, GLboolean, count); -      regs->regs[i].conflicts[i] = GL_TRUE; - -      regs->regs[i].conflict_list = ralloc_array(regs->regs, unsigned int, 4); -      regs->regs[i].conflict_list_size = 4; -      regs->regs[i].conflict_list[0] = i; -      regs->regs[i].num_conflicts = 1; -   } - -   return regs; -} - -static void -ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2) -{ -   struct ra_reg *reg1 = ®s->regs[r1]; - -   if (reg1->conflict_list_size == reg1->num_conflicts) { -      reg1->conflict_list_size *= 2; -      reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list, -				     unsigned int, reg1->conflict_list_size); -   } -   reg1->conflict_list[reg1->num_conflicts++] = r2; -   reg1->conflicts[r2] = GL_TRUE; -} - -void -ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2) -{ -   if (!regs->regs[r1].conflicts[r2]) { -      ra_add_conflict_list(regs, r1, r2); -      ra_add_conflict_list(regs, r2, r1); -   } -} - -unsigned int -ra_alloc_reg_class(struct ra_regs *regs) -{ -   struct ra_class *class; - -   regs->classes = reralloc(regs->regs, regs->classes, struct ra_class *, -			    regs->class_count + 1); - -   class = rzalloc(regs, struct ra_class); -   regs->classes[regs->class_count] = class; - -   class->regs = rzalloc_array(class, GLboolean, regs->count); - -   return regs->class_count++; -} - -void -ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int r) -{ -   struct ra_class *class = regs->classes[c]; - -   class->regs[r] = GL_TRUE; -   class->p++; -} - -/** - * Must be called after all conflicts and register classes have been - * set up and before the register set is used for allocation. - */ -void -ra_set_finalize(struct ra_regs *regs) -{ -   unsigned int b, c; - -   for (b = 0; b < regs->class_count; b++) { -      regs->classes[b]->q = ralloc_array(regs, unsigned int, regs->class_count); -   } - -   /* Compute, for each class B and C, how many regs of B an -    * allocation to C could conflict with. -    */ -   for (b = 0; b < regs->class_count; b++) { -      for (c = 0; c < regs->class_count; c++) { -	 unsigned int rc; -	 int max_conflicts = 0; - -	 for (rc = 0; rc < regs->count; rc++) { -	    int conflicts = 0; -	    int i; - -	    if (!regs->classes[c]->regs[rc]) -	       continue; - -	    for (i = 0; i < regs->regs[rc].num_conflicts; i++) { -	       unsigned int rb = regs->regs[rc].conflict_list[i]; -	       if (regs->classes[b]->regs[rb]) -		  conflicts++; -	    } -	    max_conflicts = MAX2(max_conflicts, conflicts); -	 } -	 regs->classes[b]->q[c] = max_conflicts; -      } -   } -} - -static void -ra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned int n2) -{ -   g->nodes[n1].adjacency[n2] = GL_TRUE; -   g->nodes[n1].adjacency_list[g->nodes[n1].adjacency_count] = n2; -   g->nodes[n1].adjacency_count++; -} - -struct ra_graph * -ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count) -{ -   struct ra_graph *g; -   unsigned int i; - -   g = rzalloc(regs, struct ra_graph); -   g->regs = regs; -   g->nodes = rzalloc_array(g, struct ra_node, count); -   g->count = count; - -   g->stack = rzalloc_array(g, unsigned int, count); - -   for (i = 0; i < count; i++) { -      g->nodes[i].adjacency = rzalloc_array(g, GLboolean, count); -      g->nodes[i].adjacency_list = ralloc_array(g, unsigned int, count); -      g->nodes[i].adjacency_count = 0; -      ra_add_node_adjacency(g, i, i); -      g->nodes[i].reg = ~0; -   } - -   return g; -} - -void -ra_set_node_class(struct ra_graph *g, -		  unsigned int n, unsigned int class) -{ -   g->nodes[n].class = class; -} - -void -ra_add_node_interference(struct ra_graph *g, -			 unsigned int n1, unsigned int n2) -{ -   if (!g->nodes[n1].adjacency[n2]) { -      ra_add_node_adjacency(g, n1, n2); -      ra_add_node_adjacency(g, n2, n1); -   } -} - -static GLboolean pq_test(struct ra_graph *g, unsigned int n) -{ -   unsigned int j; -   unsigned int q = 0; -   int n_class = g->nodes[n].class; - -   for (j = 0; j < g->nodes[n].adjacency_count; j++) { -      unsigned int n2 = g->nodes[n].adjacency_list[j]; -      unsigned int n2_class = g->nodes[n2].class; - -      if (n != n2 && !g->nodes[n2].in_stack) { -	 q += g->regs->classes[n_class]->q[n2_class]; -      } -   } - -   return q < g->regs->classes[n_class]->p; -} - -/** - * Simplifies the interference graph by pushing all - * trivially-colorable nodes into a stack of nodes to be colored, - * removing them from the graph, and rinsing and repeating. - * - * Returns GL_TRUE if all nodes were removed from the graph.  GL_FALSE - * means that either spilling will be required, or optimistic coloring - * should be applied. - */ -GLboolean -ra_simplify(struct ra_graph *g) -{ -   GLboolean progress = GL_TRUE; -   int i; - -   while (progress) { -      progress = GL_FALSE; - -      for (i = g->count - 1; i >= 0; i--) { -	 if (g->nodes[i].in_stack) -	    continue; - -	 if (pq_test(g, i)) { -	    g->stack[g->stack_count] = i; -	    g->stack_count++; -	    g->nodes[i].in_stack = GL_TRUE; -	    progress = GL_TRUE; -	 } -      } -   } - -   for (i = 0; i < g->count; i++) { -      if (!g->nodes[i].in_stack) -	 return GL_FALSE; -   } - -   return GL_TRUE; -} - -/** - * Pops nodes from the stack back into the graph, coloring them with - * registers as they go. - * - * If all nodes were trivially colorable, then this must succeed.  If - * not (optimistic coloring), then it may return GL_FALSE; - */ -GLboolean -ra_select(struct ra_graph *g) -{ -   int i; - -   while (g->stack_count != 0) { -      unsigned int r; -      int n = g->stack[g->stack_count - 1]; -      struct ra_class *c = g->regs->classes[g->nodes[n].class]; - -      /* Find the lowest-numbered reg which is not used by a member -       * of the graph adjacent to us. -       */ -      for (r = 0; r < g->regs->count; r++) { -	 if (!c->regs[r]) -	    continue; - -	 /* Check if any of our neighbors conflict with this register choice. */ -	 for (i = 0; i < g->nodes[n].adjacency_count; i++) { -	    unsigned int n2 = g->nodes[n].adjacency_list[i]; - -	    if (!g->nodes[n2].in_stack && -		g->regs->regs[r].conflicts[g->nodes[n2].reg]) { -	       break; -	    } -	 } -	 if (i == g->nodes[n].adjacency_count) -	    break; -      } -      if (r == g->regs->count) -	 return GL_FALSE; - -      g->nodes[n].reg = r; -      g->nodes[n].in_stack = GL_FALSE; -      g->stack_count--; -   } - -   return GL_TRUE; -} - -/** - * Optimistic register coloring: Just push the remaining nodes - * on the stack.  They'll be colored first in ra_select(), and - * if they succeed then the locally-colorable nodes are still - * locally-colorable and the rest of the register allocation - * will succeed. - */ -void -ra_optimistic_color(struct ra_graph *g) -{ -   unsigned int i; - -   for (i = 0; i < g->count; i++) { -      if (g->nodes[i].in_stack) -	 continue; - -      g->stack[g->stack_count] = i; -      g->stack_count++; -      g->nodes[i].in_stack = GL_TRUE; -   } -} - -GLboolean -ra_allocate_no_spills(struct ra_graph *g) -{ -   if (!ra_simplify(g)) { -      ra_optimistic_color(g); -   } -   return ra_select(g); -} - -unsigned int -ra_get_node_reg(struct ra_graph *g, unsigned int n) -{ -   return g->nodes[n].reg; -} - -static float -ra_get_spill_benefit(struct ra_graph *g, unsigned int n) -{ -   int j; -   float benefit = 0; -   int n_class = g->nodes[n].class; - -   /* Define the benefit of eliminating an interference between n, n2 -    * through spilling as q(C, B) / p(C).  This is similar to the -    * "count number of edges" approach of traditional graph coloring, -    * but takes classes into account. -    */ -   for (j = 0; j < g->nodes[n].adjacency_count; j++) { -      unsigned int n2 = g->nodes[n].adjacency_list[j]; -      if (n != n2) { -	 unsigned int n2_class = g->nodes[n2].class; -	 benefit += ((float)g->regs->classes[n_class]->q[n2_class] / -		     g->regs->classes[n_class]->p); -      } -   } - -   return benefit; -} - -/** - * Returns a node number to be spilled according to the cost/benefit using - * the pq test, or -1 if there are no spillable nodes. - */ -int -ra_get_best_spill_node(struct ra_graph *g) -{ -   unsigned int best_node = -1; -   unsigned int best_benefit = 0.0; -   unsigned int n; - -   for (n = 0; n < g->count; n++) { -      float cost = g->nodes[n].spill_cost; -      float benefit; - -      if (cost <= 0.0) -	 continue; - -      benefit = ra_get_spill_benefit(g, n); - -      if (benefit / cost > best_benefit) { -	 best_benefit = benefit / cost; -	 best_node = n; -      } -   } - -   return best_node; -} - -/** - * Only nodes with a spill cost set (cost != 0.0) will be considered - * for register spilling. - */ -void -ra_set_node_spill_cost(struct ra_graph *g, unsigned int n, float cost) -{ -   g->nodes[n].spill_cost = cost; -} +/*
 + * Copyright © 2010 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the next
 + * paragraph) shall be included in all copies or substantial portions of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 + * IN THE SOFTWARE.
 + *
 + * Authors:
 + *    Eric Anholt <eric@anholt.net>
 + *
 + */
 +
 +/** @file register_allocate.c
 + *
 + * Graph-coloring register allocator.
 + *
 + * The basic idea of graph coloring is to make a node in a graph for
 + * every thing that needs a register (color) number assigned, and make
 + * edges in the graph between nodes that interfere (can't be allocated
 + * to the same register at the same time).
 + *
 + * During the "simplify" process, any any node with fewer edges than
 + * there are registers means that that edge can get assigned a
 + * register regardless of what its neighbors choose, so that node is
 + * pushed on a stack and removed (with its edges) from the graph.
 + * That likely causes other nodes to become trivially colorable as well.
 + *
 + * Then during the "select" process, nodes are popped off of that
 + * stack, their edges restored, and assigned a color different from
 + * their neighbors.  Because they were pushed on the stack only when
 + * they were trivially colorable, any color chosen won't interfere
 + * with the registers to be popped later.
 + *
 + * The downside to most graph coloring is that real hardware often has
 + * limitations, like registers that need to be allocated to a node in
 + * pairs, or aligned on some boundary.  This implementation follows
 + * the paper "Retargetable Graph-Coloring Register Allocation for
 + * Irregular Architectures" by Johan Runeson and Sven-Olof Nyström.
 + *
 + * In this system, there are register classes each containing various
 + * registers, and registers may interfere with other registers.  For
 + * example, one might have a class of base registers, and a class of
 + * aligned register pairs that would each interfere with their pair of
 + * the base registers.  Each node has a register class it needs to be
 + * assigned to.  Define p(B) to be the size of register class B, and
 + * q(B,C) to be the number of registers in B that the worst choice
 + * register in C could conflict with.  Then, this system replaces the
 + * basic graph coloring test of "fewer edges from this node than there
 + * are registers" with "For this node of class B, the sum of q(B,C)
 + * for each neighbor node of class C is less than pB".
 + *
 + * A nice feature of the pq test is that q(B,C) can be computed once
 + * up front and stored in a 2-dimensional array, so that the cost of
 + * coloring a node is constant with the number of registers.  We do
 + * this during ra_set_finalize().
 + */
 +
 +#include <ralloc.h>
 +
 +#include "main/imports.h"
 +#include "main/macros.h"
 +#include "main/mtypes.h"
 +#include "register_allocate.h"
 +
 +struct ra_reg {
 +   GLboolean *conflicts;
 +   unsigned int *conflict_list;
 +   unsigned int conflict_list_size;
 +   unsigned int num_conflicts;
 +};
 +
 +struct ra_regs {
 +   struct ra_reg *regs;
 +   unsigned int count;
 +
 +   struct ra_class **classes;
 +   unsigned int class_count;
 +};
 +
 +struct ra_class {
 +   GLboolean *regs;
 +
 +   /**
 +    * p(B) in Runeson/Nyström paper.
 +    *
 +    * This is "how many regs are in the set."
 +    */
 +   unsigned int p;
 +
 +   /**
 +    * q(B,C) (indexed by C, B is this register class) in
 +    * Runeson/Nyström paper.  This is "how many registers of B could
 +    * the worst choice register from C conflict with".
 +    */
 +   unsigned int *q;
 +};
 +
 +struct ra_node {
 +   /** @{
 +    *
 +    * List of which nodes this node interferes with.  This should be
 +    * symmetric with the other node.
 +    */
 +   GLboolean *adjacency;
 +   unsigned int *adjacency_list;
 +   unsigned int adjacency_count;
 +   /** @} */
 +
 +   unsigned int class;
 +
 +   /* Register, if assigned, or ~0. */
 +   unsigned int reg;
 +
 +   /**
 +    * Set when the node is in the trivially colorable stack.  When
 +    * set, the adjacency to this node is ignored, to implement the
 +    * "remove the edge from the graph" in simplification without
 +    * having to actually modify the adjacency_list.
 +    */
 +   GLboolean in_stack;
 +
 +   /* For an implementation that needs register spilling, this is the
 +    * approximate cost of spilling this node.
 +    */
 +   float spill_cost;
 +};
 +
 +struct ra_graph {
 +   struct ra_regs *regs;
 +   /**
 +    * the variables that need register allocation.
 +    */
 +   struct ra_node *nodes;
 +   unsigned int count; /**< count of nodes. */
 +
 +   unsigned int *stack;
 +   unsigned int stack_count;
 +};
 +
 +struct ra_regs *
 +ra_alloc_reg_set(unsigned int count)
 +{
 +   unsigned int i;
 +   struct ra_regs *regs;
 +
 +   regs = rzalloc(NULL, struct ra_regs);
 +   regs->count = count;
 +   regs->regs = rzalloc_array(regs, struct ra_reg, count);
 +
 +   for (i = 0; i < count; i++) {
 +      regs->regs[i].conflicts = rzalloc_array(regs->regs, GLboolean, count);
 +      regs->regs[i].conflicts[i] = GL_TRUE;
 +
 +      regs->regs[i].conflict_list = ralloc_array(regs->regs, unsigned int, 4);
 +      regs->regs[i].conflict_list_size = 4;
 +      regs->regs[i].conflict_list[0] = i;
 +      regs->regs[i].num_conflicts = 1;
 +   }
 +
 +   return regs;
 +}
 +
 +static void
 +ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2)
 +{
 +   struct ra_reg *reg1 = ®s->regs[r1];
 +
 +   if (reg1->conflict_list_size == reg1->num_conflicts) {
 +      reg1->conflict_list_size *= 2;
 +      reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list,
 +				     unsigned int, reg1->conflict_list_size);
 +   }
 +   reg1->conflict_list[reg1->num_conflicts++] = r2;
 +   reg1->conflicts[r2] = GL_TRUE;
 +}
 +
 +void
 +ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2)
 +{
 +   if (!regs->regs[r1].conflicts[r2]) {
 +      ra_add_conflict_list(regs, r1, r2);
 +      ra_add_conflict_list(regs, r2, r1);
 +   }
 +}
 +
 +unsigned int
 +ra_alloc_reg_class(struct ra_regs *regs)
 +{
 +   struct ra_class *class;
 +
 +   regs->classes = reralloc(regs->regs, regs->classes, struct ra_class *,
 +			    regs->class_count + 1);
 +
 +   class = rzalloc(regs, struct ra_class);
 +   regs->classes[regs->class_count] = class;
 +
 +   class->regs = rzalloc_array(class, GLboolean, regs->count);
 +
 +   return regs->class_count++;
 +}
 +
 +void
 +ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int r)
 +{
 +   struct ra_class *class = regs->classes[c];
 +
 +   class->regs[r] = GL_TRUE;
 +   class->p++;
 +}
 +
 +/**
 + * Must be called after all conflicts and register classes have been
 + * set up and before the register set is used for allocation.
 + */
 +void
 +ra_set_finalize(struct ra_regs *regs)
 +{
 +   unsigned int b, c;
 +
 +   for (b = 0; b < regs->class_count; b++) {
 +      regs->classes[b]->q = ralloc_array(regs, unsigned int, regs->class_count);
 +   }
 +
 +   /* Compute, for each class B and C, how many regs of B an
 +    * allocation to C could conflict with.
 +    */
 +   for (b = 0; b < regs->class_count; b++) {
 +      for (c = 0; c < regs->class_count; c++) {
 +	 unsigned int rc;
 +	 int max_conflicts = 0;
 +
 +	 for (rc = 0; rc < regs->count; rc++) {
 +	    int conflicts = 0;
 +	    int i;
 +
 +	    if (!regs->classes[c]->regs[rc])
 +	       continue;
 +
 +	    for (i = 0; i < regs->regs[rc].num_conflicts; i++) {
 +	       unsigned int rb = regs->regs[rc].conflict_list[i];
 +	       if (regs->classes[b]->regs[rb])
 +		  conflicts++;
 +	    }
 +	    max_conflicts = MAX2(max_conflicts, conflicts);
 +	 }
 +	 regs->classes[b]->q[c] = max_conflicts;
 +      }
 +   }
 +}
 +
 +static void
 +ra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned int n2)
 +{
 +   g->nodes[n1].adjacency[n2] = GL_TRUE;
 +   g->nodes[n1].adjacency_list[g->nodes[n1].adjacency_count] = n2;
 +   g->nodes[n1].adjacency_count++;
 +}
 +
 +struct ra_graph *
 +ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count)
 +{
 +   struct ra_graph *g;
 +   unsigned int i;
 +
 +   g = rzalloc(regs, struct ra_graph);
 +   g->regs = regs;
 +   g->nodes = rzalloc_array(g, struct ra_node, count);
 +   g->count = count;
 +
 +   g->stack = rzalloc_array(g, unsigned int, count);
 +
 +   for (i = 0; i < count; i++) {
 +      g->nodes[i].adjacency = rzalloc_array(g, GLboolean, count);
 +      g->nodes[i].adjacency_list = ralloc_array(g, unsigned int, count);
 +      g->nodes[i].adjacency_count = 0;
 +      ra_add_node_adjacency(g, i, i);
 +      g->nodes[i].reg = ~0;
 +   }
 +
 +   return g;
 +}
 +
 +void
 +ra_set_node_class(struct ra_graph *g,
 +		  unsigned int n, unsigned int class)
 +{
 +   g->nodes[n].class = class;
 +}
 +
 +void
 +ra_add_node_interference(struct ra_graph *g,
 +			 unsigned int n1, unsigned int n2)
 +{
 +   if (!g->nodes[n1].adjacency[n2]) {
 +      ra_add_node_adjacency(g, n1, n2);
 +      ra_add_node_adjacency(g, n2, n1);
 +   }
 +}
 +
 +static GLboolean pq_test(struct ra_graph *g, unsigned int n)
 +{
 +   unsigned int j;
 +   unsigned int q = 0;
 +   int n_class = g->nodes[n].class;
 +
 +   for (j = 0; j < g->nodes[n].adjacency_count; j++) {
 +      unsigned int n2 = g->nodes[n].adjacency_list[j];
 +      unsigned int n2_class = g->nodes[n2].class;
 +
 +      if (n != n2 && !g->nodes[n2].in_stack) {
 +	 q += g->regs->classes[n_class]->q[n2_class];
 +      }
 +   }
 +
 +   return q < g->regs->classes[n_class]->p;
 +}
 +
 +/**
 + * Simplifies the interference graph by pushing all
 + * trivially-colorable nodes into a stack of nodes to be colored,
 + * removing them from the graph, and rinsing and repeating.
 + *
 + * Returns GL_TRUE if all nodes were removed from the graph.  GL_FALSE
 + * means that either spilling will be required, or optimistic coloring
 + * should be applied.
 + */
 +GLboolean
 +ra_simplify(struct ra_graph *g)
 +{
 +   GLboolean progress = GL_TRUE;
 +   int i;
 +
 +   while (progress) {
 +      progress = GL_FALSE;
 +
 +      for (i = g->count - 1; i >= 0; i--) {
 +	 if (g->nodes[i].in_stack)
 +	    continue;
 +
 +	 if (pq_test(g, i)) {
 +	    g->stack[g->stack_count] = i;
 +	    g->stack_count++;
 +	    g->nodes[i].in_stack = GL_TRUE;
 +	    progress = GL_TRUE;
 +	 }
 +      }
 +   }
 +
 +   for (i = 0; i < g->count; i++) {
 +      if (!g->nodes[i].in_stack)
 +	 return GL_FALSE;
 +   }
 +
 +   return GL_TRUE;
 +}
 +
 +/**
 + * Pops nodes from the stack back into the graph, coloring them with
 + * registers as they go.
 + *
 + * If all nodes were trivially colorable, then this must succeed.  If
 + * not (optimistic coloring), then it may return GL_FALSE;
 + */
 +GLboolean
 +ra_select(struct ra_graph *g)
 +{
 +   int i;
 +
 +   while (g->stack_count != 0) {
 +      unsigned int r;
 +      int n = g->stack[g->stack_count - 1];
 +      struct ra_class *c = g->regs->classes[g->nodes[n].class];
 +
 +      /* Find the lowest-numbered reg which is not used by a member
 +       * of the graph adjacent to us.
 +       */
 +      for (r = 0; r < g->regs->count; r++) {
 +	 if (!c->regs[r])
 +	    continue;
 +
 +	 /* Check if any of our neighbors conflict with this register choice. */
 +	 for (i = 0; i < g->nodes[n].adjacency_count; i++) {
 +	    unsigned int n2 = g->nodes[n].adjacency_list[i];
 +
 +	    if (!g->nodes[n2].in_stack &&
 +		g->regs->regs[r].conflicts[g->nodes[n2].reg]) {
 +	       break;
 +	    }
 +	 }
 +	 if (i == g->nodes[n].adjacency_count)
 +	    break;
 +      }
 +      if (r == g->regs->count)
 +	 return GL_FALSE;
 +
 +      g->nodes[n].reg = r;
 +      g->nodes[n].in_stack = GL_FALSE;
 +      g->stack_count--;
 +   }
 +
 +   return GL_TRUE;
 +}
 +
 +/**
 + * Optimistic register coloring: Just push the remaining nodes
 + * on the stack.  They'll be colored first in ra_select(), and
 + * if they succeed then the locally-colorable nodes are still
 + * locally-colorable and the rest of the register allocation
 + * will succeed.
 + */
 +void
 +ra_optimistic_color(struct ra_graph *g)
 +{
 +   unsigned int i;
 +
 +   for (i = 0; i < g->count; i++) {
 +      if (g->nodes[i].in_stack)
 +	 continue;
 +
 +      g->stack[g->stack_count] = i;
 +      g->stack_count++;
 +      g->nodes[i].in_stack = GL_TRUE;
 +   }
 +}
 +
 +GLboolean
 +ra_allocate_no_spills(struct ra_graph *g)
 +{
 +   if (!ra_simplify(g)) {
 +      ra_optimistic_color(g);
 +   }
 +   return ra_select(g);
 +}
 +
 +unsigned int
 +ra_get_node_reg(struct ra_graph *g, unsigned int n)
 +{
 +   return g->nodes[n].reg;
 +}
 +
 +static float
 +ra_get_spill_benefit(struct ra_graph *g, unsigned int n)
 +{
 +   int j;
 +   float benefit = 0;
 +   int n_class = g->nodes[n].class;
 +
 +   /* Define the benefit of eliminating an interference between n, n2
 +    * through spilling as q(C, B) / p(C).  This is similar to the
 +    * "count number of edges" approach of traditional graph coloring,
 +    * but takes classes into account.
 +    */
 +   for (j = 0; j < g->nodes[n].adjacency_count; j++) {
 +      unsigned int n2 = g->nodes[n].adjacency_list[j];
 +      if (n != n2) {
 +	 unsigned int n2_class = g->nodes[n2].class;
 +	 benefit += ((float)g->regs->classes[n_class]->q[n2_class] /
 +		     g->regs->classes[n_class]->p);
 +      }
 +   }
 +
 +   return benefit;
 +}
 +
 +/**
 + * Returns a node number to be spilled according to the cost/benefit using
 + * the pq test, or -1 if there are no spillable nodes.
 + */
 +int
 +ra_get_best_spill_node(struct ra_graph *g)
 +{
 +   unsigned int best_node = -1;
 +   unsigned int best_benefit = 0.0;
 +   unsigned int n;
 +
 +   for (n = 0; n < g->count; n++) {
 +      float cost = g->nodes[n].spill_cost;
 +      float benefit;
 +
 +      if (cost <= 0.0)
 +	 continue;
 +
 +      benefit = ra_get_spill_benefit(g, n);
 +
 +      if (benefit / cost > best_benefit) {
 +	 best_benefit = benefit / cost;
 +	 best_node = n;
 +      }
 +   }
 +
 +   return best_node;
 +}
 +
 +/**
 + * Only nodes with a spill cost set (cost != 0.0) will be considered
 + * for register spilling.
 + */
 +void
 +ra_set_node_spill_cost(struct ra_graph *g, unsigned int n, float cost)
 +{
 +   g->nodes[n].spill_cost = cost;
 +}
 diff --git a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c index f159ed2db..9cc40c3d9 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c @@ -372,6 +372,14 @@ internal_format(struct gl_context *ctx, GLenum format, GLenum type)              return
                 ctx->Extensions.ARB_texture_float ? GL_RGBA32F :
                 ctx->Extensions.EXT_texture_snorm ? GL_RGBA16_SNORM : GL_RGBA16;
 +
 +         case GL_UNSIGNED_INT_5_9_9_9_REV:
 +            assert(ctx->Extensions.EXT_texture_shared_exponent);
 +            return GL_RGB9_E5;
 +
 +         case GL_UNSIGNED_INT_10F_11F_11F_REV:
 +            assert(ctx->Extensions.EXT_packed_float);
 +            return GL_R11F_G11F_B10F;
           }
        }
     }
 diff --git a/mesalib/src/mesa/state_tracker/st_extensions.c b/mesalib/src/mesa/state_tracker/st_extensions.c index 8f697fa51..0c5e1991c 100644 --- a/mesalib/src/mesa/state_tracker/st_extensions.c +++ b/mesalib/src/mesa/state_tracker/st_extensions.c @@ -515,6 +515,12 @@ void st_init_extensions(struct st_context *st)        ctx->Extensions.ARB_half_float_vertex = GL_TRUE;
     }
 +   if (screen->is_format_supported(screen, PIPE_FORMAT_R32G32B32A32_FIXED,
 +                                   PIPE_BUFFER, 0,
 +                                   PIPE_BIND_VERTEX_BUFFER)) {
 +      ctx->Extensions.ARB_ES2_compatibility = GL_TRUE;
 +   }
 +
     if (screen->get_shader_param(screen, PIPE_SHADER_GEOMETRY, PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) {
  #if 0 /* XXX re-enable when GLSL compiler again supports geometry shaders */
        ctx->Extensions.ARB_geometry_shader4 = GL_TRUE;
 @@ -576,4 +582,17 @@ void st_init_extensions(struct st_context *st)     if (st->pipe->texture_barrier) {
        ctx->Extensions.NV_texture_barrier = GL_TRUE;
     }
 +
 +   if (screen->is_format_supported(screen, PIPE_FORMAT_R9G9B9E5_FLOAT,
 +                                   PIPE_TEXTURE_2D, 0,
 +                                   PIPE_BIND_SAMPLER_VIEW)) {
 +      ctx->Extensions.EXT_texture_shared_exponent = GL_TRUE;
 +   }
 +
 +   if (screen->is_format_supported(screen, PIPE_FORMAT_R11G11B10_FLOAT,
 +                                   PIPE_TEXTURE_2D, 0,
 +                                   PIPE_BIND_RENDER_TARGET |
 +                                   PIPE_BIND_SAMPLER_VIEW)) {
 +      ctx->Extensions.EXT_packed_float = GL_TRUE;
 +   }
  }
 diff --git a/mesalib/src/mesa/state_tracker/st_format.c b/mesalib/src/mesa/state_tracker/st_format.c index 32b44b806..8568c68e1 100644 --- a/mesalib/src/mesa/state_tracker/st_format.c +++ b/mesalib/src/mesa/state_tracker/st_format.c @@ -325,6 +325,11 @@ st_mesa_format_to_pipe_format(gl_format mesaFormat)     case MESA_FORMAT_SIGNED_I16:
        return PIPE_FORMAT_I16_SNORM;
 +   case MESA_FORMAT_RGB9_E5_FLOAT:
 +      return PIPE_FORMAT_R9G9B9E5_FLOAT;
 +   case MESA_FORMAT_R11_G11_B10_FLOAT:
 +      return PIPE_FORMAT_R11G11B10_FLOAT;
 +
     default:
        assert(0);
        return PIPE_FORMAT_NONE;
 @@ -545,6 +550,11 @@ st_pipe_format_to_mesa_format(enum pipe_format format)     case PIPE_FORMAT_I16_SNORM:
        return MESA_FORMAT_SIGNED_I16;
 +   case PIPE_FORMAT_R9G9B9E5_FLOAT:
 +      return MESA_FORMAT_RGB9_E5_FLOAT;
 +   case PIPE_FORMAT_R11G11B10_FLOAT:
 +      return MESA_FORMAT_R11_G11_B10_FLOAT;
 +
     default:
        assert(0);
        return MESA_FORMAT_NONE;
 @@ -1483,6 +1493,20 @@ st_choose_format(struct pipe_screen *screen, GLenum internalFormat,                 target, sample_count, bindings);
        }
 +   case GL_RGB9_E5:
 +      if (screen->is_format_supported(screen, PIPE_FORMAT_R9G9B9E5_FLOAT, target,
 +                                      sample_count, bindings)) {
 +         return PIPE_FORMAT_R9G9B9E5_FLOAT;
 +      }
 +      return PIPE_FORMAT_NONE;
 +
 +   case GL_R11F_G11F_B10F:
 +      if (screen->is_format_supported(screen, PIPE_FORMAT_R11G11B10_FLOAT, target,
 +                                      sample_count, bindings)) {
 +         return PIPE_FORMAT_R11G11B10_FLOAT;
 +      }
 +      return PIPE_FORMAT_NONE;
 +
     default:
        return PIPE_FORMAT_NONE;
     }
 | 
