diff options
Diffstat (limited to 'mesalib/src/gallium/auxiliary')
-rw-r--r-- | mesalib/src/gallium/auxiliary/util/u_vbuf.c | 120 | ||||
-rw-r--r-- | mesalib/src/gallium/auxiliary/util/u_vbuf.h | 6 |
2 files changed, 99 insertions, 27 deletions
diff --git a/mesalib/src/gallium/auxiliary/util/u_vbuf.c b/mesalib/src/gallium/auxiliary/util/u_vbuf.c index 7fb492882..b712b52de 100644 --- a/mesalib/src/gallium/auxiliary/util/u_vbuf.c +++ b/mesalib/src/gallium/auxiliary/util/u_vbuf.c @@ -25,6 +25,66 @@ * **************************************************************************/ +/** + * This module uploads user buffers and translates the vertex buffers which + * contain incompatible vertices (i.e. not supported by the driver/hardware) + * into compatible ones, based on the Gallium CAPs. + * + * It does not upload index buffers. + * + * The module heavily uses bitmasks to represent per-buffer and + * per-vertex-element flags to avoid looping over the list of buffers just + * to see if there's a non-zero stride, or user buffer, or unsupported format, + * etc. + * + * There are 3 categories of vertex elements, which are processed separately: + * - per-vertex attribs (stride != 0, instance_divisor == 0) + * - instanced attribs (stride != 0, instance_divisor > 0) + * - constant attribs (stride == 0) + * + * All needed uploads and translations are performed every draw command, but + * only the subset of vertices needed for that draw command is uploaded or + * translated. (the module never translates whole buffers) + * + * + * The module consists of two main parts: + * + * + * 1) Translate (u_vbuf_translate_begin/end) + * + * This is pretty much a vertex fetch fallback. It translates vertices from + * one vertex buffer to another in an unused vertex buffer slot. It does + * whatever is needed to make the vertices readable by the hardware (changes + * vertex formats and aligns offsets and strides). The translate module is + * used here. + * + * Each of the 3 categories is translated to a separate buffer. + * Only the [min_index, max_index] range is translated. For instanced attribs, + * the range is [start_instance, start_instance+instance_count]. For constant + * attribs, the range is [0, 1]. + * + * + * 2) User buffer uploading (u_vbuf_upload_buffers) + * + * Only the [min_index, max_index] range is uploaded (just like Translate) + * with a single memcpy. + * + * This method works best for non-indexed draw operations or indexed draw + * operations where the [min_index, max_index] range is not being way bigger + * than the vertex count. + * + * If the range is too big (e.g. one triangle with indices {0, 1, 10000}), + * the per-vertex attribs are uploaded via the translate module, all packed + * into one vertex buffer, and the indexed draw call is turned into + * a non-indexed one in the process. This adds additional complexity + * to the translate part, but it prevents bad apps from bringing your frame + * rate down. + * + * + * If there is nothing to do, it forwards every command to the driver. + * The module also has its own CSO cache of vertex element states. + */ + #include "util/u_vbuf.h" #include "util/u_dump.h" @@ -49,6 +109,8 @@ struct u_vbuf_elements { enum pipe_format native_format[PIPE_MAX_ATTRIBS]; unsigned native_format_size[PIPE_MAX_ATTRIBS]; + /* Which buffers are used by the vertex element state. */ + uint32_t used_vb_mask; /* This might mean two things: * - src_format != native_format, as discussed above. * - src_offset % 4 != 0 (if the caps don't allow such an offset). */ @@ -430,6 +492,8 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, struct translate_key key[VB_NUM]; unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */ unsigned i, type; + unsigned incompatible_vb_mask = mgr->incompatible_vb_mask & + mgr->ve->used_vb_mask; int start[VB_NUM] = { start_vertex, /* VERTEX */ @@ -453,20 +517,20 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, if (!mgr->vertex_buffer[vb_index].stride) { if (!(mgr->ve->incompatible_elem_mask & (1 << i)) && - !(mgr->incompatible_vb_mask & (1 << vb_index))) { + !(incompatible_vb_mask & (1 << vb_index))) { continue; } mask[VB_CONST] |= 1 << vb_index; } else if (mgr->ve->ve[i].instance_divisor) { if (!(mgr->ve->incompatible_elem_mask & (1 << i)) && - !(mgr->incompatible_vb_mask & (1 << vb_index))) { + !(incompatible_vb_mask & (1 << vb_index))) { continue; } mask[VB_INSTANCE] |= 1 << vb_index; } else { if (!unroll_indices && !(mgr->ve->incompatible_elem_mask & (1 << i)) && - !(mgr->incompatible_vb_mask & (1 << vb_index))) { + !(incompatible_vb_mask & (1 << vb_index))) { continue; } mask[VB_VERTEX] |= 1 << vb_index; @@ -488,7 +552,7 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, bit = 1 << vb_index; if (!(mgr->ve->incompatible_elem_mask & (1 << i)) && - !(mgr->incompatible_vb_mask & (1 << vb_index)) && + !(incompatible_vb_mask & (1 << vb_index)) && (!unroll_indices || !(mask[VB_VERTEX] & bit))) { continue; } @@ -690,6 +754,7 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, } } + ve->used_vb_mask = used_buffers; ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers; ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers; @@ -826,18 +891,18 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr, { unsigned i; unsigned nr_velems = mgr->ve->count; - unsigned nr_vbufs = util_last_bit(mgr->enabled_vb_mask); struct pipe_vertex_element *velems = mgr->using_translate ? mgr->fallback_velems : mgr->ve->ve; unsigned start_offset[PIPE_MAX_ATTRIBS]; - unsigned end_offset[PIPE_MAX_ATTRIBS] = {0}; + unsigned end_offset[PIPE_MAX_ATTRIBS]; + uint32_t buffer_mask = 0; /* Determine how much data needs to be uploaded. */ for (i = 0; i < nr_velems; i++) { struct pipe_vertex_element *velem = &velems[i]; unsigned index = velem->vertex_buffer_index; struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index]; - unsigned instance_div, first, size; + unsigned instance_div, first, size, index_bit; /* Skip the buffers generated by translate. */ if (index == mgr->fallback_vbs[VB_VERTEX] || @@ -867,8 +932,10 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr, size = vb->stride * (num_vertices - 1) + mgr->ve->src_format_size[i]; } + index_bit = 1 << index; + /* Update offsets. */ - if (!end_offset[index]) { + if (!(buffer_mask & index_bit)) { start_offset[index] = first; end_offset[index] = first + size; } else { @@ -877,19 +944,20 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr, if (first + size > end_offset[index]) end_offset[index] = first + size; } + + buffer_mask |= index_bit; } /* Upload buffers. */ - for (i = 0; i < nr_vbufs; i++) { - unsigned start, end = end_offset[i]; + while (buffer_mask) { + unsigned start, end; struct pipe_vertex_buffer *real_vb; const uint8_t *ptr; - if (!end) { - continue; - } + i = u_bit_scan(&buffer_mask); start = start_offset[i]; + end = end_offset[i]; assert(start < end); real_vb = &mgr->real_vertex_buffer[i]; @@ -907,9 +975,10 @@ static boolean u_vbuf_need_minmax_index(struct u_vbuf *mgr) /* See if there are any per-vertex attribs which will be uploaded or * translated. Use bitmasks to get the info instead of looping over vertex * elements. */ - return ((mgr->user_vb_mask | mgr->incompatible_vb_mask | - mgr->ve->incompatible_vb_mask_any) & - mgr->ve->noninstance_vb_mask_any & mgr->nonzero_stride_vb_mask) != 0; + return (mgr->ve->used_vb_mask & + ((mgr->user_vb_mask | mgr->incompatible_vb_mask | + mgr->ve->incompatible_vb_mask_any) & + mgr->ve->noninstance_vb_mask_any & mgr->nonzero_stride_vb_mask)) != 0; } static boolean u_vbuf_mapping_vertex_buffer_blocks(struct u_vbuf *mgr) @@ -918,9 +987,10 @@ static boolean u_vbuf_mapping_vertex_buffer_blocks(struct u_vbuf *mgr) * * We could query whether each buffer is busy, but that would * be way more costly than this. */ - return (~mgr->user_vb_mask & ~mgr->incompatible_vb_mask & - mgr->ve->compatible_vb_mask_all & mgr->ve->noninstance_vb_mask_any & - mgr->nonzero_stride_vb_mask) != 0; + return (mgr->ve->used_vb_mask & + (~mgr->user_vb_mask & ~mgr->incompatible_vb_mask & + mgr->ve->compatible_vb_mask_all & mgr->ve->noninstance_vb_mask_any & + mgr->nonzero_stride_vb_mask)) != 0; } static void u_vbuf_get_minmax_index(struct pipe_context *pipe, @@ -1041,15 +1111,17 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) int start_vertex, min_index; unsigned num_vertices; boolean unroll_indices = FALSE; - uint32_t user_vb_mask = mgr->user_vb_mask; + uint32_t used_vb_mask = mgr->ve->used_vb_mask; + uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask; + uint32_t incompatible_vb_mask = mgr->incompatible_vb_mask & used_vb_mask; /* Normal draw. No fallback and no user buffers. */ - if (!mgr->incompatible_vb_mask && + if (!incompatible_vb_mask && !mgr->ve->incompatible_elem_mask && !user_vb_mask) { /* Set vertex buffers if needed. */ - if (mgr->dirty_real_vb_mask) { + if (mgr->dirty_real_vb_mask & used_vb_mask) { u_vbuf_set_driver_vertex_buffers(mgr); } @@ -1102,7 +1174,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) /* Translate vertices with non-native layouts or formats. */ if (unroll_indices || - mgr->incompatible_vb_mask || + incompatible_vb_mask || mgr->ve->incompatible_elem_mask) { /* XXX check the return value */ u_vbuf_translate_begin(mgr, start_vertex, num_vertices, @@ -1110,7 +1182,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) info->start, info->count, min_index, unroll_indices); - user_vb_mask &= ~(mgr->incompatible_vb_mask | + user_vb_mask &= ~(incompatible_vb_mask | mgr->ve->incompatible_vb_mask_all); } diff --git a/mesalib/src/gallium/auxiliary/util/u_vbuf.h b/mesalib/src/gallium/auxiliary/util/u_vbuf.h index 0f8227164..a608184e5 100644 --- a/mesalib/src/gallium/auxiliary/util/u_vbuf.h +++ b/mesalib/src/gallium/auxiliary/util/u_vbuf.h @@ -28,9 +28,9 @@ #ifndef U_VBUF_H #define U_VBUF_H -/* This module builds upon u_upload_mgr and translate_cache and takes care of - * user buffer uploads and vertex format fallbacks. It's designed - * for the drivers which don't always use the Draw module. (e.g. for HWTCL) +/* This module takes care of user buffer uploads and vertex format fallbacks. + * It's designed for the drivers which don't want to use the Draw module. + * There is a more detailed description at the beginning of the .c file. */ #include "pipe/p_context.h" |