From b5d1fd89898edb34f73679b542c754d837d44cf8 Mon Sep 17 00:00:00 2001 From: marha Date: Sun, 13 Mar 2011 20:15:26 +0000 Subject: xkeyboard-config libxcb pixman mesalib git update 13 Mar 2011 --- libxcb/configure.ac | 1 + libxcb/src/xcb_in.c | 146 +- libxcb/src/xcb_out.c | 734 +- libxcb/src/xcbint.h | 2 +- mesalib/configs/linux-llvm | 88 +- mesalib/configure.ac | 3820 +++--- mesalib/scons/gallium.py | 1249 +- mesalib/scons/llvm.py | 332 +- mesalib/scons/udis86.py | 44 - mesalib/src/glsl/glsl_types.h | 956 +- mesalib/src/mesa/main/ff_fragment_shader.cpp | 1435 ++- mesalib/src/mesa/main/mtypes.h | 1 - mesalib/src/mesa/main/state.c | 19 +- mesalib/src/mesa/main/texenvprogram.h | 2 +- mesalib/src/mesa/program/program.c | 2 +- mesalib/src/mesa/state_tracker/st_cb_clear.c | 1180 +- mesalib/src/mesa/state_tracker/st_cb_texture.c | 2 +- pixman/pixman/pixman-arm-common.h | 773 +- pixman/pixman/pixman-arm-neon-asm.S | 5308 ++++---- pixman/pixman/pixman-arm-neon-asm.h | 17 + pixman/pixman/pixman-arm-neon.c | 946 +- pixman/pixman/pixman-arm-simd-asm.S | 66 +- pixman/pixman/pixman-arm-simd.c | 855 +- xorg-server/xkeyboard-config/rules/base.lists.part | 6 +- xorg-server/xkeyboard-config/rules/base.xml.in | 12411 ++++++++++--------- xorg-server/xkeyboard-config/symbols/inet | 13 + 26 files changed, 15349 insertions(+), 15059 deletions(-) delete mode 100644 mesalib/scons/udis86.py diff --git a/libxcb/configure.ac b/libxcb/configure.ac index fc9f17ef6..362ba0535 100644 --- a/libxcb/configure.ac +++ b/libxcb/configure.ac @@ -259,6 +259,7 @@ echo " Xfixes..............: ${BUILD_XFIXES}" echo " Xfree86-dri.........: ${BUILD_XFREE86_DRI}" echo " xinerama............: ${BUILD_XINERAMA}" echo " xinput..............: ${BUILD_XINPUT}" +echo " xkb.................: ${BUILD_XKB}" echo " xprint..............: ${BUILD_XPRINT}" echo " xtest...............: ${BUILD_XTEST}" echo " xv..................: ${BUILD_XV}" diff --git a/libxcb/src/xcb_in.c b/libxcb/src/xcb_in.c index b4c48fb67..c9cb5f445 100644 --- a/libxcb/src/xcb_in.c +++ b/libxcb/src/xcb_in.c @@ -75,7 +75,7 @@ typedef struct pending_reply { } pending_reply; typedef struct reader_list { - unsigned int request; + uint64_t request; pthread_cond_t *data; struct reader_list *next; } reader_list; @@ -208,10 +208,10 @@ static int read_packet(xcb_connection_t *c) c->in.current_reply_tail = &cur->next; for(reader = c->in.readers; reader && - XCB_SEQUENCE_COMPARE_32(reader->request, <=, c->in.request_read); + XCB_SEQUENCE_COMPARE(reader->request, <=, c->in.request_read); reader = reader->next) { - if(XCB_SEQUENCE_COMPARE_32(reader->request, ==, c->in.request_read)) + if(reader->request == c->in.request_read) { pthread_cond_signal(reader->data); break; @@ -301,7 +301,7 @@ static int read_block(const int fd, void *buf, const ssize_t len) return len; } -static int poll_for_reply(xcb_connection_t *c, unsigned int request, void **reply, xcb_generic_error_t **error) +static int poll_for_reply(xcb_connection_t *c, uint64_t request, void **reply, xcb_generic_error_t **error) { struct reply_list *head; @@ -310,7 +310,7 @@ static int poll_for_reply(xcb_connection_t *c, unsigned int request, void **repl head = 0; /* We've read requests past the one we want, so if it has replies we have * them all and they're in the replies map. */ - else if(XCB_SEQUENCE_COMPARE_32(request, <, c->in.request_read)) + else if(XCB_SEQUENCE_COMPARE(request, <, c->in.request_read)) { head = _xcb_map_remove(c->in.replies, request); if(head && head->next) @@ -318,7 +318,7 @@ static int poll_for_reply(xcb_connection_t *c, unsigned int request, void **repl } /* We're currently processing the responses to the request we want, and we * have a reply ready to return. So just return it without blocking. */ - else if(XCB_SEQUENCE_COMPARE_32(request, ==, c->in.request_read) && c->in.current_reply) + else if(request == c->in.request_read && c->in.current_reply) { head = c->in.current_reply; c->in.current_reply = head->next; @@ -327,7 +327,7 @@ static int poll_for_reply(xcb_connection_t *c, unsigned int request, void **repl } /* We know this request can't have any more replies, and we've already * established it doesn't have a reply now. Don't bother blocking. */ - else if(XCB_SEQUENCE_COMPARE_32(request, ==, c->in.request_completed)) + else if(request == c->in.request_completed) head = 0; /* We may have more replies on the way for this request: block until we're * sure. */ @@ -356,25 +356,12 @@ static int poll_for_reply(xcb_connection_t *c, unsigned int request, void **repl return 1; } -/* Public interface */ - -void *xcb_wait_for_reply(xcb_connection_t *c, unsigned int request, xcb_generic_error_t **e) +static void *wait_for_reply(xcb_connection_t *c, uint64_t request, xcb_generic_error_t **e) { - uint64_t widened_request; void *ret = 0; - if(e) - *e = 0; - if(c->has_error) - return 0; - - pthread_mutex_lock(&c->iolock); - - widened_request = (c->out.request & UINT64_C(0xffffffff00000000)) | request; - if(widened_request > c->out.request) - widened_request -= UINT64_C(1) << 32; /* If this request has not been written yet, write it. */ - if(c->out.return_socket || _xcb_out_flush_to(c, widened_request)) + if(c->out.return_socket || _xcb_out_flush_to(c, request)) { pthread_cond_t cond = PTHREAD_COND_INITIALIZER; reader_list reader; @@ -382,7 +369,7 @@ void *xcb_wait_for_reply(xcb_connection_t *c, unsigned int request, xcb_generic_ for(prev_reader = &c->in.readers; *prev_reader && - XCB_SEQUENCE_COMPARE_32((*prev_reader)->request, <=, request); + XCB_SEQUENCE_COMPARE((*prev_reader)->request, <=, request); prev_reader = &(*prev_reader)->next) { /* empty */; @@ -398,7 +385,7 @@ void *xcb_wait_for_reply(xcb_connection_t *c, unsigned int request, xcb_generic_ for(prev_reader = &c->in.readers; *prev_reader && - XCB_SEQUENCE_COMPARE_32((*prev_reader)->request, <=, request); + XCB_SEQUENCE_COMPARE((*prev_reader)->request, <=, request); prev_reader = &(*prev_reader)->next) { if(*prev_reader == &reader) @@ -411,6 +398,29 @@ void *xcb_wait_for_reply(xcb_connection_t *c, unsigned int request, xcb_generic_ } _xcb_in_wake_up_next_reader(c); + return ret; +} + +static uint64_t widen(xcb_connection_t *c, unsigned int request) +{ + uint64_t widened_request = (c->out.request & UINT64_C(0xffffffff00000000)) | request; + if(widened_request > c->out.request) + widened_request -= UINT64_C(1) << 32; + return widened_request; +} + +/* Public interface */ + +void *xcb_wait_for_reply(xcb_connection_t *c, unsigned int request, xcb_generic_error_t **e) +{ + void *ret; + if(e) + *e = 0; + if(c->has_error) + return 0; + + pthread_mutex_lock(&c->iolock); + ret = wait_for_reply(c, widen(c, request), e); pthread_mutex_unlock(&c->iolock); return ret; } @@ -436,66 +446,27 @@ static void insert_pending_discard(xcb_connection_t *c, pending_reply **prev_nex c->in.pending_replies_tail = &pend->next; } -static void discard_reply(xcb_connection_t *c, unsigned int request) +static void discard_reply(xcb_connection_t *c, uint64_t request) { - pending_reply *pend = 0; + void *reply; pending_reply **prev_pend; - uint64_t widened_request; - /* We've read requests past the one we want, so if it has replies we have - * them all and they're in the replies map. */ - if(XCB_SEQUENCE_COMPARE_32(request, <, c->in.request_read)) - { - struct reply_list *head; - head = _xcb_map_remove(c->in.replies, request); - while (head) - { - struct reply_list *next = head->next; - free(head->reply); - free(head); - head = next; - } - return; - } - - /* We're currently processing the responses to the request we want, and we - * have a reply ready to return. Free it, and mark the pend to free any further - * replies. */ - if(XCB_SEQUENCE_COMPARE_32(request, ==, c->in.request_read) && c->in.current_reply) - { - struct reply_list *head; - head = c->in.current_reply; - c->in.current_reply = NULL; - c->in.current_reply_tail = &c->in.current_reply; - while (head) - { - struct reply_list *next = head->next; - free(head->reply); - free(head); - head = next; - } - - pend = c->in.pending_replies; - if(pend && - !(XCB_SEQUENCE_COMPARE(pend->first_request, <=, c->in.request_read) && - (pend->workaround == WORKAROUND_EXTERNAL_SOCKET_OWNER || - XCB_SEQUENCE_COMPARE(c->in.request_read, <=, pend->last_request)))) - pend = 0; - if(pend) - pend->flags |= XCB_REQUEST_DISCARD_REPLY; - else - insert_pending_discard(c, &c->in.pending_replies, c->in.request_read); + /* Free any replies or errors that we've already read. Stop if + * xcb_wait_for_reply would block or we've run out of replies. */ + while(poll_for_reply(c, request, &reply, 0) && reply) + free(reply); + /* If we've proven there are no more responses coming, we're done. */ + if(XCB_SEQUENCE_COMPARE(request, <=, c->in.request_completed)) return; - } /* Walk the list of pending requests. Mark the first match for deletion. */ for(prev_pend = &c->in.pending_replies; *prev_pend; prev_pend = &(*prev_pend)->next) { - if(XCB_SEQUENCE_COMPARE_32((*prev_pend)->first_request, >, request)) + if(XCB_SEQUENCE_COMPARE((*prev_pend)->first_request, >, request)) break; - if(XCB_SEQUENCE_COMPARE_32((*prev_pend)->first_request, ==, request)) + if((*prev_pend)->first_request == request) { /* Pending reply found. Mark for discard: */ (*prev_pend)->flags |= XCB_REQUEST_DISCARD_REPLY; @@ -504,11 +475,7 @@ static void discard_reply(xcb_connection_t *c, unsigned int request) } /* Pending reply not found (likely due to _unchecked request). Create one: */ - widened_request = (c->out.request & UINT64_C(0xffffffff00000000)) | request; - if(widened_request > c->out.request) - widened_request -= UINT64_C(1) << 32; - - insert_pending_discard(c, prev_pend, widened_request); + insert_pending_discard(c, prev_pend, request); } void xcb_discard_reply(xcb_connection_t *c, unsigned int sequence) @@ -521,7 +488,7 @@ void xcb_discard_reply(xcb_connection_t *c, unsigned int sequence) return; pthread_mutex_lock(&c->iolock); - discard_reply(c, sequence); + discard_reply(c, widen(c, sequence)); pthread_mutex_unlock(&c->iolock); } @@ -537,7 +504,7 @@ int xcb_poll_for_reply(xcb_connection_t *c, unsigned int request, void **reply, } assert(reply != 0); pthread_mutex_lock(&c->iolock); - ret = poll_for_reply(c, request, reply, error); + ret = poll_for_reply(c, widen(c, request), reply, error); pthread_mutex_unlock(&c->iolock); return ret; } @@ -575,21 +542,22 @@ xcb_generic_event_t *xcb_poll_for_event(xcb_connection_t *c) xcb_generic_error_t *xcb_request_check(xcb_connection_t *c, xcb_void_cookie_t cookie) { - /* FIXME: this could hold the lock to avoid syncing unnecessarily, but - * that would require factoring the locking out of xcb_get_input_focus, - * xcb_get_input_focus_reply, and xcb_wait_for_reply. */ - xcb_generic_error_t *ret; + uint64_t request; + xcb_generic_error_t *ret = 0; void *reply; if(c->has_error) return 0; - if(XCB_SEQUENCE_COMPARE_32(cookie.sequence,>=,c->in.request_expected) - && XCB_SEQUENCE_COMPARE_32(cookie.sequence,>,c->in.request_completed)) + pthread_mutex_lock(&c->iolock); + request = widen(c, cookie.sequence); + if(XCB_SEQUENCE_COMPARE(request, >=, c->in.request_expected) + && XCB_SEQUENCE_COMPARE(request, >, c->in.request_completed)) { - free(xcb_get_input_focus_reply(c, xcb_get_input_focus(c), &ret)); - assert(!ret); + _xcb_out_send_sync(c); + _xcb_out_flush_to(c, c->out.request); } - reply = xcb_wait_for_reply(c, cookie.sequence, &ret); + reply = wait_for_reply(c, request, &ret); assert(!reply); + pthread_mutex_unlock(&c->iolock); return ret; } diff --git a/libxcb/src/xcb_out.c b/libxcb/src/xcb_out.c index fbce7a0ea..4f27de116 100644 --- a/libxcb/src/xcb_out.c +++ b/libxcb/src/xcb_out.c @@ -1,362 +1,372 @@ -/* Copyright (C) 2001-2004 Bart Massey and Jamey Sharp. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Except as contained in this notice, the names of the authors or their - * institutions shall not be used in advertising or otherwise to promote the - * sale, use or other dealings in this Software without prior written - * authorization from the authors. - */ - -/* Stuff that sends stuff to the server. */ - -#include -#include -#include -#include - -#include "xcb.h" -#include "xcbext.h" -#include "xcbint.h" -#include "bigreq.h" - -static int write_block(xcb_connection_t *c, struct iovec *vector, int count) -{ - while(count && c->out.queue_len + vector[0].iov_len <= sizeof(c->out.queue)) - { - memcpy(c->out.queue + c->out.queue_len, vector[0].iov_base, vector[0].iov_len); - c->out.queue_len += vector[0].iov_len; - vector[0].iov_base = (char *) vector[0].iov_base + vector[0].iov_len; - vector[0].iov_len = 0; - ++vector, --count; - } - if(!count) - return 1; - - --vector, ++count; - vector[0].iov_base = c->out.queue; - vector[0].iov_len = c->out.queue_len; - c->out.queue_len = 0; - return _xcb_out_send(c, vector, count); -} - -static void get_socket_back(xcb_connection_t *c) -{ - while(c->out.return_socket && c->out.socket_moving) - pthread_cond_wait(&c->out.socket_cond, &c->iolock); - if(!c->out.return_socket) - return; - - c->out.socket_moving = 1; - pthread_mutex_unlock(&c->iolock); - c->out.return_socket(c->out.socket_closure); - pthread_mutex_lock(&c->iolock); - c->out.socket_moving = 0; - - pthread_cond_broadcast(&c->out.socket_cond); - c->out.return_socket = 0; - c->out.socket_closure = 0; - _xcb_in_replies_done(c); -} - -/* Public interface */ - -void xcb_prefetch_maximum_request_length(xcb_connection_t *c) -{ - if(c->has_error) - return; - pthread_mutex_lock(&c->out.reqlenlock); - if(c->out.maximum_request_length_tag == LAZY_NONE) - { - const xcb_query_extension_reply_t *ext; - ext = xcb_get_extension_data(c, &xcb_big_requests_id); - if(ext && ext->present) - { - c->out.maximum_request_length_tag = LAZY_COOKIE; - c->out.maximum_request_length.cookie = xcb_big_requests_enable(c); - } - else - { - c->out.maximum_request_length_tag = LAZY_FORCED; - c->out.maximum_request_length.value = c->setup->maximum_request_length; - } - } - pthread_mutex_unlock(&c->out.reqlenlock); -} - -uint32_t xcb_get_maximum_request_length(xcb_connection_t *c) -{ - if(c->has_error) - return 0; - xcb_prefetch_maximum_request_length(c); - pthread_mutex_lock(&c->out.reqlenlock); - if(c->out.maximum_request_length_tag == LAZY_COOKIE) - { - xcb_big_requests_enable_reply_t *r = xcb_big_requests_enable_reply(c, c->out.maximum_request_length.cookie, 0); - c->out.maximum_request_length_tag = LAZY_FORCED; - if(r) - { - c->out.maximum_request_length.value = r->maximum_request_length; - free(r); - } - else - c->out.maximum_request_length.value = c->setup->maximum_request_length; - } - pthread_mutex_unlock(&c->out.reqlenlock); - return c->out.maximum_request_length.value; -} - -unsigned int xcb_send_request(xcb_connection_t *c, int flags, struct iovec *vector, const xcb_protocol_request_t *req) -{ - static const union { - struct { - uint8_t major; - uint8_t pad; - uint16_t len; - } fields; - uint32_t packet; - } sync_req = { { /* GetInputFocus */ 43, 0, 1 } }; - uint64_t request; - uint32_t prefix[3] = { 0 }; - int veclen = req->count; - enum workarounds workaround = WORKAROUND_NONE; - - if(c->has_error) - return 0; - - assert(c != 0); - assert(vector != 0); - assert(req->count > 0); - - if(!(flags & XCB_REQUEST_RAW)) - { - static const char pad[3]; - unsigned int i; - uint16_t shortlen = 0; - size_t longlen = 0; - assert(vector[0].iov_len >= 4); - /* set the major opcode, and the minor opcode for extensions */ - if(req->ext) - { - const xcb_query_extension_reply_t *extension = xcb_get_extension_data(c, req->ext); - if(!(extension && extension->present)) - { - _xcb_conn_shutdown(c); - return 0; - } - ((uint8_t *) vector[0].iov_base)[0] = extension->major_opcode; - ((uint8_t *) vector[0].iov_base)[1] = req->opcode; - } - else - ((uint8_t *) vector[0].iov_base)[0] = req->opcode; - - /* put together the length field, possibly using BIGREQUESTS */ - for(i = 0; i < req->count; ++i) - { - longlen += vector[i].iov_len; - if(!vector[i].iov_base) - { - vector[i].iov_base = (char *) pad; - assert(vector[i].iov_len <= sizeof(pad)); - } - } - assert((longlen & 3) == 0); - longlen >>= 2; - - if(longlen <= c->setup->maximum_request_length) - { - /* we don't need BIGREQUESTS. */ - shortlen = longlen; - longlen = 0; - } - else if(longlen > xcb_get_maximum_request_length(c)) - { - _xcb_conn_shutdown(c); - return 0; /* server can't take this; maybe need BIGREQUESTS? */ - } - - /* set the length field. */ - ((uint16_t *) vector[0].iov_base)[1] = shortlen; - if(!shortlen) - prefix[2] = ++longlen; - } - flags &= ~XCB_REQUEST_RAW; - - /* do we need to work around the X server bug described in glx.xml? */ - /* XXX: GetFBConfigs won't use BIG-REQUESTS in any sane - * configuration, but that should be handled here anyway. */ - if(req->ext && !req->isvoid && !strcmp(req->ext->name, "GLX") && - ((req->opcode == 17 && ((uint32_t *) vector[0].iov_base)[1] == 0x10004) || - req->opcode == 21)) - workaround = WORKAROUND_GLX_GET_FB_CONFIGS_BUG; - - /* get a sequence number and arrange for delivery. */ - pthread_mutex_lock(&c->iolock); - /* wait for other writing threads to get out of my way. */ - while(c->out.writing) - pthread_cond_wait(&c->out.cond, &c->iolock); - get_socket_back(c); - - request = ++c->out.request; - /* send GetInputFocus (sync_req) when 64k-2 requests have been sent without - * a reply. - * Also send sync_req (could use NoOp) at 32-bit wrap to avoid having - * applications see sequence 0 as that is used to indicate - * an error in sending the request */ - while((req->isvoid && - c->out.request == c->in.request_expected + (1 << 16) - 1) || - request == 0) - { - prefix[0] = sync_req.packet; - _xcb_in_expect_reply(c, request, WORKAROUND_NONE, XCB_REQUEST_DISCARD_REPLY); - c->in.request_expected = c->out.request; - request = ++c->out.request; - } - - if(workaround != WORKAROUND_NONE || flags != 0) - _xcb_in_expect_reply(c, request, workaround, flags); - if(!req->isvoid) - c->in.request_expected = c->out.request; - - if(prefix[0] || prefix[2]) - { - --vector, ++veclen; - if(prefix[2]) - { - prefix[1] = ((uint32_t *) vector[1].iov_base)[0]; - vector[1].iov_base = (uint32_t *) vector[1].iov_base + 1; - vector[1].iov_len -= sizeof(uint32_t); - } - vector[0].iov_len = sizeof(uint32_t) * ((prefix[0] ? 1 : 0) + (prefix[2] ? 2 : 0)); - vector[0].iov_base = prefix + !prefix[0]; - } - - if(!write_block(c, vector, veclen)) - { - _xcb_conn_shutdown(c); - request = 0; - } - pthread_mutex_unlock(&c->iolock); - return request; -} - -int xcb_take_socket(xcb_connection_t *c, void (*return_socket)(void *closure), void *closure, int flags, uint64_t *sent) -{ - int ret; - if(c->has_error) - return 0; - pthread_mutex_lock(&c->iolock); - get_socket_back(c); - ret = _xcb_out_flush_to(c, c->out.request); - if(ret) - { - c->out.return_socket = return_socket; - c->out.socket_closure = closure; - if(flags) - _xcb_in_expect_reply(c, c->out.request, WORKAROUND_EXTERNAL_SOCKET_OWNER, flags); - assert(c->out.request == c->out.request_written); - *sent = c->out.request; - } - pthread_mutex_unlock(&c->iolock); - return ret; -} - -int xcb_writev(xcb_connection_t *c, struct iovec *vector, int count, uint64_t requests) -{ - int ret; - if(c->has_error) - return 0; - pthread_mutex_lock(&c->iolock); - c->out.request += requests; - ret = _xcb_out_send(c, vector, count); - pthread_mutex_unlock(&c->iolock); - return ret; -} - -int xcb_flush(xcb_connection_t *c) -{ - int ret; - if(c->has_error) - return 0; - pthread_mutex_lock(&c->iolock); - ret = _xcb_out_flush_to(c, c->out.request); - pthread_mutex_unlock(&c->iolock); - return ret; -} - -/* Private interface */ - -int _xcb_out_init(_xcb_out *out) -{ - if(pthread_cond_init(&out->socket_cond, 0)) - return 0; - out->return_socket = 0; - out->socket_closure = 0; - out->socket_moving = 0; - - if(pthread_cond_init(&out->cond, 0)) - return 0; - out->writing = 0; - - out->queue_len = 0; - - out->request = 0; - out->request_written = 0; - - if(pthread_mutex_init(&out->reqlenlock, 0)) - return 0; - out->maximum_request_length_tag = LAZY_NONE; - - return 1; -} - -void _xcb_out_destroy(_xcb_out *out) -{ - pthread_cond_destroy(&out->cond); - pthread_mutex_destroy(&out->reqlenlock); -} - -int _xcb_out_send(xcb_connection_t *c, struct iovec *vector, int count) -{ - int ret = 1; - while(ret && count) - ret = _xcb_conn_wait(c, &c->out.cond, &vector, &count); - c->out.request_written = c->out.request; - pthread_cond_broadcast(&c->out.cond); - _xcb_in_wake_up_next_reader(c); - return ret; -} - -int _xcb_out_flush_to(xcb_connection_t *c, uint64_t request) -{ - assert(XCB_SEQUENCE_COMPARE(request, <=, c->out.request)); - if(XCB_SEQUENCE_COMPARE(c->out.request_written, >=, request)) - return 1; - if(c->out.queue_len) - { - struct iovec vec; - vec.iov_base = c->out.queue; - vec.iov_len = c->out.queue_len; - c->out.queue_len = 0; - return _xcb_out_send(c, &vec, 1); - } - while(c->out.writing) - pthread_cond_wait(&c->out.cond, &c->iolock); - assert(XCB_SEQUENCE_COMPARE(c->out.request_written, >=, request)); - return 1; -} +/* Copyright (C) 2001-2004 Bart Massey and Jamey Sharp. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the names of the authors or their + * institutions shall not be used in advertising or otherwise to promote the + * sale, use or other dealings in this Software without prior written + * authorization from the authors. + */ + +/* Stuff that sends stuff to the server. */ + +#include +#include +#include +#include + +#include "xcb.h" +#include "xcbext.h" +#include "xcbint.h" +#include "bigreq.h" + +static inline void send_request(xcb_connection_t *c, int isvoid, enum workarounds workaround, int flags, struct iovec *vector, int count) +{ + if(c->has_error) + return; + + ++c->out.request; + if(!isvoid) + c->in.request_expected = c->out.request; + if(workaround != WORKAROUND_NONE || flags != 0) + _xcb_in_expect_reply(c, c->out.request, workaround, flags); + + while(count && c->out.queue_len + vector[0].iov_len <= sizeof(c->out.queue)) + { + memcpy(c->out.queue + c->out.queue_len, vector[0].iov_base, vector[0].iov_len); + c->out.queue_len += vector[0].iov_len; + vector[0].iov_base = (char *) vector[0].iov_base + vector[0].iov_len; + vector[0].iov_len = 0; + ++vector, --count; + } + if(!count) + return; + + --vector, ++count; + vector[0].iov_base = c->out.queue; + vector[0].iov_len = c->out.queue_len; + c->out.queue_len = 0; + _xcb_out_send(c, vector, count); +} + +static void send_sync(xcb_connection_t *c) +{ + static const union { + struct { + uint8_t major; + uint8_t pad; + uint16_t len; + } fields; + uint32_t packet; + } sync_req = { { /* GetInputFocus */ 43, 0, 1 } }; + struct iovec vector[2]; + vector[1].iov_base = (char *) &sync_req; + vector[1].iov_len = sizeof(sync_req); + send_request(c, 0, WORKAROUND_NONE, XCB_REQUEST_DISCARD_REPLY, vector + 1, 1); +} + +static void get_socket_back(xcb_connection_t *c) +{ + while(c->out.return_socket && c->out.socket_moving) + pthread_cond_wait(&c->out.socket_cond, &c->iolock); + if(!c->out.return_socket) + return; + + c->out.socket_moving = 1; + pthread_mutex_unlock(&c->iolock); + c->out.return_socket(c->out.socket_closure); + pthread_mutex_lock(&c->iolock); + c->out.socket_moving = 0; + + pthread_cond_broadcast(&c->out.socket_cond); + c->out.return_socket = 0; + c->out.socket_closure = 0; + _xcb_in_replies_done(c); +} + +/* Public interface */ + +void xcb_prefetch_maximum_request_length(xcb_connection_t *c) +{ + if(c->has_error) + return; + pthread_mutex_lock(&c->out.reqlenlock); + if(c->out.maximum_request_length_tag == LAZY_NONE) + { + const xcb_query_extension_reply_t *ext; + ext = xcb_get_extension_data(c, &xcb_big_requests_id); + if(ext && ext->present) + { + c->out.maximum_request_length_tag = LAZY_COOKIE; + c->out.maximum_request_length.cookie = xcb_big_requests_enable(c); + } + else + { + c->out.maximum_request_length_tag = LAZY_FORCED; + c->out.maximum_request_length.value = c->setup->maximum_request_length; + } + } + pthread_mutex_unlock(&c->out.reqlenlock); +} + +uint32_t xcb_get_maximum_request_length(xcb_connection_t *c) +{ + if(c->has_error) + return 0; + xcb_prefetch_maximum_request_length(c); + pthread_mutex_lock(&c->out.reqlenlock); + if(c->out.maximum_request_length_tag == LAZY_COOKIE) + { + xcb_big_requests_enable_reply_t *r = xcb_big_requests_enable_reply(c, c->out.maximum_request_length.cookie, 0); + c->out.maximum_request_length_tag = LAZY_FORCED; + if(r) + { + c->out.maximum_request_length.value = r->maximum_request_length; + free(r); + } + else + c->out.maximum_request_length.value = c->setup->maximum_request_length; + } + pthread_mutex_unlock(&c->out.reqlenlock); + return c->out.maximum_request_length.value; +} + +unsigned int xcb_send_request(xcb_connection_t *c, int flags, struct iovec *vector, const xcb_protocol_request_t *req) +{ + uint64_t request; + uint32_t prefix[2]; + int veclen = req->count; + enum workarounds workaround = WORKAROUND_NONE; + + if(c->has_error) + return 0; + + assert(c != 0); + assert(vector != 0); + assert(req->count > 0); + + if(!(flags & XCB_REQUEST_RAW)) + { + static const char pad[3]; + unsigned int i; + uint16_t shortlen = 0; + size_t longlen = 0; + assert(vector[0].iov_len >= 4); + /* set the major opcode, and the minor opcode for extensions */ + if(req->ext) + { + const xcb_query_extension_reply_t *extension = xcb_get_extension_data(c, req->ext); + if(!(extension && extension->present)) + { + _xcb_conn_shutdown(c); + return 0; + } + ((uint8_t *) vector[0].iov_base)[0] = extension->major_opcode; + ((uint8_t *) vector[0].iov_base)[1] = req->opcode; + } + else + ((uint8_t *) vector[0].iov_base)[0] = req->opcode; + + /* put together the length field, possibly using BIGREQUESTS */ + for(i = 0; i < req->count; ++i) + { + longlen += vector[i].iov_len; + if(!vector[i].iov_base) + { + vector[i].iov_base = (char *) pad; + assert(vector[i].iov_len <= sizeof(pad)); + } + } + assert((longlen & 3) == 0); + longlen >>= 2; + + if(longlen <= c->setup->maximum_request_length) + { + /* we don't need BIGREQUESTS. */ + shortlen = longlen; + longlen = 0; + } + else if(longlen > xcb_get_maximum_request_length(c)) + { + _xcb_conn_shutdown(c); + return 0; /* server can't take this; maybe need BIGREQUESTS? */ + } + + /* set the length field. */ + ((uint16_t *) vector[0].iov_base)[1] = shortlen; + if(!shortlen) + { + prefix[0] = ((uint32_t *) vector[0].iov_base)[0]; + prefix[1] = ++longlen; + vector[0].iov_base = (uint32_t *) vector[0].iov_base + 1; + vector[0].iov_len -= sizeof(uint32_t); + --vector, ++veclen; + vector[0].iov_base = prefix; + vector[0].iov_len = sizeof(prefix); + } + } + flags &= ~XCB_REQUEST_RAW; + + /* do we need to work around the X server bug described in glx.xml? */ + /* XXX: GetFBConfigs won't use BIG-REQUESTS in any sane + * configuration, but that should be handled here anyway. */ + if(req->ext && !req->isvoid && !strcmp(req->ext->name, "GLX") && + ((req->opcode == 17 && ((uint32_t *) vector[0].iov_base)[1] == 0x10004) || + req->opcode == 21)) + workaround = WORKAROUND_GLX_GET_FB_CONFIGS_BUG; + + /* get a sequence number and arrange for delivery. */ + pthread_mutex_lock(&c->iolock); + /* wait for other writing threads to get out of my way. */ + while(c->out.writing) + pthread_cond_wait(&c->out.cond, &c->iolock); + get_socket_back(c); + + /* send GetInputFocus (sync_req) when 64k-2 requests have been sent without + * a reply. */ + if(req->isvoid && c->out.request == c->in.request_expected + (1 << 16) - 2) + send_sync(c); + /* Also send sync_req (could use NoOp) at 32-bit wrap to avoid having + * applications see sequence 0 as that is used to indicate + * an error in sending the request */ + if((unsigned int) (c->out.request + 1) == 0) + send_sync(c); + + /* The above send_sync calls could drop the I/O lock, but this + * thread will still exclude any other thread that tries to write, + * so the sequence number postconditions still hold. */ + send_request(c, req->isvoid, workaround, flags, vector, veclen); + request = c->has_error ? 0 : c->out.request; + pthread_mutex_unlock(&c->iolock); + return request; +} + +int xcb_take_socket(xcb_connection_t *c, void (*return_socket)(void *closure), void *closure, int flags, uint64_t *sent) +{ + int ret; + if(c->has_error) + return 0; + pthread_mutex_lock(&c->iolock); + get_socket_back(c); + ret = _xcb_out_flush_to(c, c->out.request); + if(ret) + { + c->out.return_socket = return_socket; + c->out.socket_closure = closure; + if(flags) + _xcb_in_expect_reply(c, c->out.request, WORKAROUND_EXTERNAL_SOCKET_OWNER, flags); + assert(c->out.request == c->out.request_written); + *sent = c->out.request; + } + pthread_mutex_unlock(&c->iolock); + return ret; +} + +int xcb_writev(xcb_connection_t *c, struct iovec *vector, int count, uint64_t requests) +{ + int ret; + if(c->has_error) + return 0; + pthread_mutex_lock(&c->iolock); + c->out.request += requests; + ret = _xcb_out_send(c, vector, count); + pthread_mutex_unlock(&c->iolock); + return ret; +} + +int xcb_flush(xcb_connection_t *c) +{ + int ret; + if(c->has_error) + return 0; + pthread_mutex_lock(&c->iolock); + ret = _xcb_out_flush_to(c, c->out.request); + pthread_mutex_unlock(&c->iolock); + return ret; +} + +/* Private interface */ + +int _xcb_out_init(_xcb_out *out) +{ + if(pthread_cond_init(&out->socket_cond, 0)) + return 0; + out->return_socket = 0; + out->socket_closure = 0; + out->socket_moving = 0; + + if(pthread_cond_init(&out->cond, 0)) + return 0; + out->writing = 0; + + out->queue_len = 0; + + out->request = 0; + out->request_written = 0; + + if(pthread_mutex_init(&out->reqlenlock, 0)) + return 0; + out->maximum_request_length_tag = LAZY_NONE; + + return 1; +} + +void _xcb_out_destroy(_xcb_out *out) +{ + pthread_cond_destroy(&out->cond); + pthread_mutex_destroy(&out->reqlenlock); +} + +int _xcb_out_send(xcb_connection_t *c, struct iovec *vector, int count) +{ + int ret = 1; + while(ret && count) + ret = _xcb_conn_wait(c, &c->out.cond, &vector, &count); + c->out.request_written = c->out.request; + pthread_cond_broadcast(&c->out.cond); + _xcb_in_wake_up_next_reader(c); + return ret; +} + +void _xcb_out_send_sync(xcb_connection_t *c) +{ + /* wait for other writing threads to get out of my way. */ + while(c->out.writing) + pthread_cond_wait(&c->out.cond, &c->iolock); + get_socket_back(c); + send_sync(c); +} + +int _xcb_out_flush_to(xcb_connection_t *c, uint64_t request) +{ + assert(XCB_SEQUENCE_COMPARE(request, <=, c->out.request)); + if(XCB_SEQUENCE_COMPARE(c->out.request_written, >=, request)) + return 1; + if(c->out.queue_len) + { + struct iovec vec; + vec.iov_base = c->out.queue; + vec.iov_len = c->out.queue_len; + c->out.queue_len = 0; + return _xcb_out_send(c, &vec, 1); + } + while(c->out.writing) + pthread_cond_wait(&c->out.cond, &c->iolock); + assert(XCB_SEQUENCE_COMPARE(c->out.request_written, >=, request)); + return 1; +} diff --git a/libxcb/src/xcbint.h b/libxcb/src/xcbint.h index 6613433ce..5950823f0 100644 --- a/libxcb/src/xcbint.h +++ b/libxcb/src/xcbint.h @@ -54,7 +54,6 @@ enum lazy_reply_tag #define XCB_PAD(i) (-(i) & 3) #define XCB_SEQUENCE_COMPARE(a,op,b) ((int64_t) ((a) - (b)) op 0) -#define XCB_SEQUENCE_COMPARE_32(a,op,b) (((int) (a) - (int) (b)) op 0) #ifndef offsetof #define offsetof(type,member) ((size_t) &((type *)0)->member) @@ -107,6 +106,7 @@ int _xcb_out_init(_xcb_out *out); void _xcb_out_destroy(_xcb_out *out); int _xcb_out_send(xcb_connection_t *c, struct iovec *vector, int count); +void _xcb_out_send_sync(xcb_connection_t *c); int _xcb_out_flush_to(xcb_connection_t *c, uint64_t request); diff --git a/mesalib/configs/linux-llvm b/mesalib/configs/linux-llvm index e6999539a..cd4730d67 100644 --- a/mesalib/configs/linux-llvm +++ b/mesalib/configs/linux-llvm @@ -1,44 +1,44 @@ -# -*-makefile-*- -# Configuration for Linux and LLVM with optimizations -# Builds the llvmpipe gallium driver - -include $(TOP)/configs/linux - -CONFIG_NAME = linux-llvm - -# Add llvmpipe driver -GALLIUM_DRIVERS_DIRS += llvmpipe - -OPT_FLAGS = -O3 -ansi -pedantic -ARCH_FLAGS = -mmmx -msse -msse2 -mstackrealign - -DEFINES += -DNDEBUG -DGALLIUM_LLVMPIPE -DHAVE_UDIS86 - -# override -std=c99 -CFLAGS += -std=gnu99 - -LLVM_VERSION := $(shell llvm-config --version) - -ifeq ($(LLVM_VERSION),) - $(warning Could not find LLVM! Make Sure 'llvm-config' is in the path) - MESA_LLVM=0 -else - MESA_LLVM=1 - HAVE_LLVM := 0x0$(subst .,0,$(LLVM_VERSION:svn=)) - DEFINES += -DHAVE_LLVM=$(HAVE_LLVM) -# $(info Using LLVM version: $(LLVM_VERSION)) -endif - -ifeq ($(MESA_LLVM),1) - LLVM_CFLAGS=`llvm-config --cppflags` - LLVM_CXXFLAGS=`llvm-config --cxxflags backend bitreader engine ipo interpreter instrumentation` -Wno-long-long - LLVM_LDFLAGS = $(shell llvm-config --ldflags backend bitreader engine ipo interpreter instrumentation) - LLVM_LIBS = $(shell llvm-config --libs backend bitwriter bitreader engine ipo interpreter instrumentation) - MKLIB_OPTIONS=-cplusplus -else - LLVM_CFLAGS= - LLVM_CXXFLAGS= -endif - -LD = g++ -GL_LIB_DEPS = $(LLVM_LDFLAGS) $(LLVM_LIBS) $(EXTRA_LIB_PATH) -lX11 -lXext -lm -lpthread -lstdc++ -ludis86 +# -*-makefile-*- +# Configuration for Linux and LLVM with optimizations +# Builds the llvmpipe gallium driver + +include $(TOP)/configs/linux + +CONFIG_NAME = linux-llvm + +# Add llvmpipe driver +GALLIUM_DRIVERS_DIRS += llvmpipe + +OPT_FLAGS = -O3 -ansi -pedantic +ARCH_FLAGS = -mmmx -msse -msse2 -mstackrealign + +DEFINES += -DNDEBUG -DGALLIUM_LLVMPIPE + +# override -std=c99 +CFLAGS += -std=gnu99 + +LLVM_VERSION := $(shell llvm-config --version) + +ifeq ($(LLVM_VERSION),) + $(warning Could not find LLVM! Make Sure 'llvm-config' is in the path) + MESA_LLVM=0 +else + MESA_LLVM=1 + HAVE_LLVM := 0x0$(subst .,0,$(LLVM_VERSION:svn=)) + DEFINES += -DHAVE_LLVM=$(HAVE_LLVM) +# $(info Using LLVM version: $(LLVM_VERSION)) +endif + +ifeq ($(MESA_LLVM),1) + LLVM_CFLAGS=`llvm-config --cppflags` + LLVM_CXXFLAGS=`llvm-config --cxxflags backend bitreader engine ipo interpreter instrumentation` -Wno-long-long + LLVM_LDFLAGS = $(shell llvm-config --ldflags backend bitreader engine ipo interpreter instrumentation) + LLVM_LIBS = $(shell llvm-config --libs backend bitwriter bitreader engine ipo interpreter instrumentation) + MKLIB_OPTIONS=-cplusplus +else + LLVM_CFLAGS= + LLVM_CXXFLAGS= +endif + +LD = g++ +GL_LIB_DEPS = $(LLVM_LDFLAGS) $(LLVM_LIBS) $(EXTRA_LIB_PATH) -lX11 -lXext -lm -lpthread -lstdc++ diff --git a/mesalib/configure.ac b/mesalib/configure.ac index d33ca301f..7b8010a76 100644 --- a/mesalib/configure.ac +++ b/mesalib/configure.ac @@ -1,1913 +1,1907 @@ -dnl Process this file with autoconf to create configure. - -AC_PREREQ([2.59]) - -dnl Versioning - scrape the version from configs/default -m4_define([mesa_version], - [m4_esyscmd([${MAKE-make} -s -f bin/version.mk version | tr -d '\n' | tr -d '\r'])]) -m4_ifval(mesa_version,, - [m4_fatal([Failed to get the Mesa version from `make -f bin/version.mk version`])]) - -dnl Tell the user about autoconf.html in the --help output -m4_divert_once([HELP_END], [ -See docs/autoconf.html for more details on the options for Mesa.]) - -AC_INIT([Mesa],[mesa_version], - [https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa]) -AC_CONFIG_AUX_DIR([bin]) -AC_CANONICAL_HOST - -dnl Versions for external dependencies -LIBDRM_REQUIRED=2.4.24 -LIBDRM_RADEON_REQUIRED=2.4.24 -LIBDRM_INTEL_REQUIRED=2.4.24 -DRI2PROTO_REQUIRED=2.1 -GLPROTO_REQUIRED=1.4.11 -LIBDRM_XORG_REQUIRED=2.4.24 -LIBKMS_XORG_REQUIRED=1.0.0 - -dnl Check for progs -AC_PROG_CPP -AC_PROG_CC -AC_PROG_CXX -AC_CHECK_PROGS([MAKE], [gmake make]) -AC_CHECK_PROGS([PYTHON2], [python2 python]) -AC_PATH_PROG([MKDEP], [makedepend]) -AC_PATH_PROG([SED], [sed]) - -if test "x$MKDEP" = "x"; then - AC_MSG_ERROR([makedepend is required to build Mesa]) -fi - -dnl Our fallback install-sh is a symlink to minstall. Use the existing -dnl configuration in that case. -AC_PROG_INSTALL -test "x$INSTALL" = "x$ac_install_sh" && INSTALL='$(MINSTALL)' - -dnl We need a POSIX shell for parts of the build. Assume we have one -dnl in most cases. -case "$host_os" in -solaris*) - # Solaris /bin/sh is too old/non-POSIX compliant - AC_PATH_PROGS(POSIX_SHELL, [ksh93 ksh sh]) - SHELL="$POSIX_SHELL" - ;; -esac - -dnl clang is mostly GCC-compatible, but its version is much lower, -dnl so we have to check for it. -AC_MSG_CHECKING([if compiling with clang]) - -AC_COMPILE_IFELSE( -[AC_LANG_PROGRAM([], [[ -#ifndef __clang__ - not clang -#endif -]])], -[CLANG=yes], [CLANG=no]) - -AC_MSG_RESULT([$CLANG]) - -dnl If we're using GCC, make sure that it is at least version 3.3.0. Older -dnl versions are explictly not supported. -if test "x$GCC" = xyes -a "x$CLANG" = xno; then - AC_MSG_CHECKING([whether gcc version is sufficient]) - major=0 - minor=0 - - GCC_VERSION=`$CC -dumpversion` - if test $? -eq 0; then - major=`echo $GCC_VERSION | cut -d. -f1` - minor=`echo $GCC_VERSION | cut -d. -f1` - fi - - if test $major -lt 3 -o $major -eq 3 -a $minor -lt 3 ; then - AC_MSG_RESULT([no]) - AC_MSG_ERROR([If using GCC, version 3.3.0 or later is required.]) - else - AC_MSG_RESULT([yes]) - fi -fi - - -MKDEP_OPTIONS=-fdepend -dnl Ask gcc where it's keeping its secret headers -if test "x$GCC" = xyes; then - for dir in include include-fixed; do - GCC_INCLUDES=`$CC -print-file-name=$dir` - if test "x$GCC_INCLUDES" != x && \ - test "$GCC_INCLUDES" != "$dir" && \ - test -d "$GCC_INCLUDES"; then - MKDEP_OPTIONS="$MKDEP_OPTIONS -I$GCC_INCLUDES" - fi - done -fi -AC_SUBST([MKDEP_OPTIONS]) - -dnl Make sure the pkg-config macros are defined -m4_ifndef([PKG_PROG_PKG_CONFIG], - [m4_fatal([Could not locate the pkg-config autoconf macros. - These are usually located in /usr/share/aclocal/pkg.m4. If your macros - are in a different location, try setting the environment variable - ACLOCAL="aclocal -I/other/macro/dir" before running autoreconf.])]) -PKG_PROG_PKG_CONFIG() - -dnl LIB_DIR - library basename -LIB_DIR=`echo $libdir | $SED 's%.*/%%'` -AC_SUBST([LIB_DIR]) - -dnl Cache LDFLAGS so we can add EXTRA_LIB_PATH and restore it later -_SAVE_LDFLAGS="$LDFLAGS" -AC_ARG_VAR([EXTRA_LIB_PATH],[Extra -L paths for the linker]) -AC_SUBST([EXTRA_LIB_PATH]) - -dnl Cache CPPFLAGS so we can add *_INCLUDES and restore it later -_SAVE_CPPFLAGS="$CPPFLAGS" -AC_ARG_VAR([X11_INCLUDES],[Extra -I paths for X11 headers]) -AC_SUBST([X11_INCLUDES]) - -dnl Compiler macros -DEFINES="" -AC_SUBST([DEFINES]) -case "$host_os" in -linux*|*-gnu*|gnu*) - DEFINES="$DEFINES -D_GNU_SOURCE -DPTHREADS" - ;; -solaris*) - DEFINES="$DEFINES -DPTHREADS -DSVR4" - ;; -cygwin*) - DEFINES="$DEFINES -DPTHREADS" - ;; -esac - -dnl Add flags for gcc and g++ -if test "x$GCC" = xyes; then - CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -std=c99" - if test "x$CLANG" = "xno"; then - CFLAGS="$CFLAGS -ffast-math" - fi - - # Enable -fvisibility=hidden if using a gcc that supports it - save_CFLAGS="$CFLAGS" - AC_MSG_CHECKING([whether $CC supports -fvisibility=hidden]) - VISIBILITY_CFLAGS="-fvisibility=hidden" - CFLAGS="$CFLAGS $VISIBILITY_CFLAGS" - AC_LINK_IFELSE([AC_LANG_PROGRAM()], AC_MSG_RESULT([yes]), - [VISIBILITY_CFLAGS=""; AC_MSG_RESULT([no])]); - - # Restore CFLAGS; VISIBILITY_CFLAGS are added to it where needed. - CFLAGS=$save_CFLAGS - - # Work around aliasing bugs - developers should comment this out - CFLAGS="$CFLAGS -fno-strict-aliasing" -fi -if test "x$GXX" = xyes; then - CXXFLAGS="$CXXFLAGS -Wall" - - # Enable -fvisibility=hidden if using a gcc that supports it - save_CXXFLAGS="$CXXFLAGS" - AC_MSG_CHECKING([whether $CXX supports -fvisibility=hidden]) - VISIBILITY_CXXFLAGS="-fvisibility=hidden" - CXXFLAGS="$CXXFLAGS $VISIBILITY_CXXFLAGS" - AC_LINK_IFELSE([AC_LANG_PROGRAM()], AC_MSG_RESULT([yes]), - [VISIBILITY_CXXFLAGS="" ; AC_MSG_RESULT([no])]); - - # Restore CXXFLAGS; VISIBILITY_CXXFLAGS are added to it where needed. - CXXFLAGS=$save_CXXFLAGS - - # Work around aliasing bugs - developers should comment this out - CXXFLAGS="$CXXFLAGS -fno-strict-aliasing" -fi - -AC_SUBST([VISIBILITY_CFLAGS]) -AC_SUBST([VISIBILITY_CXXFLAGS]) - -dnl These should be unnecessary, but let the user set them if they want -AC_ARG_VAR([OPT_FLAGS], [Additional optimization flags for the compiler. - Default is to use CFLAGS.]) -AC_ARG_VAR([ARCH_FLAGS], [Additional architecture specific flags for the - compiler. Default is to use CFLAGS.]) -AC_SUBST([OPT_FLAGS]) -AC_SUBST([ARCH_FLAGS]) - -dnl -dnl Hacks to enable 32 or 64 bit build -dnl -AC_ARG_ENABLE([32-bit], - [AS_HELP_STRING([--enable-32-bit], - [build 32-bit libraries @<:@default=auto@:>@])], - [enable_32bit="$enableval"], - [enable_32bit=auto] -) -if test "x$enable_32bit" = xyes; then - if test "x$GCC" = xyes; then - CFLAGS="$CFLAGS -m32" - ARCH_FLAGS="$ARCH_FLAGS -m32" - fi - if test "x$GXX" = xyes; then - CXXFLAGS="$CXXFLAGS -m32" - fi -fi -AC_ARG_ENABLE([64-bit], - [AS_HELP_STRING([--enable-64-bit], - [build 64-bit libraries @<:@default=auto@:>@])], - [enable_64bit="$enableval"], - [enable_64bit=auto] -) -if test "x$enable_64bit" = xyes; then - if test "x$GCC" = xyes; then - CFLAGS="$CFLAGS -m64" - fi - if test "x$GXX" = xyes; then - CXXFLAGS="$CXXFLAGS -m64" - fi -fi - -dnl -dnl shared/static libraries, mimic libtool options -dnl -AC_ARG_ENABLE([static], - [AS_HELP_STRING([--enable-static], - [build static libraries @<:@default=disabled@:>@])], - [enable_static="$enableval"], - [enable_static=no] -) -case "x$enable_static" in -xyes|xno ) ;; -x ) enable_static=no ;; -* ) - AC_MSG_ERROR([Static library option '$enable_static' is not a valid]) - ;; -esac -AC_ARG_ENABLE([shared], - [AS_HELP_STRING([--disable-shared], - [build shared libraries @<:@default=enabled@:>@])], - [enable_shared="$enableval"], - [enable_shared=yes] -) -case "x$enable_shared" in -xyes|xno ) ;; -x ) enable_shared=yes ;; -* ) - AC_MSG_ERROR([Shared library option '$enable_shared' is not a valid]) - ;; -esac - -dnl Can't have static and shared libraries, default to static if user -dnl explicitly requested. If both disabled, set to static since shared -dnl was explicitly requirested. -case "x$enable_static$enable_shared" in -xyesyes ) - AC_MSG_WARN([Can't build static and shared libraries, disabling shared]) - enable_shared=no - ;; -xnono ) - AC_MSG_WARN([Can't disable both static and shared libraries, enabling static]) - enable_static=yes - ;; -esac - -dnl -dnl mklib options -dnl -AC_ARG_VAR([MKLIB_OPTIONS],[Options for the Mesa library script, mklib]) -if test "$enable_static" = yes; then - MKLIB_OPTIONS="$MKLIB_OPTIONS -static" -fi -AC_SUBST([MKLIB_OPTIONS]) - -dnl -dnl other compiler options -dnl -AC_ARG_ENABLE([debug], - [AS_HELP_STRING([--enable-debug], - [use debug compiler flags and macros @<:@default=disabled@:>@])], - [enable_debug="$enableval"], - [enable_debug=no] -) -if test "x$enable_debug" = xyes; then - DEFINES="$DEFINES -DDEBUG" - if test "x$GCC" = xyes; then - CFLAGS="$CFLAGS -g" - fi - if test "x$GXX" = xyes; then - CXXFLAGS="$CXXFLAGS -g" - fi -fi - -dnl -dnl library names -dnl -LIB_PREFIX_GLOB='lib' -LIB_VERSION_SEPARATOR='.' -if test "$enable_static" = yes; then - LIB_EXTENSION='a' -else - case "$host_os" in - darwin* ) - LIB_EXTENSION='dylib' ;; - cygwin* ) - dnl prefix can be 'cyg' or 'lib' - LIB_PREFIX_GLOB='???' - LIB_VERSION_SEPARATOR='-' - LIB_EXTENSION='dll' ;; - aix* ) - LIB_EXTENSION='a' ;; - * ) - LIB_EXTENSION='so' ;; - esac -fi - -GL_LIB_NAME='lib$(GL_LIB).'${LIB_EXTENSION} -GLU_LIB_NAME='lib$(GLU_LIB).'${LIB_EXTENSION} -GLUT_LIB_NAME='lib$(GLUT_LIB).'${LIB_EXTENSION} -GLW_LIB_NAME='lib$(GLW_LIB).'${LIB_EXTENSION} -OSMESA_LIB_NAME='lib$(OSMESA_LIB).'${LIB_EXTENSION} -EGL_LIB_NAME='lib$(EGL_LIB).'${LIB_EXTENSION} -GLESv1_CM_LIB_NAME='lib$(GLESv1_CM_LIB).'${LIB_EXTENSION} -GLESv2_LIB_NAME='lib$(GLESv2_LIB).'${LIB_EXTENSION} -VG_LIB_NAME='lib$(VG_LIB).'${LIB_EXTENSION} -GLAPI_LIB_NAME='lib$(GLAPI_LIB).'${LIB_EXTENSION} -WAYLAND_EGL_LIB_NAME='lib$(WAYLAND_EGL_LIB).'${LIB_EXTENSION} - -GL_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GL_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' -GLU_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLU_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' -GLUT_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLUT_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' -GLW_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLW_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' -OSMESA_LIB_GLOB=${LIB_PREFIX_GLOB}'$(OSMESA_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' -EGL_LIB_GLOB=${LIB_PREFIX_GLOB}'$(EGL_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' -EGL_LIB_GLOB=${LIB_PREFIX_GLOB}'$(EGL_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' -GLESv1_CM_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLESv1_CM_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' -GLESv2_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLESv2_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' -VG_LIB_GLOB=${LIB_PREFIX_GLOB}'$(VG_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' -GLAPI_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLAPI_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' -WAYLAND_EGL_LIB_GLOB=${LIB_PREFIX_GLOB}'$(WAYLAND_EGL_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' - -AC_SUBST([GL_LIB_NAME]) -AC_SUBST([GLU_LIB_NAME]) -AC_SUBST([GLUT_LIB_NAME]) -AC_SUBST([GLW_LIB_NAME]) -AC_SUBST([OSMESA_LIB_NAME]) -AC_SUBST([EGL_LIB_NAME]) -AC_SUBST([GLESv1_CM_LIB_NAME]) -AC_SUBST([GLESv2_LIB_NAME]) -AC_SUBST([VG_LIB_NAME]) -AC_SUBST([GLAPI_LIB_NAME]) -AC_SUBST([WAYLAND_EGL_LIB_NAME]) - -AC_SUBST([GL_LIB_GLOB]) -AC_SUBST([GLU_LIB_GLOB]) -AC_SUBST([GLUT_LIB_GLOB]) -AC_SUBST([GLW_LIB_GLOB]) -AC_SUBST([OSMESA_LIB_GLOB]) -AC_SUBST([EGL_LIB_GLOB]) -AC_SUBST([GLESv1_CM_LIB_GLOB]) -AC_SUBST([GLESv2_LIB_GLOB]) -AC_SUBST([VG_LIB_GLOB]) -AC_SUBST([GLAPI_LIB_GLOB]) -AC_SUBST([WAYLAND_EGL_LIB_GLOB]) - -dnl -dnl Arch/platform-specific settings -dnl -AC_ARG_ENABLE([asm], - [AS_HELP_STRING([--disable-asm], - [disable assembly usage @<:@default=enabled on supported plaforms@:>@])], - [enable_asm="$enableval"], - [enable_asm=yes] -) -asm_arch="" -ASM_FLAGS="" -MESA_ASM_SOURCES="" -GLAPI_ASM_SOURCES="" -AC_MSG_CHECKING([whether to enable assembly]) -test "x$enable_asm" = xno && AC_MSG_RESULT([no]) -# disable if cross compiling on x86/x86_64 since we must run gen_matypes -if test "x$enable_asm" = xyes && test "x$cross_compiling" = xyes; then - case "$host_cpu" in - i?86 | x86_64) - enable_asm=no - AC_MSG_RESULT([no, cross compiling]) - ;; - esac -fi -# check for supported arches -if test "x$enable_asm" = xyes; then - case "$host_cpu" in - i?86) - case "$host_os" in - linux* | *freebsd* | dragonfly* | *netbsd*) - test "x$enable_64bit" = xyes && asm_arch=x86_64 || asm_arch=x86 - ;; - esac - ;; - x86_64) - case "$host_os" in - linux* | *freebsd* | dragonfly* | *netbsd*) - test "x$enable_32bit" = xyes && asm_arch=x86 || asm_arch=x86_64 - ;; - esac - ;; - powerpc) - case "$host_os" in - linux*) - asm_arch=ppc - ;; - esac - ;; - sparc*) - case "$host_os" in - linux*) - asm_arch=sparc - ;; - esac - ;; - esac - - case "$asm_arch" in - x86) - ASM_FLAGS="-DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM" - MESA_ASM_SOURCES='$(X86_SOURCES)' - GLAPI_ASM_SOURCES='$(X86_API)' - AC_MSG_RESULT([yes, x86]) - ;; - x86_64) - ASM_FLAGS="-DUSE_X86_64_ASM" - MESA_ASM_SOURCES='$(X86-64_SOURCES)' - GLAPI_ASM_SOURCES='$(X86-64_API)' - AC_MSG_RESULT([yes, x86_64]) - ;; - ppc) - ASM_FLAGS="-DUSE_PPC_ASM -DUSE_VMX_ASM" - MESA_ASM_SOURCES='$(PPC_SOURCES)' - AC_MSG_RESULT([yes, ppc]) - ;; - sparc) - ASM_FLAGS="-DUSE_SPARC_ASM" - MESA_ASM_SOURCES='$(SPARC_SOURCES)' - GLAPI_ASM_SOURCES='$(SPARC_API)' - AC_MSG_RESULT([yes, sparc]) - ;; - *) - AC_MSG_RESULT([no, platform not supported]) - ;; - esac -fi -AC_SUBST([ASM_FLAGS]) -AC_SUBST([MESA_ASM_SOURCES]) -AC_SUBST([GLAPI_ASM_SOURCES]) - -dnl PIC code macro -MESA_PIC_FLAGS - -dnl Check to see if dlopen is in default libraries (like Solaris, which -dnl has it in libc), or if libdl is needed to get it. -AC_CHECK_FUNC([dlopen], [], - [AC_CHECK_LIB([dl], [dlopen], [DLOPEN_LIBS="-ldl"])]) -AC_SUBST([DLOPEN_LIBS]) - -dnl See if posix_memalign is available -AC_CHECK_FUNC([posix_memalign], [DEFINES="$DEFINES -DHAVE_POSIX_MEMALIGN"]) - -dnl SELinux awareness. -AC_ARG_ENABLE([selinux], - [AS_HELP_STRING([--enable-selinux], - [Build SELinux-aware Mesa @<:@default=disabled@:>@])], - [MESA_SELINUX="$enableval"], - [MESA_SELINUX=no]) -if test "x$enable_selinux" = "xyes"; then - AC_CHECK_HEADER([selinux/selinux.h],[], - [AC_MSG_ERROR([SELinux headers not found])]) - AC_CHECK_LIB([selinux],[is_selinux_enabled],[], - [AC_MSG_ERROR([SELinux library not found])]) - SELINUX_LIBS="-lselinux" - DEFINES="$DEFINES -DMESA_SELINUX" -fi - -dnl Determine which APIs to support -AC_ARG_ENABLE([opengl], - [AS_HELP_STRING([--disable-opengl], - [disable support for standard OpenGL API @<:@default=no@:>@])], - [enable_opengl="$enableval"], - [enable_opengl=yes]) -AC_ARG_ENABLE([gles1], - [AS_HELP_STRING([--enable-gles1], - [enable support for OpenGL ES 1.x API @<:@default=no@:>@])], - [enable_gles1="$enableval"], - [enable_gles1=no]) -AC_ARG_ENABLE([gles2], - [AS_HELP_STRING([--enable-gles2], - [enable support for OpenGL ES 2.x API @<:@default=no@:>@])], - [enable_gles2="$enableval"], - [enable_gles2=no]) -AC_ARG_ENABLE([gles-overlay], - [AS_HELP_STRING([--enable-gles-overlay], - [DEPRECATED. Same as --enable-gles1 and --enable-gles2])], - [enable_gles1="$enableval"; enable_gles2="$enableval"], - []) - -AC_ARG_ENABLE([openvg], - [AS_HELP_STRING([--enable-openvg], - [enable support for OpenVG API @<:@default=no@:>@])], - [enable_openvg="$enableval"], - [enable_openvg=no]) - -dnl smooth the transition; should be removed eventually -if test "x$enable_openvg" = xno; then - case "x$with_state_trackers" in - x*vega*) - AC_MSG_WARN([vega state tracker is enabled without --enable-openvg]) - enable_openvg=yes - ;; - esac -fi - -if test "x$enable_opengl" = xno -a \ - "x$enable_gles1" = xno -a \ - "x$enable_gles2" = xno -a \ - "x$enable_openvg" = xno; then - AC_MSG_ERROR([at least one API should be enabled]) -fi - -API_DEFINES="" -if test "x$enable_opengl" = xno; then - API_DEFINES="$API_DEFINES -DFEATURE_GL=0" -else - API_DEFINES="$API_DEFINES -DFEATURE_GL=1" -fi -if test "x$enable_gles1" = xyes; then - API_DEFINES="$API_DEFINES -DFEATURE_ES1=1" -fi -if test "x$enable_gles2" = xyes; then - API_DEFINES="$API_DEFINES -DFEATURE_ES2=1" -fi -AC_SUBST([API_DEFINES]) - -AC_ARG_ENABLE([shared-glapi], - [AS_HELP_STRING([--enable-shared-glapi], - [EXPERIMENTAL. Enable shared glapi for OpenGL @<:@default=no@:>@])], - [enable_shared_glapi="$enableval"], - [enable_shared_glapi=no]) - -SHARED_GLAPI="0" -if test "x$enable_shared_glapi" = xyes; then - SHARED_GLAPI="1" -fi -AC_SUBST([SHARED_GLAPI]) - -dnl -dnl Driver configuration. Options are xlib, dri and osmesa right now. -dnl More later: fbdev, ... -dnl -default_driver="xlib" - -case "$host_os" in -linux*) - case "$host_cpu" in - i*86|x86_64|powerpc*|sparc*) default_driver="dri";; - esac - ;; -*freebsd* | dragonfly* | *netbsd*) - case "$host_cpu" in - i*86|x86_64|powerpc*|sparc*) default_driver="dri";; - esac - ;; -esac - -if test "x$enable_opengl" = xno; then - default_driver="no" -fi - -AC_ARG_WITH([driver], - [AS_HELP_STRING([--with-driver=DRIVER], - [driver for Mesa: xlib,dri,osmesa @<:@default=dri when available, or xlib@:>@])], - [mesa_driver="$withval"], - [mesa_driver="$default_driver"]) -dnl Check for valid option -case "x$mesa_driver" in -xxlib|xdri|xosmesa) - if test "x$enable_opengl" = xno; then - AC_MSG_ERROR([Driver '$mesa_driver' requires OpenGL enabled]) - fi - ;; -xno) - ;; -*) - AC_MSG_ERROR([Driver '$mesa_driver' is not a valid option]) - ;; -esac - -dnl -dnl Driver specific build directories -dnl - -dnl this variable will be prepended to SRC_DIRS and is not exported -CORE_DIRS="" - -SRC_DIRS="" -GLU_DIRS="sgi" -GALLIUM_DIRS="auxiliary drivers state_trackers" -GALLIUM_TARGET_DIRS="" -GALLIUM_WINSYS_DIRS="sw" -GALLIUM_DRIVERS_DIRS="softpipe failover galahad trace rbug noop identity" -GALLIUM_STATE_TRACKERS_DIRS="" - -# build shared-glapi if enabled for OpenGL or if OpenGL ES is enabled -case "x$enable_shared_glapi$enable_gles1$enable_gles2" in -x*yes*) - CORE_DIRS="$CORE_DIRS mapi/shared-glapi" - ;; -esac - -# build glapi if OpenGL is enabled -if test "x$enable_opengl" = xyes; then - CORE_DIRS="$CORE_DIRS mapi/glapi" -fi - -# build es1api if OpenGL ES 1.x is enabled -if test "x$enable_gles1" = xyes; then - CORE_DIRS="$CORE_DIRS mapi/es1api" -fi - -# build es2api if OpenGL ES 2.x is enabled -if test "x$enable_gles2" = xyes; then - CORE_DIRS="$CORE_DIRS mapi/es2api" -fi - -# build vgapi if OpenVG is enabled -if test "x$enable_openvg" = xyes; then - CORE_DIRS="$CORE_DIRS mapi/vgapi" -fi - -# build glsl and mesa if OpenGL or OpenGL ES is enabled -case "x$enable_opengl$enable_gles1$enable_gles2" in -x*yes*) - CORE_DIRS="$CORE_DIRS glsl mesa" - ;; -esac - -case "$mesa_driver" in -xlib) - DRIVER_DIRS="x11" - GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS sw/xlib" - GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS libgl-xlib" - ;; -dri) - SRC_DIRS="$SRC_DIRS glx" - DRIVER_DIRS="dri" - GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS sw/xlib sw/dri" - ;; -osmesa) - DRIVER_DIRS="osmesa" - ;; -no) - DRIVER_DRIS="" - ;; -esac -AC_SUBST([SRC_DIRS]) -AC_SUBST([GLU_DIRS]) -AC_SUBST([DRIVER_DIRS]) -AC_SUBST([GALLIUM_DIRS]) -AC_SUBST([GALLIUM_TARGET_DIRS]) -AC_SUBST([GALLIUM_WINSYS_DIRS]) -AC_SUBST([GALLIUM_DRIVERS_DIRS]) -AC_SUBST([GALLIUM_STATE_TRACKERS_DIRS]) -AC_SUBST([MESA_LLVM]) - -dnl -dnl Find out if X is available. The variable have_x is set if libX11 is -dnl found to mimic AC_PATH_XTRA. -dnl -if test -n "$PKG_CONFIG"; then - AC_MSG_CHECKING([pkg-config files for X11 are available]) - PKG_CHECK_EXISTS([x11],[ - x11_pkgconfig=yes - have_x=yes - ],[ - x11_pkgconfig=no - ]) - AC_MSG_RESULT([$x11_pkgconfig]) -else - x11_pkgconfig=no -fi -dnl Use the autoconf macro if no pkg-config files -if test "$x11_pkgconfig" = yes; then - PKG_CHECK_MODULES([X11], [x11]) -else - AC_PATH_XTRA - test -z "$X11_CFLAGS" && X11_CFLAGS="$X_CFLAGS" - test -z "$X11_LIBS" && X11_LIBS="$X_LIBS -lX11" - AC_SUBST([X11_CFLAGS]) - AC_SUBST([X11_LIBS]) -fi - -dnl Try to tell the user that the --x-* options are only used when -dnl pkg-config is not available. This must be right after AC_PATH_XTRA. -m4_divert_once([HELP_BEGIN], -[These options are only used when the X libraries cannot be found by the -pkg-config utility.]) - -dnl We need X for xlib and dri, so bomb now if it's not found -case "$mesa_driver" in -xlib|dri) - if test "$no_x" = yes; then - AC_MSG_ERROR([X11 development libraries needed for $mesa_driver driver]) - fi - ;; -esac - -dnl XCB - this is only used for GLX right now -AC_ARG_ENABLE([xcb], - [AS_HELP_STRING([--enable-xcb], - [use XCB for GLX @<:@default=disabled@:>@])], - [enable_xcb="$enableval"], - [enable_xcb=no]) -if test "x$enable_xcb" = xyes; then - DEFINES="$DEFINES -DUSE_XCB" -else - enable_xcb=no -fi - -dnl -dnl libGL configuration per driver -dnl -case "$mesa_driver" in -xlib) - if test "$x11_pkgconfig" = yes; then - PKG_CHECK_MODULES([XLIBGL], [x11 xext]) - GL_PC_REQ_PRIV="x11 xext" - X11_INCLUDES="$X11_INCLUDES $XLIBGL_CFLAGS" - GL_LIB_DEPS="$XLIBGL_LIBS" - else - # should check these... - X11_INCLUDES="$X11_INCLUDES $X_CFLAGS" - GL_LIB_DEPS="$X_LIBS -lX11 -lXext" - GL_PC_LIB_PRIV="$GL_LIB_DEPS" - GL_PC_CFLAGS="$X11_INCLUDES" - fi - GL_LIB_DEPS="$GL_LIB_DEPS $SELINUX_LIBS -lm -lpthread" - GL_PC_LIB_PRIV="$GL_PC_LIB_PRIV $SELINUX_LIBS -lm -lpthread" - - # if static, move the external libraries to the programs - # and empty the libraries for libGL - if test "$enable_static" = yes; then - APP_LIB_DEPS="$APP_LIB_DEPS $GL_LIB_DEPS" - GL_LIB_DEPS="" - fi - ;; -dri|no) # these checks are still desired when there is no mesa_driver - # DRI must be shared, I think - if test "$enable_static" = yes; then - AC_MSG_ERROR([Can't use static libraries for DRI drivers]) - fi - - # Check for libdrm - PKG_CHECK_MODULES([LIBDRM], [libdrm >= $LIBDRM_REQUIRED]) - PKG_CHECK_MODULES([DRI2PROTO], [dri2proto >= $DRI2PROTO_REQUIRED]) - PKG_CHECK_MODULES([GLPROTO], [glproto >= $GLPROTO_REQUIRED]) - GL_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED dri2proto >= $DRI2PROTO_REQUIRED glproto >= $GLPROTO_REQUIRED" - DRI_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED" - - # find the DRI deps for libGL - if test "$x11_pkgconfig" = yes; then - dri_modules="x11 xext xdamage xfixes" - - # add xf86vidmode if available - PKG_CHECK_MODULES([XF86VIDMODE], [xxf86vm], HAVE_XF86VIDMODE=yes, HAVE_XF86VIDMODE=no) - if test "$HAVE_XF86VIDMODE" = yes ; then - dri_modules="$dri_modules xxf86vm" - fi - - # add xcb modules if necessary - if test "$enable_xcb" = yes; then - dri_modules="$dri_modules x11-xcb xcb-glx" - fi - - PKG_CHECK_MODULES([DRIGL], [$dri_modules]) - GL_PC_REQ_PRIV="$GL_PC_REQ_PRIV $dri_modules" - X11_INCLUDES="$X11_INCLUDES $DRIGL_CFLAGS" - GL_LIB_DEPS="$DRIGL_LIBS" - else - # should check these... - X11_INCLUDES="$X11_INCLUDES $X_CFLAGS" - GL_LIB_DEPS="$X_LIBS -lX11 -lXext -lXxf86vm -lXdamage -lXfixes" - GL_PC_LIB_PRIV="$GL_LIB_DEPS" - GL_PC_CFLAGS="$X11_INCLUDES" - - # XCB can only be used from pkg-config - if test "$enable_xcb" = yes; then - PKG_CHECK_MODULES([XCB],[x11-xcb xcb-glx]) - GL_PC_REQ_PRIV="$GL_PC_REQ_PRIV x11-xcb xcb-glx" - X11_INCLUDES="$X11_INCLUDES $XCB_CFLAGS" - GL_LIB_DEPS="$GL_LIB_DEPS $XCB_LIBS" - fi - fi - - # need DRM libs, -lpthread, etc. - GL_LIB_DEPS="$GL_LIB_DEPS $LIBDRM_LIBS -lm -lpthread $DLOPEN_LIBS" - GL_PC_LIB_PRIV="-lm -lpthread $DLOPEN_LIBS" - GLESv1_CM_LIB_DEPS="$LIBDRM_LIBS -lm -lpthread $DLOPEN_LIBS" - GLESv1_CM_PC_LIB_PRIV="-lm -lpthread $DLOPEN_LIBS" - GLESv2_LIB_DEPS="$LIBDRM_LIBS -lm -lpthread $DLOPEN_LIBS" - GLESv2_PC_LIB_PRIV="-lm -lpthread $DLOPEN_LIBS" - ;; -osmesa) - # No libGL for osmesa - GL_LIB_DEPS="" - ;; -esac -AC_SUBST([GL_LIB_DEPS]) -AC_SUBST([GL_PC_REQ_PRIV]) -AC_SUBST([GL_PC_LIB_PRIV]) -AC_SUBST([GL_PC_CFLAGS]) -AC_SUBST([DRI_PC_REQ_PRIV]) -AC_SUBST([GLESv1_CM_LIB_DEPS]) -AC_SUBST([GLESv1_CM_PC_LIB_PRIV]) -AC_SUBST([GLESv2_LIB_DEPS]) -AC_SUBST([GLESv2_PC_LIB_PRIV]) - -GLAPI_LIB_DEPS="-lpthread" -AC_SUBST([GLAPI_LIB_DEPS]) - - -dnl Setup default DRI CFLAGS -DRI_CFLAGS='$(CFLAGS)' -DRI_CXXFLAGS='$(CXXFLAGS)' -DRI_LIB_DEPS='$(TOP)/src/mesa/libmesa.a' -MESA_MODULES='$(TOP)/src/mesa/libmesa.a' - -AC_ARG_ENABLE([shared-dricore], - [AS_HELP_STRING([--enable-shared-dricore], - [link DRI modules with shared core DRI routines @<:@default=disabled@:>@])], - [enable_dricore="$enableval"], - [enable_dricore=no]) -if test "$mesa_driver" = dri ; then - if test "$enable_dricore" = yes ; then - if test "$GCC$GXX" != yesyes ; then - AC_MSG_WARN([Shared dricore requires GCC-compatible rpath handling. Disabling shared dricore]) - enable_dricore=no - else - DRICORE_GLSL_LIBS='$(TOP)/$(LIB_DIR)/libglsl.so' - DRICORE_LIBS='$(TOP)/$(LIB_DIR)/libdricore.so' - DRICORE_LIB_DEPS='-L$(TOP)/$(LIB_DIR) -Wl,-R$(DRI_DRIVER_INSTALL_DIR) -lglsl' - DRI_LIB_DEPS='-L$(TOP)/$(LIB_DIR) -Wl,-R$(DRI_DRIVER_INSTALL_DIR) -ldricore -lglsl' - DRI_CFLAGS='$(CFLAGS_NOVISIBILITY) -DUSE_DRICORE' - DRI_CXXFLAGS='$(CXXFLAGS_NOVISIBILITY) -DUSE_DRICORE' - MESA_MODULES='$(DRICORE_LIBS) $(DRICORE_GLSL_LIBS)' - fi - fi -fi -AC_SUBST([DRICORE_LIBS]) -AC_SUBST([DRICORE_GLSL_LIBS]) -AC_SUBST([DRICORE_LIB_DEPS]) -AC_SUBST([DRI_CXXFLAGS]) -AC_SUBST([DRI_CFLAGS]) -AC_SUBST([MESA_MODULES]) - -AC_SUBST([HAVE_XF86VIDMODE]) - -PKG_CHECK_MODULES([LIBDRM_RADEON], - [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED], - HAVE_LIBDRM_RADEON=yes, - HAVE_LIBDRM_RADEON=no) - -dnl -dnl More X11 setup -dnl -if test "$mesa_driver" = xlib; then - DEFINES="$DEFINES -DUSE_XSHM" -fi - -dnl -dnl TLS detection -dnl - -AC_ARG_ENABLE([glx-tls], - [AS_HELP_STRING([--enable-glx-tls], - [enable TLS support in GLX @<:@default=disabled@:>@])], - [GLX_USE_TLS="$enableval"], - [GLX_USE_TLS=no]) -AC_SUBST(GLX_TLS, ${GLX_USE_TLS}) - -AS_IF([test "x$GLX_USE_TLS" = xyes], - [DEFINES="${DEFINES} -DGLX_USE_TLS -DPTHREADS"]) - -dnl -dnl More DRI setup -dnl -dnl Directory for DRI drivers -AC_ARG_WITH([dri-driverdir], - [AS_HELP_STRING([--with-dri-driverdir=DIR], - [directory for the DRI drivers @<:@${libdir}/dri@:>@])], - [DRI_DRIVER_INSTALL_DIR="$withval"], - [DRI_DRIVER_INSTALL_DIR='${libdir}/dri']) -AC_SUBST([DRI_DRIVER_INSTALL_DIR]) -dnl Extra search path for DRI drivers -AC_ARG_WITH([dri-searchpath], - [AS_HELP_STRING([--with-dri-searchpath=DIRS...], - [semicolon delimited DRI driver search directories @<:@${libdir}/dri@:>@])], - [DRI_DRIVER_SEARCH_DIR="$withval"], - [DRI_DRIVER_SEARCH_DIR='${DRI_DRIVER_INSTALL_DIR}']) -AC_SUBST([DRI_DRIVER_SEARCH_DIR]) -dnl Direct rendering or just indirect rendering -AC_ARG_ENABLE([driglx-direct], - [AS_HELP_STRING([--disable-driglx-direct], - [enable direct rendering in GLX and EGL for DRI @<:@default=enabled@:>@])], - [driglx_direct="$enableval"], - [driglx_direct="yes"]) -dnl Which drivers to build - default is chosen by platform -AC_ARG_WITH([dri-drivers], - [AS_HELP_STRING([--with-dri-drivers@<:@=DIRS...@:>@], - [comma delimited DRI drivers list, e.g. - "swrast,i965,radeon" @<:@default=auto@:>@])], - [with_dri_drivers="$withval"], - [with_dri_drivers=yes]) -if test "x$with_dri_drivers" = x; then - with_dri_drivers=no -fi - -dnl If $with_dri_drivers is yes, directories will be added through -dnl platform checks -DRI_DIRS="" -case "$with_dri_drivers" in -no) ;; -yes) - DRI_DIRS="yes" - ;; -*) - # verify the requested driver directories exist - dri_drivers=`IFS=', '; echo $with_dri_drivers` - for driver in $dri_drivers; do - test -d "$srcdir/src/mesa/drivers/dri/$driver" || \ - AC_MSG_ERROR([DRI driver directory '$driver' doesn't exist]) - done - DRI_DIRS="$dri_drivers" - ;; -esac - -dnl Set DRI_DIRS, DEFINES and LIB_DEPS -if test "$mesa_driver" = dri -o "$mesa_driver" = no; then - # Platform specific settings and drivers to build - case "$host_os" in - linux*) - DEFINES="$DEFINES -DUSE_EXTERNAL_DXTN_LIB=1 -DIN_DRI_DRIVER" - if test "x$driglx_direct" = xyes; then - DEFINES="$DEFINES -DGLX_DIRECT_RENDERING" - fi - DEFINES="$DEFINES -DGLX_INDIRECT_RENDERING -DHAVE_ALIAS" - - case "$host_cpu" in - x86_64) - # sis is missing because they have not be converted to use - # the new interface. i810 are missing because there is no - # x86-64 system where they could *ever* be used. - if test "x$DRI_DIRS" = "xyes"; then - DRI_DIRS="i915 i965 mach64 mga r128 r200 r300 r600 radeon \ - savage tdfx unichrome swrast" - fi - ;; - powerpc*) - # Build only the drivers for cards that exist on PowerPC. - # At some point MGA will be added, but not yet. - if test "x$DRI_DIRS" = "xyes"; then - DRI_DIRS="mach64 r128 r200 r300 r600 radeon tdfx swrast" - fi - ;; - sparc*) - # Build only the drivers for cards that exist on sparc` - if test "x$DRI_DIRS" = "xyes"; then - DRI_DIRS="mach64 r128 r200 r300 r600 radeon swrast" - fi - ;; - esac - ;; - freebsd* | dragonfly* | *netbsd*) - DEFINES="$DEFINES -DPTHREADS -DUSE_EXTERNAL_DXTN_LIB=1" - DEFINES="$DEFINES -DIN_DRI_DRIVER -DHAVE_ALIAS" - DEFINES="$DEFINES -DGLX_INDIRECT_RENDERING" - if test "x$driglx_direct" = xyes; then - DEFINES="$DEFINES -DGLX_DIRECT_RENDERING" - fi - - if test "x$DRI_DIRS" = "xyes"; then - DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 r600 radeon tdfx \ - unichrome savage sis swrast" - fi - ;; - gnu*) - DEFINES="$DEFINES -DUSE_EXTERNAL_DXTN_LIB=1 -DIN_DRI_DRIVER" - DEFINES="$DEFINES -DGLX_INDIRECT_RENDERING -DHAVE_ALIAS" - ;; - solaris*) - DEFINES="$DEFINES -DUSE_EXTERNAL_DXTN_LIB=1 -DIN_DRI_DRIVER" - DEFINES="$DEFINES -DGLX_INDIRECT_RENDERING" - if test "x$driglx_direct" = xyes; then - DEFINES="$DEFINES -DGLX_DIRECT_RENDERING" - fi - ;; - esac - - # default drivers - if test "x$DRI_DIRS" = "xyes"; then - DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 r600 radeon \ - savage sis tdfx unichrome swrast" - fi - - DRI_DIRS=`echo "$DRI_DIRS" | $SED 's/ */ /g'` - - # Check for expat - if test "$mesa_driver" = dri; then - EXPAT_INCLUDES="" - EXPAT_LIB=-lexpat - AC_ARG_WITH([expat], - [AS_HELP_STRING([--with-expat=DIR], - [expat install directory])],[ - EXPAT_INCLUDES="-I$withval/include" - CPPFLAGS="$CPPFLAGS $EXPAT_INCLUDES" - LDFLAGS="$LDFLAGS -L$withval/$LIB_DIR" - EXPAT_LIB="-L$withval/$LIB_DIR -lexpat" - ]) - AC_CHECK_HEADER([expat.h],[],[AC_MSG_ERROR([Expat required for DRI.])]) - AC_CHECK_LIB([expat],[XML_ParserCreate],[], - [AC_MSG_ERROR([Expat required for DRI.])]) - fi - - # put all the necessary libs together, including possibly libdricore - DRI_LIB_DEPS="$DRI_LIB_DEPS $SELINUX_LIBS $LIBDRM_LIBS $EXPAT_LIB -lm -lpthread $DLOPEN_LIBS" -fi -AC_SUBST([DRI_DIRS]) -AC_SUBST([EXPAT_INCLUDES]) -AC_SUBST([DRI_LIB_DEPS]) - -case $DRI_DIRS in -*i915*|*i965*) - PKG_CHECK_MODULES([INTEL], [libdrm_intel >= $LIBDRM_INTEL_REQUIRED]) - ;; -esac - -case $DRI_DIRS in -*radeon*|*r200*|*r300*|*r600*) - if test "x$HAVE_LIBDRM_RADEON" = xyes; then - RADEON_CFLAGS="-DHAVE_LIBDRM_RADEON=1 $LIBDRM_RADEON_CFLAGS" - RADEON_LDFLAGS=$LIBDRM_RADEON_LIBS - fi - ;; -esac -AC_SUBST([RADEON_CFLAGS]) -AC_SUBST([RADEON_LDFLAGS]) - - -dnl -dnl OSMesa configuration -dnl -if test "$mesa_driver" = xlib; then - default_gl_osmesa=yes -else - default_gl_osmesa=no -fi -AC_ARG_ENABLE([gl-osmesa], - [AS_HELP_STRING([--enable-gl-osmesa], - [enable OSMesa with libGL @<:@default=enabled for xlib driver@:>@])], - [gl_osmesa="$enableval"], - [gl_osmesa="$default_gl_osmesa"]) -if test "x$gl_osmesa" = xyes; then - if test "x$enable_opengl" = xno; then - AC_MSG_ERROR([OpenGL is not available for OSMesa driver]) - fi - if test "$mesa_driver" = osmesa; then - AC_MSG_ERROR([libGL is not available for OSMesa driver]) - else - DRIVER_DIRS="$DRIVER_DIRS osmesa" - fi -fi - -dnl Configure the channel bits for OSMesa (libOSMesa, libOSMesa16, ...) -AC_ARG_WITH([osmesa-bits], - [AS_HELP_STRING([--with-osmesa-bits=BITS], - [OSMesa channel bits and library name: 8, 16, 32 @<:@default=8@:>@])], - [osmesa_bits="$withval"], - [osmesa_bits=8]) -if test "$mesa_driver" != osmesa && test "x$osmesa_bits" != x8; then - AC_MSG_WARN([Ignoring OSMesa channel bits for non-OSMesa driver]) - osmesa_bits=8 -fi -case "x$osmesa_bits" in -x8) - OSMESA_LIB=OSMesa - ;; -x16|x32) - OSMESA_LIB="OSMesa$osmesa_bits" - DEFINES="$DEFINES -DCHAN_BITS=$osmesa_bits -DDEFAULT_SOFTWARE_DEPTH_BITS=31" - ;; -*) - AC_MSG_ERROR([OSMesa bits '$osmesa_bits' is not a valid option]) - ;; -esac -AC_SUBST([OSMESA_LIB]) - -case "$DRIVER_DIRS" in -*osmesa*) - # only link libraries with osmesa if shared - if test "$enable_static" = no; then - OSMESA_LIB_DEPS="-lm -lpthread $SELINUX_LIBS $DLOPEN_LIBS" - else - OSMESA_LIB_DEPS="" - fi - OSMESA_MESA_DEPS="" - OSMESA_PC_LIB_PRIV="-lm -lpthread $SELINUX_LIBS $DLOPEN_LIBS" - ;; -esac -AC_SUBST([OSMESA_LIB_DEPS]) -AC_SUBST([OSMESA_MESA_DEPS]) -AC_SUBST([OSMESA_PC_REQ]) -AC_SUBST([OSMESA_PC_LIB_PRIV]) - -dnl -dnl EGL configuration -dnl -AC_ARG_ENABLE([egl], - [AS_HELP_STRING([--disable-egl], - [disable EGL library @<:@default=enabled@:>@])], - [enable_egl="$enableval"], - [enable_egl=yes]) -if test "x$enable_egl" = xno; then - if test "x$mesa_driver" = xno; then - AC_MSG_ERROR([cannot disable EGL when there is no mesa driver]) - fi - if test "x$enable_openvg" = xyes; then - AC_MSG_ERROR([cannot enable OpenVG without EGL]) - fi -fi -if test "x$enable_egl" = xyes; then - SRC_DIRS="$SRC_DIRS egl" - EGL_LIB_DEPS="$DLOPEN_LIBS -lpthread" - EGL_DRIVERS_DIRS="" - if test "$enable_static" != yes; then - # build egl_glx when libGL is built - if test "$mesa_driver" = xlib -o "$mesa_driver" = dri; then - EGL_DRIVERS_DIRS="glx" - fi - - if test "$mesa_driver" = dri; then - # build egl_dri2 when xcb-dri2 is available - PKG_CHECK_MODULES([XCB_DRI2], [x11-xcb xcb-dri2 xcb-xfixes], - [have_xcb_dri2=yes],[have_xcb_dri2=no]) - PKG_CHECK_MODULES([LIBUDEV], [libudev > 150], - [have_libudev=yes],[have_libudev=no]) - - if test "$have_xcb_dri2" = yes; then - EGL_DRIVER_DRI2=dri2 - DEFINES="$DEFINES -DHAVE_XCB_DRI2" - if test "$have_libudev" = yes; then - DEFINES="$DEFINES -DHAVE_LIBUDEV" - fi - # workaround a bug in xcb-dri2 generated by xcb-proto 1.6 - AC_CHECK_LIB(xcb-dri2, xcb_dri2_connect_alignment_pad, [], - [DEFINES="$DEFINES -DXCB_DRI2_CONNECT_DEVICE_NAME_BROKEN"]) - fi - fi - - EGL_DRIVERS_DIRS="$EGL_DRIVERS_DIRS $EGL_DRIVER_DRI2" - fi -fi -AC_SUBST([EGL_LIB_DEPS]) -AC_SUBST([EGL_DRIVERS_DIRS]) - -dnl -dnl GLU configuration -dnl -AC_ARG_ENABLE([glu], - [AS_HELP_STRING([--disable-glu], - [enable OpenGL Utility library @<:@default=enabled@:>@])], - [enable_glu="$enableval"], - [enable_glu=yes]) - -if test "x$enable_glu" = xyes -a "x$mesa_driver" = xno; then - AC_MSG_NOTICE([Disabling GLU since there is no OpenGL driver]) - enable_glu=no -fi - -if test "x$enable_glu" = xyes; then - SRC_DIRS="$SRC_DIRS glu" - - case "$mesa_driver" in - osmesa) - # Link libGLU to libOSMesa instead of libGL - GLU_LIB_DEPS="" - GLU_PC_REQ="osmesa" - if test "$enable_static" = no; then - GLU_MESA_DEPS='-l$(OSMESA_LIB)' - else - GLU_MESA_DEPS="" - fi - ;; - *) - # If static, empty GLU_LIB_DEPS and add libs for programs to link - GLU_PC_REQ="gl" - GLU_PC_LIB_PRIV="-lm" - if test "$enable_static" = no; then - GLU_LIB_DEPS="-lm" - GLU_MESA_DEPS='-l$(GL_LIB)' - else - GLU_LIB_DEPS="" - GLU_MESA_DEPS="" - APP_LIB_DEPS="$APP_LIB_DEPS -lstdc++" - fi - ;; - esac -fi -if test "$enable_static" = no; then - GLU_LIB_DEPS="$GLU_LIB_DEPS $OS_CPLUSPLUS_LIBS" -fi -GLU_PC_LIB_PRIV="$GLU_PC_LIB_PRIV $OS_CPLUSPLUS_LIBS" -AC_SUBST([GLU_LIB_DEPS]) -AC_SUBST([GLU_MESA_DEPS]) -AC_SUBST([GLU_PC_REQ]) -AC_SUBST([GLU_PC_REQ_PRIV]) -AC_SUBST([GLU_PC_LIB_PRIV]) -AC_SUBST([GLU_PC_CFLAGS]) - -dnl -dnl GLw configuration -dnl -AC_ARG_ENABLE([glw], - [AS_HELP_STRING([--disable-glw], - [enable Xt/Motif widget library @<:@default=enabled@:>@])], - [enable_glw="$enableval"], - [enable_glw=yes]) -dnl Don't build GLw on osmesa -if test "x$enable_glw" = xyes; then - case "$mesa_driver" in - osmesa|no) - AC_MSG_NOTICE([Disabling GLw since there is no OpenGL driver]) - enable_glw=no - ;; - esac -fi -AC_ARG_ENABLE([motif], - [AS_HELP_STRING([--enable-motif], - [use Motif widgets in GLw @<:@default=disabled@:>@])], - [enable_motif="$enableval"], - [enable_motif=no]) - -if test "x$enable_glw" = xyes; then - SRC_DIRS="$SRC_DIRS glw" - if test "$x11_pkgconfig" = yes; then - PKG_CHECK_MODULES([GLW],[x11 xt]) - GLW_PC_REQ_PRIV="x11 xt" - GLW_LIB_DEPS="$GLW_LIBS" - else - # should check these... - GLW_LIB_DEPS="$X_LIBS -lXt -lX11" - GLW_PC_LIB_PRIV="$GLW_LIB_DEPS" - GLW_PC_CFLAGS="$X11_INCLUDES" - fi - - GLW_SOURCES="GLwDrawA.c" - MOTIF_CFLAGS= - if test "x$enable_motif" = xyes; then - GLW_SOURCES="$GLW_SOURCES GLwMDrawA.c" - AC_PATH_PROG([MOTIF_CONFIG], [motif-config], [no]) - if test "x$MOTIF_CONFIG" != xno; then - MOTIF_CFLAGS=`$MOTIF_CONFIG --cflags` - MOTIF_LIBS=`$MOTIF_CONFIG --libs` - else - AC_CHECK_HEADER([Xm/PrimitiveP.h], [], - [AC_MSG_ERROR([Can't locate Motif headers])]) - AC_CHECK_LIB([Xm], [XmGetPixmap], [MOTIF_LIBS="-lXm"], - [AC_MSG_ERROR([Can't locate Motif Xm library])]) - fi - # MOTIF_LIBS is prepended to GLW_LIB_DEPS since Xm needs Xt/X11 - GLW_LIB_DEPS="$MOTIF_LIBS $GLW_LIB_DEPS" - GLW_PC_LIB_PRIV="$MOTIF_LIBS $GLW_PC_LIB_PRIV" - GLW_PC_CFLAGS="$MOTIF_CFLAGS $GLW_PC_CFLAGS" - fi - - # If static, empty GLW_LIB_DEPS and add libs for programs to link - GLW_PC_LIB_PRIV="$GLW_PC_LIB_PRIV" - if test "$enable_static" = no; then - GLW_MESA_DEPS='-l$(GL_LIB)' - GLW_LIB_DEPS="$GLW_LIB_DEPS" - else - APP_LIB_DEPS="$APP_LIB_DEPS $GLW_LIB_DEPS" - GLW_LIB_DEPS="" - GLW_MESA_DEPS="" - fi -fi -AC_SUBST([GLW_LIB_DEPS]) -AC_SUBST([GLW_MESA_DEPS]) -AC_SUBST([GLW_SOURCES]) -AC_SUBST([MOTIF_CFLAGS]) -AC_SUBST([GLW_PC_REQ_PRIV]) -AC_SUBST([GLW_PC_LIB_PRIV]) -AC_SUBST([GLW_PC_CFLAGS]) - -dnl -dnl GLUT configuration -dnl -if test -f "$srcdir/include/GL/glut.h"; then - default_glut=yes -else - default_glut=no -fi -AC_ARG_ENABLE([glut], - [AS_HELP_STRING([--disable-glut], - [enable GLUT library @<:@default=enabled if source available@:>@])], - [enable_glut="$enableval"], - [enable_glut="$default_glut"]) - -dnl Don't build glut on osmesa -if test "x$enable_glut" = xyes; then - case "$mesa_driver" in - osmesa|no) - AC_MSG_NOTICE([Disabling glut since there is no OpenGL driver]) - enable_glut=no - ;; - esac -fi -dnl Can't build glut if GLU not available -if test "x$enable_glu$enable_glut" = xnoyes; then - AC_MSG_WARN([Disabling glut since GLU is disabled]) - enable_glut=no -fi - -if test "x$enable_glut" = xyes; then - SRC_DIRS="$SRC_DIRS glut/glx" - if test "$x11_pkgconfig" = yes; then - PKG_CHECK_MODULES([GLUT],[x11 xmu xi]) - GLUT_PC_REQ_PRIV="x11 xmu xi" - GLUT_LIB_DEPS="$GLUT_LIBS" - else - # should check these... - GLUT_LIB_DEPS="$X_LIBS -lX11 -lXmu -lXi" - GLUT_PC_LIB_PRIV="$GLUT_LIB_DEPS" - GLUT_PC_CFLAGS="$X11_INCLUDES" - fi - if test "x$GCC" = xyes; then - GLUT_CFLAGS="$GLUT_CFLAGS -fexceptions" - fi - GLUT_LIB_DEPS="$GLUT_LIB_DEPS -lm" - GLUT_PC_LIB_PRIV="$GLUT_PC_LIB_PRIV -lm" - - # If static, empty GLUT_LIB_DEPS and add libs for programs to link - if test "$enable_static" = no; then - GLUT_MESA_DEPS='-l$(GLU_LIB) -l$(GL_LIB)' - else - APP_LIB_DEPS="$APP_LIB_DEPS $GLUT_LIB_DEPS" - GLUT_LIB_DEPS="" - GLUT_MESA_DEPS="" - fi -fi -AC_SUBST([GLUT_LIB_DEPS]) -AC_SUBST([GLUT_MESA_DEPS]) -AC_SUBST([GLUT_CFLAGS]) -AC_SUBST([GLUT_PC_REQ_PRIV]) -AC_SUBST([GLUT_PC_LIB_PRIV]) -AC_SUBST([GLUT_PC_CFLAGS]) - -dnl -dnl Program library dependencies -dnl Only libm is added here if necessary as the libraries should -dnl be pulled in by the linker -dnl -if test "x$APP_LIB_DEPS" = x; then - case "$host_os" in - solaris*) - APP_LIB_DEPS="-lX11 -lsocket -lnsl -lm" - ;; - cygwin*) - APP_LIB_DEPS="-lX11" - ;; - *) - APP_LIB_DEPS="-lm" - ;; - esac -fi -AC_SUBST([APP_LIB_DEPS]) -AC_SUBST([PROGRAM_DIRS]) - -dnl -dnl Gallium configuration -dnl -AC_ARG_ENABLE([gallium], - [AS_HELP_STRING([--disable-gallium], - [build gallium @<:@default=enabled@:>@])], - [enable_gallium="$enableval"], - [enable_gallium=yes]) -if test "x$enable_gallium" = xno -a "x$enable_openvg" = xyes; then - AC_MSG_ERROR([cannot enable OpenVG without Gallium]) -fi -if test "x$enable_gallium" = xyes; then - SRC_DIRS="$SRC_DIRS gallium gallium/winsys gallium/targets" - AC_CHECK_HEADER([udis86.h], [HAS_UDIS86="yes"], - [HAS_UDIS86="no"]) - AC_PATH_PROG([LLVM_CONFIG], [llvm-config], [no]) -fi - -AC_SUBST([LLVM_CFLAGS]) -AC_SUBST([LLVM_LIBS]) -AC_SUBST([LLVM_LDFLAGS]) -AC_SUBST([LLVM_VERSION]) - -dnl -dnl Gallium state trackers configuration -dnl - -AC_ARG_ENABLE([gallium-egl], - [AS_HELP_STRING([--enable-gallium-egl], - [enable gallium EGL state tracker @<:@default=auto@:>@])], - [enable_gallium_egl="$enableval"], - [enable_gallium_egl=auto]) -if test "x$enable_gallium_egl" = xauto; then - case "$mesa_driver" in - dri|no) - enable_gallium_egl=$enable_egl - ;; - *) - enable_gallium_egl=$enable_openvg - ;; - esac -fi -case "x$enable_egl$enable_gallium_egl" in -xnoyes) - AC_MSG_ERROR([cannot build Gallium EGL state tracker without EGL]) -esac - -AC_ARG_WITH([state-trackers], - [AS_HELP_STRING([--with-state-trackers@<:@=DIRS...@:>@], - [comma delimited state_trackers list, e.g. - "egl,glx" @<:@default=auto@:>@])], - [with_state_trackers="$withval"], - [with_state_trackers=yes]) - -case "$with_state_trackers" in -no) - GALLIUM_STATE_TRACKERS_DIRS="" - ;; -yes) - # look at what else is built - case "$mesa_driver" in - xlib) - GALLIUM_STATE_TRACKERS_DIRS=glx - ;; - dri) - GALLIUM_STATE_TRACKERS_DIRS="dri" - HAVE_ST_DRI="yes" - # Have only tested st/xorg on 1.6.0 servers - PKG_CHECK_MODULES(XORG, [xorg-server >= 1.6.0 libdrm >= $LIBDRM_XORG_REQUIRED libkms >= $LIBKMS_XORG_REQUIRED], - HAVE_ST_XORG="yes"; GALLIUM_STATE_TRACKERS_DIRS="$GALLIUM_STATE_TRACKERS_DIRS xorg", - HAVE_ST_XORG="no") - ;; - esac - - if test "x$enable_egl" = xyes; then - if test "$enable_openvg" = yes; then - GALLIUM_STATE_TRACKERS_DIRS="$GALLIUM_STATE_TRACKERS_DIRS vega" - st_egl="yes" - fi - - if test "$enable_gallium_egl" = yes; then - GALLIUM_STATE_TRACKERS_DIRS="$GALLIUM_STATE_TRACKERS_DIRS egl" - HAVE_ST_EGL="yes" - fi - fi - ;; -*) - # verify the requested state tracker exist - state_trackers="" - _state_trackers=`IFS=', '; echo $with_state_trackers` - for tracker in $_state_trackers; do - case "$tracker" in - dri) - if test "x$mesa_driver" != xdri; then - AC_MSG_ERROR([cannot build dri state tracker without mesa driver set to dri]) - fi - HAVE_ST_DRI="yes" - ;; - egl) - if test "x$enable_egl" != xyes; then - AC_MSG_ERROR([cannot build egl state tracker without EGL library]) - fi - HAVE_ST_EGL="yes" - ;; - xorg) - PKG_CHECK_MODULES([XORG], [xorg-server >= 1.6.0]) - PKG_CHECK_MODULES([LIBDRM_XORG], [libdrm >= $LIBDRM_XORG_REQUIRED]) - PKG_CHECK_MODULES([LIBKMS_XORG], [libkms >= $LIBKMS_XORG_REQUIRED]) - HAVE_ST_XORG="yes" - ;; - vega) - if test "x$enable_openvg" != xyes; then - AC_MSG_ERROR([cannot build vega state tracker without --enable-openvg]) - fi - have_st_vega="yes" - ;; - esac - - if test -n "$tracker"; then - test -d "$srcdir/src/gallium/state_trackers/$tracker" || \ - AC_MSG_ERROR([state tracker '$tracker' doesn't exist]) - if test -n "$state_trackers"; then - state_trackers="$state_trackers $tracker" - else - state_trackers="$tracker" - fi - fi - done - GALLIUM_STATE_TRACKERS_DIRS="$state_trackers" - - # append --enable-openvg/--enable-gallium-egl to --with-state-trackers - if test "x$have_st_vega" != xyes -a "x$enable_openvg" = xyes; then - AC_MSG_ERROR([--with-state-trackers specified but vega is missing]) - fi - if test "x$HAVE_ST_EGL" != xyes -a "x$enable_gallium_egl" = xyes; then - AC_MSG_ERROR([--with-state-trackers specified but egl is missing]) - fi - ;; -esac - - -EGL_CLIENT_APIS="" -VG_LIB_DEPS="" - -case "x$enable_opengl$enable_gles1$enable_gles2" in -x*yes*) - EGL_CLIENT_APIS="$EGL_CLIENT_APIS "'$(GL_LIB)' - ;; -esac -if test "x$enable_openvg" = xyes; then - EGL_CLIENT_APIS="$EGL_CLIENT_APIS "'$(VG_LIB)' - VG_LIB_DEPS="$VG_LIB_DEPS -lpthread" -fi - -AC_SUBST([VG_LIB_DEPS]) -AC_SUBST([EGL_CLIENT_APIS]) - -if test "x$HAVE_ST_EGL" = xyes; then - GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS egl" -fi - -if test "x$HAVE_ST_XORG" = xyes; then - PKG_CHECK_MODULES(XEXT, [xextproto >= 7.0.99.1], - HAVE_XEXTPROTO_71="yes"; DEFINES="$DEFINES -DHAVE_XEXTPROTO_71", - HAVE_XEXTPROTO_71="no") -fi - -AC_ARG_WITH([egl-platforms], - [AS_HELP_STRING([--with-egl-platforms@<:@=DIRS...@:>@], - [comma delimited native platforms libEGL supports, e.g. - "x11,drm" @<:@default=auto@:>@])], - [with_egl_platforms="$withval"], - [with_egl_platforms=yes]) -AC_ARG_WITH([egl-displays], - [AS_HELP_STRING([--with-egl-displays@<:@=DIRS...@:>@], - [DEPRECATED. Use --with-egl-platforms instead])], - [with_egl_platforms="$withval"]) - -EGL_PLATFORMS="" -WAYLAND_EGL_LIB_DEPS="" - -case "$with_egl_platforms" in -yes) - if test "x$enable_egl" = xyes && test "x$mesa_driver" != xosmesa; then - EGL_PLATFORMS="x11" - if test "$mesa_driver" = dri; then - EGL_PLATFORMS="$EGL_PLATFORMS drm" - fi - fi - ;; -*) - if test "x$enable_egl" != xyes; then - AC_MSG_ERROR([cannot build egl state tracker without EGL library]) - fi - # verify the requested driver directories exist - egl_platforms=`IFS=', '; echo $with_egl_platforms` - for plat in $egl_platforms; do - test -d "$srcdir/src/gallium/state_trackers/egl/$plat" || \ - AC_MSG_ERROR([EGL platform '$plat' doesn't exist]) - if test "$plat" = "fbdev"; then - GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS sw/fbdev" - fi - if test "$plat" = "wayland"; then - PKG_CHECK_MODULES([WAYLAND], [wayland-client wayland-server],, \ - [AC_MSG_ERROR([cannot find libwayland-client])]) - WAYLAND_EGL_LIB_DEPS="$WAYLAND_LIBS $LIBDRM_LIBS" - fi - done - EGL_PLATFORMS="$egl_platforms" - ;; -esac -AC_SUBST([EGL_PLATFORMS]) - -AC_SUBST([WAYLAND_EGL_LIB_DEPS]) -WAYLAND_EGL_PC_REQ_PRIV="wayland-client libdrm" -WAYLAND_EGL_PC_LIB_PRIV= -WAYLAND_EGL_PC_CFLAGS= - -AC_SUBST([WAYLAND_EGL_PC_REQ_PRIV]) -AC_SUBST([WAYLAND_EGL_PC_LIB_PRIV]) -AC_SUBST([WAYLAND_EGL_PC_CFLAGS]) - - -AC_ARG_WITH([egl-driver-dir], - [AS_HELP_STRING([--with-egl-driver-dir=DIR], - [directory for EGL drivers [[default=${libdir}/egl]]])], - [EGL_DRIVER_INSTALL_DIR="$withval"], - [EGL_DRIVER_INSTALL_DIR='${libdir}/egl']) -AC_SUBST([EGL_DRIVER_INSTALL_DIR]) - -AC_ARG_WITH([xorg-driver-dir], - [AS_HELP_STRING([--with-xorg-driver-dir=DIR], - [Default xorg driver directory[[default=${libdir}/xorg/modules/drivers]]])], - [XORG_DRIVER_INSTALL_DIR="$withval"], - [XORG_DRIVER_INSTALL_DIR="${libdir}/xorg/modules/drivers"]) -AC_SUBST([XORG_DRIVER_INSTALL_DIR]) - -AC_ARG_WITH([max-width], - [AS_HELP_STRING([--with-max-width=N], - [Maximum framebuffer width (4096)])], - [DEFINES="${DEFINES} -DMAX_WIDTH=${withval}"; - AS_IF([test "${withval}" -gt "4096"], - [AC_MSG_WARN([Large framebuffer: see s_tritemp.h comments.])])] -) -AC_ARG_WITH([max-height], - [AS_HELP_STRING([--with-max-height=N], - [Maximum framebuffer height (4096)])], - [DEFINES="${DEFINES} -DMAX_HEIGHT=${withval}"; - AS_IF([test "${withval}" -gt "4096"], - [AC_MSG_WARN([Large framebuffer: see s_tritemp.h comments.])])] -) - -dnl -dnl Gallium LLVM -dnl -AC_ARG_ENABLE([gallium-llvm], - [AS_HELP_STRING([--enable-gallium-llvm], - [build gallium LLVM support @<:@default=disabled@:>@])], - [enable_gallium_llvm="$enableval"], - [enable_gallium_llvm=auto]) -if test "x$enable_gallium_llvm" = xyes; then - if test "x$LLVM_CONFIG" != xno; then - LLVM_VERSION=`$LLVM_CONFIG --version` - LLVM_CFLAGS=`$LLVM_CONFIG --cppflags` - LLVM_LIBS="`$LLVM_CONFIG --libs jit interpreter nativecodegen bitwriter` -lstdc++" - - if test "x$HAS_UDIS86" != xno; then - LLVM_LIBS="$LLVM_LIBS -ludis86" - DEFINES="$DEFINES -DHAVE_UDIS86" - fi - LLVM_LDFLAGS=`$LLVM_CONFIG --ldflags` - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS llvmpipe" - DEFINES="$DEFINES -DGALLIUM_LLVMPIPE -D__STDC_CONSTANT_MACROS" - MESA_LLVM=1 - else - MESA_LLVM=0 - fi -else - MESA_LLVM=0 -fi - -dnl -dnl Gallium helper functions -dnl -gallium_check_st() { - if test "x$HAVE_ST_DRI" = xyes || test "x$HAVE_ST_XORG" = xyes; then - GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS $1" - fi - if test "x$HAVE_ST_DRI" = xyes && test "x$2" != x; then - GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $2" - fi - if test "x$HAVE_ST_XORG" = xyes && test "x$3" != x; then - GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $3" - fi -} - - -dnl -dnl Gallium SVGA configuration -dnl -AC_ARG_ENABLE([gallium-svga], - [AS_HELP_STRING([--enable-gallium-svga], - [build gallium SVGA @<:@default=disabled@:>@])], - [enable_gallium_svga="$enableval"], - [enable_gallium_svga=auto]) -if test "x$enable_gallium_svga" = xyes; then - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS svga" - gallium_check_st "svga/drm" "dri-vmwgfx" "xorg-vmwgfx" -elif test "x$enable_gallium_svga" = xauto; then - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS svga" -fi - -dnl -dnl Gallium i915 configuration -dnl -AC_ARG_ENABLE([gallium-i915], - [AS_HELP_STRING([--enable-gallium-i915], - [build gallium i915 @<:@default=disabled@:>@])], - [enable_gallium_i915="$enableval"], - [enable_gallium_i915=auto]) -if test "x$enable_gallium_i915" = xyes; then - GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS i915/sw" - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i915" - gallium_check_st "i915/drm" "dri-i915" "xorg-i915" -elif test "x$enable_gallium_i915" = xauto; then - GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS i915/sw" - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i915" -fi - -dnl -dnl Gallium i965 configuration -dnl -AC_ARG_ENABLE([gallium-i965], - [AS_HELP_STRING([--enable-gallium-i965], - [build gallium i965 @<:@default=disabled@:>@])], - [enable_gallium_i965="$enableval"], - [enable_gallium_i965=auto]) -if test "x$enable_gallium_i965" = xyes; then - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i965" - gallium_check_st "i965/drm" "dri-i965" "xorg-i965" -elif test "x$enable_gallium_i965" = xauto; then - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i965" -fi - -dnl -dnl Gallium Radeon r300g configuration -dnl -AC_ARG_ENABLE([gallium-radeon], - [AS_HELP_STRING([--enable-gallium-radeon], - [build gallium radeon @<:@default=disabled@:>@])], - [enable_gallium_radeon="$enableval"], - [enable_gallium_radeon=auto]) -if test "x$enable_gallium_radeon" = xauto; then - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300" - gallium_check_st "radeon/drm" "dri-r300" -fi -if test "x$enable_gallium_radeon" = xyes; then - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300" - gallium_check_st "radeon/drm" "dri-r300" "xorg-radeon" -fi - -dnl -dnl Gallium Radeon r600g configuration -dnl -AC_ARG_ENABLE([gallium-r600], - [AS_HELP_STRING([--enable-gallium-r600], - [build gallium radeon @<:@default=disabled@:>@])], - [enable_gallium_r600="$enableval"], - [enable_gallium_r600=auto]) -if test "x$enable_gallium_r600" = xyes; then - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600" - gallium_check_st "r600/drm" "dri-r600" -fi - -dnl -dnl Gallium Nouveau configuration -dnl -AC_ARG_ENABLE([gallium-nouveau], - [AS_HELP_STRING([--enable-gallium-nouveau], - [build gallium nouveau @<:@default=disabled@:>@])], - [enable_gallium_nouveau="$enableval"], - [enable_gallium_nouveau=no]) -if test "x$enable_gallium_nouveau" = xyes; then - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv50 nvc0" - gallium_check_st "nouveau/drm" "dri-nouveau" "xorg-nouveau" -fi - -dnl -dnl Gallium swrast configuration -dnl -AC_ARG_ENABLE([gallium-swrast], - [AS_HELP_STRING([--enable-gallium-swrast], - [build gallium swrast @<:@default=auto@:>@])], - [enable_gallium_swrast="$enableval"], - [enable_gallium_swrast=auto]) -if test "x$enable_gallium_swrast" = xyes || test "x$enable_gallium_swrast" = xauto; then - if test "x$HAVE_ST_DRI" = xyes; then - GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS dri-swrast" - fi -fi - -dnl prepend CORE_DIRS to SRC_DIRS -SRC_DIRS="$CORE_DIRS $SRC_DIRS" - -dnl Restore LDFLAGS and CPPFLAGS -LDFLAGS="$_SAVE_LDFLAGS" -CPPFLAGS="$_SAVE_CPPFLAGS" - -dnl Substitute the config -AC_CONFIG_FILES([configs/autoconf]) - -dnl Replace the configs/current symlink -AC_CONFIG_COMMANDS([configs],[ -if test -f configs/current || test -L configs/current; then - rm -f configs/current -fi -ln -s autoconf configs/current -]) - -AC_OUTPUT - -dnl -dnl Output some configuration info for the user -dnl -echo "" -echo " prefix: $prefix" -echo " exec_prefix: $exec_prefix" -echo " libdir: $libdir" -echo " includedir: $includedir" - -dnl API info -echo "" -echo " OpenGL: $enable_opengl (ES1: $enable_gles1 ES2: $enable_gles2)" -echo " OpenVG: $enable_openvg" - -dnl Driver info -echo "" -echo " Driver: $mesa_driver" -if test "$mesa_driver" != no; then - if echo "$DRIVER_DIRS" | grep 'osmesa' >/dev/null 2>&1; then - echo " OSMesa: lib$OSMESA_LIB" - else - echo " OSMesa: no" - fi - if test "$mesa_driver" = dri; then - # cleanup the drivers var - dri_dirs=`echo $DRI_DIRS | $SED 's/^ *//;s/ */ /;s/ *$//'` - if test "x$DRI_DIRS" = x; then - echo " DRI drivers: no" - else - echo " DRI drivers: $dri_dirs" - fi - echo " DRI driver dir: $DRI_DRIVER_INSTALL_DIR" - echo " Use XCB: $enable_xcb" - echo " Shared dricore: $enable_dricore" - fi -fi -echo "" -echo " GLU: $enable_glu" -echo " GLw: $enable_glw (Motif: $enable_motif)" -echo " glut: $enable_glut" - -dnl EGL -echo "" -echo " EGL: $enable_egl" -if test "$enable_egl" = yes; then - echo " EGL platforms: $EGL_PLATFORMS" - - egl_drivers="" - for d in $EGL_DRIVERS_DIRS; do - egl_drivers="$egl_drivers builtin:egl_$d" - done - - if test "$enable_gallium" = yes -a "$HAVE_ST_EGL" = yes; then - echo " EGL drivers: ${egl_drivers} egl_gallium" - echo " EGL Gallium STs:$EGL_CLIENT_APIS" - else - echo " EGL drivers: $egl_drivers" - fi -fi - -echo "" -if test "x$MESA_LLVM" = x1; then - echo " llvm: yes" - echo " llvm-config: $LLVM_CONFIG" - echo " llvm-version: $LLVM_VERSION" -else - echo " llvm: no" -fi - -echo "" -if echo "$SRC_DIRS" | grep 'gallium' >/dev/null 2>&1; then - echo " Gallium: yes" - echo " Gallium dirs: $GALLIUM_DIRS" - echo " Target dirs: $GALLIUM_TARGET_DIRS" - echo " Winsys dirs: $GALLIUM_WINSYS_DIRS" - echo " Driver dirs: $GALLIUM_DRIVERS_DIRS" - echo " Trackers dirs: $GALLIUM_STATE_TRACKERS_DIRS" -else - echo " Gallium: no" -fi - -dnl Libraries -echo "" -echo " Shared libs: $enable_shared" -echo " Static libs: $enable_static" - -dnl Compiler options -# cleanup the CFLAGS/CXXFLAGS/DEFINES vars -cflags=`echo $CFLAGS $OPT_FLAGS $PIC_FLAGS $ARCH_FLAGS | \ - $SED 's/^ *//;s/ */ /;s/ *$//'` -cxxflags=`echo $CXXFLAGS $OPT_FLAGS $PIC_FLAGS $ARCH_FLAGS | \ - $SED 's/^ *//;s/ */ /;s/ *$//'` -defines=`echo $DEFINES $ASM_FLAGS | $SED 's/^ *//;s/ */ /;s/ *$//'` -echo "" -echo " CFLAGS: $cflags" -echo " CXXFLAGS: $cxxflags" -echo " Macros: $defines" -echo "" -echo " PYTHON2: $PYTHON2" - -echo "" -echo " Run '${MAKE-make}' to build Mesa" -echo "" +dnl Process this file with autoconf to create configure. + +AC_PREREQ([2.59]) + +dnl Versioning - scrape the version from configs/default +m4_define([mesa_version], + [m4_esyscmd([${MAKE-make} -s -f bin/version.mk version | tr -d '\n' | tr -d '\r'])]) +m4_ifval(mesa_version,, + [m4_fatal([Failed to get the Mesa version from `make -f bin/version.mk version`])]) + +dnl Tell the user about autoconf.html in the --help output +m4_divert_once([HELP_END], [ +See docs/autoconf.html for more details on the options for Mesa.]) + +AC_INIT([Mesa],[mesa_version], + [https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa]) +AC_CONFIG_AUX_DIR([bin]) +AC_CANONICAL_HOST + +dnl Versions for external dependencies +LIBDRM_REQUIRED=2.4.24 +LIBDRM_RADEON_REQUIRED=2.4.24 +LIBDRM_INTEL_REQUIRED=2.4.24 +DRI2PROTO_REQUIRED=2.1 +GLPROTO_REQUIRED=1.4.11 +LIBDRM_XORG_REQUIRED=2.4.24 +LIBKMS_XORG_REQUIRED=1.0.0 + +dnl Check for progs +AC_PROG_CPP +AC_PROG_CC +AC_PROG_CXX +AC_CHECK_PROGS([MAKE], [gmake make]) +AC_CHECK_PROGS([PYTHON2], [python2 python]) +AC_PATH_PROG([MKDEP], [makedepend]) +AC_PATH_PROG([SED], [sed]) + +if test "x$MKDEP" = "x"; then + AC_MSG_ERROR([makedepend is required to build Mesa]) +fi + +dnl Our fallback install-sh is a symlink to minstall. Use the existing +dnl configuration in that case. +AC_PROG_INSTALL +test "x$INSTALL" = "x$ac_install_sh" && INSTALL='$(MINSTALL)' + +dnl We need a POSIX shell for parts of the build. Assume we have one +dnl in most cases. +case "$host_os" in +solaris*) + # Solaris /bin/sh is too old/non-POSIX compliant + AC_PATH_PROGS(POSIX_SHELL, [ksh93 ksh sh]) + SHELL="$POSIX_SHELL" + ;; +esac + +dnl clang is mostly GCC-compatible, but its version is much lower, +dnl so we have to check for it. +AC_MSG_CHECKING([if compiling with clang]) + +AC_COMPILE_IFELSE( +[AC_LANG_PROGRAM([], [[ +#ifndef __clang__ + not clang +#endif +]])], +[CLANG=yes], [CLANG=no]) + +AC_MSG_RESULT([$CLANG]) + +dnl If we're using GCC, make sure that it is at least version 3.3.0. Older +dnl versions are explictly not supported. +if test "x$GCC" = xyes -a "x$CLANG" = xno; then + AC_MSG_CHECKING([whether gcc version is sufficient]) + major=0 + minor=0 + + GCC_VERSION=`$CC -dumpversion` + if test $? -eq 0; then + major=`echo $GCC_VERSION | cut -d. -f1` + minor=`echo $GCC_VERSION | cut -d. -f1` + fi + + if test $major -lt 3 -o $major -eq 3 -a $minor -lt 3 ; then + AC_MSG_RESULT([no]) + AC_MSG_ERROR([If using GCC, version 3.3.0 or later is required.]) + else + AC_MSG_RESULT([yes]) + fi +fi + + +MKDEP_OPTIONS=-fdepend +dnl Ask gcc where it's keeping its secret headers +if test "x$GCC" = xyes; then + for dir in include include-fixed; do + GCC_INCLUDES=`$CC -print-file-name=$dir` + if test "x$GCC_INCLUDES" != x && \ + test "$GCC_INCLUDES" != "$dir" && \ + test -d "$GCC_INCLUDES"; then + MKDEP_OPTIONS="$MKDEP_OPTIONS -I$GCC_INCLUDES" + fi + done +fi +AC_SUBST([MKDEP_OPTIONS]) + +dnl Make sure the pkg-config macros are defined +m4_ifndef([PKG_PROG_PKG_CONFIG], + [m4_fatal([Could not locate the pkg-config autoconf macros. + These are usually located in /usr/share/aclocal/pkg.m4. If your macros + are in a different location, try setting the environment variable + ACLOCAL="aclocal -I/other/macro/dir" before running autoreconf.])]) +PKG_PROG_PKG_CONFIG() + +dnl LIB_DIR - library basename +LIB_DIR=`echo $libdir | $SED 's%.*/%%'` +AC_SUBST([LIB_DIR]) + +dnl Cache LDFLAGS so we can add EXTRA_LIB_PATH and restore it later +_SAVE_LDFLAGS="$LDFLAGS" +AC_ARG_VAR([EXTRA_LIB_PATH],[Extra -L paths for the linker]) +AC_SUBST([EXTRA_LIB_PATH]) + +dnl Cache CPPFLAGS so we can add *_INCLUDES and restore it later +_SAVE_CPPFLAGS="$CPPFLAGS" +AC_ARG_VAR([X11_INCLUDES],[Extra -I paths for X11 headers]) +AC_SUBST([X11_INCLUDES]) + +dnl Compiler macros +DEFINES="" +AC_SUBST([DEFINES]) +case "$host_os" in +linux*|*-gnu*|gnu*) + DEFINES="$DEFINES -D_GNU_SOURCE -DPTHREADS" + ;; +solaris*) + DEFINES="$DEFINES -DPTHREADS -DSVR4" + ;; +cygwin*) + DEFINES="$DEFINES -DPTHREADS" + ;; +esac + +dnl Add flags for gcc and g++ +if test "x$GCC" = xyes; then + CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -std=c99" + if test "x$CLANG" = "xno"; then + CFLAGS="$CFLAGS -ffast-math" + fi + + # Enable -fvisibility=hidden if using a gcc that supports it + save_CFLAGS="$CFLAGS" + AC_MSG_CHECKING([whether $CC supports -fvisibility=hidden]) + VISIBILITY_CFLAGS="-fvisibility=hidden" + CFLAGS="$CFLAGS $VISIBILITY_CFLAGS" + AC_LINK_IFELSE([AC_LANG_PROGRAM()], AC_MSG_RESULT([yes]), + [VISIBILITY_CFLAGS=""; AC_MSG_RESULT([no])]); + + # Restore CFLAGS; VISIBILITY_CFLAGS are added to it where needed. + CFLAGS=$save_CFLAGS + + # Work around aliasing bugs - developers should comment this out + CFLAGS="$CFLAGS -fno-strict-aliasing" +fi +if test "x$GXX" = xyes; then + CXXFLAGS="$CXXFLAGS -Wall" + + # Enable -fvisibility=hidden if using a gcc that supports it + save_CXXFLAGS="$CXXFLAGS" + AC_MSG_CHECKING([whether $CXX supports -fvisibility=hidden]) + VISIBILITY_CXXFLAGS="-fvisibility=hidden" + CXXFLAGS="$CXXFLAGS $VISIBILITY_CXXFLAGS" + AC_LINK_IFELSE([AC_LANG_PROGRAM()], AC_MSG_RESULT([yes]), + [VISIBILITY_CXXFLAGS="" ; AC_MSG_RESULT([no])]); + + # Restore CXXFLAGS; VISIBILITY_CXXFLAGS are added to it where needed. + CXXFLAGS=$save_CXXFLAGS + + # Work around aliasing bugs - developers should comment this out + CXXFLAGS="$CXXFLAGS -fno-strict-aliasing" +fi + +AC_SUBST([VISIBILITY_CFLAGS]) +AC_SUBST([VISIBILITY_CXXFLAGS]) + +dnl These should be unnecessary, but let the user set them if they want +AC_ARG_VAR([OPT_FLAGS], [Additional optimization flags for the compiler. + Default is to use CFLAGS.]) +AC_ARG_VAR([ARCH_FLAGS], [Additional architecture specific flags for the + compiler. Default is to use CFLAGS.]) +AC_SUBST([OPT_FLAGS]) +AC_SUBST([ARCH_FLAGS]) + +dnl +dnl Hacks to enable 32 or 64 bit build +dnl +AC_ARG_ENABLE([32-bit], + [AS_HELP_STRING([--enable-32-bit], + [build 32-bit libraries @<:@default=auto@:>@])], + [enable_32bit="$enableval"], + [enable_32bit=auto] +) +if test "x$enable_32bit" = xyes; then + if test "x$GCC" = xyes; then + CFLAGS="$CFLAGS -m32" + ARCH_FLAGS="$ARCH_FLAGS -m32" + fi + if test "x$GXX" = xyes; then + CXXFLAGS="$CXXFLAGS -m32" + fi +fi +AC_ARG_ENABLE([64-bit], + [AS_HELP_STRING([--enable-64-bit], + [build 64-bit libraries @<:@default=auto@:>@])], + [enable_64bit="$enableval"], + [enable_64bit=auto] +) +if test "x$enable_64bit" = xyes; then + if test "x$GCC" = xyes; then + CFLAGS="$CFLAGS -m64" + fi + if test "x$GXX" = xyes; then + CXXFLAGS="$CXXFLAGS -m64" + fi +fi + +dnl +dnl shared/static libraries, mimic libtool options +dnl +AC_ARG_ENABLE([static], + [AS_HELP_STRING([--enable-static], + [build static libraries @<:@default=disabled@:>@])], + [enable_static="$enableval"], + [enable_static=no] +) +case "x$enable_static" in +xyes|xno ) ;; +x ) enable_static=no ;; +* ) + AC_MSG_ERROR([Static library option '$enable_static' is not a valid]) + ;; +esac +AC_ARG_ENABLE([shared], + [AS_HELP_STRING([--disable-shared], + [build shared libraries @<:@default=enabled@:>@])], + [enable_shared="$enableval"], + [enable_shared=yes] +) +case "x$enable_shared" in +xyes|xno ) ;; +x ) enable_shared=yes ;; +* ) + AC_MSG_ERROR([Shared library option '$enable_shared' is not a valid]) + ;; +esac + +dnl Can't have static and shared libraries, default to static if user +dnl explicitly requested. If both disabled, set to static since shared +dnl was explicitly requirested. +case "x$enable_static$enable_shared" in +xyesyes ) + AC_MSG_WARN([Can't build static and shared libraries, disabling shared]) + enable_shared=no + ;; +xnono ) + AC_MSG_WARN([Can't disable both static and shared libraries, enabling static]) + enable_static=yes + ;; +esac + +dnl +dnl mklib options +dnl +AC_ARG_VAR([MKLIB_OPTIONS],[Options for the Mesa library script, mklib]) +if test "$enable_static" = yes; then + MKLIB_OPTIONS="$MKLIB_OPTIONS -static" +fi +AC_SUBST([MKLIB_OPTIONS]) + +dnl +dnl other compiler options +dnl +AC_ARG_ENABLE([debug], + [AS_HELP_STRING([--enable-debug], + [use debug compiler flags and macros @<:@default=disabled@:>@])], + [enable_debug="$enableval"], + [enable_debug=no] +) +if test "x$enable_debug" = xyes; then + DEFINES="$DEFINES -DDEBUG" + if test "x$GCC" = xyes; then + CFLAGS="$CFLAGS -g" + fi + if test "x$GXX" = xyes; then + CXXFLAGS="$CXXFLAGS -g" + fi +fi + +dnl +dnl library names +dnl +LIB_PREFIX_GLOB='lib' +LIB_VERSION_SEPARATOR='.' +if test "$enable_static" = yes; then + LIB_EXTENSION='a' +else + case "$host_os" in + darwin* ) + LIB_EXTENSION='dylib' ;; + cygwin* ) + dnl prefix can be 'cyg' or 'lib' + LIB_PREFIX_GLOB='???' + LIB_VERSION_SEPARATOR='-' + LIB_EXTENSION='dll' ;; + aix* ) + LIB_EXTENSION='a' ;; + * ) + LIB_EXTENSION='so' ;; + esac +fi + +GL_LIB_NAME='lib$(GL_LIB).'${LIB_EXTENSION} +GLU_LIB_NAME='lib$(GLU_LIB).'${LIB_EXTENSION} +GLUT_LIB_NAME='lib$(GLUT_LIB).'${LIB_EXTENSION} +GLW_LIB_NAME='lib$(GLW_LIB).'${LIB_EXTENSION} +OSMESA_LIB_NAME='lib$(OSMESA_LIB).'${LIB_EXTENSION} +EGL_LIB_NAME='lib$(EGL_LIB).'${LIB_EXTENSION} +GLESv1_CM_LIB_NAME='lib$(GLESv1_CM_LIB).'${LIB_EXTENSION} +GLESv2_LIB_NAME='lib$(GLESv2_LIB).'${LIB_EXTENSION} +VG_LIB_NAME='lib$(VG_LIB).'${LIB_EXTENSION} +GLAPI_LIB_NAME='lib$(GLAPI_LIB).'${LIB_EXTENSION} +WAYLAND_EGL_LIB_NAME='lib$(WAYLAND_EGL_LIB).'${LIB_EXTENSION} + +GL_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GL_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' +GLU_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLU_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' +GLUT_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLUT_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' +GLW_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLW_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' +OSMESA_LIB_GLOB=${LIB_PREFIX_GLOB}'$(OSMESA_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' +EGL_LIB_GLOB=${LIB_PREFIX_GLOB}'$(EGL_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' +EGL_LIB_GLOB=${LIB_PREFIX_GLOB}'$(EGL_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' +GLESv1_CM_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLESv1_CM_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' +GLESv2_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLESv2_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' +VG_LIB_GLOB=${LIB_PREFIX_GLOB}'$(VG_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' +GLAPI_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLAPI_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' +WAYLAND_EGL_LIB_GLOB=${LIB_PREFIX_GLOB}'$(WAYLAND_EGL_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*' + +AC_SUBST([GL_LIB_NAME]) +AC_SUBST([GLU_LIB_NAME]) +AC_SUBST([GLUT_LIB_NAME]) +AC_SUBST([GLW_LIB_NAME]) +AC_SUBST([OSMESA_LIB_NAME]) +AC_SUBST([EGL_LIB_NAME]) +AC_SUBST([GLESv1_CM_LIB_NAME]) +AC_SUBST([GLESv2_LIB_NAME]) +AC_SUBST([VG_LIB_NAME]) +AC_SUBST([GLAPI_LIB_NAME]) +AC_SUBST([WAYLAND_EGL_LIB_NAME]) + +AC_SUBST([GL_LIB_GLOB]) +AC_SUBST([GLU_LIB_GLOB]) +AC_SUBST([GLUT_LIB_GLOB]) +AC_SUBST([GLW_LIB_GLOB]) +AC_SUBST([OSMESA_LIB_GLOB]) +AC_SUBST([EGL_LIB_GLOB]) +AC_SUBST([GLESv1_CM_LIB_GLOB]) +AC_SUBST([GLESv2_LIB_GLOB]) +AC_SUBST([VG_LIB_GLOB]) +AC_SUBST([GLAPI_LIB_GLOB]) +AC_SUBST([WAYLAND_EGL_LIB_GLOB]) + +dnl +dnl Arch/platform-specific settings +dnl +AC_ARG_ENABLE([asm], + [AS_HELP_STRING([--disable-asm], + [disable assembly usage @<:@default=enabled on supported plaforms@:>@])], + [enable_asm="$enableval"], + [enable_asm=yes] +) +asm_arch="" +ASM_FLAGS="" +MESA_ASM_SOURCES="" +GLAPI_ASM_SOURCES="" +AC_MSG_CHECKING([whether to enable assembly]) +test "x$enable_asm" = xno && AC_MSG_RESULT([no]) +# disable if cross compiling on x86/x86_64 since we must run gen_matypes +if test "x$enable_asm" = xyes && test "x$cross_compiling" = xyes; then + case "$host_cpu" in + i?86 | x86_64) + enable_asm=no + AC_MSG_RESULT([no, cross compiling]) + ;; + esac +fi +# check for supported arches +if test "x$enable_asm" = xyes; then + case "$host_cpu" in + i?86) + case "$host_os" in + linux* | *freebsd* | dragonfly* | *netbsd*) + test "x$enable_64bit" = xyes && asm_arch=x86_64 || asm_arch=x86 + ;; + esac + ;; + x86_64) + case "$host_os" in + linux* | *freebsd* | dragonfly* | *netbsd*) + test "x$enable_32bit" = xyes && asm_arch=x86 || asm_arch=x86_64 + ;; + esac + ;; + powerpc) + case "$host_os" in + linux*) + asm_arch=ppc + ;; + esac + ;; + sparc*) + case "$host_os" in + linux*) + asm_arch=sparc + ;; + esac + ;; + esac + + case "$asm_arch" in + x86) + ASM_FLAGS="-DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM" + MESA_ASM_SOURCES='$(X86_SOURCES)' + GLAPI_ASM_SOURCES='$(X86_API)' + AC_MSG_RESULT([yes, x86]) + ;; + x86_64) + ASM_FLAGS="-DUSE_X86_64_ASM" + MESA_ASM_SOURCES='$(X86-64_SOURCES)' + GLAPI_ASM_SOURCES='$(X86-64_API)' + AC_MSG_RESULT([yes, x86_64]) + ;; + ppc) + ASM_FLAGS="-DUSE_PPC_ASM -DUSE_VMX_ASM" + MESA_ASM_SOURCES='$(PPC_SOURCES)' + AC_MSG_RESULT([yes, ppc]) + ;; + sparc) + ASM_FLAGS="-DUSE_SPARC_ASM" + MESA_ASM_SOURCES='$(SPARC_SOURCES)' + GLAPI_ASM_SOURCES='$(SPARC_API)' + AC_MSG_RESULT([yes, sparc]) + ;; + *) + AC_MSG_RESULT([no, platform not supported]) + ;; + esac +fi +AC_SUBST([ASM_FLAGS]) +AC_SUBST([MESA_ASM_SOURCES]) +AC_SUBST([GLAPI_ASM_SOURCES]) + +dnl PIC code macro +MESA_PIC_FLAGS + +dnl Check to see if dlopen is in default libraries (like Solaris, which +dnl has it in libc), or if libdl is needed to get it. +AC_CHECK_FUNC([dlopen], [], + [AC_CHECK_LIB([dl], [dlopen], [DLOPEN_LIBS="-ldl"])]) +AC_SUBST([DLOPEN_LIBS]) + +dnl See if posix_memalign is available +AC_CHECK_FUNC([posix_memalign], [DEFINES="$DEFINES -DHAVE_POSIX_MEMALIGN"]) + +dnl SELinux awareness. +AC_ARG_ENABLE([selinux], + [AS_HELP_STRING([--enable-selinux], + [Build SELinux-aware Mesa @<:@default=disabled@:>@])], + [MESA_SELINUX="$enableval"], + [MESA_SELINUX=no]) +if test "x$enable_selinux" = "xyes"; then + AC_CHECK_HEADER([selinux/selinux.h],[], + [AC_MSG_ERROR([SELinux headers not found])]) + AC_CHECK_LIB([selinux],[is_selinux_enabled],[], + [AC_MSG_ERROR([SELinux library not found])]) + SELINUX_LIBS="-lselinux" + DEFINES="$DEFINES -DMESA_SELINUX" +fi + +dnl Determine which APIs to support +AC_ARG_ENABLE([opengl], + [AS_HELP_STRING([--disable-opengl], + [disable support for standard OpenGL API @<:@default=no@:>@])], + [enable_opengl="$enableval"], + [enable_opengl=yes]) +AC_ARG_ENABLE([gles1], + [AS_HELP_STRING([--enable-gles1], + [enable support for OpenGL ES 1.x API @<:@default=no@:>@])], + [enable_gles1="$enableval"], + [enable_gles1=no]) +AC_ARG_ENABLE([gles2], + [AS_HELP_STRING([--enable-gles2], + [enable support for OpenGL ES 2.x API @<:@default=no@:>@])], + [enable_gles2="$enableval"], + [enable_gles2=no]) +AC_ARG_ENABLE([gles-overlay], + [AS_HELP_STRING([--enable-gles-overlay], + [DEPRECATED. Same as --enable-gles1 and --enable-gles2])], + [enable_gles1="$enableval"; enable_gles2="$enableval"], + []) + +AC_ARG_ENABLE([openvg], + [AS_HELP_STRING([--enable-openvg], + [enable support for OpenVG API @<:@default=no@:>@])], + [enable_openvg="$enableval"], + [enable_openvg=no]) + +dnl smooth the transition; should be removed eventually +if test "x$enable_openvg" = xno; then + case "x$with_state_trackers" in + x*vega*) + AC_MSG_WARN([vega state tracker is enabled without --enable-openvg]) + enable_openvg=yes + ;; + esac +fi + +if test "x$enable_opengl" = xno -a \ + "x$enable_gles1" = xno -a \ + "x$enable_gles2" = xno -a \ + "x$enable_openvg" = xno; then + AC_MSG_ERROR([at least one API should be enabled]) +fi + +API_DEFINES="" +if test "x$enable_opengl" = xno; then + API_DEFINES="$API_DEFINES -DFEATURE_GL=0" +else + API_DEFINES="$API_DEFINES -DFEATURE_GL=1" +fi +if test "x$enable_gles1" = xyes; then + API_DEFINES="$API_DEFINES -DFEATURE_ES1=1" +fi +if test "x$enable_gles2" = xyes; then + API_DEFINES="$API_DEFINES -DFEATURE_ES2=1" +fi +AC_SUBST([API_DEFINES]) + +AC_ARG_ENABLE([shared-glapi], + [AS_HELP_STRING([--enable-shared-glapi], + [EXPERIMENTAL. Enable shared glapi for OpenGL @<:@default=no@:>@])], + [enable_shared_glapi="$enableval"], + [enable_shared_glapi=no]) + +SHARED_GLAPI="0" +if test "x$enable_shared_glapi" = xyes; then + SHARED_GLAPI="1" +fi +AC_SUBST([SHARED_GLAPI]) + +dnl +dnl Driver configuration. Options are xlib, dri and osmesa right now. +dnl More later: fbdev, ... +dnl +default_driver="xlib" + +case "$host_os" in +linux*) + case "$host_cpu" in + i*86|x86_64|powerpc*|sparc*) default_driver="dri";; + esac + ;; +*freebsd* | dragonfly* | *netbsd*) + case "$host_cpu" in + i*86|x86_64|powerpc*|sparc*) default_driver="dri";; + esac + ;; +esac + +if test "x$enable_opengl" = xno; then + default_driver="no" +fi + +AC_ARG_WITH([driver], + [AS_HELP_STRING([--with-driver=DRIVER], + [driver for Mesa: xlib,dri,osmesa @<:@default=dri when available, or xlib@:>@])], + [mesa_driver="$withval"], + [mesa_driver="$default_driver"]) +dnl Check for valid option +case "x$mesa_driver" in +xxlib|xdri|xosmesa) + if test "x$enable_opengl" = xno; then + AC_MSG_ERROR([Driver '$mesa_driver' requires OpenGL enabled]) + fi + ;; +xno) + ;; +*) + AC_MSG_ERROR([Driver '$mesa_driver' is not a valid option]) + ;; +esac + +dnl +dnl Driver specific build directories +dnl + +dnl this variable will be prepended to SRC_DIRS and is not exported +CORE_DIRS="" + +SRC_DIRS="" +GLU_DIRS="sgi" +GALLIUM_DIRS="auxiliary drivers state_trackers" +GALLIUM_TARGET_DIRS="" +GALLIUM_WINSYS_DIRS="sw" +GALLIUM_DRIVERS_DIRS="softpipe failover galahad trace rbug noop identity" +GALLIUM_STATE_TRACKERS_DIRS="" + +# build shared-glapi if enabled for OpenGL or if OpenGL ES is enabled +case "x$enable_shared_glapi$enable_gles1$enable_gles2" in +x*yes*) + CORE_DIRS="$CORE_DIRS mapi/shared-glapi" + ;; +esac + +# build glapi if OpenGL is enabled +if test "x$enable_opengl" = xyes; then + CORE_DIRS="$CORE_DIRS mapi/glapi" +fi + +# build es1api if OpenGL ES 1.x is enabled +if test "x$enable_gles1" = xyes; then + CORE_DIRS="$CORE_DIRS mapi/es1api" +fi + +# build es2api if OpenGL ES 2.x is enabled +if test "x$enable_gles2" = xyes; then + CORE_DIRS="$CORE_DIRS mapi/es2api" +fi + +# build vgapi if OpenVG is enabled +if test "x$enable_openvg" = xyes; then + CORE_DIRS="$CORE_DIRS mapi/vgapi" +fi + +# build glsl and mesa if OpenGL or OpenGL ES is enabled +case "x$enable_opengl$enable_gles1$enable_gles2" in +x*yes*) + CORE_DIRS="$CORE_DIRS glsl mesa" + ;; +esac + +case "$mesa_driver" in +xlib) + DRIVER_DIRS="x11" + GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS sw/xlib" + GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS libgl-xlib" + ;; +dri) + SRC_DIRS="$SRC_DIRS glx" + DRIVER_DIRS="dri" + GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS sw/xlib sw/dri" + ;; +osmesa) + DRIVER_DIRS="osmesa" + ;; +no) + DRIVER_DRIS="" + ;; +esac +AC_SUBST([SRC_DIRS]) +AC_SUBST([GLU_DIRS]) +AC_SUBST([DRIVER_DIRS]) +AC_SUBST([GALLIUM_DIRS]) +AC_SUBST([GALLIUM_TARGET_DIRS]) +AC_SUBST([GALLIUM_WINSYS_DIRS]) +AC_SUBST([GALLIUM_DRIVERS_DIRS]) +AC_SUBST([GALLIUM_STATE_TRACKERS_DIRS]) +AC_SUBST([MESA_LLVM]) + +dnl +dnl Find out if X is available. The variable have_x is set if libX11 is +dnl found to mimic AC_PATH_XTRA. +dnl +if test -n "$PKG_CONFIG"; then + AC_MSG_CHECKING([pkg-config files for X11 are available]) + PKG_CHECK_EXISTS([x11],[ + x11_pkgconfig=yes + have_x=yes + ],[ + x11_pkgconfig=no + ]) + AC_MSG_RESULT([$x11_pkgconfig]) +else + x11_pkgconfig=no +fi +dnl Use the autoconf macro if no pkg-config files +if test "$x11_pkgconfig" = yes; then + PKG_CHECK_MODULES([X11], [x11]) +else + AC_PATH_XTRA + test -z "$X11_CFLAGS" && X11_CFLAGS="$X_CFLAGS" + test -z "$X11_LIBS" && X11_LIBS="$X_LIBS -lX11" + AC_SUBST([X11_CFLAGS]) + AC_SUBST([X11_LIBS]) +fi + +dnl Try to tell the user that the --x-* options are only used when +dnl pkg-config is not available. This must be right after AC_PATH_XTRA. +m4_divert_once([HELP_BEGIN], +[These options are only used when the X libraries cannot be found by the +pkg-config utility.]) + +dnl We need X for xlib and dri, so bomb now if it's not found +case "$mesa_driver" in +xlib|dri) + if test "$no_x" = yes; then + AC_MSG_ERROR([X11 development libraries needed for $mesa_driver driver]) + fi + ;; +esac + +dnl XCB - this is only used for GLX right now +AC_ARG_ENABLE([xcb], + [AS_HELP_STRING([--enable-xcb], + [use XCB for GLX @<:@default=disabled@:>@])], + [enable_xcb="$enableval"], + [enable_xcb=no]) +if test "x$enable_xcb" = xyes; then + DEFINES="$DEFINES -DUSE_XCB" +else + enable_xcb=no +fi + +dnl +dnl libGL configuration per driver +dnl +case "$mesa_driver" in +xlib) + if test "$x11_pkgconfig" = yes; then + PKG_CHECK_MODULES([XLIBGL], [x11 xext]) + GL_PC_REQ_PRIV="x11 xext" + X11_INCLUDES="$X11_INCLUDES $XLIBGL_CFLAGS" + GL_LIB_DEPS="$XLIBGL_LIBS" + else + # should check these... + X11_INCLUDES="$X11_INCLUDES $X_CFLAGS" + GL_LIB_DEPS="$X_LIBS -lX11 -lXext" + GL_PC_LIB_PRIV="$GL_LIB_DEPS" + GL_PC_CFLAGS="$X11_INCLUDES" + fi + GL_LIB_DEPS="$GL_LIB_DEPS $SELINUX_LIBS -lm -lpthread" + GL_PC_LIB_PRIV="$GL_PC_LIB_PRIV $SELINUX_LIBS -lm -lpthread" + + # if static, move the external libraries to the programs + # and empty the libraries for libGL + if test "$enable_static" = yes; then + APP_LIB_DEPS="$APP_LIB_DEPS $GL_LIB_DEPS" + GL_LIB_DEPS="" + fi + ;; +dri|no) # these checks are still desired when there is no mesa_driver + # DRI must be shared, I think + if test "$enable_static" = yes; then + AC_MSG_ERROR([Can't use static libraries for DRI drivers]) + fi + + # Check for libdrm + PKG_CHECK_MODULES([LIBDRM], [libdrm >= $LIBDRM_REQUIRED]) + PKG_CHECK_MODULES([DRI2PROTO], [dri2proto >= $DRI2PROTO_REQUIRED]) + PKG_CHECK_MODULES([GLPROTO], [glproto >= $GLPROTO_REQUIRED]) + GL_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED dri2proto >= $DRI2PROTO_REQUIRED glproto >= $GLPROTO_REQUIRED" + DRI_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED" + + # find the DRI deps for libGL + if test "$x11_pkgconfig" = yes; then + dri_modules="x11 xext xdamage xfixes" + + # add xf86vidmode if available + PKG_CHECK_MODULES([XF86VIDMODE], [xxf86vm], HAVE_XF86VIDMODE=yes, HAVE_XF86VIDMODE=no) + if test "$HAVE_XF86VIDMODE" = yes ; then + dri_modules="$dri_modules xxf86vm" + fi + + # add xcb modules if necessary + if test "$enable_xcb" = yes; then + dri_modules="$dri_modules x11-xcb xcb-glx" + fi + + PKG_CHECK_MODULES([DRIGL], [$dri_modules]) + GL_PC_REQ_PRIV="$GL_PC_REQ_PRIV $dri_modules" + X11_INCLUDES="$X11_INCLUDES $DRIGL_CFLAGS" + GL_LIB_DEPS="$DRIGL_LIBS" + else + # should check these... + X11_INCLUDES="$X11_INCLUDES $X_CFLAGS" + GL_LIB_DEPS="$X_LIBS -lX11 -lXext -lXxf86vm -lXdamage -lXfixes" + GL_PC_LIB_PRIV="$GL_LIB_DEPS" + GL_PC_CFLAGS="$X11_INCLUDES" + + # XCB can only be used from pkg-config + if test "$enable_xcb" = yes; then + PKG_CHECK_MODULES([XCB],[x11-xcb xcb-glx]) + GL_PC_REQ_PRIV="$GL_PC_REQ_PRIV x11-xcb xcb-glx" + X11_INCLUDES="$X11_INCLUDES $XCB_CFLAGS" + GL_LIB_DEPS="$GL_LIB_DEPS $XCB_LIBS" + fi + fi + + # need DRM libs, -lpthread, etc. + GL_LIB_DEPS="$GL_LIB_DEPS $LIBDRM_LIBS -lm -lpthread $DLOPEN_LIBS" + GL_PC_LIB_PRIV="-lm -lpthread $DLOPEN_LIBS" + GLESv1_CM_LIB_DEPS="$LIBDRM_LIBS -lm -lpthread $DLOPEN_LIBS" + GLESv1_CM_PC_LIB_PRIV="-lm -lpthread $DLOPEN_LIBS" + GLESv2_LIB_DEPS="$LIBDRM_LIBS -lm -lpthread $DLOPEN_LIBS" + GLESv2_PC_LIB_PRIV="-lm -lpthread $DLOPEN_LIBS" + ;; +osmesa) + # No libGL for osmesa + GL_LIB_DEPS="" + ;; +esac +AC_SUBST([GL_LIB_DEPS]) +AC_SUBST([GL_PC_REQ_PRIV]) +AC_SUBST([GL_PC_LIB_PRIV]) +AC_SUBST([GL_PC_CFLAGS]) +AC_SUBST([DRI_PC_REQ_PRIV]) +AC_SUBST([GLESv1_CM_LIB_DEPS]) +AC_SUBST([GLESv1_CM_PC_LIB_PRIV]) +AC_SUBST([GLESv2_LIB_DEPS]) +AC_SUBST([GLESv2_PC_LIB_PRIV]) + +GLAPI_LIB_DEPS="-lpthread" +AC_SUBST([GLAPI_LIB_DEPS]) + + +dnl Setup default DRI CFLAGS +DRI_CFLAGS='$(CFLAGS)' +DRI_CXXFLAGS='$(CXXFLAGS)' +DRI_LIB_DEPS='$(TOP)/src/mesa/libmesa.a' +MESA_MODULES='$(TOP)/src/mesa/libmesa.a' + +AC_ARG_ENABLE([shared-dricore], + [AS_HELP_STRING([--enable-shared-dricore], + [link DRI modules with shared core DRI routines @<:@default=disabled@:>@])], + [enable_dricore="$enableval"], + [enable_dricore=no]) +if test "$mesa_driver" = dri ; then + if test "$enable_dricore" = yes ; then + if test "$GCC$GXX" != yesyes ; then + AC_MSG_WARN([Shared dricore requires GCC-compatible rpath handling. Disabling shared dricore]) + enable_dricore=no + else + DRICORE_GLSL_LIBS='$(TOP)/$(LIB_DIR)/libglsl.so' + DRICORE_LIBS='$(TOP)/$(LIB_DIR)/libdricore.so' + DRICORE_LIB_DEPS='-L$(TOP)/$(LIB_DIR) -Wl,-R$(DRI_DRIVER_INSTALL_DIR) -lglsl' + DRI_LIB_DEPS='-L$(TOP)/$(LIB_DIR) -Wl,-R$(DRI_DRIVER_INSTALL_DIR) -ldricore -lglsl' + DRI_CFLAGS='$(CFLAGS_NOVISIBILITY) -DUSE_DRICORE' + DRI_CXXFLAGS='$(CXXFLAGS_NOVISIBILITY) -DUSE_DRICORE' + MESA_MODULES='$(DRICORE_LIBS) $(DRICORE_GLSL_LIBS)' + fi + fi +fi +AC_SUBST([DRICORE_LIBS]) +AC_SUBST([DRICORE_GLSL_LIBS]) +AC_SUBST([DRICORE_LIB_DEPS]) +AC_SUBST([DRI_CXXFLAGS]) +AC_SUBST([DRI_CFLAGS]) +AC_SUBST([MESA_MODULES]) + +AC_SUBST([HAVE_XF86VIDMODE]) + +PKG_CHECK_MODULES([LIBDRM_RADEON], + [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED], + HAVE_LIBDRM_RADEON=yes, + HAVE_LIBDRM_RADEON=no) + +dnl +dnl More X11 setup +dnl +if test "$mesa_driver" = xlib; then + DEFINES="$DEFINES -DUSE_XSHM" +fi + +dnl +dnl TLS detection +dnl + +AC_ARG_ENABLE([glx-tls], + [AS_HELP_STRING([--enable-glx-tls], + [enable TLS support in GLX @<:@default=disabled@:>@])], + [GLX_USE_TLS="$enableval"], + [GLX_USE_TLS=no]) +AC_SUBST(GLX_TLS, ${GLX_USE_TLS}) + +AS_IF([test "x$GLX_USE_TLS" = xyes], + [DEFINES="${DEFINES} -DGLX_USE_TLS -DPTHREADS"]) + +dnl +dnl More DRI setup +dnl +dnl Directory for DRI drivers +AC_ARG_WITH([dri-driverdir], + [AS_HELP_STRING([--with-dri-driverdir=DIR], + [directory for the DRI drivers @<:@${libdir}/dri@:>@])], + [DRI_DRIVER_INSTALL_DIR="$withval"], + [DRI_DRIVER_INSTALL_DIR='${libdir}/dri']) +AC_SUBST([DRI_DRIVER_INSTALL_DIR]) +dnl Extra search path for DRI drivers +AC_ARG_WITH([dri-searchpath], + [AS_HELP_STRING([--with-dri-searchpath=DIRS...], + [semicolon delimited DRI driver search directories @<:@${libdir}/dri@:>@])], + [DRI_DRIVER_SEARCH_DIR="$withval"], + [DRI_DRIVER_SEARCH_DIR='${DRI_DRIVER_INSTALL_DIR}']) +AC_SUBST([DRI_DRIVER_SEARCH_DIR]) +dnl Direct rendering or just indirect rendering +AC_ARG_ENABLE([driglx-direct], + [AS_HELP_STRING([--disable-driglx-direct], + [enable direct rendering in GLX and EGL for DRI @<:@default=enabled@:>@])], + [driglx_direct="$enableval"], + [driglx_direct="yes"]) +dnl Which drivers to build - default is chosen by platform +AC_ARG_WITH([dri-drivers], + [AS_HELP_STRING([--with-dri-drivers@<:@=DIRS...@:>@], + [comma delimited DRI drivers list, e.g. + "swrast,i965,radeon" @<:@default=auto@:>@])], + [with_dri_drivers="$withval"], + [with_dri_drivers=yes]) +if test "x$with_dri_drivers" = x; then + with_dri_drivers=no +fi + +dnl If $with_dri_drivers is yes, directories will be added through +dnl platform checks +DRI_DIRS="" +case "$with_dri_drivers" in +no) ;; +yes) + DRI_DIRS="yes" + ;; +*) + # verify the requested driver directories exist + dri_drivers=`IFS=', '; echo $with_dri_drivers` + for driver in $dri_drivers; do + test -d "$srcdir/src/mesa/drivers/dri/$driver" || \ + AC_MSG_ERROR([DRI driver directory '$driver' doesn't exist]) + done + DRI_DIRS="$dri_drivers" + ;; +esac + +dnl Set DRI_DIRS, DEFINES and LIB_DEPS +if test "$mesa_driver" = dri -o "$mesa_driver" = no; then + # Platform specific settings and drivers to build + case "$host_os" in + linux*) + DEFINES="$DEFINES -DUSE_EXTERNAL_DXTN_LIB=1 -DIN_DRI_DRIVER" + if test "x$driglx_direct" = xyes; then + DEFINES="$DEFINES -DGLX_DIRECT_RENDERING" + fi + DEFINES="$DEFINES -DGLX_INDIRECT_RENDERING -DHAVE_ALIAS" + + case "$host_cpu" in + x86_64) + # sis is missing because they have not be converted to use + # the new interface. i810 are missing because there is no + # x86-64 system where they could *ever* be used. + if test "x$DRI_DIRS" = "xyes"; then + DRI_DIRS="i915 i965 mach64 mga r128 r200 r300 r600 radeon \ + savage tdfx unichrome swrast" + fi + ;; + powerpc*) + # Build only the drivers for cards that exist on PowerPC. + # At some point MGA will be added, but not yet. + if test "x$DRI_DIRS" = "xyes"; then + DRI_DIRS="mach64 r128 r200 r300 r600 radeon tdfx swrast" + fi + ;; + sparc*) + # Build only the drivers for cards that exist on sparc` + if test "x$DRI_DIRS" = "xyes"; then + DRI_DIRS="mach64 r128 r200 r300 r600 radeon swrast" + fi + ;; + esac + ;; + freebsd* | dragonfly* | *netbsd*) + DEFINES="$DEFINES -DPTHREADS -DUSE_EXTERNAL_DXTN_LIB=1" + DEFINES="$DEFINES -DIN_DRI_DRIVER -DHAVE_ALIAS" + DEFINES="$DEFINES -DGLX_INDIRECT_RENDERING" + if test "x$driglx_direct" = xyes; then + DEFINES="$DEFINES -DGLX_DIRECT_RENDERING" + fi + + if test "x$DRI_DIRS" = "xyes"; then + DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 r600 radeon tdfx \ + unichrome savage sis swrast" + fi + ;; + gnu*) + DEFINES="$DEFINES -DUSE_EXTERNAL_DXTN_LIB=1 -DIN_DRI_DRIVER" + DEFINES="$DEFINES -DGLX_INDIRECT_RENDERING -DHAVE_ALIAS" + ;; + solaris*) + DEFINES="$DEFINES -DUSE_EXTERNAL_DXTN_LIB=1 -DIN_DRI_DRIVER" + DEFINES="$DEFINES -DGLX_INDIRECT_RENDERING" + if test "x$driglx_direct" = xyes; then + DEFINES="$DEFINES -DGLX_DIRECT_RENDERING" + fi + ;; + esac + + # default drivers + if test "x$DRI_DIRS" = "xyes"; then + DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 r600 radeon \ + savage sis tdfx unichrome swrast" + fi + + DRI_DIRS=`echo "$DRI_DIRS" | $SED 's/ */ /g'` + + # Check for expat + if test "$mesa_driver" = dri; then + EXPAT_INCLUDES="" + EXPAT_LIB=-lexpat + AC_ARG_WITH([expat], + [AS_HELP_STRING([--with-expat=DIR], + [expat install directory])],[ + EXPAT_INCLUDES="-I$withval/include" + CPPFLAGS="$CPPFLAGS $EXPAT_INCLUDES" + LDFLAGS="$LDFLAGS -L$withval/$LIB_DIR" + EXPAT_LIB="-L$withval/$LIB_DIR -lexpat" + ]) + AC_CHECK_HEADER([expat.h],[],[AC_MSG_ERROR([Expat required for DRI.])]) + AC_CHECK_LIB([expat],[XML_ParserCreate],[], + [AC_MSG_ERROR([Expat required for DRI.])]) + fi + + # put all the necessary libs together, including possibly libdricore + DRI_LIB_DEPS="$DRI_LIB_DEPS $SELINUX_LIBS $LIBDRM_LIBS $EXPAT_LIB -lm -lpthread $DLOPEN_LIBS" +fi +AC_SUBST([DRI_DIRS]) +AC_SUBST([EXPAT_INCLUDES]) +AC_SUBST([DRI_LIB_DEPS]) + +case $DRI_DIRS in +*i915*|*i965*) + PKG_CHECK_MODULES([INTEL], [libdrm_intel >= $LIBDRM_INTEL_REQUIRED]) + ;; +esac + +case $DRI_DIRS in +*radeon*|*r200*|*r300*|*r600*) + if test "x$HAVE_LIBDRM_RADEON" = xyes; then + RADEON_CFLAGS="-DHAVE_LIBDRM_RADEON=1 $LIBDRM_RADEON_CFLAGS" + RADEON_LDFLAGS=$LIBDRM_RADEON_LIBS + fi + ;; +esac +AC_SUBST([RADEON_CFLAGS]) +AC_SUBST([RADEON_LDFLAGS]) + + +dnl +dnl OSMesa configuration +dnl +if test "$mesa_driver" = xlib; then + default_gl_osmesa=yes +else + default_gl_osmesa=no +fi +AC_ARG_ENABLE([gl-osmesa], + [AS_HELP_STRING([--enable-gl-osmesa], + [enable OSMesa with libGL @<:@default=enabled for xlib driver@:>@])], + [gl_osmesa="$enableval"], + [gl_osmesa="$default_gl_osmesa"]) +if test "x$gl_osmesa" = xyes; then + if test "x$enable_opengl" = xno; then + AC_MSG_ERROR([OpenGL is not available for OSMesa driver]) + fi + if test "$mesa_driver" = osmesa; then + AC_MSG_ERROR([libGL is not available for OSMesa driver]) + else + DRIVER_DIRS="$DRIVER_DIRS osmesa" + fi +fi + +dnl Configure the channel bits for OSMesa (libOSMesa, libOSMesa16, ...) +AC_ARG_WITH([osmesa-bits], + [AS_HELP_STRING([--with-osmesa-bits=BITS], + [OSMesa channel bits and library name: 8, 16, 32 @<:@default=8@:>@])], + [osmesa_bits="$withval"], + [osmesa_bits=8]) +if test "$mesa_driver" != osmesa && test "x$osmesa_bits" != x8; then + AC_MSG_WARN([Ignoring OSMesa channel bits for non-OSMesa driver]) + osmesa_bits=8 +fi +case "x$osmesa_bits" in +x8) + OSMESA_LIB=OSMesa + ;; +x16|x32) + OSMESA_LIB="OSMesa$osmesa_bits" + DEFINES="$DEFINES -DCHAN_BITS=$osmesa_bits -DDEFAULT_SOFTWARE_DEPTH_BITS=31" + ;; +*) + AC_MSG_ERROR([OSMesa bits '$osmesa_bits' is not a valid option]) + ;; +esac +AC_SUBST([OSMESA_LIB]) + +case "$DRIVER_DIRS" in +*osmesa*) + # only link libraries with osmesa if shared + if test "$enable_static" = no; then + OSMESA_LIB_DEPS="-lm -lpthread $SELINUX_LIBS $DLOPEN_LIBS" + else + OSMESA_LIB_DEPS="" + fi + OSMESA_MESA_DEPS="" + OSMESA_PC_LIB_PRIV="-lm -lpthread $SELINUX_LIBS $DLOPEN_LIBS" + ;; +esac +AC_SUBST([OSMESA_LIB_DEPS]) +AC_SUBST([OSMESA_MESA_DEPS]) +AC_SUBST([OSMESA_PC_REQ]) +AC_SUBST([OSMESA_PC_LIB_PRIV]) + +dnl +dnl EGL configuration +dnl +AC_ARG_ENABLE([egl], + [AS_HELP_STRING([--disable-egl], + [disable EGL library @<:@default=enabled@:>@])], + [enable_egl="$enableval"], + [enable_egl=yes]) +if test "x$enable_egl" = xno; then + if test "x$mesa_driver" = xno; then + AC_MSG_ERROR([cannot disable EGL when there is no mesa driver]) + fi + if test "x$enable_openvg" = xyes; then + AC_MSG_ERROR([cannot enable OpenVG without EGL]) + fi +fi +if test "x$enable_egl" = xyes; then + SRC_DIRS="$SRC_DIRS egl" + EGL_LIB_DEPS="$DLOPEN_LIBS -lpthread" + EGL_DRIVERS_DIRS="" + if test "$enable_static" != yes; then + # build egl_glx when libGL is built + if test "$mesa_driver" = xlib -o "$mesa_driver" = dri; then + EGL_DRIVERS_DIRS="glx" + fi + + if test "$mesa_driver" = dri; then + # build egl_dri2 when xcb-dri2 is available + PKG_CHECK_MODULES([XCB_DRI2], [x11-xcb xcb-dri2 xcb-xfixes], + [have_xcb_dri2=yes],[have_xcb_dri2=no]) + PKG_CHECK_MODULES([LIBUDEV], [libudev > 150], + [have_libudev=yes],[have_libudev=no]) + + if test "$have_xcb_dri2" = yes; then + EGL_DRIVER_DRI2=dri2 + DEFINES="$DEFINES -DHAVE_XCB_DRI2" + if test "$have_libudev" = yes; then + DEFINES="$DEFINES -DHAVE_LIBUDEV" + fi + # workaround a bug in xcb-dri2 generated by xcb-proto 1.6 + AC_CHECK_LIB(xcb-dri2, xcb_dri2_connect_alignment_pad, [], + [DEFINES="$DEFINES -DXCB_DRI2_CONNECT_DEVICE_NAME_BROKEN"]) + fi + fi + + EGL_DRIVERS_DIRS="$EGL_DRIVERS_DIRS $EGL_DRIVER_DRI2" + fi +fi +AC_SUBST([EGL_LIB_DEPS]) +AC_SUBST([EGL_DRIVERS_DIRS]) + +dnl +dnl GLU configuration +dnl +AC_ARG_ENABLE([glu], + [AS_HELP_STRING([--disable-glu], + [enable OpenGL Utility library @<:@default=enabled@:>@])], + [enable_glu="$enableval"], + [enable_glu=yes]) + +if test "x$enable_glu" = xyes -a "x$mesa_driver" = xno; then + AC_MSG_NOTICE([Disabling GLU since there is no OpenGL driver]) + enable_glu=no +fi + +if test "x$enable_glu" = xyes; then + SRC_DIRS="$SRC_DIRS glu" + + case "$mesa_driver" in + osmesa) + # Link libGLU to libOSMesa instead of libGL + GLU_LIB_DEPS="" + GLU_PC_REQ="osmesa" + if test "$enable_static" = no; then + GLU_MESA_DEPS='-l$(OSMESA_LIB)' + else + GLU_MESA_DEPS="" + fi + ;; + *) + # If static, empty GLU_LIB_DEPS and add libs for programs to link + GLU_PC_REQ="gl" + GLU_PC_LIB_PRIV="-lm" + if test "$enable_static" = no; then + GLU_LIB_DEPS="-lm" + GLU_MESA_DEPS='-l$(GL_LIB)' + else + GLU_LIB_DEPS="" + GLU_MESA_DEPS="" + APP_LIB_DEPS="$APP_LIB_DEPS -lstdc++" + fi + ;; + esac +fi +if test "$enable_static" = no; then + GLU_LIB_DEPS="$GLU_LIB_DEPS $OS_CPLUSPLUS_LIBS" +fi +GLU_PC_LIB_PRIV="$GLU_PC_LIB_PRIV $OS_CPLUSPLUS_LIBS" +AC_SUBST([GLU_LIB_DEPS]) +AC_SUBST([GLU_MESA_DEPS]) +AC_SUBST([GLU_PC_REQ]) +AC_SUBST([GLU_PC_REQ_PRIV]) +AC_SUBST([GLU_PC_LIB_PRIV]) +AC_SUBST([GLU_PC_CFLAGS]) + +dnl +dnl GLw configuration +dnl +AC_ARG_ENABLE([glw], + [AS_HELP_STRING([--disable-glw], + [enable Xt/Motif widget library @<:@default=enabled@:>@])], + [enable_glw="$enableval"], + [enable_glw=yes]) +dnl Don't build GLw on osmesa +if test "x$enable_glw" = xyes; then + case "$mesa_driver" in + osmesa|no) + AC_MSG_NOTICE([Disabling GLw since there is no OpenGL driver]) + enable_glw=no + ;; + esac +fi +AC_ARG_ENABLE([motif], + [AS_HELP_STRING([--enable-motif], + [use Motif widgets in GLw @<:@default=disabled@:>@])], + [enable_motif="$enableval"], + [enable_motif=no]) + +if test "x$enable_glw" = xyes; then + SRC_DIRS="$SRC_DIRS glw" + if test "$x11_pkgconfig" = yes; then + PKG_CHECK_MODULES([GLW],[x11 xt]) + GLW_PC_REQ_PRIV="x11 xt" + GLW_LIB_DEPS="$GLW_LIBS" + else + # should check these... + GLW_LIB_DEPS="$X_LIBS -lXt -lX11" + GLW_PC_LIB_PRIV="$GLW_LIB_DEPS" + GLW_PC_CFLAGS="$X11_INCLUDES" + fi + + GLW_SOURCES="GLwDrawA.c" + MOTIF_CFLAGS= + if test "x$enable_motif" = xyes; then + GLW_SOURCES="$GLW_SOURCES GLwMDrawA.c" + AC_PATH_PROG([MOTIF_CONFIG], [motif-config], [no]) + if test "x$MOTIF_CONFIG" != xno; then + MOTIF_CFLAGS=`$MOTIF_CONFIG --cflags` + MOTIF_LIBS=`$MOTIF_CONFIG --libs` + else + AC_CHECK_HEADER([Xm/PrimitiveP.h], [], + [AC_MSG_ERROR([Can't locate Motif headers])]) + AC_CHECK_LIB([Xm], [XmGetPixmap], [MOTIF_LIBS="-lXm"], + [AC_MSG_ERROR([Can't locate Motif Xm library])]) + fi + # MOTIF_LIBS is prepended to GLW_LIB_DEPS since Xm needs Xt/X11 + GLW_LIB_DEPS="$MOTIF_LIBS $GLW_LIB_DEPS" + GLW_PC_LIB_PRIV="$MOTIF_LIBS $GLW_PC_LIB_PRIV" + GLW_PC_CFLAGS="$MOTIF_CFLAGS $GLW_PC_CFLAGS" + fi + + # If static, empty GLW_LIB_DEPS and add libs for programs to link + GLW_PC_LIB_PRIV="$GLW_PC_LIB_PRIV" + if test "$enable_static" = no; then + GLW_MESA_DEPS='-l$(GL_LIB)' + GLW_LIB_DEPS="$GLW_LIB_DEPS" + else + APP_LIB_DEPS="$APP_LIB_DEPS $GLW_LIB_DEPS" + GLW_LIB_DEPS="" + GLW_MESA_DEPS="" + fi +fi +AC_SUBST([GLW_LIB_DEPS]) +AC_SUBST([GLW_MESA_DEPS]) +AC_SUBST([GLW_SOURCES]) +AC_SUBST([MOTIF_CFLAGS]) +AC_SUBST([GLW_PC_REQ_PRIV]) +AC_SUBST([GLW_PC_LIB_PRIV]) +AC_SUBST([GLW_PC_CFLAGS]) + +dnl +dnl GLUT configuration +dnl +if test -f "$srcdir/include/GL/glut.h"; then + default_glut=yes +else + default_glut=no +fi +AC_ARG_ENABLE([glut], + [AS_HELP_STRING([--disable-glut], + [enable GLUT library @<:@default=enabled if source available@:>@])], + [enable_glut="$enableval"], + [enable_glut="$default_glut"]) + +dnl Don't build glut on osmesa +if test "x$enable_glut" = xyes; then + case "$mesa_driver" in + osmesa|no) + AC_MSG_NOTICE([Disabling glut since there is no OpenGL driver]) + enable_glut=no + ;; + esac +fi +dnl Can't build glut if GLU not available +if test "x$enable_glu$enable_glut" = xnoyes; then + AC_MSG_WARN([Disabling glut since GLU is disabled]) + enable_glut=no +fi + +if test "x$enable_glut" = xyes; then + SRC_DIRS="$SRC_DIRS glut/glx" + if test "$x11_pkgconfig" = yes; then + PKG_CHECK_MODULES([GLUT],[x11 xmu xi]) + GLUT_PC_REQ_PRIV="x11 xmu xi" + GLUT_LIB_DEPS="$GLUT_LIBS" + else + # should check these... + GLUT_LIB_DEPS="$X_LIBS -lX11 -lXmu -lXi" + GLUT_PC_LIB_PRIV="$GLUT_LIB_DEPS" + GLUT_PC_CFLAGS="$X11_INCLUDES" + fi + if test "x$GCC" = xyes; then + GLUT_CFLAGS="$GLUT_CFLAGS -fexceptions" + fi + GLUT_LIB_DEPS="$GLUT_LIB_DEPS -lm" + GLUT_PC_LIB_PRIV="$GLUT_PC_LIB_PRIV -lm" + + # If static, empty GLUT_LIB_DEPS and add libs for programs to link + if test "$enable_static" = no; then + GLUT_MESA_DEPS='-l$(GLU_LIB) -l$(GL_LIB)' + else + APP_LIB_DEPS="$APP_LIB_DEPS $GLUT_LIB_DEPS" + GLUT_LIB_DEPS="" + GLUT_MESA_DEPS="" + fi +fi +AC_SUBST([GLUT_LIB_DEPS]) +AC_SUBST([GLUT_MESA_DEPS]) +AC_SUBST([GLUT_CFLAGS]) +AC_SUBST([GLUT_PC_REQ_PRIV]) +AC_SUBST([GLUT_PC_LIB_PRIV]) +AC_SUBST([GLUT_PC_CFLAGS]) + +dnl +dnl Program library dependencies +dnl Only libm is added here if necessary as the libraries should +dnl be pulled in by the linker +dnl +if test "x$APP_LIB_DEPS" = x; then + case "$host_os" in + solaris*) + APP_LIB_DEPS="-lX11 -lsocket -lnsl -lm" + ;; + cygwin*) + APP_LIB_DEPS="-lX11" + ;; + *) + APP_LIB_DEPS="-lm" + ;; + esac +fi +AC_SUBST([APP_LIB_DEPS]) +AC_SUBST([PROGRAM_DIRS]) + +dnl +dnl Gallium configuration +dnl +AC_ARG_ENABLE([gallium], + [AS_HELP_STRING([--disable-gallium], + [build gallium @<:@default=enabled@:>@])], + [enable_gallium="$enableval"], + [enable_gallium=yes]) +if test "x$enable_gallium" = xno -a "x$enable_openvg" = xyes; then + AC_MSG_ERROR([cannot enable OpenVG without Gallium]) +fi +if test "x$enable_gallium" = xyes; then + SRC_DIRS="$SRC_DIRS gallium gallium/winsys gallium/targets" + AC_PATH_PROG([LLVM_CONFIG], [llvm-config], [no]) +fi + +AC_SUBST([LLVM_CFLAGS]) +AC_SUBST([LLVM_LIBS]) +AC_SUBST([LLVM_LDFLAGS]) +AC_SUBST([LLVM_VERSION]) + +dnl +dnl Gallium state trackers configuration +dnl + +AC_ARG_ENABLE([gallium-egl], + [AS_HELP_STRING([--enable-gallium-egl], + [enable gallium EGL state tracker @<:@default=auto@:>@])], + [enable_gallium_egl="$enableval"], + [enable_gallium_egl=auto]) +if test "x$enable_gallium_egl" = xauto; then + case "$mesa_driver" in + dri|no) + enable_gallium_egl=$enable_egl + ;; + *) + enable_gallium_egl=$enable_openvg + ;; + esac +fi +case "x$enable_egl$enable_gallium_egl" in +xnoyes) + AC_MSG_ERROR([cannot build Gallium EGL state tracker without EGL]) +esac + +AC_ARG_WITH([state-trackers], + [AS_HELP_STRING([--with-state-trackers@<:@=DIRS...@:>@], + [comma delimited state_trackers list, e.g. + "egl,glx" @<:@default=auto@:>@])], + [with_state_trackers="$withval"], + [with_state_trackers=yes]) + +case "$with_state_trackers" in +no) + GALLIUM_STATE_TRACKERS_DIRS="" + ;; +yes) + # look at what else is built + case "$mesa_driver" in + xlib) + GALLIUM_STATE_TRACKERS_DIRS=glx + ;; + dri) + GALLIUM_STATE_TRACKERS_DIRS="dri" + HAVE_ST_DRI="yes" + # Have only tested st/xorg on 1.6.0 servers + PKG_CHECK_MODULES(XORG, [xorg-server >= 1.6.0 libdrm >= $LIBDRM_XORG_REQUIRED libkms >= $LIBKMS_XORG_REQUIRED], + HAVE_ST_XORG="yes"; GALLIUM_STATE_TRACKERS_DIRS="$GALLIUM_STATE_TRACKERS_DIRS xorg", + HAVE_ST_XORG="no") + ;; + esac + + if test "x$enable_egl" = xyes; then + if test "$enable_openvg" = yes; then + GALLIUM_STATE_TRACKERS_DIRS="$GALLIUM_STATE_TRACKERS_DIRS vega" + st_egl="yes" + fi + + if test "$enable_gallium_egl" = yes; then + GALLIUM_STATE_TRACKERS_DIRS="$GALLIUM_STATE_TRACKERS_DIRS egl" + HAVE_ST_EGL="yes" + fi + fi + ;; +*) + # verify the requested state tracker exist + state_trackers="" + _state_trackers=`IFS=', '; echo $with_state_trackers` + for tracker in $_state_trackers; do + case "$tracker" in + dri) + if test "x$mesa_driver" != xdri; then + AC_MSG_ERROR([cannot build dri state tracker without mesa driver set to dri]) + fi + HAVE_ST_DRI="yes" + ;; + egl) + if test "x$enable_egl" != xyes; then + AC_MSG_ERROR([cannot build egl state tracker without EGL library]) + fi + HAVE_ST_EGL="yes" + ;; + xorg) + PKG_CHECK_MODULES([XORG], [xorg-server >= 1.6.0]) + PKG_CHECK_MODULES([LIBDRM_XORG], [libdrm >= $LIBDRM_XORG_REQUIRED]) + PKG_CHECK_MODULES([LIBKMS_XORG], [libkms >= $LIBKMS_XORG_REQUIRED]) + HAVE_ST_XORG="yes" + ;; + vega) + if test "x$enable_openvg" != xyes; then + AC_MSG_ERROR([cannot build vega state tracker without --enable-openvg]) + fi + have_st_vega="yes" + ;; + esac + + if test -n "$tracker"; then + test -d "$srcdir/src/gallium/state_trackers/$tracker" || \ + AC_MSG_ERROR([state tracker '$tracker' doesn't exist]) + if test -n "$state_trackers"; then + state_trackers="$state_trackers $tracker" + else + state_trackers="$tracker" + fi + fi + done + GALLIUM_STATE_TRACKERS_DIRS="$state_trackers" + + # append --enable-openvg/--enable-gallium-egl to --with-state-trackers + if test "x$have_st_vega" != xyes -a "x$enable_openvg" = xyes; then + AC_MSG_ERROR([--with-state-trackers specified but vega is missing]) + fi + if test "x$HAVE_ST_EGL" != xyes -a "x$enable_gallium_egl" = xyes; then + AC_MSG_ERROR([--with-state-trackers specified but egl is missing]) + fi + ;; +esac + + +EGL_CLIENT_APIS="" +VG_LIB_DEPS="" + +case "x$enable_opengl$enable_gles1$enable_gles2" in +x*yes*) + EGL_CLIENT_APIS="$EGL_CLIENT_APIS "'$(GL_LIB)' + ;; +esac +if test "x$enable_openvg" = xyes; then + EGL_CLIENT_APIS="$EGL_CLIENT_APIS "'$(VG_LIB)' + VG_LIB_DEPS="$VG_LIB_DEPS -lpthread" +fi + +AC_SUBST([VG_LIB_DEPS]) +AC_SUBST([EGL_CLIENT_APIS]) + +if test "x$HAVE_ST_EGL" = xyes; then + GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS egl" +fi + +if test "x$HAVE_ST_XORG" = xyes; then + PKG_CHECK_MODULES(XEXT, [xextproto >= 7.0.99.1], + HAVE_XEXTPROTO_71="yes"; DEFINES="$DEFINES -DHAVE_XEXTPROTO_71", + HAVE_XEXTPROTO_71="no") +fi + +AC_ARG_WITH([egl-platforms], + [AS_HELP_STRING([--with-egl-platforms@<:@=DIRS...@:>@], + [comma delimited native platforms libEGL supports, e.g. + "x11,drm" @<:@default=auto@:>@])], + [with_egl_platforms="$withval"], + [with_egl_platforms=yes]) +AC_ARG_WITH([egl-displays], + [AS_HELP_STRING([--with-egl-displays@<:@=DIRS...@:>@], + [DEPRECATED. Use --with-egl-platforms instead])], + [with_egl_platforms="$withval"]) + +EGL_PLATFORMS="" +WAYLAND_EGL_LIB_DEPS="" + +case "$with_egl_platforms" in +yes) + if test "x$enable_egl" = xyes && test "x$mesa_driver" != xosmesa; then + EGL_PLATFORMS="x11" + if test "$mesa_driver" = dri; then + EGL_PLATFORMS="$EGL_PLATFORMS drm" + fi + fi + ;; +*) + if test "x$enable_egl" != xyes; then + AC_MSG_ERROR([cannot build egl state tracker without EGL library]) + fi + # verify the requested driver directories exist + egl_platforms=`IFS=', '; echo $with_egl_platforms` + for plat in $egl_platforms; do + test -d "$srcdir/src/gallium/state_trackers/egl/$plat" || \ + AC_MSG_ERROR([EGL platform '$plat' doesn't exist]) + if test "$plat" = "fbdev"; then + GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS sw/fbdev" + fi + if test "$plat" = "wayland"; then + PKG_CHECK_MODULES([WAYLAND], [wayland-client wayland-server],, \ + [AC_MSG_ERROR([cannot find libwayland-client])]) + WAYLAND_EGL_LIB_DEPS="$WAYLAND_LIBS $LIBDRM_LIBS" + fi + done + EGL_PLATFORMS="$egl_platforms" + ;; +esac +AC_SUBST([EGL_PLATFORMS]) + +AC_SUBST([WAYLAND_EGL_LIB_DEPS]) +WAYLAND_EGL_PC_REQ_PRIV="wayland-client libdrm" +WAYLAND_EGL_PC_LIB_PRIV= +WAYLAND_EGL_PC_CFLAGS= + +AC_SUBST([WAYLAND_EGL_PC_REQ_PRIV]) +AC_SUBST([WAYLAND_EGL_PC_LIB_PRIV]) +AC_SUBST([WAYLAND_EGL_PC_CFLAGS]) + + +AC_ARG_WITH([egl-driver-dir], + [AS_HELP_STRING([--with-egl-driver-dir=DIR], + [directory for EGL drivers [[default=${libdir}/egl]]])], + [EGL_DRIVER_INSTALL_DIR="$withval"], + [EGL_DRIVER_INSTALL_DIR='${libdir}/egl']) +AC_SUBST([EGL_DRIVER_INSTALL_DIR]) + +AC_ARG_WITH([xorg-driver-dir], + [AS_HELP_STRING([--with-xorg-driver-dir=DIR], + [Default xorg driver directory[[default=${libdir}/xorg/modules/drivers]]])], + [XORG_DRIVER_INSTALL_DIR="$withval"], + [XORG_DRIVER_INSTALL_DIR="${libdir}/xorg/modules/drivers"]) +AC_SUBST([XORG_DRIVER_INSTALL_DIR]) + +AC_ARG_WITH([max-width], + [AS_HELP_STRING([--with-max-width=N], + [Maximum framebuffer width (4096)])], + [DEFINES="${DEFINES} -DMAX_WIDTH=${withval}"; + AS_IF([test "${withval}" -gt "4096"], + [AC_MSG_WARN([Large framebuffer: see s_tritemp.h comments.])])] +) +AC_ARG_WITH([max-height], + [AS_HELP_STRING([--with-max-height=N], + [Maximum framebuffer height (4096)])], + [DEFINES="${DEFINES} -DMAX_HEIGHT=${withval}"; + AS_IF([test "${withval}" -gt "4096"], + [AC_MSG_WARN([Large framebuffer: see s_tritemp.h comments.])])] +) + +dnl +dnl Gallium LLVM +dnl +AC_ARG_ENABLE([gallium-llvm], + [AS_HELP_STRING([--enable-gallium-llvm], + [build gallium LLVM support @<:@default=disabled@:>@])], + [enable_gallium_llvm="$enableval"], + [enable_gallium_llvm=auto]) +if test "x$enable_gallium_llvm" = xyes; then + if test "x$LLVM_CONFIG" != xno; then + LLVM_VERSION=`$LLVM_CONFIG --version` + LLVM_CFLAGS=`$LLVM_CONFIG --cppflags` + LLVM_LIBS="`$LLVM_CONFIG --libs jit interpreter nativecodegen bitwriter` -lstdc++" + + LLVM_LDFLAGS=`$LLVM_CONFIG --ldflags` + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS llvmpipe" + DEFINES="$DEFINES -DGALLIUM_LLVMPIPE -D__STDC_CONSTANT_MACROS" + MESA_LLVM=1 + else + MESA_LLVM=0 + fi +else + MESA_LLVM=0 +fi + +dnl +dnl Gallium helper functions +dnl +gallium_check_st() { + if test "x$HAVE_ST_DRI" = xyes || test "x$HAVE_ST_XORG" = xyes; then + GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS $1" + fi + if test "x$HAVE_ST_DRI" = xyes && test "x$2" != x; then + GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $2" + fi + if test "x$HAVE_ST_XORG" = xyes && test "x$3" != x; then + GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $3" + fi +} + + +dnl +dnl Gallium SVGA configuration +dnl +AC_ARG_ENABLE([gallium-svga], + [AS_HELP_STRING([--enable-gallium-svga], + [build gallium SVGA @<:@default=disabled@:>@])], + [enable_gallium_svga="$enableval"], + [enable_gallium_svga=auto]) +if test "x$enable_gallium_svga" = xyes; then + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS svga" + gallium_check_st "svga/drm" "dri-vmwgfx" "xorg-vmwgfx" +elif test "x$enable_gallium_svga" = xauto; then + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS svga" +fi + +dnl +dnl Gallium i915 configuration +dnl +AC_ARG_ENABLE([gallium-i915], + [AS_HELP_STRING([--enable-gallium-i915], + [build gallium i915 @<:@default=disabled@:>@])], + [enable_gallium_i915="$enableval"], + [enable_gallium_i915=auto]) +if test "x$enable_gallium_i915" = xyes; then + GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS i915/sw" + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i915" + gallium_check_st "i915/drm" "dri-i915" "xorg-i915" +elif test "x$enable_gallium_i915" = xauto; then + GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS i915/sw" + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i915" +fi + +dnl +dnl Gallium i965 configuration +dnl +AC_ARG_ENABLE([gallium-i965], + [AS_HELP_STRING([--enable-gallium-i965], + [build gallium i965 @<:@default=disabled@:>@])], + [enable_gallium_i965="$enableval"], + [enable_gallium_i965=auto]) +if test "x$enable_gallium_i965" = xyes; then + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i965" + gallium_check_st "i965/drm" "dri-i965" "xorg-i965" +elif test "x$enable_gallium_i965" = xauto; then + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS i965" +fi + +dnl +dnl Gallium Radeon r300g configuration +dnl +AC_ARG_ENABLE([gallium-radeon], + [AS_HELP_STRING([--enable-gallium-radeon], + [build gallium radeon @<:@default=disabled@:>@])], + [enable_gallium_radeon="$enableval"], + [enable_gallium_radeon=auto]) +if test "x$enable_gallium_radeon" = xauto; then + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300" + gallium_check_st "radeon/drm" "dri-r300" +fi +if test "x$enable_gallium_radeon" = xyes; then + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300" + gallium_check_st "radeon/drm" "dri-r300" "xorg-radeon" +fi + +dnl +dnl Gallium Radeon r600g configuration +dnl +AC_ARG_ENABLE([gallium-r600], + [AS_HELP_STRING([--enable-gallium-r600], + [build gallium radeon @<:@default=disabled@:>@])], + [enable_gallium_r600="$enableval"], + [enable_gallium_r600=auto]) +if test "x$enable_gallium_r600" = xyes; then + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600" + gallium_check_st "r600/drm" "dri-r600" +fi + +dnl +dnl Gallium Nouveau configuration +dnl +AC_ARG_ENABLE([gallium-nouveau], + [AS_HELP_STRING([--enable-gallium-nouveau], + [build gallium nouveau @<:@default=disabled@:>@])], + [enable_gallium_nouveau="$enableval"], + [enable_gallium_nouveau=no]) +if test "x$enable_gallium_nouveau" = xyes; then + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv50 nvc0" + gallium_check_st "nouveau/drm" "dri-nouveau" "xorg-nouveau" +fi + +dnl +dnl Gallium swrast configuration +dnl +AC_ARG_ENABLE([gallium-swrast], + [AS_HELP_STRING([--enable-gallium-swrast], + [build gallium swrast @<:@default=auto@:>@])], + [enable_gallium_swrast="$enableval"], + [enable_gallium_swrast=auto]) +if test "x$enable_gallium_swrast" = xyes || test "x$enable_gallium_swrast" = xauto; then + if test "x$HAVE_ST_DRI" = xyes; then + GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS dri-swrast" + fi +fi + +dnl prepend CORE_DIRS to SRC_DIRS +SRC_DIRS="$CORE_DIRS $SRC_DIRS" + +dnl Restore LDFLAGS and CPPFLAGS +LDFLAGS="$_SAVE_LDFLAGS" +CPPFLAGS="$_SAVE_CPPFLAGS" + +dnl Substitute the config +AC_CONFIG_FILES([configs/autoconf]) + +dnl Replace the configs/current symlink +AC_CONFIG_COMMANDS([configs],[ +if test -f configs/current || test -L configs/current; then + rm -f configs/current +fi +ln -s autoconf configs/current +]) + +AC_OUTPUT + +dnl +dnl Output some configuration info for the user +dnl +echo "" +echo " prefix: $prefix" +echo " exec_prefix: $exec_prefix" +echo " libdir: $libdir" +echo " includedir: $includedir" + +dnl API info +echo "" +echo " OpenGL: $enable_opengl (ES1: $enable_gles1 ES2: $enable_gles2)" +echo " OpenVG: $enable_openvg" + +dnl Driver info +echo "" +echo " Driver: $mesa_driver" +if test "$mesa_driver" != no; then + if echo "$DRIVER_DIRS" | grep 'osmesa' >/dev/null 2>&1; then + echo " OSMesa: lib$OSMESA_LIB" + else + echo " OSMesa: no" + fi + if test "$mesa_driver" = dri; then + # cleanup the drivers var + dri_dirs=`echo $DRI_DIRS | $SED 's/^ *//;s/ */ /;s/ *$//'` + if test "x$DRI_DIRS" = x; then + echo " DRI drivers: no" + else + echo " DRI drivers: $dri_dirs" + fi + echo " DRI driver dir: $DRI_DRIVER_INSTALL_DIR" + echo " Use XCB: $enable_xcb" + echo " Shared dricore: $enable_dricore" + fi +fi +echo "" +echo " GLU: $enable_glu" +echo " GLw: $enable_glw (Motif: $enable_motif)" +echo " glut: $enable_glut" + +dnl EGL +echo "" +echo " EGL: $enable_egl" +if test "$enable_egl" = yes; then + echo " EGL platforms: $EGL_PLATFORMS" + + egl_drivers="" + for d in $EGL_DRIVERS_DIRS; do + egl_drivers="$egl_drivers builtin:egl_$d" + done + + if test "$enable_gallium" = yes -a "$HAVE_ST_EGL" = yes; then + echo " EGL drivers: ${egl_drivers} egl_gallium" + echo " EGL Gallium STs:$EGL_CLIENT_APIS" + else + echo " EGL drivers: $egl_drivers" + fi +fi + +echo "" +if test "x$MESA_LLVM" = x1; then + echo " llvm: yes" + echo " llvm-config: $LLVM_CONFIG" + echo " llvm-version: $LLVM_VERSION" +else + echo " llvm: no" +fi + +echo "" +if echo "$SRC_DIRS" | grep 'gallium' >/dev/null 2>&1; then + echo " Gallium: yes" + echo " Gallium dirs: $GALLIUM_DIRS" + echo " Target dirs: $GALLIUM_TARGET_DIRS" + echo " Winsys dirs: $GALLIUM_WINSYS_DIRS" + echo " Driver dirs: $GALLIUM_DRIVERS_DIRS" + echo " Trackers dirs: $GALLIUM_STATE_TRACKERS_DIRS" +else + echo " Gallium: no" +fi + +dnl Libraries +echo "" +echo " Shared libs: $enable_shared" +echo " Static libs: $enable_static" + +dnl Compiler options +# cleanup the CFLAGS/CXXFLAGS/DEFINES vars +cflags=`echo $CFLAGS $OPT_FLAGS $PIC_FLAGS $ARCH_FLAGS | \ + $SED 's/^ *//;s/ */ /;s/ *$//'` +cxxflags=`echo $CXXFLAGS $OPT_FLAGS $PIC_FLAGS $ARCH_FLAGS | \ + $SED 's/^ *//;s/ */ /;s/ *$//'` +defines=`echo $DEFINES $ASM_FLAGS | $SED 's/^ *//;s/ */ /;s/ *$//'` +echo "" +echo " CFLAGS: $cflags" +echo " CXXFLAGS: $cxxflags" +echo " Macros: $defines" +echo "" +echo " PYTHON2: $PYTHON2" + +echo "" +echo " Run '${MAKE-make}' to build Mesa" +echo "" diff --git a/mesalib/scons/gallium.py b/mesalib/scons/gallium.py index 34523d589..a61a9af8c 100644 --- a/mesalib/scons/gallium.py +++ b/mesalib/scons/gallium.py @@ -1,625 +1,624 @@ -"""gallium - -Frontend-tool for Gallium3D architecture. - -""" - -# -# Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. -# All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sub license, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice (including the -# next paragraph) shall be included in all copies or substantial portions -# of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. -# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR -# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# - - -import distutils.version -import os -import os.path -import re -import subprocess -import platform as _platform - -import SCons.Action -import SCons.Builder -import SCons.Scanner - - -def symlink(target, source, env): - target = str(target[0]) - source = str(source[0]) - if os.path.islink(target) or os.path.exists(target): - os.remove(target) - os.symlink(os.path.basename(source), target) - -def install(env, source, subdir): - target_dir = os.path.join(env.Dir('#.').srcnode().abspath, env['build_dir'], subdir) - return env.Install(target_dir, source) - -def install_program(env, source): - return install(env, source, 'bin') - -def install_shared_library(env, sources, version = ()): - targets = [] - install_dir = os.path.join(env.Dir('#.').srcnode().abspath, env['build_dir']) - version = tuple(map(str, version)) - if env['SHLIBSUFFIX'] == '.dll': - dlls = env.FindIxes(sources, 'SHLIBPREFIX', 'SHLIBSUFFIX') - targets += install(env, dlls, 'bin') - libs = env.FindIxes(sources, 'LIBPREFIX', 'LIBSUFFIX') - targets += install(env, libs, 'lib') - else: - for source in sources: - target_dir = os.path.join(install_dir, 'lib') - target_name = '.'.join((str(source),) + version) - last = env.InstallAs(os.path.join(target_dir, target_name), source) - targets += last - while len(version): - version = version[:-1] - target_name = '.'.join((str(source),) + version) - action = SCons.Action.Action(symlink, "$TARGET -> $SOURCE") - last = env.Command(os.path.join(target_dir, target_name), last, action) - targets += last - return targets - - -def createInstallMethods(env): - env.AddMethod(install_program, 'InstallProgram') - env.AddMethod(install_shared_library, 'InstallSharedLibrary') - - -def num_jobs(): - try: - return int(os.environ['NUMBER_OF_PROCESSORS']) - except (ValueError, KeyError): - pass - - try: - return os.sysconf('SC_NPROCESSORS_ONLN') - except (ValueError, OSError, AttributeError): - pass - - try: - return int(os.popen2("sysctl -n hw.ncpu")[1].read()) - except ValueError: - pass - - return 1 - - -def pkg_config_modules(env, name, modules): - '''Simple wrapper for pkg-config.''' - - env[name] = False - - if env['platform'] == 'windows': - return - - if not env.Detect('pkg-config'): - return - - if subprocess.call(["pkg-config", "--exists", ' '.join(modules)]) != 0: - return - - # Put -I and -L flags directly into the environment, as these don't affect - # the compilation of targets that do not use them - try: - env.ParseConfig('pkg-config --cflags-only-I --libs-only-L ' + ' '.join(modules)) - except OSError: - return - - # Other flags may affect the compilation of unrelated targets, so store - # them with a prefix, (e.g., XXX_CFLAGS, XXX_LIBS, etc) - try: - flags = env.ParseFlags('!pkg-config --cflags-only-other --libs-only-l --libs-only-other ' + ' '.join(modules)) - except OSError: - return - prefix = name.upper() + '_' - for flag_name, flag_value in flags.iteritems(): - env[prefix + flag_name] = flag_value - - env[name] = True - - - -def generate(env): - """Common environment generation code""" - - # Tell tools which machine to compile for - env['TARGET_ARCH'] = env['machine'] - env['MSVS_ARCH'] = env['machine'] - - # Toolchain - platform = env['platform'] - if env['toolchain'] == 'default': - if platform == 'winddk': - env['toolchain'] = 'winddk' - elif platform == 'wince': - env['toolchain'] = 'wcesdk' - env.Tool(env['toolchain']) - - # Allow override compiler and specify additional flags from environment - if os.environ.has_key('CC'): - env['CC'] = os.environ['CC'] - # Update CCVERSION to match - pipe = SCons.Action._subproc(env, [env['CC'], '--version'], - stdin = 'devnull', - stderr = 'devnull', - stdout = subprocess.PIPE) - if pipe.wait() == 0: - line = pipe.stdout.readline() - match = re.search(r'[0-9]+(\.[0-9]+)+', line) - if match: - env['CCVERSION'] = match.group(0) - if os.environ.has_key('CFLAGS'): - env['CCFLAGS'] += SCons.Util.CLVar(os.environ['CFLAGS']) - if os.environ.has_key('CXX'): - env['CXX'] = os.environ['CXX'] - if os.environ.has_key('CXXFLAGS'): - env['CXXFLAGS'] += SCons.Util.CLVar(os.environ['CXXFLAGS']) - if os.environ.has_key('LDFLAGS'): - env['LINKFLAGS'] += SCons.Util.CLVar(os.environ['LDFLAGS']) - - env['gcc'] = 'gcc' in os.path.basename(env['CC']).split('-') - env['msvc'] = env['CC'] == 'cl' - - if env['msvc'] and env['toolchain'] == 'default' and env['machine'] == 'x86_64': - # MSVC x64 support is broken in earlier versions of scons - env.EnsurePythonVersion(2, 0) - - # shortcuts - machine = env['machine'] - platform = env['platform'] - x86 = env['machine'] == 'x86' - ppc = env['machine'] == 'ppc' - gcc = env['gcc'] - msvc = env['msvc'] - - # Determine whether we are cross compiling; in particular, whether we need - # to compile code generators with a different compiler as the target code. - host_platform = _platform.system().lower() - if host_platform.startswith('cygwin'): - host_platform = 'cygwin' - host_machine = os.environ.get('PROCESSOR_ARCHITEW6432', os.environ.get('PROCESSOR_ARCHITECTURE', _platform.machine())) - host_machine = { - 'x86': 'x86', - 'i386': 'x86', - 'i486': 'x86', - 'i586': 'x86', - 'i686': 'x86', - 'ppc' : 'ppc', - 'AMD64': 'x86_64', - 'x86_64': 'x86_64', - }.get(host_machine, 'generic') - env['crosscompile'] = platform != host_platform - if machine == 'x86_64' and host_machine != 'x86_64': - env['crosscompile'] = True - env['hostonly'] = False - - # Backwards compatability with the debug= profile= options - if env['build'] == 'debug': - if not env['debug']: - print 'scons: warning: debug option is deprecated and will be removed eventually; use instead' - print - print ' scons build=release' - print - env['build'] = 'release' - if env['profile']: - print 'scons: warning: profile option is deprecated and will be removed eventually; use instead' - print - print ' scons build=profile' - print - env['build'] = 'profile' - if False: - # Enforce SConscripts to use the new build variable - env.popitem('debug') - env.popitem('profile') - else: - # Backwards portability with older sconscripts - if env['build'] in ('debug', 'checked'): - env['debug'] = True - env['profile'] = False - if env['build'] == 'profile': - env['debug'] = False - env['profile'] = True - if env['build'] == 'release': - env['debug'] = False - env['profile'] = False - - # Put build output in a separate dir, which depends on the current - # configuration. See also http://www.scons.org/wiki/AdvancedBuildExample - build_topdir = 'build' - build_subdir = env['platform'] - if env['machine'] != 'generic': - build_subdir += '-' + env['machine'] - if env['build'] != 'release': - build_subdir += '-' + env['build'] - build_dir = os.path.join(build_topdir, build_subdir) - # Place the .sconsign file in the build dir too, to avoid issues with - # different scons versions building the same source file - env['build_dir'] = build_dir - env.SConsignFile(os.path.join(build_dir, '.sconsign')) - if 'SCONS_CACHE_DIR' in os.environ: - print 'scons: Using build cache in %s.' % (os.environ['SCONS_CACHE_DIR'],) - env.CacheDir(os.environ['SCONS_CACHE_DIR']) - env['CONFIGUREDIR'] = os.path.join(build_dir, 'conf') - env['CONFIGURELOG'] = os.path.join(os.path.abspath(build_dir), 'config.log') - - # Parallel build - if env.GetOption('num_jobs') <= 1: - env.SetOption('num_jobs', num_jobs()) - - env.Decider('MD5-timestamp') - env.SetOption('max_drift', 60) - - # C preprocessor options - cppdefines = [] - if env['build'] in ('debug', 'checked'): - cppdefines += ['DEBUG'] - else: - cppdefines += ['NDEBUG'] - if env['build'] == 'profile': - cppdefines += ['PROFILE'] - if platform == 'windows': - cppdefines += [ - 'WIN32', - '_WINDOWS', - #'_UNICODE', - #'UNICODE', - # http://msdn.microsoft.com/en-us/library/aa383745.aspx - ('_WIN32_WINNT', '0x0601'), - ('WINVER', '0x0601'), - ] - if msvc and env['toolchain'] != 'winddk': - cppdefines += [ - 'VC_EXTRALEAN', - '_USE_MATH_DEFINES', - '_CRT_SECURE_NO_WARNINGS', - '_CRT_SECURE_NO_DEPRECATE', - '_SCL_SECURE_NO_WARNINGS', - '_SCL_SECURE_NO_DEPRECATE', - ] - if env['build'] in ('debug', 'checked'): - cppdefines += ['_DEBUG'] - if env['toolchain'] == 'winddk': - # Mimic WINDDK's builtin flags. See also: - # - WINDDK's bin/makefile.new i386mk.inc for more info. - # - buildchk_wxp_x86.log files, generated by the WINDDK's build - # - http://alter.org.ua/docs/nt_kernel/vc8_proj/ - if machine == 'x86': - cppdefines += ['_X86_', 'i386'] - if machine == 'x86_64': - cppdefines += ['_AMD64_', 'AMD64'] - if platform == 'winddk': - cppdefines += [ - 'STD_CALL', - ('CONDITION_HANDLING', '1'), - ('NT_INST', '0'), - ('WIN32', '100'), - ('_NT1X_', '100'), - ('WINNT', '1'), - ('_WIN32_WINNT', '0x0501'), # minimum required OS version - ('WINVER', '0x0501'), - ('_WIN32_IE', '0x0603'), - ('WIN32_LEAN_AND_MEAN', '1'), - ('DEVL', '1'), - ('__BUILDMACHINE__', 'WinDDK'), - ('FPO', '0'), - ] - if env['build'] in ('debug', 'checked'): - cppdefines += [('DBG', 1)] - if platform == 'wince': - cppdefines += [ - '_CRT_SECURE_NO_DEPRECATE', - '_USE_32BIT_TIME_T', - 'UNICODE', - '_UNICODE', - ('UNDER_CE', '600'), - ('_WIN32_WCE', '0x600'), - 'WINCEOEM', - 'WINCEINTERNAL', - 'WIN32', - 'STRICT', - 'x86', - '_X86_', - 'INTERNATIONAL', - ('INTLMSG_CODEPAGE', '1252'), - ] - if platform == 'windows': - cppdefines += ['PIPE_SUBSYSTEM_WINDOWS_USER'] - if platform == 'winddk': - cppdefines += ['PIPE_SUBSYSTEM_WINDOWS_DISPLAY'] - if platform == 'wince': - cppdefines += ['PIPE_SUBSYSTEM_WINDOWS_CE'] - cppdefines += ['PIPE_SUBSYSTEM_WINDOWS_CE_OGL'] - if platform == 'embedded': - cppdefines += ['PIPE_OS_EMBEDDED'] - env.Append(CPPDEFINES = cppdefines) - - # C compiler options - cflags = [] # C - cxxflags = [] # C++ - ccflags = [] # C & C++ - if gcc: - ccversion = env['CCVERSION'] - if env['build'] == 'debug': - ccflags += ['-O0'] - elif ccversion.startswith('4.2.'): - # gcc 4.2.x optimizer is broken - print "warning: gcc 4.2.x optimizer is broken -- disabling optimizations" - ccflags += ['-O0'] - else: - ccflags += ['-O3'] - ccflags += ['-g3'] - if env['build'] in ('checked', 'profile'): - # See http://code.google.com/p/jrfonseca/wiki/Gprof2Dot#Which_options_should_I_pass_to_gcc_when_compiling_for_profiling? - ccflags += [ - '-fno-omit-frame-pointer', - '-fno-optimize-sibling-calls', - ] - if env['machine'] == 'x86': - ccflags += [ - '-m32', - #'-march=pentium4', - ] - if distutils.version.LooseVersion(ccversion) >= distutils.version.LooseVersion('4.2') \ - and (platform != 'windows' or env['build'] == 'debug' or True): - # NOTE: We need to ensure stack is realigned given that we - # produce shared objects, and have no control over the stack - # alignment policy of the application. Therefore we need - # -mstackrealign ore -mincoming-stack-boundary=2. - # - # XXX: -O and -mstackrealign causes stack corruption on MinGW - # - # XXX: We could have SSE without -mstackrealign if we always used - # __attribute__((force_align_arg_pointer)), but that's not - # always the case. - ccflags += [ - '-mstackrealign', # ensure stack is aligned - '-mmmx', '-msse', '-msse2', # enable SIMD intrinsics - #'-mfpmath=sse', - ] - if platform in ['windows', 'darwin']: - # Workaround http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37216 - ccflags += ['-fno-common'] - if env['machine'] == 'x86_64': - ccflags += ['-m64'] - if platform == 'darwin': - ccflags += ['-fno-common'] - # See also: - # - http://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html - ccflags += [ - '-Wall', - '-Wno-long-long', - '-ffast-math', - '-fmessage-length=0', # be nice to Eclipse - ] - cflags += [ - '-Wmissing-prototypes', - '-std=gnu99', - ] - if distutils.version.LooseVersion(ccversion) >= distutils.version.LooseVersion('4.0'): - ccflags += [ - '-Wmissing-field-initializers', - ] - if distutils.version.LooseVersion(ccversion) >= distutils.version.LooseVersion('4.2'): - ccflags += [ - '-Werror=pointer-arith', - ] - cflags += [ - '-Werror=declaration-after-statement', - ] - if msvc: - # See also: - # - http://msdn.microsoft.com/en-us/library/19z1t1wy.aspx - # - cl /? - if env['build'] == 'debug': - ccflags += [ - '/Od', # disable optimizations - '/Oi', # enable intrinsic functions - '/Oy-', # disable frame pointer omission - ] - else: - ccflags += [ - '/O2', # optimize for speed - ] - if env['build'] == 'release': - ccflags += [ - '/GL', # enable whole program optimization - ] - else: - ccflags += [ - '/GL-', # disable whole program optimization - ] - ccflags += [ - '/fp:fast', # fast floating point - '/W3', # warning level - #'/Wp64', # enable 64 bit porting warnings - ] - if env['machine'] == 'x86': - ccflags += [ - #'/arch:SSE2', # use the SSE2 instructions - ] - if platform == 'windows': - ccflags += [ - # TODO - ] - if platform == 'winddk': - ccflags += [ - '/Zl', # omit default library name in .OBJ - '/Zp8', # 8bytes struct member alignment - '/Gy', # separate functions for linker - '/Gm-', # disable minimal rebuild - '/WX', # treat warnings as errors - '/Gz', # __stdcall Calling convention - '/GX-', # disable C++ EH - '/GR-', # disable C++ RTTI - '/GF', # enable read-only string pooling - '/G6', # optimize for PPro, P-II, P-III - '/Ze', # enable extensions - '/Gi-', # disable incremental compilation - '/QIfdiv-', # disable Pentium FDIV fix - '/hotpatch', # prepares an image for hotpatching. - #'/Z7', #enable old-style debug info - ] - if platform == 'wince': - # See also C:\WINCE600\public\common\oak\misc\makefile.def - ccflags += [ - '/Zl', # omit default library name in .OBJ - '/GF', # enable read-only string pooling - '/GR-', # disable C++ RTTI - '/GS', # enable security checks - # Allow disabling language conformance to maintain backward compat - #'/Zc:wchar_t-', # don't force wchar_t as native type, instead of typedef - #'/Zc:forScope-', # don't enforce Standard C++ for scoping rules - #'/wd4867', - #'/wd4430', - #'/MT', - #'/U_MT', - ] - # Automatic pdb generation - # See http://scons.tigris.org/issues/show_bug.cgi?id=1656 - env.EnsureSConsVersion(0, 98, 0) - env['PDB'] = '${TARGET.base}.pdb' - env.Append(CCFLAGS = ccflags) - env.Append(CFLAGS = cflags) - env.Append(CXXFLAGS = cxxflags) - - if env['platform'] == 'windows' and msvc: - # Choose the appropriate MSVC CRT - # http://msdn.microsoft.com/en-us/library/2kzt1wy3.aspx - if env['build'] in ('debug', 'checked'): - env.Append(CCFLAGS = ['/MTd']) - env.Append(SHCCFLAGS = ['/LDd']) - else: - env.Append(CCFLAGS = ['/MT']) - env.Append(SHCCFLAGS = ['/LD']) - - # Assembler options - if gcc: - if env['machine'] == 'x86': - env.Append(ASFLAGS = ['-m32']) - if env['machine'] == 'x86_64': - env.Append(ASFLAGS = ['-m64']) - - # Linker options - linkflags = [] - shlinkflags = [] - if gcc: - if env['machine'] == 'x86': - linkflags += ['-m32'] - if env['machine'] == 'x86_64': - linkflags += ['-m64'] - if env['platform'] not in ('darwin'): - shlinkflags += [ - '-Wl,-Bsymbolic', - ] - # Handle circular dependencies in the libraries - if env['platform'] in ('darwin'): - pass - else: - env['_LIBFLAGS'] = '-Wl,--start-group ' + env['_LIBFLAGS'] + ' -Wl,--end-group' - if msvc: - if env['build'] == 'release': - # enable Link-time Code Generation - linkflags += ['/LTCG'] - env.Append(ARFLAGS = ['/LTCG']) - if platform == 'windows' and msvc: - # See also: - # - http://msdn2.microsoft.com/en-us/library/y0zzbyt4.aspx - linkflags += [ - '/fixed:no', - '/incremental:no', - ] - if platform == 'winddk': - linkflags += [ - '/merge:_PAGE=PAGE', - '/merge:_TEXT=.text', - '/section:INIT,d', - '/opt:ref', - '/opt:icf', - '/ignore:4198,4010,4037,4039,4065,4070,4078,4087,4089,4221', - '/incremental:no', - '/fullbuild', - '/release', - '/nodefaultlib', - '/wx', - '/debug', - '/debugtype:cv', - '/version:5.1', - '/osversion:5.1', - '/functionpadmin:5', - '/safeseh', - '/pdbcompress', - '/stack:0x40000,0x1000', - '/driver', - '/align:0x80', - '/subsystem:native,5.01', - '/base:0x10000', - - '/entry:DrvEnableDriver', - ] - if env['build'] != 'release': - linkflags += [ - '/MAP', # http://msdn.microsoft.com/en-us/library/k7xkk3e2.aspx - ] - if platform == 'wince': - linkflags += [ - '/nodefaultlib', - #'/incremental:no', - #'/fullbuild', - '/entry:_DllMainCRTStartup', - ] - env.Append(LINKFLAGS = linkflags) - env.Append(SHLINKFLAGS = shlinkflags) - - # We have C++ in several libraries, so always link with the C++ compiler - if env['gcc']: - env['LINK'] = env['CXX'] - - # Default libs - env.Append(LIBS = []) - - # Load tools - env.Tool('lex') - env.Tool('yacc') - if env['llvm']: - env.Tool('llvm') - env.Tool('udis86') - - pkg_config_modules(env, 'x11', ['x11', 'xext']) - pkg_config_modules(env, 'drm', ['libdrm']) - pkg_config_modules(env, 'drm_intel', ['libdrm_intel']) - pkg_config_modules(env, 'drm_radeon', ['libdrm_radeon']) - pkg_config_modules(env, 'xorg', ['xorg-server']) - pkg_config_modules(env, 'kms', ['libkms']) - - env['dri'] = env['x11'] and env['drm'] - - # Custom builders and methods - env.Tool('custom') - createInstallMethods(env) - - # for debugging - #print env.Dump() - - -def exists(env): - return 1 +"""gallium + +Frontend-tool for Gallium3D architecture. + +""" + +# +# Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. +# All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sub license, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice (including the +# next paragraph) shall be included in all copies or substantial portions +# of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR +# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# + + +import distutils.version +import os +import os.path +import re +import subprocess +import platform as _platform + +import SCons.Action +import SCons.Builder +import SCons.Scanner + + +def symlink(target, source, env): + target = str(target[0]) + source = str(source[0]) + if os.path.islink(target) or os.path.exists(target): + os.remove(target) + os.symlink(os.path.basename(source), target) + +def install(env, source, subdir): + target_dir = os.path.join(env.Dir('#.').srcnode().abspath, env['build_dir'], subdir) + return env.Install(target_dir, source) + +def install_program(env, source): + return install(env, source, 'bin') + +def install_shared_library(env, sources, version = ()): + targets = [] + install_dir = os.path.join(env.Dir('#.').srcnode().abspath, env['build_dir']) + version = tuple(map(str, version)) + if env['SHLIBSUFFIX'] == '.dll': + dlls = env.FindIxes(sources, 'SHLIBPREFIX', 'SHLIBSUFFIX') + targets += install(env, dlls, 'bin') + libs = env.FindIxes(sources, 'LIBPREFIX', 'LIBSUFFIX') + targets += install(env, libs, 'lib') + else: + for source in sources: + target_dir = os.path.join(install_dir, 'lib') + target_name = '.'.join((str(source),) + version) + last = env.InstallAs(os.path.join(target_dir, target_name), source) + targets += last + while len(version): + version = version[:-1] + target_name = '.'.join((str(source),) + version) + action = SCons.Action.Action(symlink, "$TARGET -> $SOURCE") + last = env.Command(os.path.join(target_dir, target_name), last, action) + targets += last + return targets + + +def createInstallMethods(env): + env.AddMethod(install_program, 'InstallProgram') + env.AddMethod(install_shared_library, 'InstallSharedLibrary') + + +def num_jobs(): + try: + return int(os.environ['NUMBER_OF_PROCESSORS']) + except (ValueError, KeyError): + pass + + try: + return os.sysconf('SC_NPROCESSORS_ONLN') + except (ValueError, OSError, AttributeError): + pass + + try: + return int(os.popen2("sysctl -n hw.ncpu")[1].read()) + except ValueError: + pass + + return 1 + + +def pkg_config_modules(env, name, modules): + '''Simple wrapper for pkg-config.''' + + env[name] = False + + if env['platform'] == 'windows': + return + + if not env.Detect('pkg-config'): + return + + if subprocess.call(["pkg-config", "--exists", ' '.join(modules)]) != 0: + return + + # Put -I and -L flags directly into the environment, as these don't affect + # the compilation of targets that do not use them + try: + env.ParseConfig('pkg-config --cflags-only-I --libs-only-L ' + ' '.join(modules)) + except OSError: + return + + # Other flags may affect the compilation of unrelated targets, so store + # them with a prefix, (e.g., XXX_CFLAGS, XXX_LIBS, etc) + try: + flags = env.ParseFlags('!pkg-config --cflags-only-other --libs-only-l --libs-only-other ' + ' '.join(modules)) + except OSError: + return + prefix = name.upper() + '_' + for flag_name, flag_value in flags.iteritems(): + env[prefix + flag_name] = flag_value + + env[name] = True + + + +def generate(env): + """Common environment generation code""" + + # Tell tools which machine to compile for + env['TARGET_ARCH'] = env['machine'] + env['MSVS_ARCH'] = env['machine'] + + # Toolchain + platform = env['platform'] + if env['toolchain'] == 'default': + if platform == 'winddk': + env['toolchain'] = 'winddk' + elif platform == 'wince': + env['toolchain'] = 'wcesdk' + env.Tool(env['toolchain']) + + # Allow override compiler and specify additional flags from environment + if os.environ.has_key('CC'): + env['CC'] = os.environ['CC'] + # Update CCVERSION to match + pipe = SCons.Action._subproc(env, [env['CC'], '--version'], + stdin = 'devnull', + stderr = 'devnull', + stdout = subprocess.PIPE) + if pipe.wait() == 0: + line = pipe.stdout.readline() + match = re.search(r'[0-9]+(\.[0-9]+)+', line) + if match: + env['CCVERSION'] = match.group(0) + if os.environ.has_key('CFLAGS'): + env['CCFLAGS'] += SCons.Util.CLVar(os.environ['CFLAGS']) + if os.environ.has_key('CXX'): + env['CXX'] = os.environ['CXX'] + if os.environ.has_key('CXXFLAGS'): + env['CXXFLAGS'] += SCons.Util.CLVar(os.environ['CXXFLAGS']) + if os.environ.has_key('LDFLAGS'): + env['LINKFLAGS'] += SCons.Util.CLVar(os.environ['LDFLAGS']) + + env['gcc'] = 'gcc' in os.path.basename(env['CC']).split('-') + env['msvc'] = env['CC'] == 'cl' + + if env['msvc'] and env['toolchain'] == 'default' and env['machine'] == 'x86_64': + # MSVC x64 support is broken in earlier versions of scons + env.EnsurePythonVersion(2, 0) + + # shortcuts + machine = env['machine'] + platform = env['platform'] + x86 = env['machine'] == 'x86' + ppc = env['machine'] == 'ppc' + gcc = env['gcc'] + msvc = env['msvc'] + + # Determine whether we are cross compiling; in particular, whether we need + # to compile code generators with a different compiler as the target code. + host_platform = _platform.system().lower() + if host_platform.startswith('cygwin'): + host_platform = 'cygwin' + host_machine = os.environ.get('PROCESSOR_ARCHITEW6432', os.environ.get('PROCESSOR_ARCHITECTURE', _platform.machine())) + host_machine = { + 'x86': 'x86', + 'i386': 'x86', + 'i486': 'x86', + 'i586': 'x86', + 'i686': 'x86', + 'ppc' : 'ppc', + 'AMD64': 'x86_64', + 'x86_64': 'x86_64', + }.get(host_machine, 'generic') + env['crosscompile'] = platform != host_platform + if machine == 'x86_64' and host_machine != 'x86_64': + env['crosscompile'] = True + env['hostonly'] = False + + # Backwards compatability with the debug= profile= options + if env['build'] == 'debug': + if not env['debug']: + print 'scons: warning: debug option is deprecated and will be removed eventually; use instead' + print + print ' scons build=release' + print + env['build'] = 'release' + if env['profile']: + print 'scons: warning: profile option is deprecated and will be removed eventually; use instead' + print + print ' scons build=profile' + print + env['build'] = 'profile' + if False: + # Enforce SConscripts to use the new build variable + env.popitem('debug') + env.popitem('profile') + else: + # Backwards portability with older sconscripts + if env['build'] in ('debug', 'checked'): + env['debug'] = True + env['profile'] = False + if env['build'] == 'profile': + env['debug'] = False + env['profile'] = True + if env['build'] == 'release': + env['debug'] = False + env['profile'] = False + + # Put build output in a separate dir, which depends on the current + # configuration. See also http://www.scons.org/wiki/AdvancedBuildExample + build_topdir = 'build' + build_subdir = env['platform'] + if env['machine'] != 'generic': + build_subdir += '-' + env['machine'] + if env['build'] != 'release': + build_subdir += '-' + env['build'] + build_dir = os.path.join(build_topdir, build_subdir) + # Place the .sconsign file in the build dir too, to avoid issues with + # different scons versions building the same source file + env['build_dir'] = build_dir + env.SConsignFile(os.path.join(build_dir, '.sconsign')) + if 'SCONS_CACHE_DIR' in os.environ: + print 'scons: Using build cache in %s.' % (os.environ['SCONS_CACHE_DIR'],) + env.CacheDir(os.environ['SCONS_CACHE_DIR']) + env['CONFIGUREDIR'] = os.path.join(build_dir, 'conf') + env['CONFIGURELOG'] = os.path.join(os.path.abspath(build_dir), 'config.log') + + # Parallel build + if env.GetOption('num_jobs') <= 1: + env.SetOption('num_jobs', num_jobs()) + + env.Decider('MD5-timestamp') + env.SetOption('max_drift', 60) + + # C preprocessor options + cppdefines = [] + if env['build'] in ('debug', 'checked'): + cppdefines += ['DEBUG'] + else: + cppdefines += ['NDEBUG'] + if env['build'] == 'profile': + cppdefines += ['PROFILE'] + if platform == 'windows': + cppdefines += [ + 'WIN32', + '_WINDOWS', + #'_UNICODE', + #'UNICODE', + # http://msdn.microsoft.com/en-us/library/aa383745.aspx + ('_WIN32_WINNT', '0x0601'), + ('WINVER', '0x0601'), + ] + if msvc and env['toolchain'] != 'winddk': + cppdefines += [ + 'VC_EXTRALEAN', + '_USE_MATH_DEFINES', + '_CRT_SECURE_NO_WARNINGS', + '_CRT_SECURE_NO_DEPRECATE', + '_SCL_SECURE_NO_WARNINGS', + '_SCL_SECURE_NO_DEPRECATE', + ] + if env['build'] in ('debug', 'checked'): + cppdefines += ['_DEBUG'] + if env['toolchain'] == 'winddk': + # Mimic WINDDK's builtin flags. See also: + # - WINDDK's bin/makefile.new i386mk.inc for more info. + # - buildchk_wxp_x86.log files, generated by the WINDDK's build + # - http://alter.org.ua/docs/nt_kernel/vc8_proj/ + if machine == 'x86': + cppdefines += ['_X86_', 'i386'] + if machine == 'x86_64': + cppdefines += ['_AMD64_', 'AMD64'] + if platform == 'winddk': + cppdefines += [ + 'STD_CALL', + ('CONDITION_HANDLING', '1'), + ('NT_INST', '0'), + ('WIN32', '100'), + ('_NT1X_', '100'), + ('WINNT', '1'), + ('_WIN32_WINNT', '0x0501'), # minimum required OS version + ('WINVER', '0x0501'), + ('_WIN32_IE', '0x0603'), + ('WIN32_LEAN_AND_MEAN', '1'), + ('DEVL', '1'), + ('__BUILDMACHINE__', 'WinDDK'), + ('FPO', '0'), + ] + if env['build'] in ('debug', 'checked'): + cppdefines += [('DBG', 1)] + if platform == 'wince': + cppdefines += [ + '_CRT_SECURE_NO_DEPRECATE', + '_USE_32BIT_TIME_T', + 'UNICODE', + '_UNICODE', + ('UNDER_CE', '600'), + ('_WIN32_WCE', '0x600'), + 'WINCEOEM', + 'WINCEINTERNAL', + 'WIN32', + 'STRICT', + 'x86', + '_X86_', + 'INTERNATIONAL', + ('INTLMSG_CODEPAGE', '1252'), + ] + if platform == 'windows': + cppdefines += ['PIPE_SUBSYSTEM_WINDOWS_USER'] + if platform == 'winddk': + cppdefines += ['PIPE_SUBSYSTEM_WINDOWS_DISPLAY'] + if platform == 'wince': + cppdefines += ['PIPE_SUBSYSTEM_WINDOWS_CE'] + cppdefines += ['PIPE_SUBSYSTEM_WINDOWS_CE_OGL'] + if platform == 'embedded': + cppdefines += ['PIPE_OS_EMBEDDED'] + env.Append(CPPDEFINES = cppdefines) + + # C compiler options + cflags = [] # C + cxxflags = [] # C++ + ccflags = [] # C & C++ + if gcc: + ccversion = env['CCVERSION'] + if env['build'] == 'debug': + ccflags += ['-O0'] + elif ccversion.startswith('4.2.'): + # gcc 4.2.x optimizer is broken + print "warning: gcc 4.2.x optimizer is broken -- disabling optimizations" + ccflags += ['-O0'] + else: + ccflags += ['-O3'] + ccflags += ['-g3'] + if env['build'] in ('checked', 'profile'): + # See http://code.google.com/p/jrfonseca/wiki/Gprof2Dot#Which_options_should_I_pass_to_gcc_when_compiling_for_profiling? + ccflags += [ + '-fno-omit-frame-pointer', + '-fno-optimize-sibling-calls', + ] + if env['machine'] == 'x86': + ccflags += [ + '-m32', + #'-march=pentium4', + ] + if distutils.version.LooseVersion(ccversion) >= distutils.version.LooseVersion('4.2') \ + and (platform != 'windows' or env['build'] == 'debug' or True): + # NOTE: We need to ensure stack is realigned given that we + # produce shared objects, and have no control over the stack + # alignment policy of the application. Therefore we need + # -mstackrealign ore -mincoming-stack-boundary=2. + # + # XXX: -O and -mstackrealign causes stack corruption on MinGW + # + # XXX: We could have SSE without -mstackrealign if we always used + # __attribute__((force_align_arg_pointer)), but that's not + # always the case. + ccflags += [ + '-mstackrealign', # ensure stack is aligned + '-mmmx', '-msse', '-msse2', # enable SIMD intrinsics + #'-mfpmath=sse', + ] + if platform in ['windows', 'darwin']: + # Workaround http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37216 + ccflags += ['-fno-common'] + if env['machine'] == 'x86_64': + ccflags += ['-m64'] + if platform == 'darwin': + ccflags += ['-fno-common'] + # See also: + # - http://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html + ccflags += [ + '-Wall', + '-Wno-long-long', + '-ffast-math', + '-fmessage-length=0', # be nice to Eclipse + ] + cflags += [ + '-Wmissing-prototypes', + '-std=gnu99', + ] + if distutils.version.LooseVersion(ccversion) >= distutils.version.LooseVersion('4.0'): + ccflags += [ + '-Wmissing-field-initializers', + ] + if distutils.version.LooseVersion(ccversion) >= distutils.version.LooseVersion('4.2'): + ccflags += [ + '-Werror=pointer-arith', + ] + cflags += [ + '-Werror=declaration-after-statement', + ] + if msvc: + # See also: + # - http://msdn.microsoft.com/en-us/library/19z1t1wy.aspx + # - cl /? + if env['build'] == 'debug': + ccflags += [ + '/Od', # disable optimizations + '/Oi', # enable intrinsic functions + '/Oy-', # disable frame pointer omission + ] + else: + ccflags += [ + '/O2', # optimize for speed + ] + if env['build'] == 'release': + ccflags += [ + '/GL', # enable whole program optimization + ] + else: + ccflags += [ + '/GL-', # disable whole program optimization + ] + ccflags += [ + '/fp:fast', # fast floating point + '/W3', # warning level + #'/Wp64', # enable 64 bit porting warnings + ] + if env['machine'] == 'x86': + ccflags += [ + #'/arch:SSE2', # use the SSE2 instructions + ] + if platform == 'windows': + ccflags += [ + # TODO + ] + if platform == 'winddk': + ccflags += [ + '/Zl', # omit default library name in .OBJ + '/Zp8', # 8bytes struct member alignment + '/Gy', # separate functions for linker + '/Gm-', # disable minimal rebuild + '/WX', # treat warnings as errors + '/Gz', # __stdcall Calling convention + '/GX-', # disable C++ EH + '/GR-', # disable C++ RTTI + '/GF', # enable read-only string pooling + '/G6', # optimize for PPro, P-II, P-III + '/Ze', # enable extensions + '/Gi-', # disable incremental compilation + '/QIfdiv-', # disable Pentium FDIV fix + '/hotpatch', # prepares an image for hotpatching. + #'/Z7', #enable old-style debug info + ] + if platform == 'wince': + # See also C:\WINCE600\public\common\oak\misc\makefile.def + ccflags += [ + '/Zl', # omit default library name in .OBJ + '/GF', # enable read-only string pooling + '/GR-', # disable C++ RTTI + '/GS', # enable security checks + # Allow disabling language conformance to maintain backward compat + #'/Zc:wchar_t-', # don't force wchar_t as native type, instead of typedef + #'/Zc:forScope-', # don't enforce Standard C++ for scoping rules + #'/wd4867', + #'/wd4430', + #'/MT', + #'/U_MT', + ] + # Automatic pdb generation + # See http://scons.tigris.org/issues/show_bug.cgi?id=1656 + env.EnsureSConsVersion(0, 98, 0) + env['PDB'] = '${TARGET.base}.pdb' + env.Append(CCFLAGS = ccflags) + env.Append(CFLAGS = cflags) + env.Append(CXXFLAGS = cxxflags) + + if env['platform'] == 'windows' and msvc: + # Choose the appropriate MSVC CRT + # http://msdn.microsoft.com/en-us/library/2kzt1wy3.aspx + if env['build'] in ('debug', 'checked'): + env.Append(CCFLAGS = ['/MTd']) + env.Append(SHCCFLAGS = ['/LDd']) + else: + env.Append(CCFLAGS = ['/MT']) + env.Append(SHCCFLAGS = ['/LD']) + + # Assembler options + if gcc: + if env['machine'] == 'x86': + env.Append(ASFLAGS = ['-m32']) + if env['machine'] == 'x86_64': + env.Append(ASFLAGS = ['-m64']) + + # Linker options + linkflags = [] + shlinkflags = [] + if gcc: + if env['machine'] == 'x86': + linkflags += ['-m32'] + if env['machine'] == 'x86_64': + linkflags += ['-m64'] + if env['platform'] not in ('darwin'): + shlinkflags += [ + '-Wl,-Bsymbolic', + ] + # Handle circular dependencies in the libraries + if env['platform'] in ('darwin'): + pass + else: + env['_LIBFLAGS'] = '-Wl,--start-group ' + env['_LIBFLAGS'] + ' -Wl,--end-group' + if msvc: + if env['build'] == 'release': + # enable Link-time Code Generation + linkflags += ['/LTCG'] + env.Append(ARFLAGS = ['/LTCG']) + if platform == 'windows' and msvc: + # See also: + # - http://msdn2.microsoft.com/en-us/library/y0zzbyt4.aspx + linkflags += [ + '/fixed:no', + '/incremental:no', + ] + if platform == 'winddk': + linkflags += [ + '/merge:_PAGE=PAGE', + '/merge:_TEXT=.text', + '/section:INIT,d', + '/opt:ref', + '/opt:icf', + '/ignore:4198,4010,4037,4039,4065,4070,4078,4087,4089,4221', + '/incremental:no', + '/fullbuild', + '/release', + '/nodefaultlib', + '/wx', + '/debug', + '/debugtype:cv', + '/version:5.1', + '/osversion:5.1', + '/functionpadmin:5', + '/safeseh', + '/pdbcompress', + '/stack:0x40000,0x1000', + '/driver', + '/align:0x80', + '/subsystem:native,5.01', + '/base:0x10000', + + '/entry:DrvEnableDriver', + ] + if env['build'] != 'release': + linkflags += [ + '/MAP', # http://msdn.microsoft.com/en-us/library/k7xkk3e2.aspx + ] + if platform == 'wince': + linkflags += [ + '/nodefaultlib', + #'/incremental:no', + #'/fullbuild', + '/entry:_DllMainCRTStartup', + ] + env.Append(LINKFLAGS = linkflags) + env.Append(SHLINKFLAGS = shlinkflags) + + # We have C++ in several libraries, so always link with the C++ compiler + if env['gcc']: + env['LINK'] = env['CXX'] + + # Default libs + env.Append(LIBS = []) + + # Load tools + env.Tool('lex') + env.Tool('yacc') + if env['llvm']: + env.Tool('llvm') + + pkg_config_modules(env, 'x11', ['x11', 'xext']) + pkg_config_modules(env, 'drm', ['libdrm']) + pkg_config_modules(env, 'drm_intel', ['libdrm_intel']) + pkg_config_modules(env, 'drm_radeon', ['libdrm_radeon']) + pkg_config_modules(env, 'xorg', ['xorg-server']) + pkg_config_modules(env, 'kms', ['libkms']) + + env['dri'] = env['x11'] and env['drm'] + + # Custom builders and methods + env.Tool('custom') + createInstallMethods(env) + + # for debugging + #print env.Dump() + + +def exists(env): + return 1 diff --git a/mesalib/scons/llvm.py b/mesalib/scons/llvm.py index 3fef9e090..364cfe366 100644 --- a/mesalib/scons/llvm.py +++ b/mesalib/scons/llvm.py @@ -1,166 +1,166 @@ -"""llvm - -Tool-specific initialization for LLVM - -""" - -# -# Copyright (c) 2009 VMware, Inc. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY -# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# - -import os -import os.path -import re -import sys -import distutils.version - -import SCons.Errors -import SCons.Util - - -def generate(env): - env['llvm'] = False - - try: - llvm_dir = os.environ['LLVM'] - except KeyError: - # Do nothing -- use the system headers/libs - llvm_dir = None - else: - if not os.path.isdir(llvm_dir): - raise SCons.Errors.InternalError, "Specified LLVM directory not found" - - if env['debug']: - llvm_subdir = 'Debug' - else: - llvm_subdir = 'Release' - - llvm_bin_dir = os.path.join(llvm_dir, llvm_subdir, 'bin') - if not os.path.isdir(llvm_bin_dir): - llvm_bin_dir = os.path.join(llvm_dir, 'bin') - if not os.path.isdir(llvm_bin_dir): - raise SCons.Errors.InternalError, "LLVM binary directory not found" - - env.PrependENVPath('PATH', llvm_bin_dir) - - if env['platform'] == 'windows': - # XXX: There is no llvm-config on Windows, so assume a standard layout - if llvm_dir is None: - print 'scons: LLVM environment variable must be specified when building for windows' - return - - # Try to determine the LLVM version from llvm/Config/config.h - llvm_config = os.path.join(llvm_dir, 'include/llvm/Config/config.h') - if not os.path.exists(llvm_config): - print 'scons: could not find %s' % llvm_config - return - llvm_version_re = re.compile(r'^#define PACKAGE_VERSION "([^"]*)"') - llvm_version = None - for line in open(llvm_config, 'rt'): - mo = llvm_version_re.match(line) - if mo: - llvm_version = mo.group(1) - llvm_version = distutils.version.LooseVersion(llvm_version) - break - if llvm_version is None: - print 'scons: could not determine the LLVM version from %s' % llvm_config - return - - env.Prepend(CPPPATH = [os.path.join(llvm_dir, 'include')]) - env.AppendUnique(CPPDEFINES = [ - '__STDC_LIMIT_MACROS', - '__STDC_CONSTANT_MACROS', - 'HAVE_STDINT_H', - ]) - env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')]) - if llvm_version >= distutils.version.LooseVersion('2.7'): - # 2.7 - env.Prepend(LIBS = [ - 'LLVMLinker', 'LLVMipo', 'LLVMInterpreter', - 'LLVMInstrumentation', 'LLVMJIT', 'LLVMExecutionEngine', - 'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser', - 'LLVMMCParser', 'LLVMX86AsmPrinter', 'LLVMX86CodeGen', - 'LLVMSelectionDAG', 'LLVMX86Info', 'LLVMAsmPrinter', - 'LLVMCodeGen', 'LLVMScalarOpts', 'LLVMInstCombine', - 'LLVMTransformUtils', 'LLVMipa', 'LLVMAsmParser', - 'LLVMArchive', 'LLVMBitReader', 'LLVMAnalysis', 'LLVMTarget', - 'LLVMMC', 'LLVMCore', 'LLVMSupport', 'LLVMSystem', - ]) - else: - # 2.6 - env.Prepend(LIBS = [ - 'LLVMX86AsmParser', 'LLVMX86AsmPrinter', 'LLVMX86CodeGen', - 'LLVMX86Info', 'LLVMLinker', 'LLVMipo', 'LLVMInterpreter', - 'LLVMInstrumentation', 'LLVMJIT', 'LLVMExecutionEngine', - 'LLVMDebugger', 'LLVMBitWriter', 'LLVMAsmParser', - 'LLVMArchive', 'LLVMBitReader', 'LLVMSelectionDAG', - 'LLVMAsmPrinter', 'LLVMCodeGen', 'LLVMScalarOpts', - 'LLVMTransformUtils', 'LLVMipa', 'LLVMAnalysis', - 'LLVMTarget', 'LLVMMC', 'LLVMCore', 'LLVMSupport', - 'LLVMSystem', - ]) - env.Append(LIBS = [ - 'imagehlp', - 'psapi', - ]) - if env['msvc']: - # Some of the LLVM C headers use the inline keyword without - # defining it. - env.Append(CPPDEFINES = [('inline', '__inline')]) - if env['build'] in ('debug', 'checked'): - # LLVM libraries are static, build with /MT, and they - # automatically link agains LIBCMT. When we're doing a - # debug build we'll be linking against LIBCMTD, so disable - # that. - env.Append(LINKFLAGS = ['/nodefaultlib:LIBCMT']) - else: - if not env.Detect('llvm-config'): - print 'scons: llvm-config script not found' % llvm_version - return - - llvm_version = env.backtick('llvm-config --version').rstrip() - llvm_version = distutils.version.LooseVersion(llvm_version) - - try: - env.ParseConfig('llvm-config --cppflags') - env.ParseConfig('llvm-config --libs jit interpreter nativecodegen bitwriter') - env.ParseConfig('llvm-config --ldflags') - except OSError: - print 'scons: llvm-config version %s failed' % llvm_version - return - - assert llvm_version is not None - env['llvm'] = True - - print 'scons: Found LLVM version %s' % llvm_version - env['LLVM_VERSION'] = llvm_version - - # Define HAVE_LLVM macro with the major/minor version number (e.g., 0x0206 for 2.6) - llvm_version_major = int(llvm_version.version[0]) - llvm_version_minor = int(llvm_version.version[1]) - llvm_version_hex = '0x%02x%02x' % (llvm_version_major, llvm_version_minor) - env.Prepend(CPPDEFINES = [('HAVE_LLVM', llvm_version_hex)]) - -def exists(env): - return True - -# vim:set ts=4 sw=4 et: +"""llvm + +Tool-specific initialization for LLVM + +""" + +# +# Copyright (c) 2009 VMware, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY +# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# + +import os +import os.path +import re +import sys +import distutils.version + +import SCons.Errors +import SCons.Util + + +def generate(env): + env['llvm'] = False + + try: + llvm_dir = os.environ['LLVM'] + except KeyError: + # Do nothing -- use the system headers/libs + llvm_dir = None + else: + if not os.path.isdir(llvm_dir): + raise SCons.Errors.InternalError, "Specified LLVM directory not found" + + if env['debug']: + llvm_subdir = 'Debug' + else: + llvm_subdir = 'Release' + + llvm_bin_dir = os.path.join(llvm_dir, llvm_subdir, 'bin') + if not os.path.isdir(llvm_bin_dir): + llvm_bin_dir = os.path.join(llvm_dir, 'bin') + if not os.path.isdir(llvm_bin_dir): + raise SCons.Errors.InternalError, "LLVM binary directory not found" + + env.PrependENVPath('PATH', llvm_bin_dir) + + if env['platform'] == 'windows': + # XXX: There is no llvm-config on Windows, so assume a standard layout + if llvm_dir is None: + print 'scons: LLVM environment variable must be specified when building for windows' + return + + # Try to determine the LLVM version from llvm/Config/config.h + llvm_config = os.path.join(llvm_dir, 'include/llvm/Config/config.h') + if not os.path.exists(llvm_config): + print 'scons: could not find %s' % llvm_config + return + llvm_version_re = re.compile(r'^#define PACKAGE_VERSION "([^"]*)"') + llvm_version = None + for line in open(llvm_config, 'rt'): + mo = llvm_version_re.match(line) + if mo: + llvm_version = mo.group(1) + llvm_version = distutils.version.LooseVersion(llvm_version) + break + if llvm_version is None: + print 'scons: could not determine the LLVM version from %s' % llvm_config + return + + env.Prepend(CPPPATH = [os.path.join(llvm_dir, 'include')]) + env.AppendUnique(CPPDEFINES = [ + '__STDC_LIMIT_MACROS', + '__STDC_CONSTANT_MACROS', + 'HAVE_STDINT_H', + ]) + env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')]) + if llvm_version >= distutils.version.LooseVersion('2.7'): + # 2.7 + env.Prepend(LIBS = [ + 'LLVMLinker', 'LLVMipo', 'LLVMInterpreter', + 'LLVMInstrumentation', 'LLVMJIT', 'LLVMExecutionEngine', + 'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser', + 'LLVMMCParser', 'LLVMX86AsmPrinter', 'LLVMX86CodeGen', + 'LLVMSelectionDAG', 'LLVMX86Info', 'LLVMAsmPrinter', + 'LLVMCodeGen', 'LLVMScalarOpts', 'LLVMInstCombine', + 'LLVMTransformUtils', 'LLVMipa', 'LLVMAsmParser', + 'LLVMArchive', 'LLVMBitReader', 'LLVMAnalysis', 'LLVMTarget', + 'LLVMMC', 'LLVMCore', 'LLVMSupport', 'LLVMSystem', + ]) + else: + # 2.6 + env.Prepend(LIBS = [ + 'LLVMX86AsmParser', 'LLVMX86AsmPrinter', 'LLVMX86CodeGen', + 'LLVMX86Info', 'LLVMLinker', 'LLVMipo', 'LLVMInterpreter', + 'LLVMInstrumentation', 'LLVMJIT', 'LLVMExecutionEngine', + 'LLVMDebugger', 'LLVMBitWriter', 'LLVMAsmParser', + 'LLVMArchive', 'LLVMBitReader', 'LLVMSelectionDAG', + 'LLVMAsmPrinter', 'LLVMCodeGen', 'LLVMScalarOpts', + 'LLVMTransformUtils', 'LLVMipa', 'LLVMAnalysis', + 'LLVMTarget', 'LLVMMC', 'LLVMCore', 'LLVMSupport', + 'LLVMSystem', + ]) + env.Append(LIBS = [ + 'imagehlp', + 'psapi', + ]) + if env['msvc']: + # Some of the LLVM C headers use the inline keyword without + # defining it. + env.Append(CPPDEFINES = [('inline', '__inline')]) + if env['build'] in ('debug', 'checked'): + # LLVM libraries are static, build with /MT, and they + # automatically link agains LIBCMT. When we're doing a + # debug build we'll be linking against LIBCMTD, so disable + # that. + env.Append(LINKFLAGS = ['/nodefaultlib:LIBCMT']) + else: + if not env.Detect('llvm-config'): + print 'scons: llvm-config script not found' % llvm_version + return + + llvm_version = env.backtick('llvm-config --version').rstrip() + llvm_version = distutils.version.LooseVersion(llvm_version) + + try: + env.ParseConfig('llvm-config --cppflags') + env.ParseConfig('llvm-config --libs') + env.ParseConfig('llvm-config --ldflags') + except OSError: + print 'scons: llvm-config version %s failed' % llvm_version + return + + assert llvm_version is not None + env['llvm'] = True + + print 'scons: Found LLVM version %s' % llvm_version + env['LLVM_VERSION'] = llvm_version + + # Define HAVE_LLVM macro with the major/minor version number (e.g., 0x0206 for 2.6) + llvm_version_major = int(llvm_version.version[0]) + llvm_version_minor = int(llvm_version.version[1]) + llvm_version_hex = '0x%02x%02x' % (llvm_version_major, llvm_version_minor) + env.Prepend(CPPDEFINES = [('HAVE_LLVM', llvm_version_hex)]) + +def exists(env): + return True + +# vim:set ts=4 sw=4 et: diff --git a/mesalib/scons/udis86.py b/mesalib/scons/udis86.py deleted file mode 100644 index 82dd01a8f..000000000 --- a/mesalib/scons/udis86.py +++ /dev/null @@ -1,44 +0,0 @@ -"""udis86 - -Tool-specific initialization for udis86 - -""" - -# -# Copyright (c) 2009 VMware, Inc. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY -# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# - -def generate(env): - conf = env.Configure() - - if conf.CheckHeader('udis86.h'): # and conf.CheckLib('udis86'): - env['UDIS86'] = True - env.Prepend(LIBS = ['udis86']) - else: - env['UDIS86'] = False - - conf.Finish() - -def exists(env): - return True - -# vim:set ts=4 sw=4 et: diff --git a/mesalib/src/glsl/glsl_types.h b/mesalib/src/glsl/glsl_types.h index 3c2672c01..e4c84c953 100644 --- a/mesalib/src/glsl/glsl_types.h +++ b/mesalib/src/glsl/glsl_types.h @@ -1,478 +1,478 @@ -/* -*- c++ -*- */ -/* - * Copyright © 2009 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once -#ifndef GLSL_TYPES_H -#define GLSL_TYPES_H - -#include -#include - -extern "C" { -#include "GL/gl.h" -} - -#include "ralloc.h" - -struct _mesa_glsl_parse_state; -struct glsl_symbol_table; - -extern "C" void -_mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state); - -extern "C" void -_mesa_glsl_release_types(void); - -enum glsl_base_type { - GLSL_TYPE_UINT = 0, - GLSL_TYPE_INT, - GLSL_TYPE_FLOAT, - GLSL_TYPE_BOOL, - GLSL_TYPE_SAMPLER, - GLSL_TYPE_STRUCT, - GLSL_TYPE_ARRAY, - GLSL_TYPE_VOID, - GLSL_TYPE_ERROR -}; - -enum glsl_sampler_dim { - GLSL_SAMPLER_DIM_1D = 0, - GLSL_SAMPLER_DIM_2D, - GLSL_SAMPLER_DIM_3D, - GLSL_SAMPLER_DIM_CUBE, - GLSL_SAMPLER_DIM_RECT, - GLSL_SAMPLER_DIM_BUF -}; - - -struct glsl_type { - GLenum gl_type; - glsl_base_type base_type; - - unsigned sampler_dimensionality:3; - unsigned sampler_shadow:1; - unsigned sampler_array:1; - unsigned sampler_type:2; /**< Type of data returned using this sampler. - * only \c GLSL_TYPE_FLOAT, \c GLSL_TYPE_INT, - * and \c GLSL_TYPE_UINT are valid. - */ - - /* Callers of this ralloc-based new need not call delete. It's - * easier to just ralloc_free 'mem_ctx' (or any of its ancestors). */ - static void* operator new(size_t size) - { - if (glsl_type::mem_ctx == NULL) { - glsl_type::mem_ctx = ralloc_context(NULL); - assert(glsl_type::mem_ctx != NULL); - } - - void *type; - - type = ralloc_size(glsl_type::mem_ctx, size); - assert(type != NULL); - - return type; - } - - /* If the user *does* call delete, that's OK, we will just - * ralloc_free in that case. */ - static void operator delete(void *type) - { - ralloc_free(type); - } - - /** - * \name Vector and matrix element counts - * - * For scalars, each of these values will be 1. For non-numeric types - * these will be 0. - */ - /*@{*/ - unsigned vector_elements:3; /**< 1, 2, 3, or 4 vector elements. */ - unsigned matrix_columns:3; /**< 1, 2, 3, or 4 matrix columns. */ - /*@}*/ - - /** - * Name of the data type - * - * This may be \c NULL for anonymous structures, for arrays, or for - * function types. - */ - const char *name; - - /** - * For \c GLSL_TYPE_ARRAY, this is the length of the array. For - * \c GLSL_TYPE_STRUCT, it is the number of elements in the structure and - * the number of values pointed to by \c fields.structure (below). - */ - unsigned length; - - /** - * Subtype of composite data types. - */ - union { - const struct glsl_type *array; /**< Type of array elements. */ - const struct glsl_type *parameters; /**< Parameters to function. */ - struct glsl_struct_field *structure; /**< List of struct fields. */ - } fields; - - - /** - * \name Pointers to various public type singletons - */ - /*@{*/ - static const glsl_type *const error_type; - static const glsl_type *const void_type; - static const glsl_type *const int_type; - static const glsl_type *const ivec4_type; - static const glsl_type *const uint_type; - static const glsl_type *const uvec2_type; - static const glsl_type *const uvec3_type; - static const glsl_type *const uvec4_type; - static const glsl_type *const float_type; - static const glsl_type *const vec2_type; - static const glsl_type *const vec3_type; - static const glsl_type *const vec4_type; - static const glsl_type *const bool_type; - static const glsl_type *const mat2_type; - static const glsl_type *const mat2x3_type; - static const glsl_type *const mat2x4_type; - static const glsl_type *const mat3x2_type; - static const glsl_type *const mat3_type; - static const glsl_type *const mat3x4_type; - static const glsl_type *const mat4x2_type; - static const glsl_type *const mat4x3_type; - static const glsl_type *const mat4_type; - /*@}*/ - - - /** - * For numeric and boolean derrived types returns the basic scalar type - * - * If the type is a numeric or boolean scalar, vector, or matrix type, - * this function gets the scalar type of the individual components. For - * all other types, including arrays of numeric or boolean types, the - * error type is returned. - */ - const glsl_type *get_base_type() const; - - /** - * Query the type of elements in an array - * - * \return - * Pointer to the type of elements in the array for array types, or \c NULL - * for non-array types. - */ - const glsl_type *element_type() const - { - return is_array() ? fields.array : NULL; - } - - /** - * Get the instance of a built-in scalar, vector, or matrix type - */ - static const glsl_type *get_instance(unsigned base_type, unsigned rows, - unsigned columns); - - /** - * Get the instance of an array type - */ - static const glsl_type *get_array_instance(const glsl_type *base, - unsigned elements); - - /** - * Get the instance of a record type - */ - static const glsl_type *get_record_instance(const glsl_struct_field *fields, - unsigned num_fields, - const char *name); - - /** - * Query the total number of scalars that make up a scalar, vector or matrix - */ - unsigned components() const - { - return vector_elements * matrix_columns; - } - - /** - * Calculate the number of components slots required to hold this type - * - * This is used to determine how many uniform or varying locations a type - * might occupy. - */ - unsigned component_slots() const; - - - /** - * Query whether or not a type is a scalar (non-vector and non-matrix). - */ - bool is_scalar() const - { - return (vector_elements == 1) - && (base_type >= GLSL_TYPE_UINT) - && (base_type <= GLSL_TYPE_BOOL); - } - - /** - * Query whether or not a type is a vector - */ - bool is_vector() const - { - return (vector_elements > 1) - && (matrix_columns == 1) - && (base_type >= GLSL_TYPE_UINT) - && (base_type <= GLSL_TYPE_BOOL); - } - - /** - * Query whether or not a type is a matrix - */ - bool is_matrix() const - { - /* GLSL only has float matrices. */ - return (matrix_columns > 1) && (base_type == GLSL_TYPE_FLOAT); - } - - /** - * Query whether or not a type is a non-array numeric type - */ - bool is_numeric() const - { - return (base_type >= GLSL_TYPE_UINT) && (base_type <= GLSL_TYPE_FLOAT); - } - - /** - * Query whether or not a type is an integral type - */ - bool is_integer() const - { - return (base_type == GLSL_TYPE_UINT) || (base_type == GLSL_TYPE_INT); - } - - /** - * Query whether or not a type is a float type - */ - bool is_float() const - { - return base_type == GLSL_TYPE_FLOAT; - } - - /** - * Query whether or not a type is a non-array boolean type - */ - bool is_boolean() const - { - return base_type == GLSL_TYPE_BOOL; - } - - /** - * Query whether or not a type is a sampler - */ - bool is_sampler() const - { - return base_type == GLSL_TYPE_SAMPLER; - } - - /** - * Query whether or not a type is an array - */ - bool is_array() const - { - return base_type == GLSL_TYPE_ARRAY; - } - - /** - * Query whether or not a type is a record - */ - bool is_record() const - { - return base_type == GLSL_TYPE_STRUCT; - } - - /** - * Query whether or not a type is the void type singleton. - */ - bool is_void() const - { - return base_type == GLSL_TYPE_VOID; - } - - /** - * Query whether or not a type is the error type singleton. - */ - bool is_error() const - { - return base_type == GLSL_TYPE_ERROR; - } - - /** - * Query the full type of a matrix row - * - * \return - * If the type is not a matrix, \c glsl_type::error_type is returned. - * Otherwise a type matching the rows of the matrix is returned. - */ - const glsl_type *row_type() const - { - return is_matrix() - ? get_instance(base_type, matrix_columns, 1) - : error_type; - } - - /** - * Query the full type of a matrix column - * - * \return - * If the type is not a matrix, \c glsl_type::error_type is returned. - * Otherwise a type matching the columns of the matrix is returned. - */ - const glsl_type *column_type() const - { - return is_matrix() - ? get_instance(base_type, vector_elements, 1) - : error_type; - } - - - /** - * Get the type of a structure field - * - * \return - * Pointer to the type of the named field. If the type is not a structure - * or the named field does not exist, \c glsl_type::error_type is returned. - */ - const glsl_type *field_type(const char *name) const; - - - /** - * Get the location of a filed within a record type - */ - int field_index(const char *name) const; - - - /** - * Query the number of elements in an array type - * - * \return - * The number of elements in the array for array types or -1 for non-array - * types. If the number of elements in the array has not yet been declared, - * zero is returned. - */ - int array_size() const - { - return is_array() ? length : -1; - } - -private: - /** - * ralloc context for all glsl_type allocations - * - * Set on the first call to \c glsl_type::new. - */ - static void *mem_ctx; - - void init_ralloc_type_ctx(void); - - /** Constructor for vector and matrix types */ - glsl_type(GLenum gl_type, - glsl_base_type base_type, unsigned vector_elements, - unsigned matrix_columns, const char *name); - - /** Constructor for sampler types */ - glsl_type(GLenum gl_type, - enum glsl_sampler_dim dim, bool shadow, bool array, - unsigned type, const char *name); - - /** Constructor for record types */ - glsl_type(const glsl_struct_field *fields, unsigned num_fields, - const char *name); - - /** Constructor for array types */ - glsl_type(const glsl_type *array, unsigned length); - - /** Hash table containing the known array types. */ - static struct hash_table *array_types; - - /** Hash table containing the known record types. */ - static struct hash_table *record_types; - - static int record_key_compare(const void *a, const void *b); - static unsigned record_key_hash(const void *key); - - /** - * \name Pointers to various type singletons - */ - /*@{*/ - static const glsl_type _error_type; - static const glsl_type _void_type; - static const glsl_type _sampler3D_type; - static const glsl_type builtin_core_types[]; - static const glsl_type builtin_structure_types[]; - static const glsl_type builtin_110_deprecated_structure_types[]; - static const glsl_type builtin_110_types[]; - static const glsl_type builtin_120_types[]; - static const glsl_type builtin_130_types[]; - static const glsl_type builtin_ARB_texture_rectangle_types[]; - static const glsl_type builtin_EXT_texture_array_types[]; - static const glsl_type builtin_EXT_texture_buffer_object_types[]; - /*@}*/ - - /** - * \name Methods to populate a symbol table with built-in types. - * - * \internal - * This is one of the truely annoying things about C++. Methods that are - * completely internal and private to a type still have to be advertised to - * the world in a public header file. - */ - /*@{*/ - static void generate_100ES_types(glsl_symbol_table *); - static void generate_110_types(glsl_symbol_table *); - static void generate_120_types(glsl_symbol_table *); - static void generate_130_types(glsl_symbol_table *); - static void generate_ARB_texture_rectangle_types(glsl_symbol_table *, bool); - static void generate_EXT_texture_array_types(glsl_symbol_table *, bool); - static void generate_OES_texture_3D_types(glsl_symbol_table *, bool); - /*@}*/ - - /** - * \name Friend functions. - * - * These functions are friends because they must have C linkage and the - * need to call various private methods or access various private static - * data. - */ - /*@{*/ - friend void _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *); - friend void _mesa_glsl_release_types(void); - /*@}*/ -}; - -struct glsl_struct_field { - const struct glsl_type *type; - const char *name; -}; - -#endif /* GLSL_TYPES_H */ +/* -*- c++ -*- */ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef GLSL_TYPES_H +#define GLSL_TYPES_H + +#include +#include + +extern "C" { +#include "GL/gl.h" +} + +#include "ralloc.h" + +struct _mesa_glsl_parse_state; +struct glsl_symbol_table; + +extern "C" void +_mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state); + +extern "C" void +_mesa_glsl_release_types(void); + +enum glsl_base_type { + GLSL_TYPE_UINT = 0, + GLSL_TYPE_INT, + GLSL_TYPE_FLOAT, + GLSL_TYPE_BOOL, + GLSL_TYPE_SAMPLER, + GLSL_TYPE_STRUCT, + GLSL_TYPE_ARRAY, + GLSL_TYPE_VOID, + GLSL_TYPE_ERROR +}; + +enum glsl_sampler_dim { + GLSL_SAMPLER_DIM_1D = 0, + GLSL_SAMPLER_DIM_2D, + GLSL_SAMPLER_DIM_3D, + GLSL_SAMPLER_DIM_CUBE, + GLSL_SAMPLER_DIM_RECT, + GLSL_SAMPLER_DIM_BUF +}; + + +struct glsl_type { + GLenum gl_type; + glsl_base_type base_type; + + unsigned sampler_dimensionality:3; /**< \see glsl_sampler_dim */ + unsigned sampler_shadow:1; + unsigned sampler_array:1; + unsigned sampler_type:2; /**< Type of data returned using this sampler. + * only \c GLSL_TYPE_FLOAT, \c GLSL_TYPE_INT, + * and \c GLSL_TYPE_UINT are valid. + */ + + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'mem_ctx' (or any of its ancestors). */ + static void* operator new(size_t size) + { + if (glsl_type::mem_ctx == NULL) { + glsl_type::mem_ctx = ralloc_context(NULL); + assert(glsl_type::mem_ctx != NULL); + } + + void *type; + + type = ralloc_size(glsl_type::mem_ctx, size); + assert(type != NULL); + + return type; + } + + /* If the user *does* call delete, that's OK, we will just + * ralloc_free in that case. */ + static void operator delete(void *type) + { + ralloc_free(type); + } + + /** + * \name Vector and matrix element counts + * + * For scalars, each of these values will be 1. For non-numeric types + * these will be 0. + */ + /*@{*/ + unsigned vector_elements:3; /**< 1, 2, 3, or 4 vector elements. */ + unsigned matrix_columns:3; /**< 1, 2, 3, or 4 matrix columns. */ + /*@}*/ + + /** + * Name of the data type + * + * This may be \c NULL for anonymous structures, for arrays, or for + * function types. + */ + const char *name; + + /** + * For \c GLSL_TYPE_ARRAY, this is the length of the array. For + * \c GLSL_TYPE_STRUCT, it is the number of elements in the structure and + * the number of values pointed to by \c fields.structure (below). + */ + unsigned length; + + /** + * Subtype of composite data types. + */ + union { + const struct glsl_type *array; /**< Type of array elements. */ + const struct glsl_type *parameters; /**< Parameters to function. */ + struct glsl_struct_field *structure; /**< List of struct fields. */ + } fields; + + + /** + * \name Pointers to various public type singletons + */ + /*@{*/ + static const glsl_type *const error_type; + static const glsl_type *const void_type; + static const glsl_type *const int_type; + static const glsl_type *const ivec4_type; + static const glsl_type *const uint_type; + static const glsl_type *const uvec2_type; + static const glsl_type *const uvec3_type; + static const glsl_type *const uvec4_type; + static const glsl_type *const float_type; + static const glsl_type *const vec2_type; + static const glsl_type *const vec3_type; + static const glsl_type *const vec4_type; + static const glsl_type *const bool_type; + static const glsl_type *const mat2_type; + static const glsl_type *const mat2x3_type; + static const glsl_type *const mat2x4_type; + static const glsl_type *const mat3x2_type; + static const glsl_type *const mat3_type; + static const glsl_type *const mat3x4_type; + static const glsl_type *const mat4x2_type; + static const glsl_type *const mat4x3_type; + static const glsl_type *const mat4_type; + /*@}*/ + + + /** + * For numeric and boolean derrived types returns the basic scalar type + * + * If the type is a numeric or boolean scalar, vector, or matrix type, + * this function gets the scalar type of the individual components. For + * all other types, including arrays of numeric or boolean types, the + * error type is returned. + */ + const glsl_type *get_base_type() const; + + /** + * Query the type of elements in an array + * + * \return + * Pointer to the type of elements in the array for array types, or \c NULL + * for non-array types. + */ + const glsl_type *element_type() const + { + return is_array() ? fields.array : NULL; + } + + /** + * Get the instance of a built-in scalar, vector, or matrix type + */ + static const glsl_type *get_instance(unsigned base_type, unsigned rows, + unsigned columns); + + /** + * Get the instance of an array type + */ + static const glsl_type *get_array_instance(const glsl_type *base, + unsigned elements); + + /** + * Get the instance of a record type + */ + static const glsl_type *get_record_instance(const glsl_struct_field *fields, + unsigned num_fields, + const char *name); + + /** + * Query the total number of scalars that make up a scalar, vector or matrix + */ + unsigned components() const + { + return vector_elements * matrix_columns; + } + + /** + * Calculate the number of components slots required to hold this type + * + * This is used to determine how many uniform or varying locations a type + * might occupy. + */ + unsigned component_slots() const; + + + /** + * Query whether or not a type is a scalar (non-vector and non-matrix). + */ + bool is_scalar() const + { + return (vector_elements == 1) + && (base_type >= GLSL_TYPE_UINT) + && (base_type <= GLSL_TYPE_BOOL); + } + + /** + * Query whether or not a type is a vector + */ + bool is_vector() const + { + return (vector_elements > 1) + && (matrix_columns == 1) + && (base_type >= GLSL_TYPE_UINT) + && (base_type <= GLSL_TYPE_BOOL); + } + + /** + * Query whether or not a type is a matrix + */ + bool is_matrix() const + { + /* GLSL only has float matrices. */ + return (matrix_columns > 1) && (base_type == GLSL_TYPE_FLOAT); + } + + /** + * Query whether or not a type is a non-array numeric type + */ + bool is_numeric() const + { + return (base_type >= GLSL_TYPE_UINT) && (base_type <= GLSL_TYPE_FLOAT); + } + + /** + * Query whether or not a type is an integral type + */ + bool is_integer() const + { + return (base_type == GLSL_TYPE_UINT) || (base_type == GLSL_TYPE_INT); + } + + /** + * Query whether or not a type is a float type + */ + bool is_float() const + { + return base_type == GLSL_TYPE_FLOAT; + } + + /** + * Query whether or not a type is a non-array boolean type + */ + bool is_boolean() const + { + return base_type == GLSL_TYPE_BOOL; + } + + /** + * Query whether or not a type is a sampler + */ + bool is_sampler() const + { + return base_type == GLSL_TYPE_SAMPLER; + } + + /** + * Query whether or not a type is an array + */ + bool is_array() const + { + return base_type == GLSL_TYPE_ARRAY; + } + + /** + * Query whether or not a type is a record + */ + bool is_record() const + { + return base_type == GLSL_TYPE_STRUCT; + } + + /** + * Query whether or not a type is the void type singleton. + */ + bool is_void() const + { + return base_type == GLSL_TYPE_VOID; + } + + /** + * Query whether or not a type is the error type singleton. + */ + bool is_error() const + { + return base_type == GLSL_TYPE_ERROR; + } + + /** + * Query the full type of a matrix row + * + * \return + * If the type is not a matrix, \c glsl_type::error_type is returned. + * Otherwise a type matching the rows of the matrix is returned. + */ + const glsl_type *row_type() const + { + return is_matrix() + ? get_instance(base_type, matrix_columns, 1) + : error_type; + } + + /** + * Query the full type of a matrix column + * + * \return + * If the type is not a matrix, \c glsl_type::error_type is returned. + * Otherwise a type matching the columns of the matrix is returned. + */ + const glsl_type *column_type() const + { + return is_matrix() + ? get_instance(base_type, vector_elements, 1) + : error_type; + } + + + /** + * Get the type of a structure field + * + * \return + * Pointer to the type of the named field. If the type is not a structure + * or the named field does not exist, \c glsl_type::error_type is returned. + */ + const glsl_type *field_type(const char *name) const; + + + /** + * Get the location of a filed within a record type + */ + int field_index(const char *name) const; + + + /** + * Query the number of elements in an array type + * + * \return + * The number of elements in the array for array types or -1 for non-array + * types. If the number of elements in the array has not yet been declared, + * zero is returned. + */ + int array_size() const + { + return is_array() ? length : -1; + } + +private: + /** + * ralloc context for all glsl_type allocations + * + * Set on the first call to \c glsl_type::new. + */ + static void *mem_ctx; + + void init_ralloc_type_ctx(void); + + /** Constructor for vector and matrix types */ + glsl_type(GLenum gl_type, + glsl_base_type base_type, unsigned vector_elements, + unsigned matrix_columns, const char *name); + + /** Constructor for sampler types */ + glsl_type(GLenum gl_type, + enum glsl_sampler_dim dim, bool shadow, bool array, + unsigned type, const char *name); + + /** Constructor for record types */ + glsl_type(const glsl_struct_field *fields, unsigned num_fields, + const char *name); + + /** Constructor for array types */ + glsl_type(const glsl_type *array, unsigned length); + + /** Hash table containing the known array types. */ + static struct hash_table *array_types; + + /** Hash table containing the known record types. */ + static struct hash_table *record_types; + + static int record_key_compare(const void *a, const void *b); + static unsigned record_key_hash(const void *key); + + /** + * \name Pointers to various type singletons + */ + /*@{*/ + static const glsl_type _error_type; + static const glsl_type _void_type; + static const glsl_type _sampler3D_type; + static const glsl_type builtin_core_types[]; + static const glsl_type builtin_structure_types[]; + static const glsl_type builtin_110_deprecated_structure_types[]; + static const glsl_type builtin_110_types[]; + static const glsl_type builtin_120_types[]; + static const glsl_type builtin_130_types[]; + static const glsl_type builtin_ARB_texture_rectangle_types[]; + static const glsl_type builtin_EXT_texture_array_types[]; + static const glsl_type builtin_EXT_texture_buffer_object_types[]; + /*@}*/ + + /** + * \name Methods to populate a symbol table with built-in types. + * + * \internal + * This is one of the truely annoying things about C++. Methods that are + * completely internal and private to a type still have to be advertised to + * the world in a public header file. + */ + /*@{*/ + static void generate_100ES_types(glsl_symbol_table *); + static void generate_110_types(glsl_symbol_table *); + static void generate_120_types(glsl_symbol_table *); + static void generate_130_types(glsl_symbol_table *); + static void generate_ARB_texture_rectangle_types(glsl_symbol_table *, bool); + static void generate_EXT_texture_array_types(glsl_symbol_table *, bool); + static void generate_OES_texture_3D_types(glsl_symbol_table *, bool); + /*@}*/ + + /** + * \name Friend functions. + * + * These functions are friends because they must have C linkage and the + * need to call various private methods or access various private static + * data. + */ + /*@{*/ + friend void _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *); + friend void _mesa_glsl_release_types(void); + /*@}*/ +}; + +struct glsl_struct_field { + const struct glsl_type *type; + const char *name; +}; + +#endif /* GLSL_TYPES_H */ diff --git a/mesalib/src/mesa/main/ff_fragment_shader.cpp b/mesalib/src/mesa/main/ff_fragment_shader.cpp index ed513397a..0bc534df5 100644 --- a/mesalib/src/mesa/main/ff_fragment_shader.cpp +++ b/mesalib/src/mesa/main/ff_fragment_shader.cpp @@ -3,7 +3,6 @@ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * Copyright 2009 VMware, Inc. All Rights Reserved. - * Copyright © 2010 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -31,8 +30,6 @@ extern "C" { #include "glheader.h" #include "imports.h" #include "mtypes.h" -#include "main/uniforms.h" -#include "main/macros.h" #include "program/program.h" #include "program/prog_parameter.h" #include "program/prog_cache.h" @@ -42,13 +39,6 @@ extern "C" { #include "program/programopt.h" #include "texenvprogram.h" } -#include "../glsl/glsl_types.h" -#include "../glsl/ir.h" -#include "../glsl/glsl_symbol_table.h" -#include "../glsl/glsl_parser_extras.h" -#include "../glsl/ir_optimization.h" -#include "../glsl/ir_print_visitor.h" -#include "../program/ir_to_mesa.h" /* * Note on texture units: @@ -69,7 +59,7 @@ struct texenvprog_cache_item { GLuint hash; void *key; - struct gl_shader_program *data; + struct gl_fragment_program *data; struct texenvprog_cache_item *next; }; @@ -86,6 +76,13 @@ texenv_doing_secondary_color(struct gl_context *ctx) return GL_FALSE; } +/** + * Up to nine instructions per tex unit, plus fog, specular color. + */ +#define MAX_INSTRUCTIONS ((MAX_TEXTURE_COORD_UNITS * 9) + 12) + +#define DISASSEM (MESA_VERBOSE & VERBOSE_DISASSEM) + struct mode_opt { #ifdef __GNUC__ __extension__ GLubyte Source:4; /**< SRC_x */ @@ -119,6 +116,8 @@ struct state_key { GLuint NumArgsA:3; /**< up to MAX_COMBINER_TERMS */ GLuint ModeA:5; /**< MODE_x */ + GLuint texture_cyl_wrap:1; /**< For gallium test/debug only */ + struct mode_opt OptRGB[MAX_COMBINER_TERMS]; struct mode_opt OptA[MAX_COMBINER_TERMS]; } unit[MAX_TEXTURE_UNITS]; @@ -471,6 +470,10 @@ static GLuint make_state_key( struct gl_context *ctx, struct state_key *key ) key->unit[i].OptRGB[1].Operand = OPR_SRC_COLOR; key->unit[i].OptRGB[1].Source = texUnit->BumpTarget - GL_TEXTURE0 + SRC_TEXTURE0; } + + /* this is a back-door for enabling cylindrical texture wrap mode */ + if (texObj->Priority == 0.125) + key->unit[i].texture_cyl_wrap = 1; } /* _NEW_LIGHT | _NEW_FOG */ @@ -499,15 +502,40 @@ static GLuint make_state_key( struct gl_context *ctx, struct state_key *key ) } +/** + * Use uregs to represent registers internally, translate to Mesa's + * expected formats on emit. + * + * NOTE: These are passed by value extensively in this file rather + * than as usual by pointer reference. If this disturbs you, try + * remembering they are just 32bits in size. + * + * GCC is smart enough to deal with these dword-sized structures in + * much the same way as if I had defined them as dwords and was using + * macros to access and set the fields. This is much nicer and easier + * to evolve. + */ +struct ureg { + GLuint file:4; + GLuint idx:8; + GLuint negatebase:1; + GLuint swz:12; + GLuint pad:7; +}; + +static const struct ureg undef = { + PROGRAM_UNDEFINED, + 255, + 0, + 0, + 0 +}; + + /** State used to build the fragment program: */ struct texenv_fragment_program { - struct gl_shader_program *shader_program; - struct gl_shader *shader; struct gl_fragment_program *program; - exec_list *instructions; - exec_list *top_instructions; - void *mem_ctx; struct state_key *state; GLbitfield alu_temps; /**< Track texture indirections, see spec. */ @@ -515,35 +543,385 @@ struct texenv_fragment_program { GLbitfield temp_in_use; /**< Tracks temporary regs which are in use. */ GLboolean error; - ir_variable *src_texture[MAX_TEXTURE_COORD_UNITS]; + struct ureg src_texture[MAX_TEXTURE_COORD_UNITS]; /* Reg containing each texture unit's sampled texture color, * else undef. */ - /* Texcoord override from bumpmapping. */ - struct ir_variable *texcoord_tex[MAX_TEXTURE_COORD_UNITS]; - + struct ureg texcoord_tex[MAX_TEXTURE_COORD_UNITS]; /* Reg containing texcoord for a texture unit, * needed for bump mapping, else undef. */ - ir_rvalue *src_previous; /**< Reg containing color from previous + struct ureg src_previous; /**< Reg containing color from previous * stage. May need to be decl'd. */ GLuint last_tex_stage; /**< Number of last enabled texture unit */ + + struct ureg half; + struct ureg one; + struct ureg zero; }; -static ir_rvalue * -get_source(struct texenv_fragment_program *p, - GLuint src, GLuint unit) + + +static struct ureg make_ureg(GLuint file, GLuint idx) +{ + struct ureg reg; + reg.file = file; + reg.idx = idx; + reg.negatebase = 0; + reg.swz = SWIZZLE_NOOP; + reg.pad = 0; + return reg; +} + +static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) +{ + reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), + GET_SWZ(reg.swz, y), + GET_SWZ(reg.swz, z), + GET_SWZ(reg.swz, w)); + + return reg; +} + +static struct ureg swizzle1( struct ureg reg, int x ) +{ + return swizzle(reg, x, x, x, x); +} + +static struct ureg negate( struct ureg reg ) +{ + reg.negatebase ^= 1; + return reg; +} + +static GLboolean is_undef( struct ureg reg ) +{ + return reg.file == PROGRAM_UNDEFINED; +} + + +static struct ureg get_temp( struct texenv_fragment_program *p ) +{ + GLint bit; + + /* First try and reuse temps which have been used already: + */ + bit = _mesa_ffs( ~p->temp_in_use & p->alu_temps ); + + /* Then any unused temporary: + */ + if (!bit) + bit = _mesa_ffs( ~p->temp_in_use ); + + if (!bit) { + _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); + exit(1); + } + + if ((GLuint) bit > p->program->Base.NumTemporaries) + p->program->Base.NumTemporaries = bit; + + p->temp_in_use |= 1<<(bit-1); + return make_ureg(PROGRAM_TEMPORARY, (bit-1)); +} + +static struct ureg get_tex_temp( struct texenv_fragment_program *p ) +{ + int bit; + + /* First try to find available temp not previously used (to avoid + * starting a new texture indirection). According to the spec, the + * ~p->temps_output isn't necessary, but will keep it there for + * now: + */ + bit = _mesa_ffs( ~p->temp_in_use & ~p->alu_temps & ~p->temps_output ); + + /* Then any unused temporary: + */ + if (!bit) + bit = _mesa_ffs( ~p->temp_in_use ); + + if (!bit) { + _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); + exit(1); + } + + if ((GLuint) bit > p->program->Base.NumTemporaries) + p->program->Base.NumTemporaries = bit; + + p->temp_in_use |= 1<<(bit-1); + return make_ureg(PROGRAM_TEMPORARY, (bit-1)); +} + + +/** Mark a temp reg as being no longer allocatable. */ +static void reserve_temp( struct texenv_fragment_program *p, struct ureg r ) +{ + if (r.file == PROGRAM_TEMPORARY) + p->temps_output |= (1 << r.idx); +} + + +static void release_temps(struct gl_context *ctx, struct texenv_fragment_program *p ) +{ + GLuint max_temp = ctx->Const.FragmentProgram.MaxTemps; + + /* KW: To support tex_env_crossbar, don't release the registers in + * temps_output. + */ + if (max_temp >= sizeof(int) * 8) + p->temp_in_use = p->temps_output; + else + p->temp_in_use = ~((1<temps_output; +} + + +static struct ureg register_param5( struct texenv_fragment_program *p, + GLint s0, + GLint s1, + GLint s2, + GLint s3, + GLint s4) +{ + int tokens[STATE_LENGTH]; + GLuint idx; + tokens[0] = s0; + tokens[1] = s1; + tokens[2] = s2; + tokens[3] = s3; + tokens[4] = s4; + idx = _mesa_add_state_reference(p->program->Base.Parameters, + (gl_state_index *)tokens); + return make_ureg(PROGRAM_STATE_VAR, idx); +} + + +#define register_param1(p,s0) register_param5(p,s0,0,0,0,0) +#define register_param2(p,s0,s1) register_param5(p,s0,s1,0,0,0) +#define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0) +#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0) + +static GLuint frag_to_vert_attrib( GLuint attrib ) +{ + switch (attrib) { + case FRAG_ATTRIB_COL0: return VERT_ATTRIB_COLOR0; + case FRAG_ATTRIB_COL1: return VERT_ATTRIB_COLOR1; + default: + assert(attrib >= FRAG_ATTRIB_TEX0); + assert(attrib <= FRAG_ATTRIB_TEX7); + return attrib - FRAG_ATTRIB_TEX0 + VERT_ATTRIB_TEX0; + } +} + + +static struct ureg register_input( struct texenv_fragment_program *p, GLuint input ) +{ + if (p->state->inputs_available & (1<program->Base.InputsRead |= (1 << input); + return make_ureg(PROGRAM_INPUT, input); + } + else { + GLuint idx = frag_to_vert_attrib( input ); + return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, idx ); + } +} + + +static void emit_arg( struct prog_src_register *reg, + struct ureg ureg ) +{ + reg->File = ureg.file; + reg->Index = ureg.idx; + reg->Swizzle = ureg.swz; + reg->Negate = ureg.negatebase ? NEGATE_XYZW : NEGATE_NONE; + reg->Abs = GL_FALSE; +} + +static void emit_dst( struct prog_dst_register *dst, + struct ureg ureg, GLuint mask ) +{ + dst->File = ureg.file; + dst->Index = ureg.idx; + dst->WriteMask = mask; + dst->CondMask = COND_TR; /* always pass cond test */ + dst->CondSwizzle = SWIZZLE_NOOP; +} + +static struct prog_instruction * +emit_op(struct texenv_fragment_program *p, + enum prog_opcode op, + struct ureg dest, + GLuint mask, + GLboolean saturate, + struct ureg src0, + struct ureg src1, + struct ureg src2 ) +{ + const GLuint nr = p->program->Base.NumInstructions++; + struct prog_instruction *inst = &p->program->Base.Instructions[nr]; + + assert(nr < MAX_INSTRUCTIONS); + + _mesa_init_instructions(inst, 1); + inst->Opcode = op; + + emit_arg( &inst->SrcReg[0], src0 ); + emit_arg( &inst->SrcReg[1], src1 ); + emit_arg( &inst->SrcReg[2], src2 ); + + inst->SaturateMode = saturate ? SATURATE_ZERO_ONE : SATURATE_OFF; + + emit_dst( &inst->DstReg, dest, mask ); + +#if 0 + /* Accounting for indirection tracking: + */ + if (dest.file == PROGRAM_TEMPORARY) + p->temps_output |= 1 << dest.idx; +#endif + + return inst; +} + + +static struct ureg emit_arith( struct texenv_fragment_program *p, + enum prog_opcode op, + struct ureg dest, + GLuint mask, + GLboolean saturate, + struct ureg src0, + struct ureg src1, + struct ureg src2 ) +{ + emit_op(p, op, dest, mask, saturate, src0, src1, src2); + + /* Accounting for indirection tracking: + */ + if (src0.file == PROGRAM_TEMPORARY) + p->alu_temps |= 1 << src0.idx; + + if (!is_undef(src1) && src1.file == PROGRAM_TEMPORARY) + p->alu_temps |= 1 << src1.idx; + + if (!is_undef(src2) && src2.file == PROGRAM_TEMPORARY) + p->alu_temps |= 1 << src2.idx; + + if (dest.file == PROGRAM_TEMPORARY) + p->alu_temps |= 1 << dest.idx; + + p->program->Base.NumAluInstructions++; + return dest; +} + +static struct ureg emit_texld( struct texenv_fragment_program *p, + enum prog_opcode op, + struct ureg dest, + GLuint destmask, + GLuint tex_unit, + GLuint tex_idx, + GLuint tex_shadow, + struct ureg coord ) +{ + struct prog_instruction *inst = emit_op( p, op, + dest, destmask, + GL_FALSE, /* don't saturate? */ + coord, /* arg 0? */ + undef, + undef); + + inst->TexSrcTarget = tex_idx; + inst->TexSrcUnit = tex_unit; + inst->TexShadow = tex_shadow; + + p->program->Base.NumTexInstructions++; + + /* Accounting for indirection tracking: + */ + reserve_temp(p, dest); + +#if 0 + /* Is this a texture indirection? + */ + if ((coord.file == PROGRAM_TEMPORARY && + (p->temps_output & (1<alu_temps & (1<program->Base.NumTexIndirections++; + p->temps_output = 1<alu_temps = 0; + assert(0); /* KW: texture env crossbar */ + } +#endif + + return dest; +} + + +static struct ureg register_const4f( struct texenv_fragment_program *p, + GLfloat s0, + GLfloat s1, + GLfloat s2, + GLfloat s3) +{ + GLfloat values[4]; + GLuint idx, swizzle; + struct ureg r; + values[0] = s0; + values[1] = s1; + values[2] = s2; + values[3] = s3; + idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, + &swizzle ); + r = make_ureg(PROGRAM_CONSTANT, idx); + r.swz = swizzle; + return r; +} + +#define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) +#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) +#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) +#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) + + +static struct ureg get_one( struct texenv_fragment_program *p ) +{ + if (is_undef(p->one)) + p->one = register_scalar_const(p, 1.0); + return p->one; +} + +static struct ureg get_half( struct texenv_fragment_program *p ) { - ir_variable *var; - ir_dereference *deref; + if (is_undef(p->half)) + p->half = register_scalar_const(p, 0.5); + return p->half; +} + +static struct ureg get_zero( struct texenv_fragment_program *p ) +{ + if (is_undef(p->zero)) + p->zero = register_scalar_const(p, 0.0); + return p->zero; +} + + +static void program_error( struct texenv_fragment_program *p, const char *msg ) +{ + _mesa_problem(NULL, "%s", msg); + p->error = 1; +} +static struct ureg get_source( struct texenv_fragment_program *p, + GLuint src, GLuint unit ) +{ switch (src) { case SRC_TEXTURE: - return new(p->mem_ctx) ir_dereference_variable(p->src_texture[unit]); + assert(!is_undef(p->src_texture[unit])); + return p->src_texture[unit]; case SRC_TEXTURE0: case SRC_TEXTURE1: @@ -553,69 +931,66 @@ get_source(struct texenv_fragment_program *p, case SRC_TEXTURE5: case SRC_TEXTURE6: case SRC_TEXTURE7: - return new(p->mem_ctx) - ir_dereference_variable(p->src_texture[src - SRC_TEXTURE0]); + assert(!is_undef(p->src_texture[src - SRC_TEXTURE0])); + return p->src_texture[src - SRC_TEXTURE0]; case SRC_CONSTANT: - var = p->shader->symbols->get_variable("gl_TextureEnvColor"); - assert(var); - deref = new(p->mem_ctx) ir_dereference_variable(var); - var->max_array_access = MAX2(var->max_array_access, unit); - return new(p->mem_ctx) ir_dereference_array(deref, - new(p->mem_ctx) ir_constant(unit)); + return register_param2(p, STATE_TEXENV_COLOR, unit); case SRC_PRIMARY_COLOR: - var = p->shader->symbols->get_variable("gl_Color"); - assert(var); - return new(p->mem_ctx) ir_dereference_variable(var); + return register_input(p, FRAG_ATTRIB_COL0); case SRC_ZERO: - return new(p->mem_ctx) ir_constant(0.0f); + return get_zero(p); case SRC_PREVIOUS: - if (!p->src_previous) { - var = p->shader->symbols->get_variable("gl_Color"); - assert(var); - return new(p->mem_ctx) ir_dereference_variable(var); - } else { - return p->src_previous->clone(p->mem_ctx, NULL); - } + if (is_undef(p->src_previous)) + return register_input(p, FRAG_ATTRIB_COL0); + else + return p->src_previous; default: assert(0); - return NULL; + return undef; } } -static ir_rvalue * -emit_combine_source(struct texenv_fragment_program *p, - GLuint unit, - GLuint source, - GLuint operand) +static struct ureg emit_combine_source( struct texenv_fragment_program *p, + GLuint mask, + GLuint unit, + GLuint source, + GLuint operand ) { - ir_rvalue *src; + struct ureg arg, src, one; src = get_source(p, source, unit); switch (operand) { case OPR_ONE_MINUS_SRC_COLOR: - return new(p->mem_ctx) ir_expression(ir_binop_sub, - new(p->mem_ctx) ir_constant(1.0f), - src); + /* Get unused tmp, + * Emit tmp = 1.0 - arg.xyzw + */ + arg = get_temp( p ); + one = get_one( p ); + return emit_arith( p, OPCODE_SUB, arg, mask, 0, one, src, undef); case OPR_SRC_ALPHA: - return new(p->mem_ctx) ir_swizzle(src, 3, 3, 3, 3, 1); - + if (mask == WRITEMASK_W) + return src; + else + return swizzle1( src, SWIZZLE_W ); case OPR_ONE_MINUS_SRC_ALPHA: - return new(p->mem_ctx) ir_expression(ir_binop_sub, - new(p->mem_ctx) ir_constant(1.0f), - new(p->mem_ctx) ir_swizzle(src, - 3, 3, - 3, 3, 1)); + /* Get unused tmp, + * Emit tmp = 1.0 - arg.wwww + */ + arg = get_temp(p); + one = get_one(p); + return emit_arith(p, OPCODE_SUB, arg, mask, 0, + one, swizzle1(src, SWIZZLE_W), undef); case OPR_ZERO: - return new(p->mem_ctx) ir_constant(0.0f); + return get_zero(p); case OPR_ONE: - return new(p->mem_ctx) ir_constant(1.0f); + return get_one(p); case OPR_SRC_COLOR: return src; default: @@ -664,104 +1039,112 @@ static GLboolean args_match( const struct state_key *key, GLuint unit ) return GL_TRUE; } -static ir_rvalue * -smear(struct texenv_fragment_program *p, ir_rvalue *val) -{ - if (!val->type->is_scalar()) - return val; - - return new(p->mem_ctx) ir_swizzle(val, 0, 0, 0, 0, 4); -} - -static ir_rvalue * -emit_combine(struct texenv_fragment_program *p, - GLuint unit, - GLuint nr, - GLuint mode, - const struct mode_opt *opt) +static struct ureg emit_combine( struct texenv_fragment_program *p, + struct ureg dest, + GLuint mask, + GLboolean saturate, + GLuint unit, + GLuint nr, + GLuint mode, + const struct mode_opt *opt) { - ir_rvalue *src[MAX_COMBINER_TERMS]; - ir_rvalue *tmp0, *tmp1; + struct ureg src[MAX_COMBINER_TERMS]; + struct ureg tmp, half; GLuint i; assert(nr <= MAX_COMBINER_TERMS); for (i = 0; i < nr; i++) - src[i] = emit_combine_source( p, unit, opt[i].Source, opt[i].Operand ); + src[i] = emit_combine_source( p, mask, unit, opt[i].Source, opt[i].Operand ); switch (mode) { case MODE_REPLACE: - return src[0]; - + if (mask == WRITEMASK_XYZW && !saturate) + return src[0]; + else + return emit_arith( p, OPCODE_MOV, dest, mask, saturate, src[0], undef, undef ); case MODE_MODULATE: - return new(p->mem_ctx) ir_expression(ir_binop_mul, src[0], src[1]); - + return emit_arith( p, OPCODE_MUL, dest, mask, saturate, + src[0], src[1], undef ); case MODE_ADD: - return new(p->mem_ctx) ir_expression(ir_binop_add, src[0], src[1]); - + return emit_arith( p, OPCODE_ADD, dest, mask, saturate, + src[0], src[1], undef ); case MODE_ADD_SIGNED: - tmp0 = new(p->mem_ctx) ir_expression(ir_binop_add, src[0], src[1]); - return new(p->mem_ctx) ir_expression(ir_binop_add, tmp0, - new(p->mem_ctx) ir_constant(-0.5f)); - + /* tmp = arg0 + arg1 + * result = tmp - .5 + */ + half = get_half(p); + tmp = get_temp( p ); + emit_arith( p, OPCODE_ADD, tmp, mask, 0, src[0], src[1], undef ); + emit_arith( p, OPCODE_SUB, dest, mask, saturate, tmp, half, undef ); + return dest; case MODE_INTERPOLATE: - /* Arg0 * (Arg2) + Arg1 * (1-Arg2) */ - tmp0 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[0], src[2]); - - tmp1 = new(p->mem_ctx) ir_expression(ir_binop_sub, - new(p->mem_ctx) ir_constant(1.0f), - src[2]->clone(p->mem_ctx, NULL)); - tmp1 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[1], tmp1); - - return new(p->mem_ctx) ir_expression(ir_binop_add, tmp0, tmp1); + /* Arg0 * (Arg2) + Arg1 * (1-Arg2) -- note arguments are reordered: + */ + return emit_arith( p, OPCODE_LRP, dest, mask, saturate, src[2], src[0], src[1] ); case MODE_SUBTRACT: - return new(p->mem_ctx) ir_expression(ir_binop_sub, src[0], src[1]); + return emit_arith( p, OPCODE_SUB, dest, mask, saturate, src[0], src[1], undef ); case MODE_DOT3_RGBA: case MODE_DOT3_RGBA_EXT: case MODE_DOT3_RGB_EXT: case MODE_DOT3_RGB: { - tmp0 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[0], - new(p->mem_ctx) ir_constant(2.0f)); - tmp0 = new(p->mem_ctx) ir_expression(ir_binop_add, tmp0, - new(p->mem_ctx) ir_constant(-1.0f)); - tmp0 = new(p->mem_ctx) ir_swizzle(smear(p, tmp0), 0, 1, 2, 3, 3); - - tmp1 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[1], - new(p->mem_ctx) ir_constant(2.0f)); - tmp1 = new(p->mem_ctx) ir_expression(ir_binop_add, tmp1, - new(p->mem_ctx) ir_constant(-1.0f)); - tmp1 = new(p->mem_ctx) ir_swizzle(smear(p, tmp1), 0, 1, 2, 3, 3); - - return new(p->mem_ctx) ir_expression(ir_binop_dot, tmp0, tmp1); - } - case MODE_MODULATE_ADD_ATI: - tmp0 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[0], src[2]); - return new(p->mem_ctx) ir_expression(ir_binop_add, tmp0, src[1]); + struct ureg tmp0 = get_temp( p ); + struct ureg tmp1 = get_temp( p ); + struct ureg neg1 = register_scalar_const(p, -1); + struct ureg two = register_scalar_const(p, 2); - case MODE_MODULATE_SIGNED_ADD_ATI: - tmp0 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[0], src[2]); - tmp0 = new(p->mem_ctx) ir_expression(ir_binop_add, tmp0, src[1]); - return new(p->mem_ctx) ir_expression(ir_binop_add, tmp0, - new(p->mem_ctx) ir_constant(-0.5f)); + /* tmp0 = 2*src0 - 1 + * tmp1 = 2*src1 - 1 + * + * dst = tmp0 dot3 tmp1 + */ + emit_arith( p, OPCODE_MAD, tmp0, WRITEMASK_XYZW, 0, + two, src[0], neg1); + if (memcmp(&src[0], &src[1], sizeof(struct ureg)) == 0) + tmp1 = tmp0; + else + emit_arith( p, OPCODE_MAD, tmp1, WRITEMASK_XYZW, 0, + two, src[1], neg1); + emit_arith( p, OPCODE_DP3, dest, mask, saturate, tmp0, tmp1, undef); + return dest; + } + case MODE_MODULATE_ADD_ATI: + /* Arg0 * Arg2 + Arg1 */ + return emit_arith( p, OPCODE_MAD, dest, mask, saturate, + src[0], src[2], src[1] ); + case MODE_MODULATE_SIGNED_ADD_ATI: { + /* Arg0 * Arg2 + Arg1 - 0.5 */ + struct ureg tmp0 = get_temp(p); + half = get_half(p); + emit_arith( p, OPCODE_MAD, tmp0, mask, 0, src[0], src[2], src[1] ); + emit_arith( p, OPCODE_SUB, dest, mask, saturate, tmp0, half, undef ); + return dest; + } case MODE_MODULATE_SUBTRACT_ATI: - tmp0 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[0], src[2]); - return new(p->mem_ctx) ir_expression(ir_binop_sub, tmp0, src[1]); - + /* Arg0 * Arg2 - Arg1 */ + emit_arith( p, OPCODE_MAD, dest, mask, 0, src[0], src[2], negate(src[1]) ); + return dest; case MODE_ADD_PRODUCTS: - tmp0 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[0], src[1]); - tmp1 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[2], src[3]); - return new(p->mem_ctx) ir_expression(ir_binop_add, tmp0, tmp1); - + /* Arg0 * Arg1 + Arg2 * Arg3 */ + { + struct ureg tmp0 = get_temp(p); + emit_arith( p, OPCODE_MUL, tmp0, mask, 0, src[0], src[1], undef ); + emit_arith( p, OPCODE_MAD, dest, mask, saturate, src[2], src[3], tmp0 ); + } + return dest; case MODE_ADD_PRODUCTS_SIGNED: - tmp0 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[0], src[1]); - tmp1 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[2], src[3]); - tmp0 = new(p->mem_ctx) ir_expression(ir_binop_add, tmp0, tmp1); - return new(p->mem_ctx) ir_expression(ir_binop_add, tmp0, - new(p->mem_ctx) ir_constant(-0.5f)); - + /* Arg0 * Arg1 + Arg2 * Arg3 - 0.5 */ + { + struct ureg tmp0 = get_temp(p); + half = get_half(p); + emit_arith( p, OPCODE_MUL, tmp0, mask, 0, src[0], src[1], undef ); + emit_arith( p, OPCODE_MAD, tmp0, mask, 0, src[2], src[3], tmp0 ); + emit_arith( p, OPCODE_SUB, dest, mask, saturate, tmp0, half, undef ); + } + return dest; case MODE_BUMP_ENVMAP_ATI: /* special - not handled here */ assert(0); @@ -772,24 +1155,17 @@ emit_combine(struct texenv_fragment_program *p, } } -static ir_rvalue * -saturate(struct texenv_fragment_program *p, ir_rvalue *val) -{ - val = new(p->mem_ctx) ir_expression(ir_binop_min, val, - new(p->mem_ctx) ir_constant(1.0f)); - return new(p->mem_ctx) ir_expression(ir_binop_max, val, - new(p->mem_ctx) ir_constant(0.0f)); -} /** * Generate instructions for one texture unit's env/combiner mode. */ -static ir_rvalue * +static struct ureg emit_texenv(struct texenv_fragment_program *p, GLuint unit) { const struct state_key *key = p->state; GLboolean rgb_saturate, alpha_saturate; GLuint rgb_shift, alpha_shift; + struct ureg out, dest; if (!key->unit[unit].enabled) { return get_source(p, SRC_PREVIOUS, 0); @@ -831,232 +1207,129 @@ emit_texenv(struct texenv_fragment_program *p, GLuint unit) else alpha_saturate = GL_FALSE; - ir_variable *temp_var = new(p->mem_ctx) ir_variable(glsl_type::vec4_type, - "texenv_combine", - ir_var_temporary); - p->instructions->push_tail(temp_var); - - ir_dereference *deref; - ir_assignment *assign; - ir_rvalue *val; + /* If this is the very last calculation (and various other conditions + * are met), emit directly to the color output register. Otherwise, + * emit to a temporary register. + */ + if (key->separate_specular || + unit != p->last_tex_stage || + alpha_shift || + key->num_draw_buffers != 1 || + rgb_shift) + dest = get_temp( p ); + else + dest = make_ureg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); /* Emit the RGB and A combine ops */ if (key->unit[unit].ModeRGB == key->unit[unit].ModeA && args_match(key, unit)) { - val = emit_combine(p, unit, - key->unit[unit].NumArgsRGB, - key->unit[unit].ModeRGB, - key->unit[unit].OptRGB); - val = smear(p, val); - if (rgb_saturate) - val = saturate(p, val); - - deref = new(p->mem_ctx) ir_dereference_variable(temp_var); - assign = new(p->mem_ctx) ir_assignment(deref, val, NULL); - p->instructions->push_tail(assign); + out = emit_combine( p, dest, WRITEMASK_XYZW, rgb_saturate, + unit, + key->unit[unit].NumArgsRGB, + key->unit[unit].ModeRGB, + key->unit[unit].OptRGB); } else if (key->unit[unit].ModeRGB == MODE_DOT3_RGBA_EXT || key->unit[unit].ModeRGB == MODE_DOT3_RGBA) { - ir_rvalue *val = emit_combine(p, unit, - key->unit[unit].NumArgsRGB, - key->unit[unit].ModeRGB, - key->unit[unit].OptRGB); - val = smear(p, val); - if (rgb_saturate) - val = saturate(p, val); - deref = new(p->mem_ctx) ir_dereference_variable(temp_var); - assign = new(p->mem_ctx) ir_assignment(deref, val, NULL); - p->instructions->push_tail(assign); + out = emit_combine( p, dest, WRITEMASK_XYZW, rgb_saturate, + unit, + key->unit[unit].NumArgsRGB, + key->unit[unit].ModeRGB, + key->unit[unit].OptRGB); } else { /* Need to do something to stop from re-emitting identical * argument calculations here: */ - val = emit_combine(p, unit, - key->unit[unit].NumArgsRGB, - key->unit[unit].ModeRGB, - key->unit[unit].OptRGB); - val = smear(p, val); - val = new(p->mem_ctx) ir_swizzle(val, 0, 1, 2, 3, 3); - if (rgb_saturate) - val = saturate(p, val); - deref = new(p->mem_ctx) ir_dereference_variable(temp_var); - assign = new(p->mem_ctx) ir_assignment(deref, val, NULL, WRITEMASK_XYZ); - p->instructions->push_tail(assign); - - val = emit_combine(p, unit, - key->unit[unit].NumArgsA, - key->unit[unit].ModeA, - key->unit[unit].OptA); - val = smear(p, val); - val = new(p->mem_ctx) ir_swizzle(val, 3, 3, 3, 3, 1); - if (alpha_saturate) - val = saturate(p, val); - deref = new(p->mem_ctx) ir_dereference_variable(temp_var); - assign = new(p->mem_ctx) ir_assignment(deref, val, NULL, WRITEMASK_W); - p->instructions->push_tail(assign); + out = emit_combine( p, dest, WRITEMASK_XYZ, rgb_saturate, + unit, + key->unit[unit].NumArgsRGB, + key->unit[unit].ModeRGB, + key->unit[unit].OptRGB); + out = emit_combine( p, dest, WRITEMASK_W, alpha_saturate, + unit, + key->unit[unit].NumArgsA, + key->unit[unit].ModeA, + key->unit[unit].OptA); } - deref = new(p->mem_ctx) ir_dereference_variable(temp_var); - /* Deal with the final shift: */ if (alpha_shift || rgb_shift) { - ir_constant *shift; + struct ureg shift; + GLboolean saturate = GL_TRUE; /* always saturate at this point */ if (rgb_shift == alpha_shift) { - shift = new(p->mem_ctx) ir_constant((float)(1 << rgb_shift)); + shift = register_scalar_const(p, (GLfloat)(1<mem_ctx) ir_constant(glsl_type::vec4_type, - (ir_constant_data *)const_data); + shift = register_const4f(p, + (GLfloat)(1<mem_ctx) ir_expression(ir_binop_mul, - deref, shift)); + return emit_arith( p, OPCODE_MUL, dest, WRITEMASK_XYZW, + saturate, out, shift, undef ); } else - return deref; + return out; } /** * Generate instruction for getting a texture source term. */ - static void load_texture( struct texenv_fragment_program *p, GLuint unit ) - { - ir_dereference *deref; - ir_assignment *assign; - - if (p->src_texture[unit]) - return; - - const GLuint texTarget = p->state->unit[unit].source_index; - ir_rvalue *texcoord; - - if (p->texcoord_tex[unit]) { - texcoord = new(p->mem_ctx) ir_dereference_variable(p->texcoord_tex[unit]); - } - else { - ir_variable *tc_array = p->shader->symbols->get_variable("gl_TexCoord"); - assert(tc_array); - texcoord = new(p->mem_ctx) ir_dereference_variable(tc_array); - ir_rvalue *index = new(p->mem_ctx) ir_constant(unit); - texcoord = new(p->mem_ctx) ir_dereference_array(texcoord, index); - tc_array->max_array_access = MAX2(tc_array->max_array_access, unit); - } - - if (!p->state->unit[unit].enabled) { - p->src_texture[unit] = new(p->mem_ctx) ir_variable(glsl_type::vec4_type, - "dummy_tex", - ir_var_temporary); - p->instructions->push_tail(p->src_texture[unit]); - - deref = new(p->mem_ctx) ir_dereference_variable(p->src_texture[unit]); - assign = new(p->mem_ctx) ir_assignment(deref, - new(p->mem_ctx) ir_constant(0.0f), - NULL); - p->instructions->push_tail(assign); - return ; - } - - const glsl_type *sampler_type = NULL; - int coords = 0; - - switch (texTarget) { - case TEXTURE_1D_INDEX: - if (p->state->unit[unit].shadow) - sampler_type = p->shader->symbols->get_type("sampler1DShadow"); - else - sampler_type = p->shader->symbols->get_type("sampler1D"); - coords = 1; - break; - case TEXTURE_1D_ARRAY_INDEX: - if (p->state->unit[unit].shadow) - sampler_type = p->shader->symbols->get_type("sampler1DArrayShadow"); - else - sampler_type = p->shader->symbols->get_type("sampler1DArray"); - coords = 2; - break; - case TEXTURE_2D_INDEX: - if (p->state->unit[unit].shadow) - sampler_type = p->shader->symbols->get_type("sampler2DShadow"); - else - sampler_type = p->shader->symbols->get_type("sampler2D"); - coords = 2; - break; - case TEXTURE_2D_ARRAY_INDEX: - if (p->state->unit[unit].shadow) - sampler_type = p->shader->symbols->get_type("sampler2DArrayShadow"); - else - sampler_type = p->shader->symbols->get_type("sampler2DArray"); - coords = 3; - break; - case TEXTURE_RECT_INDEX: - if (p->state->unit[unit].shadow) - sampler_type = p->shader->symbols->get_type("sampler2DRectShadow"); - else - sampler_type = p->shader->symbols->get_type("sampler2DRect"); - coords = 2; - break; - case TEXTURE_3D_INDEX: - assert(!p->state->unit[unit].shadow); - sampler_type = p->shader->symbols->get_type("sampler3D"); - coords = 3; - break; - case TEXTURE_CUBE_INDEX: - if (p->state->unit[unit].shadow) - sampler_type = p->shader->symbols->get_type("samplerCubeShadow"); - else - sampler_type = p->shader->symbols->get_type("samplerCube"); - coords = 3; - break; - } - - p->src_texture[unit] = new(p->mem_ctx) ir_variable(glsl_type::vec4_type, - "tex", - ir_var_temporary); - p->instructions->push_tail(p->src_texture[unit]); - - ir_texture *tex = new(p->mem_ctx) ir_texture(ir_tex); - - - char *sampler_name = ralloc_asprintf(p->mem_ctx, "sampler_%d", unit); - ir_variable *sampler = new(p->mem_ctx) ir_variable(sampler_type, - sampler_name, - ir_var_uniform); - p->top_instructions->push_head(sampler); - deref = new(p->mem_ctx) ir_dereference_variable(sampler); - tex->set_sampler(deref); - - tex->coordinate = new(p->mem_ctx) ir_swizzle(texcoord, 0, 1, 2, 3, coords); - - if (p->state->unit[unit].shadow) { - texcoord = texcoord->clone(p->mem_ctx, NULL); - tex->shadow_comparitor = new(p->mem_ctx) ir_swizzle(texcoord, - coords, 0, 0, 0, - 1); - coords++; - } - - texcoord = texcoord->clone(p->mem_ctx, NULL); - tex->projector = new(p->mem_ctx) ir_swizzle(texcoord, 3, 0, 0, 0, 1); - - deref = new(p->mem_ctx) ir_dereference_variable(p->src_texture[unit]); - assign = new(p->mem_ctx) ir_assignment(deref, tex, NULL); - p->instructions->push_tail(assign); - } +static void load_texture( struct texenv_fragment_program *p, GLuint unit ) +{ + if (is_undef(p->src_texture[unit])) { + const GLuint texTarget = p->state->unit[unit].source_index; + struct ureg texcoord; + struct ureg tmp = get_tex_temp( p ); -static void -load_texenv_source(struct texenv_fragment_program *p, - GLuint src, GLuint unit) + if (is_undef(p->texcoord_tex[unit])) { + texcoord = register_input(p, FRAG_ATTRIB_TEX0+unit); + } + else { + /* might want to reuse this reg for tex output actually */ + texcoord = p->texcoord_tex[unit]; + } + + /* TODO: Use D0_MASK_XY where possible. + */ + if (p->state->unit[unit].enabled) { + GLboolean shadow = GL_FALSE; + + if (p->state->unit[unit].shadow) { + p->program->Base.ShadowSamplers |= 1 << unit; + shadow = GL_TRUE; + } + + p->src_texture[unit] = emit_texld( p, OPCODE_TXP, + tmp, WRITEMASK_XYZW, + unit, texTarget, shadow, + texcoord ); + + p->program->Base.SamplersUsed |= (1 << unit); + /* This identity mapping should already be in place + * (see _mesa_init_program_struct()) but let's be safe. + */ + p->program->Base.SamplerUnits[unit] = unit; + } + else + p->src_texture[unit] = get_zero(p); + + if (p->state->unit[unit].texture_cyl_wrap) { + /* set flag which is checked by Mesa->Gallium program translation */ + p->program->Base.InputFlags[0] |= PROG_PARAM_BIT_CYL_WRAP; + } + + } +} + +static GLboolean load_texenv_source( struct texenv_fragment_program *p, + GLuint src, GLuint unit ) { switch (src) { case SRC_TEXTURE: @@ -1078,6 +1351,8 @@ load_texenv_source(struct texenv_fragment_program *p, /* not a texture src - do nothing */ break; } + + return GL_TRUE; } @@ -1104,214 +1379,108 @@ load_texunit_sources( struct texenv_fragment_program *p, GLuint unit ) /** * Generate instructions for loading bump map textures. */ -static void +static GLboolean load_texunit_bumpmap( struct texenv_fragment_program *p, GLuint unit ) { const struct state_key *key = p->state; GLuint bumpedUnitNr = key->unit[unit].OptRGB[1].Source - SRC_TEXTURE0; - ir_rvalue *bump; - ir_rvalue *texcoord; - ir_variable *rot_mat_0_var, *rot_mat_1_var; - ir_dereference_variable *rot_mat_0, *rot_mat_1; - - rot_mat_0_var = p->shader->symbols->get_variable("gl_MESABumpRotMatrix0"); - rot_mat_1_var = p->shader->symbols->get_variable("gl_MESABumpRotMatrix1"); - rot_mat_0 = new(p->mem_ctx) ir_dereference_variable(rot_mat_0_var); - rot_mat_1 = new(p->mem_ctx) ir_dereference_variable(rot_mat_1_var); - - ir_variable *tc_array = p->shader->symbols->get_variable("gl_TexCoord"); - assert(tc_array); - texcoord = new(p->mem_ctx) ir_dereference_variable(tc_array); - ir_rvalue *index = new(p->mem_ctx) ir_constant(bumpedUnitNr); - texcoord = new(p->mem_ctx) ir_dereference_array(texcoord, index); - tc_array->max_array_access = MAX2(tc_array->max_array_access, unit); + struct ureg texcDst, bumpMapRes; + struct ureg constdudvcolor = register_const4f(p, 0.0, 0.0, 0.0, 1.0); + struct ureg texcSrc = register_input(p, FRAG_ATTRIB_TEX0 + bumpedUnitNr); + struct ureg rotMat0 = register_param3( p, STATE_INTERNAL, STATE_ROT_MATRIX_0, unit ); + struct ureg rotMat1 = register_param3( p, STATE_INTERNAL, STATE_ROT_MATRIX_1, unit ); load_texenv_source( p, unit + SRC_TEXTURE0, unit ); + bumpMapRes = get_source(p, key->unit[unit].OptRGB[0].Source, unit); + texcDst = get_tex_temp( p ); + p->texcoord_tex[bumpedUnitNr] = texcDst; + /* Apply rot matrix and add coords to be available in next phase. - * dest = Arg1 + (Arg0.xx * rotMat0) + (Arg0.yy * rotMat1) + * dest = (Arg0.xxxx * rotMat0 + Arg1) + (Arg0.yyyy * rotMat1) * note only 2 coords are affected the rest are left unchanged (mul by 0) */ - ir_dereference *deref; - ir_assignment *assign; - ir_rvalue *bump_x, *bump_y; - - texcoord = smear(p, texcoord); - - /* bump_texcoord = texcoord */ - ir_variable *bumped = new(p->mem_ctx) ir_variable(texcoord->type, - "bump_texcoord", - ir_var_temporary); - p->instructions->push_tail(bumped); - - deref = new(p->mem_ctx) ir_dereference_variable(bumped); - assign = new(p->mem_ctx) ir_assignment(deref, texcoord, NULL); - p->instructions->push_tail(assign); - - /* bump_texcoord.xy += arg0.x * rotmat0 + arg0.y * rotmat1 */ - bump = get_source(p, key->unit[unit].OptRGB[0].Source, unit); - bump_x = new(p->mem_ctx) ir_swizzle(bump, 0, 0, 0, 0, 1); - bump = bump->clone(p->mem_ctx, NULL); - bump_y = new(p->mem_ctx) ir_swizzle(bump, 1, 0, 0, 0, 1); - - bump_x = new(p->mem_ctx) ir_expression(ir_binop_mul, bump_x, rot_mat_0); - bump_y = new(p->mem_ctx) ir_expression(ir_binop_mul, bump_y, rot_mat_1); - - ir_expression *expr; - expr = new(p->mem_ctx) ir_expression(ir_binop_add, bump_x, bump_y); - - deref = new(p->mem_ctx) ir_dereference_variable(bumped); - expr = new(p->mem_ctx) ir_expression(ir_binop_add, - new(p->mem_ctx) ir_swizzle(deref, - 0, 1, 1, 1, - 2), - expr); - - deref = new(p->mem_ctx) ir_dereference_variable(bumped); - assign = new(p->mem_ctx) ir_assignment(deref, expr, NULL, WRITEMASK_XY); - p->instructions->push_tail(assign); - - p->texcoord_tex[bumpedUnitNr] = bumped; + emit_arith( p, OPCODE_MAD, texcDst, WRITEMASK_XYZW, 0, + swizzle1(bumpMapRes, SWIZZLE_X), rotMat0, texcSrc ); + emit_arith( p, OPCODE_MAD, texcDst, WRITEMASK_XYZW, 0, + swizzle1(bumpMapRes, SWIZZLE_Y), rotMat1, texcDst ); + + /* Move 0,0,0,1 into bumpmap src if someone (crossbar) is foolish + * enough to access this later, should optimize away. + */ + emit_arith( p, OPCODE_MOV, bumpMapRes, WRITEMASK_XYZW, 0, + constdudvcolor, undef, undef ); + + return GL_TRUE; } /** - * Applies the fog calculations. - * - * This is basically like the ARB_fragment_prorgam fog options. Note - * that ffvertex_prog.c produces fogcoord for us when - * GL_FOG_COORDINATE_EXT is set to GL_FRAGMENT_DEPTH_EXT. + * Generate a new fragment program which implements the context's + * current texture env/combine mode. */ -static ir_rvalue * -emit_fog_instructions(struct texenv_fragment_program *p, - ir_rvalue *fragcolor) +static void +create_new_program(struct gl_context *ctx, struct state_key *key, + struct gl_fragment_program *program) { - struct state_key *key = p->state; - ir_rvalue *f, *temp; - ir_variable *params, *oparams; - ir_variable *fogcoord; - ir_assignment *assign; - - /* Temporary storage for the whole fog result. Fog calculations - * only affect rgb so we're hanging on to the .a value of fragcolor - * this way. - */ - ir_variable *fog_result = new(p->mem_ctx) ir_variable(glsl_type::vec4_type, - "fog_result", - ir_var_auto); - p->instructions->push_tail(fog_result); - temp = new(p->mem_ctx) ir_dereference_variable(fog_result); - assign = new(p->mem_ctx) ir_assignment(temp, fragcolor, NULL); - p->instructions->push_tail(assign); - - temp = new(p->mem_ctx) ir_dereference_variable(fog_result); - fragcolor = new(p->mem_ctx) ir_swizzle(temp, 0, 1, 2, 3, 3); - - oparams = p->shader->symbols->get_variable("gl_MESAFogParamsOptimized"); - fogcoord = p->shader->symbols->get_variable("gl_FogFragCoord"); - params = p->shader->symbols->get_variable("gl_Fog"); - f = new(p->mem_ctx) ir_dereference_variable(fogcoord); - - ir_variable *f_var = new(p->mem_ctx) ir_variable(glsl_type::float_type, - "fog_factor", ir_var_auto); - p->instructions->push_tail(f_var); - - switch (key->fog_mode) { - case FOG_LINEAR: - /* f = (end - z) / (end - start) - * - * gl_MesaFogParamsOptimized gives us (-1 / (end - start)) and - * (end / (end - start)) so we can generate a single MAD. - */ - temp = new(p->mem_ctx) ir_dereference_variable(oparams); - temp = new(p->mem_ctx) ir_swizzle(temp, 0, 0, 0, 0, 1); - f = new(p->mem_ctx) ir_expression(ir_binop_mul, f, temp); - - temp = new(p->mem_ctx) ir_dereference_variable(oparams); - temp = new(p->mem_ctx) ir_swizzle(temp, 1, 0, 0, 0, 1); - f = new(p->mem_ctx) ir_expression(ir_binop_add, f, temp); - break; - case FOG_EXP: - /* f = e^(-(density * fogcoord)) - * - * gl_MesaFogParamsOptimized gives us density/ln(2) so we can - * use EXP2 which is generally the native instruction without - * having to do any further math on the fog density uniform. - */ - temp = new(p->mem_ctx) ir_dereference_variable(oparams); - temp = new(p->mem_ctx) ir_swizzle(temp, 2, 0, 0, 0, 1); - f = new(p->mem_ctx) ir_expression(ir_binop_mul, f, temp); - f = new(p->mem_ctx) ir_expression(ir_unop_neg, f); - f = new(p->mem_ctx) ir_expression(ir_unop_exp2, f); - break; - case FOG_EXP2: - /* f = e^(-(density * fogcoord)^2) - * - * gl_MesaFogParamsOptimized gives us density/sqrt(ln(2)) so we - * can do this like FOG_EXP but with a squaring after the - * multiply by density. - */ - ir_variable *temp_var = new(p->mem_ctx) ir_variable(glsl_type::float_type, - "fog_temp", - ir_var_auto); - p->instructions->push_tail(temp_var); - - temp = new(p->mem_ctx) ir_dereference_variable(oparams); - temp = new(p->mem_ctx) ir_swizzle(temp, 3, 0, 0, 0, 1); - f = new(p->mem_ctx) ir_expression(ir_binop_mul, - f, temp); - - temp = new(p->mem_ctx) ir_dereference_variable(temp_var); - ir_assignment *assign = new(p->mem_ctx) ir_assignment(temp, f, NULL); - p->instructions->push_tail(assign); - - f = new(p->mem_ctx) ir_dereference_variable(temp_var); - temp = new(p->mem_ctx) ir_dereference_variable(temp_var); - f = new(p->mem_ctx) ir_expression(ir_binop_mul, f, temp); - f = new(p->mem_ctx) ir_expression(ir_unop_neg, f); - f = new(p->mem_ctx) ir_expression(ir_unop_exp2, f); - break; - } - - f = saturate(p, f); + struct prog_instruction instBuffer[MAX_INSTRUCTIONS]; + struct texenv_fragment_program p; + GLuint unit; + struct ureg cf, out; + int i; - temp = new(p->mem_ctx) ir_dereference_variable(f_var); - assign = new(p->mem_ctx) ir_assignment(temp, f, NULL); - p->instructions->push_tail(assign); + memset(&p, 0, sizeof(p)); + p.state = key; + p.program = program; - f = new(p->mem_ctx) ir_dereference_variable(f_var); - f = new(p->mem_ctx) ir_expression(ir_binop_sub, - new(p->mem_ctx) ir_constant(1.0f), - f); - temp = new(p->mem_ctx) ir_dereference_variable(params); - temp = new(p->mem_ctx) ir_dereference_record(temp, "color"); - temp = new(p->mem_ctx) ir_swizzle(temp, 0, 1, 2, 3, 3); - temp = new(p->mem_ctx) ir_expression(ir_binop_mul, temp, f); + /* During code generation, use locally-allocated instruction buffer, + * then alloc dynamic storage below. + */ + p.program->Base.Instructions = instBuffer; + p.program->Base.Target = GL_FRAGMENT_PROGRAM_ARB; + p.program->Base.String = NULL; + p.program->Base.NumTexIndirections = 1; /* is this right? */ + p.program->Base.NumTexInstructions = 0; + p.program->Base.NumAluInstructions = 0; + p.program->Base.NumInstructions = 0; + p.program->Base.NumTemporaries = 0; + p.program->Base.NumParameters = 0; + p.program->Base.NumAttributes = 0; + p.program->Base.NumAddressRegs = 0; + p.program->Base.Parameters = _mesa_new_parameter_list(); + p.program->Base.InputsRead = 0x0; + + if (key->num_draw_buffers == 1) + p.program->Base.OutputsWritten = 1 << FRAG_RESULT_COLOR; + else { + for (i = 0; i < key->num_draw_buffers; i++) + p.program->Base.OutputsWritten |= (1 << (FRAG_RESULT_DATA0 + i)); + } - f = new(p->mem_ctx) ir_dereference_variable(f_var); - f = new(p->mem_ctx) ir_expression(ir_binop_mul, fragcolor, f); - f = new(p->mem_ctx) ir_expression(ir_binop_add, temp, f); + for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) { + p.src_texture[unit] = undef; + p.texcoord_tex[unit] = undef; + } - ir_dereference *deref = new(p->mem_ctx) ir_dereference_variable(fog_result); - assign = new(p->mem_ctx) ir_assignment(deref, f, NULL, WRITEMASK_XYZ); - p->instructions->push_tail(assign); + p.src_previous = undef; + p.half = undef; + p.zero = undef; + p.one = undef; - return new(p->mem_ctx) ir_dereference_variable(fog_result); -} + p.last_tex_stage = 0; + release_temps(ctx, &p); -static void -emit_instructions(struct texenv_fragment_program *p) -{ - struct state_key *key = p->state; - GLuint unit; + if (key->enabled_units && key->num_draw_buffers) { + GLboolean needbumpstage = GL_FALSE; - if (key->enabled_units) { /* Zeroth pass - bump map textures first */ - for (unit = 0; unit < key->nr_enabled_units; unit++) { + for (unit = 0; unit < key->nr_enabled_units; unit++) if (key->unit[unit].enabled && key->unit[unit].ModeRGB == MODE_BUMP_ENVMAP_ATI) { - load_texunit_bumpmap(p, unit); + needbumpstage = GL_TRUE; + load_texunit_bumpmap( &p, unit ); } - } + if (needbumpstage) + p.program->Base.NumTexIndirections++; /* First pass - to support texture_env_crossbar, first identify * all referenced texture sources and emit texld instructions @@ -1319,157 +1488,104 @@ emit_instructions(struct texenv_fragment_program *p) */ for (unit = 0; unit < key->nr_enabled_units; unit++) if (key->unit[unit].enabled) { - load_texunit_sources(p, unit); - p->last_tex_stage = unit; + load_texunit_sources( &p, unit ); + p.last_tex_stage = unit; } /* Second pass - emit combine instructions to build final color: */ - for (unit = 0; unit < key->nr_enabled_units; unit++) { + for (unit = 0; unit < key->nr_enabled_units; unit++) if (key->unit[unit].enabled) { - p->src_previous = emit_texenv(p, unit); + p.src_previous = emit_texenv( &p, unit ); + reserve_temp(&p, p.src_previous); /* don't re-use this temp reg */ + release_temps(ctx, &p); /* release all temps */ } - } } - ir_rvalue *cf = get_source(p, SRC_PREVIOUS, 0); - ir_dereference_variable *deref; - ir_assignment *assign; - - if (key->separate_specular) { - ir_rvalue *tmp0, *tmp1; - ir_variable *spec_result = new(p->mem_ctx) ir_variable(glsl_type::vec4_type, - "specular_add", - ir_var_temporary); - - p->instructions->push_tail(spec_result); - - deref = new(p->mem_ctx) ir_dereference_variable(spec_result); - assign = new(p->mem_ctx) ir_assignment(deref, cf, NULL); - p->instructions->push_tail(assign); - - deref = new(p->mem_ctx) ir_dereference_variable(spec_result); - tmp0 = new(p->mem_ctx) ir_swizzle(deref, 0, 1, 2, 3, 3); + cf = get_source( &p, SRC_PREVIOUS, 0 ); - ir_variable *secondary = - p->shader->symbols->get_variable("gl_SecondaryColor"); - assert(secondary); - deref = new(p->mem_ctx) ir_dereference_variable(secondary); - tmp1 = new(p->mem_ctx) ir_swizzle(deref, 0, 1, 2, 3, 3); - - tmp0 = new(p->mem_ctx) ir_expression(ir_binop_add, - tmp0, tmp1); - - deref = new(p->mem_ctx) ir_dereference_variable(spec_result); - assign = new(p->mem_ctx) ir_assignment(deref, tmp0, NULL, WRITEMASK_XYZ); - p->instructions->push_tail(assign); + for (i = 0; i < key->num_draw_buffers; i++) { + if (key->num_draw_buffers == 1) + out = make_ureg( PROGRAM_OUTPUT, FRAG_RESULT_COLOR ); + else { + out = make_ureg( PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i ); + } - cf = new(p->mem_ctx) ir_dereference_variable(spec_result); + if (key->separate_specular) { + /* Emit specular add. + */ + struct ureg s = register_input(&p, FRAG_ATTRIB_COL1); + emit_arith( &p, OPCODE_ADD, out, WRITEMASK_XYZ, 0, cf, s, undef ); + emit_arith( &p, OPCODE_MOV, out, WRITEMASK_W, 0, cf, undef, undef ); + } + else if (memcmp(&cf, &out, sizeof(cf)) != 0) { + /* Will wind up in here if no texture enabled or a couple of + * other scenarios (GL_REPLACE for instance). + */ + emit_arith( &p, OPCODE_MOV, out, WRITEMASK_XYZW, 0, cf, undef, undef ); + } } + /* Finish up: + */ + emit_arith( &p, OPCODE_END, undef, WRITEMASK_XYZW, 0, undef, undef, undef); if (key->fog_enabled) { - cf = emit_fog_instructions(p, cf); + /* Pull fog mode from struct gl_context, the value in the state key is + * a reduced value and not what is expected in FogOption + */ + p.program->FogOption = ctx->Fog.Mode; + p.program->Base.InputsRead |= FRAG_BIT_FOGC; } - - ir_variable *frag_color = p->shader->symbols->get_variable("gl_FragColor"); - assert(frag_color); - deref = new(p->mem_ctx) ir_dereference_variable(frag_color); - assign = new(p->mem_ctx) ir_assignment(deref, cf, NULL); - p->instructions->push_tail(assign); -} - -/** - * Generate a new fragment program which implements the context's - * current texture env/combine mode. - */ -static struct gl_shader_program * -create_new_program(struct gl_context *ctx, struct state_key *key) -{ - struct texenv_fragment_program p; - unsigned int unit; - _mesa_glsl_parse_state *state; - - memset(&p, 0, sizeof(p)); - p.mem_ctx = ralloc_context(NULL); - p.shader = ctx->Driver.NewShader(ctx, 0, GL_FRAGMENT_SHADER); - p.shader->ir = new(p.shader) exec_list; - state = new(p.shader) _mesa_glsl_parse_state(ctx, GL_FRAGMENT_SHADER, - p.shader); - p.shader->symbols = state->symbols; - p.top_instructions = p.shader->ir; - p.instructions = p.shader->ir; - p.state = key; - p.shader_program = ctx->Driver.NewShaderProgram(ctx, 0); - - state->language_version = 120; - _mesa_glsl_initialize_types(state); - _mesa_glsl_initialize_variables(p.instructions, state); - - for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) { - p.src_texture[unit] = NULL; - p.texcoord_tex[unit] = NULL; + else { + p.program->FogOption = GL_NONE; } - p.src_previous = NULL; + if (p.program->Base.NumTexIndirections > ctx->Const.FragmentProgram.MaxTexIndirections) + program_error(&p, "Exceeded max nr indirect texture lookups"); - p.last_tex_stage = 0; + if (p.program->Base.NumTexInstructions > ctx->Const.FragmentProgram.MaxTexInstructions) + program_error(&p, "Exceeded max TEX instructions"); - ir_function *main_f = new(p.mem_ctx) ir_function("main"); - p.instructions->push_tail(main_f); - state->symbols->add_function(main_f); + if (p.program->Base.NumAluInstructions > ctx->Const.FragmentProgram.MaxAluInstructions) + program_error(&p, "Exceeded max ALU instructions"); - ir_function_signature *main_sig = - new(p.mem_ctx) ir_function_signature(p.shader->symbols->get_type("void")); - main_sig->is_defined = true; - main_f->add_signature(main_sig); + ASSERT(p.program->Base.NumInstructions <= MAX_INSTRUCTIONS); - p.instructions = &main_sig->body; - if (key->num_draw_buffers) - emit_instructions(&p); - - validate_ir_tree(p.shader->ir); - - while (do_common_optimization(p.shader->ir, false, 32)) - ; - reparent_ir(p.shader->ir, p.shader->ir); + /* Allocate final instruction array */ + p.program->Base.Instructions + = _mesa_alloc_instructions(p.program->Base.NumInstructions); + if (!p.program->Base.Instructions) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, + "generating tex env program"); + return; + } + _mesa_copy_instructions(p.program->Base.Instructions, instBuffer, + p.program->Base.NumInstructions); - p.shader->CompileStatus = true; - p.shader->Version = state->language_version; - p.shader->num_builtins_to_link = state->num_builtins_to_link; - p.shader_program->Shaders = - (gl_shader **)malloc(sizeof(*p.shader_program->Shaders)); - p.shader_program->Shaders[0] = p.shader; - p.shader_program->NumShaders = 1; + if (key->num_draw_buffers && p.program->FogOption) { + _mesa_append_fog_code(ctx, p.program); + p.program->FogOption = GL_NONE; + } - _mesa_glsl_link_shader(ctx, p.shader_program); - /* Set the sampler uniforms, and relink to get them into the linked - * program. + /* Notify driver the fragment program has (actually) changed. */ - struct gl_fragment_program *fp = p.shader_program->FragmentProgram; - for (unsigned int i = 0; i < MAX_TEXTURE_UNITS; i++) { - char *name = ralloc_asprintf(p.mem_ctx, "sampler_%d", i); - int loc = _mesa_get_uniform_location(ctx, p.shader_program, name); - if (loc != -1) { - /* Avoid using _mesa_uniform() because it flags state - * updates, so if we're generating this shader_program in a - * state update, we end up recursing. Instead, just set the - * value, which is picked up at re-link. - */ - loc = (loc & 0xffff) + (loc >> 16); - int sampler = fp->Base.Parameters->ParameterValues[loc][0]; - fp->Base.SamplerUnits[sampler] = i; - } + if (ctx->Driver.ProgramStringNotify) { + GLboolean ok = ctx->Driver.ProgramStringNotify(ctx, + GL_FRAGMENT_PROGRAM_ARB, + &p.program->Base); + /* Driver should be able to handle any texenv programs as long as + * the driver correctly reported max number of texture units correctly, + * etc. + */ + ASSERT(ok); + (void) ok; /* silence unused var warning */ } - _mesa_update_shader_textures_used(&fp->Base); - (void) ctx->Driver.ProgramStringNotify(ctx, fp->Base.Target, &fp->Base); - if (!p.shader_program->LinkStatus) - _mesa_problem(ctx, "Failed to link fixed function fragment shader: %s\n", - p.shader_program->InfoLog); - - ralloc_free(p.mem_ctx); - return p.shader_program; + if (DISASSEM) { + _mesa_print_program(&p.program->Base); + printf("\n"); + } } extern "C" { @@ -1478,27 +1594,30 @@ extern "C" { * Return a fragment program which implements the current * fixed-function texture, fog and color-sum operations. */ -struct gl_shader_program * +struct gl_fragment_program * _mesa_get_fixed_func_fragment_program(struct gl_context *ctx) { - struct gl_shader_program *shader_program; + struct gl_fragment_program *prog; struct state_key key; GLuint keySize; - + keySize = make_state_key(ctx, &key); - - shader_program = (struct gl_shader_program *) + + prog = (struct gl_fragment_program *) _mesa_search_program_cache(ctx->FragmentProgram.Cache, &key, keySize); - if (!shader_program) { - shader_program = create_new_program(ctx, &key); + if (!prog) { + prog = (struct gl_fragment_program *) + ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + + create_new_program(ctx, &key, prog); - _mesa_shader_cache_insert(ctx, ctx->FragmentProgram.Cache, - &key, keySize, shader_program); + _mesa_program_cache_insert(ctx, ctx->FragmentProgram.Cache, + &key, keySize, &prog->Base); } - return shader_program; + return prog; } } diff --git a/mesalib/src/mesa/main/mtypes.h b/mesalib/src/mesa/main/mtypes.h index 520d96689..39b6f72cc 100644 --- a/mesalib/src/mesa/main/mtypes.h +++ b/mesalib/src/mesa/main/mtypes.h @@ -2191,7 +2191,6 @@ struct gl_shader_state struct gl_shader_program *CurrentVertexProgram; struct gl_shader_program *CurrentGeometryProgram; struct gl_shader_program *CurrentFragmentProgram; - struct gl_shader_program *_CurrentFragmentProgram; /** * Program used by glUniform calls. diff --git a/mesalib/src/mesa/main/state.c b/mesalib/src/mesa/main/state.c index 5651e3263..f50f2af1e 100644 --- a/mesalib/src/mesa/main/state.c +++ b/mesalib/src/mesa/main/state.c @@ -43,7 +43,6 @@ #include "pixel.h" #include "program/program.h" #include "program/prog_parameter.h" -#include "shaderobj.h" #include "state.h" #include "stencil.h" #include "texenvprogram.h" @@ -250,7 +249,7 @@ update_program(struct gl_context *ctx) { const struct gl_shader_program *vsProg = ctx->Shader.CurrentVertexProgram; const struct gl_shader_program *gsProg = ctx->Shader.CurrentGeometryProgram; - struct gl_shader_program *fsProg = ctx->Shader.CurrentFragmentProgram; + const struct gl_shader_program *fsProg = ctx->Shader.CurrentFragmentProgram; const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current; const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current; const struct gl_geometry_program *prevGP = ctx->GeometryProgram._Current; @@ -276,31 +275,23 @@ update_program(struct gl_context *ctx) /* Use shader programs */ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, fsProg->FragmentProgram); - _mesa_reference_shader_program(ctx, &ctx->Shader._CurrentFragmentProgram, - fsProg); } else if (ctx->FragmentProgram._Enabled) { - /* use user-defined fragment program */ + /* use user-defined vertex program */ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, ctx->FragmentProgram.Current); - _mesa_reference_shader_program(ctx, &ctx->Shader._CurrentFragmentProgram, - NULL); } else if (ctx->FragmentProgram._MaintainTexEnvProgram) { /* Use fragment program generated from fixed-function state. */ - struct gl_shader_program *f = _mesa_get_fixed_func_fragment_program(ctx); - _mesa_reference_shader_program(ctx, - &ctx->Shader._CurrentFragmentProgram, f); - _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, - f->FragmentProgram); + _mesa_get_fixed_func_fragment_program(ctx)); + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._TexEnvProgram, + ctx->FragmentProgram._Current); } else { /* no fragment program */ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, NULL); - _mesa_reference_shader_program(ctx, &ctx->Shader._CurrentFragmentProgram, - NULL); } if (gsProg && gsProg->LinkStatus && gsProg->GeometryProgram) { diff --git a/mesalib/src/mesa/main/texenvprogram.h b/mesalib/src/mesa/main/texenvprogram.h index dba775feb..0895ebacb 100644 --- a/mesalib/src/mesa/main/texenvprogram.h +++ b/mesalib/src/mesa/main/texenvprogram.h @@ -29,7 +29,7 @@ struct gl_context; -extern struct gl_shader_program * +extern struct gl_fragment_program * _mesa_get_fixed_func_fragment_program(struct gl_context *ctx); #endif diff --git a/mesalib/src/mesa/program/program.c b/mesalib/src/mesa/program/program.c index 43f894a9b..6c97787e8 100644 --- a/mesalib/src/mesa/program/program.c +++ b/mesalib/src/mesa/program/program.c @@ -140,7 +140,7 @@ _mesa_free_program_data(struct gl_context *ctx) #endif #if FEATURE_NV_fragment_program || FEATURE_ARB_fragment_program _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL); - _mesa_delete_shader_cache(ctx, ctx->FragmentProgram.Cache); + _mesa_delete_program_cache(ctx, ctx->FragmentProgram.Cache); #endif #if FEATURE_ARB_geometry_shader4 _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current, NULL); diff --git a/mesalib/src/mesa/state_tracker/st_cb_clear.c b/mesalib/src/mesa/state_tracker/st_cb_clear.c index 0e0c4326e..0130c7a5a 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_clear.c +++ b/mesalib/src/mesa/state_tracker/st_cb_clear.c @@ -1,579 +1,601 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2009 VMware, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - * Brian Paul - * Michel Dänzer - */ - -#include "main/glheader.h" -#include "main/formats.h" -#include "main/macros.h" -#include "program/prog_instruction.h" -#include "st_context.h" -#include "st_atom.h" -#include "st_cb_accum.h" -#include "st_cb_clear.h" -#include "st_cb_fbo.h" -#include "st_format.h" -#include "st_program.h" - -#include "pipe/p_context.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_format.h" -#include "util/u_inlines.h" -#include "util/u_simple_shaders.h" -#include "util/u_draw_quad.h" - -#include "cso_cache/cso_context.h" - - -/** - * Do per-context initialization for glClear. - */ -void -st_init_clear(struct st_context *st) -{ - struct pipe_context *pipe = st->pipe; - struct pipe_screen *pscreen = st->pipe->screen; - - memset(&st->clear, 0, sizeof(st->clear)); - - st->clear.raster.gl_rasterization_rules = 1; - st->clear.enable_ds_separate = pscreen->get_param(pscreen, PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE); - - /* fragment shader state: color pass-through program */ - st->clear.fs = util_make_fragment_passthrough_shader(pipe); - - /* vertex shader state: color/position pass-through */ - { - const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_COLOR }; - const uint semantic_indexes[] = { 0, 0 }; - st->clear.vs = util_make_vertex_passthrough_shader(pipe, 2, - semantic_names, - semantic_indexes); - } -} - - -/** - * Free per-context state for glClear. - */ -void -st_destroy_clear(struct st_context *st) -{ - if (st->clear.fs) { - cso_delete_fragment_shader(st->cso_context, st->clear.fs); - st->clear.fs = NULL; - } - if (st->clear.vs) { - cso_delete_vertex_shader(st->cso_context, st->clear.vs); - st->clear.vs = NULL; - } - if (st->clear.vbuf) { - pipe_resource_reference(&st->clear.vbuf, NULL); - st->clear.vbuf = NULL; - } -} - - -/** - * Draw a screen-aligned quadrilateral. - * Coords are clip coords with y=0=bottom. - */ -static void -draw_quad(struct st_context *st, - float x0, float y0, float x1, float y1, GLfloat z, - const GLfloat color[4]) -{ - struct pipe_context *pipe = st->pipe; - - /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as - * no_flush) updates to buffers where we know there is no conflict - * with previous data. Currently using max_slots > 1 will cause - * synchronous rendering if the driver flushes its command buffers - * between one bitmap and the next. Our flush hook below isn't - * sufficient to catch this as the driver doesn't tell us when it - * flushes its own command buffers. Until this gets fixed, pay the - * price of allocating a new buffer for each bitmap cache-flush to - * avoid synchronous rendering. - */ - const GLuint max_slots = 1; /* 1024 / sizeof(st->clear.vertices); */ - GLuint i; - - if (st->clear.vbuf_slot >= max_slots) { - pipe_resource_reference(&st->clear.vbuf, NULL); - st->clear.vbuf_slot = 0; - } - - if (!st->clear.vbuf) { - st->clear.vbuf = pipe_buffer_create(pipe->screen, - PIPE_BIND_VERTEX_BUFFER, - PIPE_USAGE_STREAM, - max_slots * sizeof(st->clear.vertices)); - } - - /* positions */ - st->clear.vertices[0][0][0] = x0; - st->clear.vertices[0][0][1] = y0; - - st->clear.vertices[1][0][0] = x1; - st->clear.vertices[1][0][1] = y0; - - st->clear.vertices[2][0][0] = x1; - st->clear.vertices[2][0][1] = y1; - - st->clear.vertices[3][0][0] = x0; - st->clear.vertices[3][0][1] = y1; - - /* same for all verts: */ - for (i = 0; i < 4; i++) { - st->clear.vertices[i][0][2] = z; - st->clear.vertices[i][0][3] = 1.0; - st->clear.vertices[i][1][0] = color[0]; - st->clear.vertices[i][1][1] = color[1]; - st->clear.vertices[i][1][2] = color[2]; - st->clear.vertices[i][1][3] = color[3]; - } - - /* put vertex data into vbuf */ - pipe_buffer_write_nooverlap(st->pipe, st->clear.vbuf, - st->clear.vbuf_slot - * sizeof(st->clear.vertices), - sizeof(st->clear.vertices), - st->clear.vertices); - - /* draw */ - util_draw_vertex_buffer(pipe, - st->cso_context, - st->clear.vbuf, - st->clear.vbuf_slot * sizeof(st->clear.vertices), - PIPE_PRIM_TRIANGLE_FAN, - 4, /* verts */ - 2); /* attribs/vert */ - - /* Increment slot */ - st->clear.vbuf_slot++; -} - - - -/** - * Do glClear by drawing a quadrilateral. - * The vertices of the quad will be computed from the - * ctx->DrawBuffer->_X/Ymin/max fields. - */ -static void -clear_with_quad(struct gl_context *ctx, - GLboolean color, GLboolean depth, GLboolean stencil) -{ - struct st_context *st = st_context(ctx); - const struct gl_framebuffer *fb = ctx->DrawBuffer; - const GLfloat fb_width = (GLfloat) fb->Width; - const GLfloat fb_height = (GLfloat) fb->Height; - const GLfloat x0 = (GLfloat) ctx->DrawBuffer->_Xmin / fb_width * 2.0f - 1.0f; - const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax / fb_width * 2.0f - 1.0f; - const GLfloat y0 = (GLfloat) ctx->DrawBuffer->_Ymin / fb_height * 2.0f - 1.0f; - const GLfloat y1 = (GLfloat) ctx->DrawBuffer->_Ymax / fb_height * 2.0f - 1.0f; - float clearColor[4]; - - /* - printf("%s %s%s%s %f,%f %f,%f\n", __FUNCTION__, - color ? "color, " : "", - depth ? "depth, " : "", - stencil ? "stencil" : "", - x0, y0, - x1, y1); - */ - - cso_save_blend(st->cso_context); - cso_save_stencil_ref(st->cso_context); - cso_save_depth_stencil_alpha(st->cso_context); - cso_save_rasterizer(st->cso_context); - cso_save_viewport(st->cso_context); - cso_save_clip(st->cso_context); - cso_save_fragment_shader(st->cso_context); - cso_save_vertex_shader(st->cso_context); - cso_save_vertex_elements(st->cso_context); - cso_save_vertex_buffers(st->cso_context); - - /* blend state: RGBA masking */ - { - struct pipe_blend_state blend; - memset(&blend, 0, sizeof(blend)); - blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - if (color) { - if (ctx->Color.ColorMask[0][0]) - blend.rt[0].colormask |= PIPE_MASK_R; - if (ctx->Color.ColorMask[0][1]) - blend.rt[0].colormask |= PIPE_MASK_G; - if (ctx->Color.ColorMask[0][2]) - blend.rt[0].colormask |= PIPE_MASK_B; - if (ctx->Color.ColorMask[0][3]) - blend.rt[0].colormask |= PIPE_MASK_A; - if (st->ctx->Color.DitherFlag) - blend.dither = 1; - } - cso_set_blend(st->cso_context, &blend); - } - - /* depth_stencil state: always pass/set to ref value */ - { - struct pipe_depth_stencil_alpha_state depth_stencil; - memset(&depth_stencil, 0, sizeof(depth_stencil)); - if (depth) { - depth_stencil.depth.enabled = 1; - depth_stencil.depth.writemask = 1; - depth_stencil.depth.func = PIPE_FUNC_ALWAYS; - } - - if (stencil) { - struct pipe_stencil_ref stencil_ref; - memset(&stencil_ref, 0, sizeof(stencil_ref)); - depth_stencil.stencil[0].enabled = 1; - depth_stencil.stencil[0].func = PIPE_FUNC_ALWAYS; - depth_stencil.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE; - depth_stencil.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; - depth_stencil.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE; - depth_stencil.stencil[0].valuemask = 0xff; - depth_stencil.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff; - stencil_ref.ref_value[0] = ctx->Stencil.Clear; - cso_set_stencil_ref(st->cso_context, &stencil_ref); - } - - cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil); - } - - cso_set_vertex_elements(st->cso_context, 2, st->velems_util_draw); - - cso_set_rasterizer(st->cso_context, &st->clear.raster); - - /* viewport state: viewport matching window dims */ - { - const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); - struct pipe_viewport_state vp; - vp.scale[0] = 0.5f * fb_width; - vp.scale[1] = fb_height * (invert ? -0.5f : 0.5f); - vp.scale[2] = 1.0f; - vp.scale[3] = 1.0f; - vp.translate[0] = 0.5f * fb_width; - vp.translate[1] = 0.5f * fb_height; - vp.translate[2] = 0.0f; - vp.translate[3] = 0.0f; - cso_set_viewport(st->cso_context, &vp); - } - - cso_set_clip(st->cso_context, &st->clear.clip); - cso_set_fragment_shader_handle(st->cso_context, st->clear.fs); - cso_set_vertex_shader_handle(st->cso_context, st->clear.vs); - - if (ctx->DrawBuffer->_ColorDrawBuffers[0]) { - st_translate_color(ctx->Color.ClearColor, - ctx->DrawBuffer->_ColorDrawBuffers[0]->_BaseFormat, - clearColor); - } - - /* draw quad matching scissor rect */ - draw_quad(st, x0, y0, x1, y1, (GLfloat) ctx->Depth.Clear, clearColor); - - /* Restore pipe state */ - cso_restore_blend(st->cso_context); - cso_restore_stencil_ref(st->cso_context); - cso_restore_depth_stencil_alpha(st->cso_context); - cso_restore_rasterizer(st->cso_context); - cso_restore_viewport(st->cso_context); - cso_restore_clip(st->cso_context); - cso_restore_fragment_shader(st->cso_context); - cso_restore_vertex_shader(st->cso_context); - cso_restore_vertex_elements(st->cso_context); - cso_restore_vertex_buffers(st->cso_context); -} - - -/** - * Determine if we need to clear the depth buffer by drawing a quad. - */ -static INLINE GLboolean -check_clear_color_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb) -{ - if (ctx->Scissor.Enabled && - (ctx->Scissor.X != 0 || - ctx->Scissor.Y != 0 || - ctx->Scissor.Width < rb->Width || - ctx->Scissor.Height < rb->Height)) - return GL_TRUE; - - if (!ctx->Color.ColorMask[0][0] || - !ctx->Color.ColorMask[0][1] || - !ctx->Color.ColorMask[0][2] || - !ctx->Color.ColorMask[0][3]) - return GL_TRUE; - - return GL_FALSE; -} - - -/** - * Determine if we need to clear the combiend depth/stencil buffer by - * drawing a quad. - */ -static INLINE GLboolean -check_clear_depth_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb) -{ - const GLuint stencilMax = 0xff; - GLboolean maskStencil - = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax; - - assert(rb->Format == MESA_FORMAT_S8 || - rb->Format == MESA_FORMAT_Z24_S8 || - rb->Format == MESA_FORMAT_S8_Z24); - - if (ctx->Scissor.Enabled && - (ctx->Scissor.X != 0 || - ctx->Scissor.Y != 0 || - ctx->Scissor.Width < rb->Width || - ctx->Scissor.Height < rb->Height)) - return GL_TRUE; - - if (maskStencil) - return GL_TRUE; - - return GL_FALSE; -} - - -/** - * Determine if we need to clear the depth buffer by drawing a quad. - */ -static INLINE GLboolean -check_clear_depth_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb, - boolean ds_separate) -{ - const struct st_renderbuffer *strb = st_renderbuffer(rb); - const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format); - - if (ctx->Scissor.Enabled && - (ctx->Scissor.X != 0 || - ctx->Scissor.Y != 0 || - ctx->Scissor.Width < rb->Width || - ctx->Scissor.Height < rb->Height)) - return GL_TRUE; - - if (!ds_separate && isDS && ctx->DrawBuffer->Visual.stencilBits > 0) - return GL_TRUE; - - return GL_FALSE; -} - - -/** - * Determine if we need to clear the stencil buffer by drawing a quad. - */ -static INLINE GLboolean -check_clear_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb, - boolean ds_separate) -{ - const struct st_renderbuffer *strb = st_renderbuffer(rb); - const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format); - const GLuint stencilMax = 0xff; - const GLboolean maskStencil - = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax; - - assert(rb->Format == MESA_FORMAT_S8 || - rb->Format == MESA_FORMAT_Z24_S8 || - rb->Format == MESA_FORMAT_S8_Z24); - - if (maskStencil) - return GL_TRUE; - - if (ctx->Scissor.Enabled && - (ctx->Scissor.X != 0 || - ctx->Scissor.Y != 0 || - ctx->Scissor.Width < rb->Width || - ctx->Scissor.Height < rb->Height)) - return GL_TRUE; - - /* This is correct, but it is necessary to look at the depth clear - * value held in the surface when it comes time to issue the clear, - * rather than taking depth and stencil clear values from the - * current state. - */ - if (!ds_separate && isDS && ctx->DrawBuffer->Visual.depthBits > 0) - return GL_TRUE; - - return GL_FALSE; -} - - - -/** - * Called when we need to flush. - */ -void -st_flush_clear(struct st_context *st) -{ - /* Release vertex buffer to avoid synchronous rendering if we were - * to map it in the next frame. - */ - pipe_resource_reference(&st->clear.vbuf, NULL); - st->clear.vbuf_slot = 0; -} - - - -/** - * Called via ctx->Driver.Clear() - */ -static void -st_Clear(struct gl_context *ctx, GLbitfield mask) -{ - static const GLbitfield BUFFER_BITS_DS - = (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); - struct st_context *st = st_context(ctx); - struct gl_renderbuffer *depthRb - = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer; - struct gl_renderbuffer *stencilRb - = ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer; - GLbitfield quad_buffers = 0x0; - GLbitfield clear_buffers = 0x0; - GLuint i; - - /* This makes sure the pipe has the latest scissor, etc values */ - st_validate_state( st ); - - if (mask & BUFFER_BITS_COLOR) { - for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { - GLuint b = ctx->DrawBuffer->_ColorDrawBufferIndexes[i]; - - if (mask & (1 << b)) { - struct gl_renderbuffer *rb - = ctx->DrawBuffer->Attachment[b].Renderbuffer; - struct st_renderbuffer *strb = st_renderbuffer(rb); - - if (!strb || !strb->surface) - continue; - - if (check_clear_color_with_quad( ctx, rb )) - quad_buffers |= PIPE_CLEAR_COLOR; - else - clear_buffers |= PIPE_CLEAR_COLOR; - } - } - } - - if ((mask & BUFFER_BITS_DS) == BUFFER_BITS_DS && depthRb == stencilRb) { - /* clearing combined depth + stencil */ - struct st_renderbuffer *strb = st_renderbuffer(depthRb); - - if (strb->surface) { - if (check_clear_depth_stencil_with_quad(ctx, depthRb)) - quad_buffers |= PIPE_CLEAR_DEPTHSTENCIL; - else - clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL; - } - } - else { - /* separate depth/stencil clears */ - /* I don't think truly separate buffers are actually possible in gallium or hw? */ - if (mask & BUFFER_BIT_DEPTH) { - struct st_renderbuffer *strb = st_renderbuffer(depthRb); - - if (strb->surface) { - if (check_clear_depth_with_quad(ctx, depthRb, - st->clear.enable_ds_separate)) - quad_buffers |= PIPE_CLEAR_DEPTH; - else - clear_buffers |= PIPE_CLEAR_DEPTH; - } - } - if (mask & BUFFER_BIT_STENCIL) { - struct st_renderbuffer *strb = st_renderbuffer(stencilRb); - - if (strb->surface) { - if (check_clear_stencil_with_quad(ctx, stencilRb, - st->clear.enable_ds_separate)) - quad_buffers |= PIPE_CLEAR_STENCIL; - else - clear_buffers |= PIPE_CLEAR_STENCIL; - } - } - } - - /* - * If we're going to use clear_with_quad() for any reason, use it for - * everything possible. - */ - if (quad_buffers) { - quad_buffers |= clear_buffers; - clear_with_quad(ctx, - quad_buffers & PIPE_CLEAR_COLOR, - quad_buffers & PIPE_CLEAR_DEPTH, - quad_buffers & PIPE_CLEAR_STENCIL); - } else if (clear_buffers) { - /* driver cannot know it can clear everything if the buffer - * is a combined depth/stencil buffer but this wasn't actually - * required from the visual. Hence fix this up to avoid potential - * read-modify-write in the driver. - */ - float clearColor[4]; - - if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) && - ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && - (depthRb == stencilRb) && - (ctx->DrawBuffer->Visual.depthBits == 0 || - ctx->DrawBuffer->Visual.stencilBits == 0)) - clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL; - - if (ctx->DrawBuffer->_ColorDrawBuffers[0]) { - st_translate_color(ctx->Color.ClearColor, - ctx->DrawBuffer->_ColorDrawBuffers[0]->_BaseFormat, - clearColor); - } - - st->pipe->clear(st->pipe, clear_buffers, ctx->Color.ClearColor, - ctx->Depth.Clear, ctx->Stencil.Clear); - } - if (mask & BUFFER_BIT_ACCUM) - st_clear_accum_buffer(ctx, - ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer); -} - - -void -st_init_clear_functions(struct dd_function_table *functions) -{ - functions->Clear = st_Clear; -} +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + * Brian Paul + * Michel Dänzer + */ + +#include "main/glheader.h" +#include "main/formats.h" +#include "main/macros.h" +#include "program/prog_instruction.h" +#include "st_context.h" +#include "st_atom.h" +#include "st_cb_accum.h" +#include "st_cb_clear.h" +#include "st_cb_fbo.h" +#include "st_format.h" +#include "st_program.h" + +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_simple_shaders.h" +#include "util/u_draw_quad.h" + +#include "cso_cache/cso_context.h" + + +/** + * Do per-context initialization for glClear. + */ +void +st_init_clear(struct st_context *st) +{ + struct pipe_screen *pscreen = st->pipe->screen; + + memset(&st->clear, 0, sizeof(st->clear)); + + st->clear.raster.gl_rasterization_rules = 1; + st->clear.enable_ds_separate = pscreen->get_param(pscreen, PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE); +} + + +/** + * Free per-context state for glClear. + */ +void +st_destroy_clear(struct st_context *st) +{ + if (st->clear.fs) { + cso_delete_fragment_shader(st->cso_context, st->clear.fs); + st->clear.fs = NULL; + } + if (st->clear.vs) { + cso_delete_vertex_shader(st->cso_context, st->clear.vs); + st->clear.vs = NULL; + } + if (st->clear.vbuf) { + pipe_resource_reference(&st->clear.vbuf, NULL); + st->clear.vbuf = NULL; + } +} + + +/** + * Helper function to set the fragment shaders. + */ +static INLINE void +set_fragment_shader(struct st_context *st) +{ + if (!st->clear.fs) + st->clear.fs = util_make_fragment_passthrough_shader(st->pipe); + + cso_set_fragment_shader_handle(st->cso_context, st->clear.fs); +} + + +/** + * Helper function to set the vertex shader. + */ +static INLINE void +set_vertex_shader(struct st_context *st) +{ + /* vertex shader - still required to provide the linkage between + * fragment shader input semantics and vertex_element/buffers. + */ + if (!st->clear.vs) + { + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_COLOR }; + const uint semantic_indexes[] = { 0, 0 }; + st->clear.vs = util_make_vertex_passthrough_shader(st->pipe, 2, + semantic_names, + semantic_indexes); + } + + cso_set_vertex_shader_handle(st->cso_context, st->clear.vs); +} + + +/** + * Draw a screen-aligned quadrilateral. + * Coords are clip coords with y=0=bottom. + */ +static void +draw_quad(struct st_context *st, + float x0, float y0, float x1, float y1, GLfloat z, + const GLfloat color[4]) +{ + struct pipe_context *pipe = st->pipe; + + /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as + * no_flush) updates to buffers where we know there is no conflict + * with previous data. Currently using max_slots > 1 will cause + * synchronous rendering if the driver flushes its command buffers + * between one bitmap and the next. Our flush hook below isn't + * sufficient to catch this as the driver doesn't tell us when it + * flushes its own command buffers. Until this gets fixed, pay the + * price of allocating a new buffer for each bitmap cache-flush to + * avoid synchronous rendering. + */ + const GLuint max_slots = 1; /* 1024 / sizeof(st->clear.vertices); */ + GLuint i; + + if (st->clear.vbuf_slot >= max_slots) { + pipe_resource_reference(&st->clear.vbuf, NULL); + st->clear.vbuf_slot = 0; + } + + if (!st->clear.vbuf) { + st->clear.vbuf = pipe_buffer_create(pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, + max_slots * sizeof(st->clear.vertices)); + } + + /* positions */ + st->clear.vertices[0][0][0] = x0; + st->clear.vertices[0][0][1] = y0; + + st->clear.vertices[1][0][0] = x1; + st->clear.vertices[1][0][1] = y0; + + st->clear.vertices[2][0][0] = x1; + st->clear.vertices[2][0][1] = y1; + + st->clear.vertices[3][0][0] = x0; + st->clear.vertices[3][0][1] = y1; + + /* same for all verts: */ + for (i = 0; i < 4; i++) { + st->clear.vertices[i][0][2] = z; + st->clear.vertices[i][0][3] = 1.0; + st->clear.vertices[i][1][0] = color[0]; + st->clear.vertices[i][1][1] = color[1]; + st->clear.vertices[i][1][2] = color[2]; + st->clear.vertices[i][1][3] = color[3]; + } + + /* put vertex data into vbuf */ + pipe_buffer_write_nooverlap(st->pipe, st->clear.vbuf, + st->clear.vbuf_slot + * sizeof(st->clear.vertices), + sizeof(st->clear.vertices), + st->clear.vertices); + + /* draw */ + util_draw_vertex_buffer(pipe, + st->cso_context, + st->clear.vbuf, + st->clear.vbuf_slot * sizeof(st->clear.vertices), + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ + + /* Increment slot */ + st->clear.vbuf_slot++; +} + + + +/** + * Do glClear by drawing a quadrilateral. + * The vertices of the quad will be computed from the + * ctx->DrawBuffer->_X/Ymin/max fields. + */ +static void +clear_with_quad(struct gl_context *ctx, + GLboolean color, GLboolean depth, GLboolean stencil) +{ + struct st_context *st = st_context(ctx); + const struct gl_framebuffer *fb = ctx->DrawBuffer; + const GLfloat fb_width = (GLfloat) fb->Width; + const GLfloat fb_height = (GLfloat) fb->Height; + const GLfloat x0 = (GLfloat) ctx->DrawBuffer->_Xmin / fb_width * 2.0f - 1.0f; + const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax / fb_width * 2.0f - 1.0f; + const GLfloat y0 = (GLfloat) ctx->DrawBuffer->_Ymin / fb_height * 2.0f - 1.0f; + const GLfloat y1 = (GLfloat) ctx->DrawBuffer->_Ymax / fb_height * 2.0f - 1.0f; + float clearColor[4]; + + /* + printf("%s %s%s%s %f,%f %f,%f\n", __FUNCTION__, + color ? "color, " : "", + depth ? "depth, " : "", + stencil ? "stencil" : "", + x0, y0, + x1, y1); + */ + + cso_save_blend(st->cso_context); + cso_save_stencil_ref(st->cso_context); + cso_save_depth_stencil_alpha(st->cso_context); + cso_save_rasterizer(st->cso_context); + cso_save_viewport(st->cso_context); + cso_save_clip(st->cso_context); + cso_save_fragment_shader(st->cso_context); + cso_save_vertex_shader(st->cso_context); + cso_save_vertex_elements(st->cso_context); + cso_save_vertex_buffers(st->cso_context); + + /* blend state: RGBA masking */ + { + struct pipe_blend_state blend; + memset(&blend, 0, sizeof(blend)); + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + if (color) { + if (ctx->Color.ColorMask[0][0]) + blend.rt[0].colormask |= PIPE_MASK_R; + if (ctx->Color.ColorMask[0][1]) + blend.rt[0].colormask |= PIPE_MASK_G; + if (ctx->Color.ColorMask[0][2]) + blend.rt[0].colormask |= PIPE_MASK_B; + if (ctx->Color.ColorMask[0][3]) + blend.rt[0].colormask |= PIPE_MASK_A; + if (st->ctx->Color.DitherFlag) + blend.dither = 1; + } + cso_set_blend(st->cso_context, &blend); + } + + /* depth_stencil state: always pass/set to ref value */ + { + struct pipe_depth_stencil_alpha_state depth_stencil; + memset(&depth_stencil, 0, sizeof(depth_stencil)); + if (depth) { + depth_stencil.depth.enabled = 1; + depth_stencil.depth.writemask = 1; + depth_stencil.depth.func = PIPE_FUNC_ALWAYS; + } + + if (stencil) { + struct pipe_stencil_ref stencil_ref; + memset(&stencil_ref, 0, sizeof(stencil_ref)); + depth_stencil.stencil[0].enabled = 1; + depth_stencil.stencil[0].func = PIPE_FUNC_ALWAYS; + depth_stencil.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE; + depth_stencil.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; + depth_stencil.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE; + depth_stencil.stencil[0].valuemask = 0xff; + depth_stencil.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff; + stencil_ref.ref_value[0] = ctx->Stencil.Clear; + cso_set_stencil_ref(st->cso_context, &stencil_ref); + } + + cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil); + } + + cso_set_vertex_elements(st->cso_context, 2, st->velems_util_draw); + + cso_set_rasterizer(st->cso_context, &st->clear.raster); + + /* viewport state: viewport matching window dims */ + { + const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); + struct pipe_viewport_state vp; + vp.scale[0] = 0.5f * fb_width; + vp.scale[1] = fb_height * (invert ? -0.5f : 0.5f); + vp.scale[2] = 1.0f; + vp.scale[3] = 1.0f; + vp.translate[0] = 0.5f * fb_width; + vp.translate[1] = 0.5f * fb_height; + vp.translate[2] = 0.0f; + vp.translate[3] = 0.0f; + cso_set_viewport(st->cso_context, &vp); + } + + cso_set_clip(st->cso_context, &st->clear.clip); + set_fragment_shader(st); + set_vertex_shader(st); + + if (ctx->DrawBuffer->_ColorDrawBuffers[0]) { + st_translate_color(ctx->Color.ClearColor, + ctx->DrawBuffer->_ColorDrawBuffers[0]->_BaseFormat, + clearColor); + } + + /* draw quad matching scissor rect */ + draw_quad(st, x0, y0, x1, y1, (GLfloat) ctx->Depth.Clear, clearColor); + + /* Restore pipe state */ + cso_restore_blend(st->cso_context); + cso_restore_stencil_ref(st->cso_context); + cso_restore_depth_stencil_alpha(st->cso_context); + cso_restore_rasterizer(st->cso_context); + cso_restore_viewport(st->cso_context); + cso_restore_clip(st->cso_context); + cso_restore_fragment_shader(st->cso_context); + cso_restore_vertex_shader(st->cso_context); + cso_restore_vertex_elements(st->cso_context); + cso_restore_vertex_buffers(st->cso_context); +} + + +/** + * Determine if we need to clear the depth buffer by drawing a quad. + */ +static INLINE GLboolean +check_clear_color_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb) +{ + if (ctx->Scissor.Enabled && + (ctx->Scissor.X != 0 || + ctx->Scissor.Y != 0 || + ctx->Scissor.Width < rb->Width || + ctx->Scissor.Height < rb->Height)) + return GL_TRUE; + + if (!ctx->Color.ColorMask[0][0] || + !ctx->Color.ColorMask[0][1] || + !ctx->Color.ColorMask[0][2] || + !ctx->Color.ColorMask[0][3]) + return GL_TRUE; + + return GL_FALSE; +} + + +/** + * Determine if we need to clear the combiend depth/stencil buffer by + * drawing a quad. + */ +static INLINE GLboolean +check_clear_depth_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb) +{ + const GLuint stencilMax = 0xff; + GLboolean maskStencil + = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax; + + assert(rb->Format == MESA_FORMAT_S8 || + rb->Format == MESA_FORMAT_Z24_S8 || + rb->Format == MESA_FORMAT_S8_Z24); + + if (ctx->Scissor.Enabled && + (ctx->Scissor.X != 0 || + ctx->Scissor.Y != 0 || + ctx->Scissor.Width < rb->Width || + ctx->Scissor.Height < rb->Height)) + return GL_TRUE; + + if (maskStencil) + return GL_TRUE; + + return GL_FALSE; +} + + +/** + * Determine if we need to clear the depth buffer by drawing a quad. + */ +static INLINE GLboolean +check_clear_depth_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb, + boolean ds_separate) +{ + const struct st_renderbuffer *strb = st_renderbuffer(rb); + const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format); + + if (ctx->Scissor.Enabled && + (ctx->Scissor.X != 0 || + ctx->Scissor.Y != 0 || + ctx->Scissor.Width < rb->Width || + ctx->Scissor.Height < rb->Height)) + return GL_TRUE; + + if (!ds_separate && isDS && ctx->DrawBuffer->Visual.stencilBits > 0) + return GL_TRUE; + + return GL_FALSE; +} + + +/** + * Determine if we need to clear the stencil buffer by drawing a quad. + */ +static INLINE GLboolean +check_clear_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb, + boolean ds_separate) +{ + const struct st_renderbuffer *strb = st_renderbuffer(rb); + const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format); + const GLuint stencilMax = 0xff; + const GLboolean maskStencil + = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax; + + assert(rb->Format == MESA_FORMAT_S8 || + rb->Format == MESA_FORMAT_Z24_S8 || + rb->Format == MESA_FORMAT_S8_Z24); + + if (maskStencil) + return GL_TRUE; + + if (ctx->Scissor.Enabled && + (ctx->Scissor.X != 0 || + ctx->Scissor.Y != 0 || + ctx->Scissor.Width < rb->Width || + ctx->Scissor.Height < rb->Height)) + return GL_TRUE; + + /* This is correct, but it is necessary to look at the depth clear + * value held in the surface when it comes time to issue the clear, + * rather than taking depth and stencil clear values from the + * current state. + */ + if (!ds_separate && isDS && ctx->DrawBuffer->Visual.depthBits > 0) + return GL_TRUE; + + return GL_FALSE; +} + + + +/** + * Called when we need to flush. + */ +void +st_flush_clear(struct st_context *st) +{ + /* Release vertex buffer to avoid synchronous rendering if we were + * to map it in the next frame. + */ + pipe_resource_reference(&st->clear.vbuf, NULL); + st->clear.vbuf_slot = 0; +} + + + +/** + * Called via ctx->Driver.Clear() + */ +static void +st_Clear(struct gl_context *ctx, GLbitfield mask) +{ + static const GLbitfield BUFFER_BITS_DS + = (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); + struct st_context *st = st_context(ctx); + struct gl_renderbuffer *depthRb + = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer; + struct gl_renderbuffer *stencilRb + = ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer; + GLbitfield quad_buffers = 0x0; + GLbitfield clear_buffers = 0x0; + GLuint i; + + /* This makes sure the pipe has the latest scissor, etc values */ + st_validate_state( st ); + + if (mask & BUFFER_BITS_COLOR) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + GLuint b = ctx->DrawBuffer->_ColorDrawBufferIndexes[i]; + + if (mask & (1 << b)) { + struct gl_renderbuffer *rb + = ctx->DrawBuffer->Attachment[b].Renderbuffer; + struct st_renderbuffer *strb = st_renderbuffer(rb); + + if (!strb || !strb->surface) + continue; + + if (check_clear_color_with_quad( ctx, rb )) + quad_buffers |= PIPE_CLEAR_COLOR; + else + clear_buffers |= PIPE_CLEAR_COLOR; + } + } + } + + if ((mask & BUFFER_BITS_DS) == BUFFER_BITS_DS && depthRb == stencilRb) { + /* clearing combined depth + stencil */ + struct st_renderbuffer *strb = st_renderbuffer(depthRb); + + if (strb->surface) { + if (check_clear_depth_stencil_with_quad(ctx, depthRb)) + quad_buffers |= PIPE_CLEAR_DEPTHSTENCIL; + else + clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL; + } + } + else { + /* separate depth/stencil clears */ + /* I don't think truly separate buffers are actually possible in gallium or hw? */ + if (mask & BUFFER_BIT_DEPTH) { + struct st_renderbuffer *strb = st_renderbuffer(depthRb); + + if (strb->surface) { + if (check_clear_depth_with_quad(ctx, depthRb, + st->clear.enable_ds_separate)) + quad_buffers |= PIPE_CLEAR_DEPTH; + else + clear_buffers |= PIPE_CLEAR_DEPTH; + } + } + if (mask & BUFFER_BIT_STENCIL) { + struct st_renderbuffer *strb = st_renderbuffer(stencilRb); + + if (strb->surface) { + if (check_clear_stencil_with_quad(ctx, stencilRb, + st->clear.enable_ds_separate)) + quad_buffers |= PIPE_CLEAR_STENCIL; + else + clear_buffers |= PIPE_CLEAR_STENCIL; + } + } + } + + /* + * If we're going to use clear_with_quad() for any reason, use it for + * everything possible. + */ + if (quad_buffers) { + quad_buffers |= clear_buffers; + clear_with_quad(ctx, + quad_buffers & PIPE_CLEAR_COLOR, + quad_buffers & PIPE_CLEAR_DEPTH, + quad_buffers & PIPE_CLEAR_STENCIL); + } else if (clear_buffers) { + /* driver cannot know it can clear everything if the buffer + * is a combined depth/stencil buffer but this wasn't actually + * required from the visual. Hence fix this up to avoid potential + * read-modify-write in the driver. + */ + float clearColor[4]; + + if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) && + ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && + (depthRb == stencilRb) && + (ctx->DrawBuffer->Visual.depthBits == 0 || + ctx->DrawBuffer->Visual.stencilBits == 0)) + clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL; + + if (ctx->DrawBuffer->_ColorDrawBuffers[0]) { + st_translate_color(ctx->Color.ClearColor, + ctx->DrawBuffer->_ColorDrawBuffers[0]->_BaseFormat, + clearColor); + } + + st->pipe->clear(st->pipe, clear_buffers, ctx->Color.ClearColor, + ctx->Depth.Clear, ctx->Stencil.Clear); + } + if (mask & BUFFER_BIT_ACCUM) + st_clear_accum_buffer(ctx, + ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer); +} + + +void +st_init_clear_functions(struct dd_function_table *functions) +{ + functions->Clear = st_Clear; +} diff --git a/mesalib/src/mesa/state_tracker/st_cb_texture.c b/mesalib/src/mesa/state_tracker/st_cb_texture.c index 8bdb3c801..1b824c0de 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_texture.c +++ b/mesalib/src/mesa/state_tracker/st_cb_texture.c @@ -1530,7 +1530,7 @@ st_copy_texsubimage(struct gl_context *ctx, GLint srcY0, srcY1; struct pipe_surface surf_tmpl; memset(&surf_tmpl, 0, sizeof(surf_tmpl)); - surf_tmpl.format = stImage->pt->format; + surf_tmpl.format = util_format_linear(stImage->pt->format); surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; surf_tmpl.u.tex.level = stImage->level; surf_tmpl.u.tex.first_layer = stImage->face + destZ; diff --git a/pixman/pixman/pixman-arm-common.h b/pixman/pixman/pixman-arm-common.h index 9b1322b6c..2c435041a 100644 --- a/pixman/pixman/pixman-arm-common.h +++ b/pixman/pixman/pixman-arm-common.h @@ -1,364 +1,409 @@ -/* - * Copyright © 2010 Nokia Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) - */ - -#ifndef PIXMAN_ARM_COMMON_H -#define PIXMAN_ARM_COMMON_H - -#include "pixman-fast-path.h" - -/* Define some macros which can expand into proxy functions between - * ARM assembly optimized functions and the rest of pixman fast path API. - * - * All the low level ARM assembly functions have to use ARM EABI - * calling convention and take up to 8 arguments: - * width, height, dst, dst_stride, src, src_stride, mask, mask_stride - * - * The arguments are ordered with the most important coming first (the - * first 4 arguments are passed to function in registers, the rest are - * on stack). The last arguments are optional, for example if the - * function is not using mask, then 'mask' and 'mask_stride' can be - * omitted when doing a function call. - * - * Arguments 'src' and 'mask' contain either a pointer to the top left - * pixel of the composited rectangle or a pixel color value depending - * on the function type. In the case of just a color value (solid source - * or mask), the corresponding stride argument is unused. - */ - -#define SKIP_ZERO_SRC 1 -#define SKIP_ZERO_MASK 2 - -#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name, \ - src_type, src_cnt, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_##cputype (int32_t w, \ - int32_t h, \ - dst_type *dst, \ - int32_t dst_stride, \ - src_type *src, \ - int32_t src_stride); \ - \ -static void \ -cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ -{ \ - dst_type *dst_line; \ - src_type *src_line; \ - int32_t dst_stride, src_stride; \ - \ - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ - src_stride, src_line, src_cnt); \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - \ - pixman_composite_##name##_asm_##cputype (width, height, \ - dst_line, dst_stride, \ - src_line, src_stride); \ -} - -#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_##cputype (int32_t w, \ - int32_t h, \ - dst_type *dst, \ - int32_t dst_stride, \ - uint32_t src); \ - \ -static void \ -cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ -{ \ - dst_type *dst_line; \ - int32_t dst_stride; \ - uint32_t src; \ - \ - src = _pixman_image_get_solid ( \ - imp, src_image, dst_image->bits.format); \ - \ - if ((flags & SKIP_ZERO_SRC) && src == 0) \ - return; \ - \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - \ - pixman_composite_##name##_asm_##cputype (width, height, \ - dst_line, dst_stride, \ - src); \ -} - -#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name, \ - mask_type, mask_cnt, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_##cputype (int32_t w, \ - int32_t h, \ - dst_type *dst, \ - int32_t dst_stride, \ - uint32_t src, \ - int32_t unused, \ - mask_type *mask, \ - int32_t mask_stride); \ - \ -static void \ -cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ -{ \ - dst_type *dst_line; \ - mask_type *mask_line; \ - int32_t dst_stride, mask_stride; \ - uint32_t src; \ - \ - src = _pixman_image_get_solid ( \ - imp, src_image, dst_image->bits.format); \ - \ - if ((flags & SKIP_ZERO_SRC) && src == 0) \ - return; \ - \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ - mask_stride, mask_line, mask_cnt); \ - \ - pixman_composite_##name##_asm_##cputype (width, height, \ - dst_line, dst_stride, \ - src, 0, \ - mask_line, mask_stride); \ -} - -#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name, \ - src_type, src_cnt, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_##cputype (int32_t w, \ - int32_t h, \ - dst_type *dst, \ - int32_t dst_stride, \ - src_type *src, \ - int32_t src_stride, \ - uint32_t mask); \ - \ -static void \ -cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ -{ \ - dst_type *dst_line; \ - src_type *src_line; \ - int32_t dst_stride, src_stride; \ - uint32_t mask; \ - \ - mask = _pixman_image_get_solid ( \ - imp, mask_image, dst_image->bits.format); \ - \ - if ((flags & SKIP_ZERO_MASK) && mask == 0) \ - return; \ - \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ - src_stride, src_line, src_cnt); \ - \ - pixman_composite_##name##_asm_##cputype (width, height, \ - dst_line, dst_stride, \ - src_line, src_stride, \ - mask); \ -} - -#define PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST(cputype, name, \ - src_type, src_cnt, \ - mask_type, mask_cnt, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_##cputype (int32_t w, \ - int32_t h, \ - dst_type *dst, \ - int32_t dst_stride, \ - src_type *src, \ - int32_t src_stride, \ - mask_type *mask, \ - int32_t mask_stride); \ - \ -static void \ -cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ -{ \ - dst_type *dst_line; \ - src_type *src_line; \ - mask_type *mask_line; \ - int32_t dst_stride, src_stride, mask_stride; \ - \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ - src_stride, src_line, src_cnt); \ - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ - mask_stride, mask_line, mask_cnt); \ - \ - pixman_composite_##name##_asm_##cputype (width, height, \ - dst_line, dst_stride, \ - src_line, src_stride, \ - mask_line, mask_stride); \ -} - -#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST(cputype, name, op, \ - src_type, dst_type) \ -void \ -pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ - int32_t w, \ - dst_type * dst, \ - const src_type * src, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x); \ - \ -static force_inline void \ -scaled_nearest_scanline_##cputype##_##name##_##op (dst_type * pd, \ - const src_type * ps, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t zero_src) \ -{ \ - pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ - vx, unit_x);\ -} \ - \ -FAST_NEAREST_MAINLOOP (cputype##_##name##_cover_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op, \ - src_type, dst_type, COVER) \ -FAST_NEAREST_MAINLOOP (cputype##_##name##_none_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op, \ - src_type, dst_type, NONE) \ -FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op, \ - src_type, dst_type, PAD) - -/* Provide entries for the fast path table */ -#define PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func) - -#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op, \ - src_type, dst_type) \ -void \ -pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ - int32_t w, \ - dst_type * dst, \ - const src_type * src, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - const uint8_t * mask); \ - \ -static force_inline void \ -scaled_nearest_scanline_##cputype##_##name##_##op (const uint8_t * mask, \ - dst_type * pd, \ - const src_type * ps, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t zero_src) \ -{ \ - if ((flags & SKIP_ZERO_SRC) && zero_src) \ - return; \ - pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ - vx, unit_x, \ - mask); \ -} \ - \ -FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op,\ - src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\ -FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op,\ - src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \ -FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op,\ - src_type, uint8_t, dst_type, PAD, TRUE, FALSE) - -/* Provide entries for the fast path table */ -#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) - -#endif +/* + * Copyright © 2010 Nokia Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) + */ + +#ifndef PIXMAN_ARM_COMMON_H +#define PIXMAN_ARM_COMMON_H + +#include "pixman-fast-path.h" + +/* Define some macros which can expand into proxy functions between + * ARM assembly optimized functions and the rest of pixman fast path API. + * + * All the low level ARM assembly functions have to use ARM EABI + * calling convention and take up to 8 arguments: + * width, height, dst, dst_stride, src, src_stride, mask, mask_stride + * + * The arguments are ordered with the most important coming first (the + * first 4 arguments are passed to function in registers, the rest are + * on stack). The last arguments are optional, for example if the + * function is not using mask, then 'mask' and 'mask_stride' can be + * omitted when doing a function call. + * + * Arguments 'src' and 'mask' contain either a pointer to the top left + * pixel of the composited rectangle or a pixel color value depending + * on the function type. In the case of just a color value (solid source + * or mask), the corresponding stride argument is unused. + */ + +#define SKIP_ZERO_SRC 1 +#define SKIP_ZERO_MASK 2 + +#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name, \ + src_type, src_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + src_type *src, \ + int32_t src_stride); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type *dst_line; \ + src_type *src_line; \ + int32_t dst_stride, src_stride; \ + \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src_line, src_stride); \ +} + +#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + uint32_t src); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type *dst_line; \ + int32_t dst_stride; \ + uint32_t src; \ + \ + src = _pixman_image_get_solid ( \ + imp, src_image, dst_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_SRC) && src == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src); \ +} + +#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name, \ + mask_type, mask_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + uint32_t src, \ + int32_t unused, \ + mask_type *mask, \ + int32_t mask_stride); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type *dst_line; \ + mask_type *mask_line; \ + int32_t dst_stride, mask_stride; \ + uint32_t src; \ + \ + src = _pixman_image_get_solid ( \ + imp, src_image, dst_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_SRC) && src == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ + mask_stride, mask_line, mask_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src, 0, \ + mask_line, mask_stride); \ +} + +#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name, \ + src_type, src_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + src_type *src, \ + int32_t src_stride, \ + uint32_t mask); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type *dst_line; \ + src_type *src_line; \ + int32_t dst_stride, src_stride; \ + uint32_t mask; \ + \ + mask = _pixman_image_get_solid ( \ + imp, mask_image, dst_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_MASK) && mask == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src_line, src_stride, \ + mask); \ +} + +#define PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST(cputype, name, \ + src_type, src_cnt, \ + mask_type, mask_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + src_type *src, \ + int32_t src_stride, \ + mask_type *mask, \ + int32_t mask_stride); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type *dst_line; \ + src_type *src_line; \ + mask_type *mask_line; \ + int32_t dst_stride, src_stride, mask_stride; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ + mask_stride, mask_line, mask_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src_line, src_stride, \ + mask_line, mask_stride); \ +} + +#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST(cputype, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ + int32_t w, \ + dst_type * dst, \ + const src_type * src, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x); \ + \ +static force_inline void \ +scaled_nearest_scanline_##cputype##_##name##_##op (dst_type * pd, \ + const src_type * ps, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ + vx, unit_x);\ +} \ + \ +FAST_NEAREST_MAINLOOP (cputype##_##name##_cover_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op, \ + src_type, dst_type, COVER) \ +FAST_NEAREST_MAINLOOP (cputype##_##name##_none_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op, \ + src_type, dst_type, NONE) \ +FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op, \ + src_type, dst_type, PAD) + +/* Provide entries for the fast path table */ +#define PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func) + +#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ + int32_t w, \ + dst_type * dst, \ + const src_type * src, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + const uint8_t * mask); \ + \ +static force_inline void \ +scaled_nearest_scanline_##cputype##_##name##_##op (const uint8_t * mask, \ + dst_type * pd, \ + const src_type * ps, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ + vx, unit_x, \ + mask); \ +} \ + \ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, PAD, TRUE, FALSE) + +/* Provide entries for the fast path table */ +#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) + +/*****************************************************************************/ + +#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST(flags, cputype, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ + dst_type * dst, \ + const src_type * top, \ + const src_type * bottom, \ + int wt, \ + int wb, \ + pixman_fixed_t x, \ + pixman_fixed_t ux, \ + int width); \ + \ +static force_inline void \ +scaled_bilinear_scanline_##cputype##_##name##_##op ( \ + dst_type * dst, \ + const uint32_t * mask, \ + const src_type * src_top, \ + const src_type * src_bottom, \ + int32_t w, \ + int wt, \ + int wb, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ + dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \ +} \ + \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint32_t, dst_type, COVER, FALSE, FALSE) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint32_t, dst_type, NONE, FALSE, FALSE) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint32_t, dst_type, PAD, FALSE, FALSE) + +#endif diff --git a/pixman/pixman/pixman-arm-neon-asm.S b/pixman/pixman/pixman-arm-neon-asm.S index c168e10c2..6cca0f2cc 100644 --- a/pixman/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman/pixman-arm-neon-asm.S @@ -1,2590 +1,2718 @@ -/* - * Copyright © 2009 Nokia Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) - */ - -/* - * This file contains implementations of NEON optimized pixel processing - * functions. There is no full and detailed tutorial, but some functions - * (those which are exposing some new or interesting features) are - * extensively commented and can be used as examples. - * - * You may want to have a look at the comments for following functions: - * - pixman_composite_over_8888_0565_asm_neon - * - pixman_composite_over_n_8_0565_asm_neon - */ - -/* Prevent the stack from becoming executable for no reason... */ -#if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits -#endif - - .text - .fpu neon - .arch armv7a - .object_arch armv4 - .eabi_attribute 10, 0 /* suppress Tag_FP_arch */ - .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */ - .arm - .altmacro - -#include "pixman-arm-neon-asm.h" - -/* Global configuration options and preferences */ - -/* - * The code can optionally make use of unaligned memory accesses to improve - * performance of handling leading/trailing pixels for each scanline. - * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for - * example in linux if unaligned memory accesses are not configured to - * generate.exceptions. - */ -.set RESPECT_STRICT_ALIGNMENT, 1 - -/* - * Set default prefetch type. There is a choice between the following options: - * - * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work - * as NOP to workaround some HW bugs or for whatever other reason) - * - * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where - * advanced prefetch intruduces heavy overhead) - * - * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8 - * which can run ARM and NEON instructions simultaneously so that extra ARM - * instructions do not add (many) extra cycles, but improve prefetch efficiency) - * - * Note: some types of function can't support advanced prefetch and fallback - * to simple one (those which handle 24bpp pixels) - */ -.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED - -/* Prefetch distance in pixels for simple prefetch */ -.set PREFETCH_DISTANCE_SIMPLE, 64 - -/* - * Implementation of pixman_composite_over_8888_0565_asm_neon - * - * This function takes a8r8g8b8 source buffer, r5g6b5 destination buffer and - * performs OVER compositing operation. Function fast_composite_over_8888_0565 - * from pixman-fast-path.c does the same in C and can be used as a reference. - * - * First we need to have some NEON assembly code which can do the actual - * operation on the pixels and provide it to the template macro. - * - * Template macro quite conveniently takes care of emitting all the necessary - * code for memory reading and writing (including quite tricky cases of - * handling unaligned leading/trailing pixels), so we only need to deal with - * the data in NEON registers. - * - * NEON registers allocation in general is recommented to be the following: - * d0, d1, d2, d3 - contain loaded source pixel data - * d4, d5, d6, d7 - contain loaded destination pixels (if they are needed) - * d24, d25, d26, d27 - contain loading mask pixel data (if mask is used) - * d28, d29, d30, d31 - place for storing the result (destination pixels) - * - * As can be seen above, four 64-bit NEON registers are used for keeping - * intermediate pixel data and up to 8 pixels can be processed in one step - * for 32bpp formats (16 pixels for 16bpp, 32 pixels for 8bpp). - * - * This particular function uses the following registers allocation: - * d0, d1, d2, d3 - contain loaded source pixel data - * d4, d5 - contain loaded destination pixels (they are needed) - * d28, d29 - place for storing the result (destination pixels) - */ - -/* - * Step one. We need to have some code to do some arithmetics on pixel data. - * This is implemented as a pair of macros: '*_head' and '*_tail'. When used - * back-to-back, they take pixel data from {d0, d1, d2, d3} and {d4, d5}, - * perform all the needed calculations and write the result to {d28, d29}. - * The rationale for having two macros and not just one will be explained - * later. In practice, any single monolitic function which does the work can - * be split into two parts in any arbitrary way without affecting correctness. - * - * There is one special trick here too. Common template macro can optionally - * make our life a bit easier by doing R, G, B, A color components - * deinterleaving for 32bpp pixel formats (and this feature is used in - * 'pixman_composite_over_8888_0565_asm_neon' function). So it means that - * instead of having 8 packed pixels in {d0, d1, d2, d3} registers, we - * actually use d0 register for blue channel (a vector of eight 8-bit - * values), d1 register for green, d2 for red and d3 for alpha. This - * simple conversion can be also done with a few NEON instructions: - * - * Packed to planar conversion: - * vuzp.8 d0, d1 - * vuzp.8 d2, d3 - * vuzp.8 d1, d3 - * vuzp.8 d0, d2 - * - * Planar to packed conversion: - * vzip.8 d0, d2 - * vzip.8 d1, d3 - * vzip.8 d2, d3 - * vzip.8 d0, d1 - * - * But pixel can be loaded directly in planar format using VLD4.8 NEON - * instruction. It is 1 cycle slower than VLD1.32, so this is not always - * desirable, that's why deinterleaving is optional. - * - * But anyway, here is the code: - */ -.macro pixman_composite_over_8888_0565_process_pixblock_head - /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format - and put data into d6 - red, d7 - green, d30 - blue */ - vshrn.u16 d6, q2, #8 - vshrn.u16 d7, q2, #3 - vsli.u16 q2, q2, #5 - vsri.u8 d6, d6, #5 - vmvn.8 d3, d3 /* invert source alpha */ - vsri.u8 d7, d7, #6 - vshrn.u16 d30, q2, #2 - /* now do alpha blending, storing results in 8-bit planar format - into d16 - red, d19 - green, d18 - blue */ - vmull.u8 q10, d3, d6 - vmull.u8 q11, d3, d7 - vmull.u8 q12, d3, d30 - vrshr.u16 q13, q10, #8 - vrshr.u16 q3, q11, #8 - vrshr.u16 q15, q12, #8 - vraddhn.u16 d20, q10, q13 - vraddhn.u16 d23, q11, q3 - vraddhn.u16 d22, q12, q15 -.endm - -.macro pixman_composite_over_8888_0565_process_pixblock_tail - /* ... continue alpha blending */ - vqadd.u8 d16, d2, d20 - vqadd.u8 q9, q0, q11 - /* convert the result to r5g6b5 and store it into {d28, d29} */ - vshll.u8 q14, d16, #8 - vshll.u8 q8, d19, #8 - vshll.u8 q9, d18, #8 - vsri.u16 q14, q8, #5 - vsri.u16 q14, q9, #11 -.endm - -/* - * OK, now we got almost everything that we need. Using the above two - * macros, the work can be done right. But now we want to optimize - * it a bit. ARM Cortex-A8 is an in-order core, and benefits really - * a lot from good code scheduling and software pipelining. - * - * Let's construct some code, which will run in the core main loop. - * Some pseudo-code of the main loop will look like this: - * head - * while (...) { - * tail - * head - * } - * tail - * - * It may look a bit weird, but this setup allows to hide instruction - * latencies better and also utilize dual-issue capability more - * efficiently (make pairs of load-store and ALU instructions). - * - * So what we need now is a '*_tail_head' macro, which will be used - * in the core main loop. A trivial straightforward implementation - * of this macro would look like this: - * - * pixman_composite_over_8888_0565_process_pixblock_tail - * vst1.16 {d28, d29}, [DST_W, :128]! - * vld1.16 {d4, d5}, [DST_R, :128]! - * vld4.32 {d0, d1, d2, d3}, [SRC]! - * pixman_composite_over_8888_0565_process_pixblock_head - * cache_preload 8, 8 - * - * Now it also got some VLD/VST instructions. We simply can't move from - * processing one block of pixels to the other one with just arithmetics. - * The previously processed data needs to be written to memory and new - * data needs to be fetched. Fortunately, this main loop does not deal - * with partial leading/trailing pixels and can load/store a full block - * of pixels in a bulk. Additionally, destination buffer is already - * 16 bytes aligned here (which is good for performance). - * - * New things here are DST_R, DST_W, SRC and MASK identifiers. These - * are the aliases for ARM registers which are used as pointers for - * accessing data. We maintain separate pointers for reading and writing - * destination buffer (DST_R and DST_W). - * - * Another new thing is 'cache_preload' macro. It is used for prefetching - * data into CPU L2 cache and improve performance when dealing with large - * images which are far larger than cache size. It uses one argument - * (actually two, but they need to be the same here) - number of pixels - * in a block. Looking into 'pixman-arm-neon-asm.h' can provide some - * details about this macro. Moreover, if good performance is needed - * the code from this macro needs to be copied into '*_tail_head' macro - * and mixed with the rest of code for optimal instructions scheduling. - * We are actually doing it below. - * - * Now after all the explanations, here is the optimized code. - * Different instruction streams (originaling from '*_head', '*_tail' - * and 'cache_preload' macro) use different indentation levels for - * better readability. Actually taking the code from one of these - * indentation levels and ignoring a few VLD/VST instructions would - * result in exactly the code from '*_head', '*_tail' or 'cache_preload' - * macro! - */ - -#if 1 - -.macro pixman_composite_over_8888_0565_process_pixblock_tail_head - vqadd.u8 d16, d2, d20 - vld1.16 {d4, d5}, [DST_R, :128]! - vqadd.u8 q9, q0, q11 - vshrn.u16 d6, q2, #8 - fetch_src_pixblock - vshrn.u16 d7, q2, #3 - vsli.u16 q2, q2, #5 - vshll.u8 q14, d16, #8 - PF add PF_X, PF_X, #8 - vshll.u8 q8, d19, #8 - PF tst PF_CTL, #0xF - vsri.u8 d6, d6, #5 - PF addne PF_X, PF_X, #8 - vmvn.8 d3, d3 - PF subne PF_CTL, PF_CTL, #1 - vsri.u8 d7, d7, #6 - vshrn.u16 d30, q2, #2 - vmull.u8 q10, d3, d6 - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - vmull.u8 q11, d3, d7 - vmull.u8 q12, d3, d30 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - vsri.u16 q14, q8, #5 - PF cmp PF_X, ORIG_W - vshll.u8 q9, d18, #8 - vrshr.u16 q13, q10, #8 - PF subge PF_X, PF_X, ORIG_W - vrshr.u16 q3, q11, #8 - vrshr.u16 q15, q12, #8 - PF subges PF_CTL, PF_CTL, #0x10 - vsri.u16 q14, q9, #11 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - vraddhn.u16 d20, q10, q13 - vraddhn.u16 d23, q11, q3 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vraddhn.u16 d22, q12, q15 - vst1.16 {d28, d29}, [DST_W, :128]! -.endm - -#else - -/* If we did not care much about the performance, we would just use this... */ -.macro pixman_composite_over_8888_0565_process_pixblock_tail_head - pixman_composite_over_8888_0565_process_pixblock_tail - vst1.16 {d28, d29}, [DST_W, :128]! - vld1.16 {d4, d5}, [DST_R, :128]! - fetch_src_pixblock - pixman_composite_over_8888_0565_process_pixblock_head - cache_preload 8, 8 -.endm - -#endif - -/* - * And now the final part. We are using 'generate_composite_function' macro - * to put all the stuff together. We are specifying the name of the function - * which we want to get, number of bits per pixel for the source, mask and - * destination (0 if unused, like mask in this case). Next come some bit - * flags: - * FLAG_DST_READWRITE - tells that the destination buffer is both read - * and written, for write-only buffer we would use - * FLAG_DST_WRITEONLY flag instead - * FLAG_DEINTERLEAVE_32BPP - tells that we prefer to work with planar data - * and separate color channels for 32bpp format. - * The next things are: - * - the number of pixels processed per iteration (8 in this case, because - * that's the maximum what can fit into four 64-bit NEON registers). - * - prefetch distance, measured in pixel blocks. In this case it is 5 times - * by 8 pixels. That would be 40 pixels, or up to 160 bytes. Optimal - * prefetch distance can be selected by running some benchmarks. - * - * After that we specify some macros, these are 'default_init', - * 'default_cleanup' here which are empty (but it is possible to have custom - * init/cleanup macros to be able to save/restore some extra NEON registers - * like d8-d15 or do anything else) followed by - * 'pixman_composite_over_8888_0565_process_pixblock_head', - * 'pixman_composite_over_8888_0565_process_pixblock_tail' and - * 'pixman_composite_over_8888_0565_process_pixblock_tail_head' - * which we got implemented above. - * - * The last part is the NEON registers allocation scheme. - */ -generate_composite_function \ - pixman_composite_over_8888_0565_asm_neon, 32, 0, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_0565_process_pixblock_head, \ - pixman_composite_over_8888_0565_process_pixblock_tail, \ - pixman_composite_over_8888_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_n_0565_process_pixblock_head - /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format - and put data into d6 - red, d7 - green, d30 - blue */ - vshrn.u16 d6, q2, #8 - vshrn.u16 d7, q2, #3 - vsli.u16 q2, q2, #5 - vsri.u8 d6, d6, #5 - vsri.u8 d7, d7, #6 - vshrn.u16 d30, q2, #2 - /* now do alpha blending, storing results in 8-bit planar format - into d16 - red, d19 - green, d18 - blue */ - vmull.u8 q10, d3, d6 - vmull.u8 q11, d3, d7 - vmull.u8 q12, d3, d30 - vrshr.u16 q13, q10, #8 - vrshr.u16 q3, q11, #8 - vrshr.u16 q15, q12, #8 - vraddhn.u16 d20, q10, q13 - vraddhn.u16 d23, q11, q3 - vraddhn.u16 d22, q12, q15 -.endm - -.macro pixman_composite_over_n_0565_process_pixblock_tail - /* ... continue alpha blending */ - vqadd.u8 d16, d2, d20 - vqadd.u8 q9, q0, q11 - /* convert the result to r5g6b5 and store it into {d28, d29} */ - vshll.u8 q14, d16, #8 - vshll.u8 q8, d19, #8 - vshll.u8 q9, d18, #8 - vsri.u16 q14, q8, #5 - vsri.u16 q14, q9, #11 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_n_0565_process_pixblock_tail_head - pixman_composite_over_n_0565_process_pixblock_tail - vld1.16 {d4, d5}, [DST_R, :128]! - vst1.16 {d28, d29}, [DST_W, :128]! - pixman_composite_over_n_0565_process_pixblock_head - cache_preload 8, 8 -.endm - -.macro pixman_composite_over_n_0565_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d3[0]}, [DUMMY] - vdup.8 d0, d3[0] - vdup.8 d1, d3[1] - vdup.8 d2, d3[2] - vdup.8 d3, d3[3] - vmvn.8 d3, d3 /* invert source alpha */ -.endm - -generate_composite_function \ - pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_0565_init, \ - default_cleanup, \ - pixman_composite_over_n_0565_process_pixblock_head, \ - pixman_composite_over_n_0565_process_pixblock_tail, \ - pixman_composite_over_n_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_8888_0565_process_pixblock_head - vshll.u8 q8, d1, #8 - vshll.u8 q14, d2, #8 - vshll.u8 q9, d0, #8 -.endm - -.macro pixman_composite_src_8888_0565_process_pixblock_tail - vsri.u16 q14, q8, #5 - vsri.u16 q14, q9, #11 -.endm - -.macro pixman_composite_src_8888_0565_process_pixblock_tail_head - vsri.u16 q14, q8, #5 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - fetch_src_pixblock - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vsri.u16 q14, q9, #11 - PF cmp PF_X, ORIG_W - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - vshll.u8 q8, d1, #8 - vst1.16 {d28, d29}, [DST_W, :128]! - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 - vshll.u8 q14, d2, #8 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - vshll.u8 q9, d0, #8 -.endm - -generate_composite_function \ - pixman_composite_src_8888_0565_asm_neon, 32, 0, 16, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_8888_0565_process_pixblock_head, \ - pixman_composite_src_8888_0565_process_pixblock_tail, \ - pixman_composite_src_8888_0565_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_src_0565_8888_process_pixblock_head - vshrn.u16 d30, q0, #8 - vshrn.u16 d29, q0, #3 - vsli.u16 q0, q0, #5 - vmov.u8 d31, #255 - vsri.u8 d30, d30, #5 - vsri.u8 d29, d29, #6 - vshrn.u16 d28, q0, #2 -.endm - -.macro pixman_composite_src_0565_8888_process_pixblock_tail -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_src_0565_8888_process_pixblock_tail_head - pixman_composite_src_0565_8888_process_pixblock_tail - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - fetch_src_pixblock - pixman_composite_src_0565_8888_process_pixblock_head - cache_preload 8, 8 -.endm - -generate_composite_function \ - pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0565_8888_process_pixblock_head, \ - pixman_composite_src_0565_8888_process_pixblock_tail, \ - pixman_composite_src_0565_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_add_8_8_process_pixblock_head - vqadd.u8 q14, q0, q2 - vqadd.u8 q15, q1, q3 -.endm - -.macro pixman_composite_add_8_8_process_pixblock_tail -.endm - -.macro pixman_composite_add_8_8_process_pixblock_tail_head - fetch_src_pixblock - PF add PF_X, PF_X, #32 - PF tst PF_CTL, #0xF - vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! - PF addne PF_X, PF_X, #32 - PF subne PF_CTL, PF_CTL, #1 - vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF cmp PF_X, ORIG_W - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 - vqadd.u8 q14, q0, q2 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vqadd.u8 q15, q1, q3 -.endm - -generate_composite_function \ - pixman_composite_add_8_8_asm_neon, 8, 0, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8_8_process_pixblock_head, \ - pixman_composite_add_8_8_process_pixblock_tail, \ - pixman_composite_add_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_add_8888_8888_process_pixblock_tail_head - fetch_src_pixblock - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - vld1.32 {d4, d5, d6, d7}, [DST_R, :128]! - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vst1.32 {d28, d29, d30, d31}, [DST_W, :128]! - PF cmp PF_X, ORIG_W - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 - vqadd.u8 q14, q0, q2 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vqadd.u8 q15, q1, q3 -.endm - -generate_composite_function \ - pixman_composite_add_8888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8_8_process_pixblock_head, \ - pixman_composite_add_8_8_process_pixblock_tail, \ - pixman_composite_add_8888_8888_process_pixblock_tail_head - -generate_composite_function_single_scanline \ - pixman_composite_scanline_add_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8_8_process_pixblock_head, \ - pixman_composite_add_8_8_process_pixblock_tail, \ - pixman_composite_add_8888_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_out_reverse_8888_8888_process_pixblock_head - vmvn.8 d24, d3 /* get inverted alpha */ - /* do alpha blending */ - vmull.u8 q8, d24, d4 - vmull.u8 q9, d24, d5 - vmull.u8 q10, d24, d6 - vmull.u8 q11, d24, d7 -.endm - -.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 -.endm - -.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vrshr.u16 q14, q8, #8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - PF cmp PF_X, ORIG_W - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 - fetch_src_pixblock - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - vmvn.8 d22, d3 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF subge PF_X, PF_X, ORIG_W - vmull.u8 q8, d22, d4 - PF subges PF_CTL, PF_CTL, #0x10 - vmull.u8 q9, d22, d5 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - vmull.u8 q10, d22, d6 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vmull.u8 q11, d22, d7 -.endm - -generate_composite_function_single_scanline \ - pixman_composite_scanline_out_reverse_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_out_reverse_8888_8888_process_pixblock_head, \ - pixman_composite_out_reverse_8888_8888_process_pixblock_tail, \ - pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_8888_8888_process_pixblock_head - pixman_composite_out_reverse_8888_8888_process_pixblock_head -.endm - -.macro pixman_composite_over_8888_8888_process_pixblock_tail - pixman_composite_out_reverse_8888_8888_process_pixblock_tail - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -.macro pixman_composite_over_8888_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vrshr.u16 q14, q8, #8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - PF cmp PF_X, ORIG_W - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 - fetch_src_pixblock - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - vmvn.8 d22, d3 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF subge PF_X, PF_X, ORIG_W - vmull.u8 q8, d22, d4 - PF subges PF_CTL, PF_CTL, #0x10 - vmull.u8 q9, d22, d5 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - vmull.u8 q10, d22, d6 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vmull.u8 q11, d22, d7 -.endm - -generate_composite_function \ - pixman_composite_over_8888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_process_pixblock_tail_head - -generate_composite_function_single_scanline \ - pixman_composite_scanline_over_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_process_pixblock_tail_head - -/******************************************************************************/ - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_n_8888_process_pixblock_tail_head - pixman_composite_over_8888_8888_process_pixblock_tail - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - pixman_composite_over_8888_8888_process_pixblock_head - cache_preload 8, 8 -.endm - -.macro pixman_composite_over_n_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d3[0]}, [DUMMY] - vdup.8 d0, d3[0] - vdup.8 d1, d3[1] - vdup.8 d2, d3[2] - vdup.8 d3, d3[3] -.endm - -generate_composite_function \ - pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8888_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_n_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head - vrshr.u16 q14, q8, #8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - PF cmp PF_X, ORIG_W - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 - vld4.8 {d0, d1, d2, d3}, [DST_R, :128]! - vmvn.8 d22, d3 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF subge PF_X, PF_X, ORIG_W - vmull.u8 q8, d22, d4 - PF subges PF_CTL, PF_CTL, #0x10 - vmull.u8 q9, d22, d5 - vmull.u8 q10, d22, d6 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vmull.u8 q11, d22, d7 -.endm - -.macro pixman_composite_over_reverse_n_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d7[0]}, [DUMMY] - vdup.8 d4, d7[0] - vdup.8 d5, d7[1] - vdup.8 d6, d7[2] - vdup.8 d7, d7[3] -.endm - -generate_composite_function \ - pixman_composite_over_reverse_n_8888_asm_neon, 0, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_reverse_n_8888_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 4, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_8888_8_0565_process_pixblock_head - vmull.u8 q0, d24, d8 /* IN for SRC pixels (part1) */ - vmull.u8 q1, d24, d9 - vmull.u8 q6, d24, d10 - vmull.u8 q7, d24, d11 - vshrn.u16 d6, q2, #8 /* convert DST_R data to 32-bpp (part1) */ - vshrn.u16 d7, q2, #3 - vsli.u16 q2, q2, #5 - vrshr.u16 q8, q0, #8 /* IN for SRC pixels (part2) */ - vrshr.u16 q9, q1, #8 - vrshr.u16 q10, q6, #8 - vrshr.u16 q11, q7, #8 - vraddhn.u16 d0, q0, q8 - vraddhn.u16 d1, q1, q9 - vraddhn.u16 d2, q6, q10 - vraddhn.u16 d3, q7, q11 - vsri.u8 d6, d6, #5 /* convert DST_R data to 32-bpp (part2) */ - vsri.u8 d7, d7, #6 - vmvn.8 d3, d3 - vshrn.u16 d30, q2, #2 - vmull.u8 q8, d3, d6 /* now do alpha blending */ - vmull.u8 q9, d3, d7 - vmull.u8 q10, d3, d30 -.endm - -.macro pixman_composite_over_8888_8_0565_process_pixblock_tail - /* 3 cycle bubble (after vmull.u8) */ - vrshr.u16 q13, q8, #8 - vrshr.u16 q11, q9, #8 - vrshr.u16 q15, q10, #8 - vraddhn.u16 d16, q8, q13 - vraddhn.u16 d27, q9, q11 - vraddhn.u16 d26, q10, q15 - vqadd.u8 d16, d2, d16 - /* 1 cycle bubble */ - vqadd.u8 q9, q0, q13 - vshll.u8 q14, d16, #8 /* convert to 16bpp */ - vshll.u8 q8, d19, #8 - vshll.u8 q9, d18, #8 - vsri.u16 q14, q8, #5 - /* 1 cycle bubble */ - vsri.u16 q14, q9, #11 -.endm - -.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head - vld1.16 {d4, d5}, [DST_R, :128]! - vshrn.u16 d6, q2, #8 - fetch_mask_pixblock - vshrn.u16 d7, q2, #3 - fetch_src_pixblock - vmull.u8 q6, d24, d10 - vrshr.u16 q13, q8, #8 - vrshr.u16 q11, q9, #8 - vrshr.u16 q15, q10, #8 - vraddhn.u16 d16, q8, q13 - vraddhn.u16 d27, q9, q11 - vraddhn.u16 d26, q10, q15 - vqadd.u8 d16, d2, d16 - vmull.u8 q1, d24, d9 - vqadd.u8 q9, q0, q13 - vshll.u8 q14, d16, #8 - vmull.u8 q0, d24, d8 - vshll.u8 q8, d19, #8 - vshll.u8 q9, d18, #8 - vsri.u16 q14, q8, #5 - vmull.u8 q7, d24, d11 - vsri.u16 q14, q9, #11 - - cache_preload 8, 8 - - vsli.u16 q2, q2, #5 - vrshr.u16 q8, q0, #8 - vrshr.u16 q9, q1, #8 - vrshr.u16 q10, q6, #8 - vrshr.u16 q11, q7, #8 - vraddhn.u16 d0, q0, q8 - vraddhn.u16 d1, q1, q9 - vraddhn.u16 d2, q6, q10 - vraddhn.u16 d3, q7, q11 - vsri.u8 d6, d6, #5 - vsri.u8 d7, d7, #6 - vmvn.8 d3, d3 - vshrn.u16 d30, q2, #2 - vst1.16 {d28, d29}, [DST_W, :128]! - vmull.u8 q8, d3, d6 - vmull.u8 q9, d3, d7 - vmull.u8 q10, d3, d30 -.endm - -generate_composite_function \ - pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_8_0565_process_pixblock_head, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -/* - * This function needs a special initialization of solid mask. - * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET - * offset, split into color components and replicated in d8-d11 - * registers. Additionally, this function needs all the NEON registers, - * so it has to save d8-d15 registers which are callee saved according - * to ABI. These registers are restored from 'cleanup' macro. All the - * other NEON registers are caller saved, so can be clobbered freely - * without introducing any problems. - */ -.macro pixman_composite_over_n_8_0565_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vpush {d8-d15} - vld1.32 {d11[0]}, [DUMMY] - vdup.8 d8, d11[0] - vdup.8 d9, d11[1] - vdup.8 d10, d11[2] - vdup.8 d11, d11[3] -.endm - -.macro pixman_composite_over_n_8_0565_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_n_8_0565_asm_neon, 0, 8, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8_0565_init, \ - pixman_composite_over_n_8_0565_cleanup, \ - pixman_composite_over_8888_8_0565_process_pixblock_head, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_8888_n_0565_init - add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) - vpush {d8-d15} - vld1.32 {d24[0]}, [DUMMY] - vdup.8 d24, d24[3] -.endm - -.macro pixman_composite_over_8888_n_0565_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_8888_n_0565_init, \ - pixman_composite_over_8888_n_0565_cleanup, \ - pixman_composite_over_8888_8_0565_process_pixblock_head, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_0565_0565_process_pixblock_head -.endm - -.macro pixman_composite_src_0565_0565_process_pixblock_tail -.endm - -.macro pixman_composite_src_0565_0565_process_pixblock_tail_head - vst1.16 {d0, d1, d2, d3}, [DST_W, :128]! - fetch_src_pixblock - cache_preload 16, 16 -.endm - -generate_composite_function \ - pixman_composite_src_0565_0565_asm_neon, 16, 0, 16, \ - FLAG_DST_WRITEONLY, \ - 16, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0565_0565_process_pixblock_head, \ - pixman_composite_src_0565_0565_process_pixblock_tail, \ - pixman_composite_src_0565_0565_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_n_8_process_pixblock_head -.endm - -.macro pixman_composite_src_n_8_process_pixblock_tail -.endm - -.macro pixman_composite_src_n_8_process_pixblock_tail_head - vst1.8 {d0, d1, d2, d3}, [DST_W, :128]! -.endm - -.macro pixman_composite_src_n_8_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d0[0]}, [DUMMY] - vsli.u64 d0, d0, #8 - vsli.u64 d0, d0, #16 - vsli.u64 d0, d0, #32 - vorr d1, d0, d0 - vorr q1, q0, q0 -.endm - -.macro pixman_composite_src_n_8_cleanup -.endm - -generate_composite_function \ - pixman_composite_src_n_8_asm_neon, 0, 0, 8, \ - FLAG_DST_WRITEONLY, \ - 32, /* number of pixels, processed in a single block */ \ - 0, /* prefetch distance */ \ - pixman_composite_src_n_8_init, \ - pixman_composite_src_n_8_cleanup, \ - pixman_composite_src_n_8_process_pixblock_head, \ - pixman_composite_src_n_8_process_pixblock_tail, \ - pixman_composite_src_n_8_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_n_0565_process_pixblock_head -.endm - -.macro pixman_composite_src_n_0565_process_pixblock_tail -.endm - -.macro pixman_composite_src_n_0565_process_pixblock_tail_head - vst1.16 {d0, d1, d2, d3}, [DST_W, :128]! -.endm - -.macro pixman_composite_src_n_0565_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d0[0]}, [DUMMY] - vsli.u64 d0, d0, #16 - vsli.u64 d0, d0, #32 - vorr d1, d0, d0 - vorr q1, q0, q0 -.endm - -.macro pixman_composite_src_n_0565_cleanup -.endm - -generate_composite_function \ - pixman_composite_src_n_0565_asm_neon, 0, 0, 16, \ - FLAG_DST_WRITEONLY, \ - 16, /* number of pixels, processed in a single block */ \ - 0, /* prefetch distance */ \ - pixman_composite_src_n_0565_init, \ - pixman_composite_src_n_0565_cleanup, \ - pixman_composite_src_n_0565_process_pixblock_head, \ - pixman_composite_src_n_0565_process_pixblock_tail, \ - pixman_composite_src_n_0565_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_n_8888_process_pixblock_head -.endm - -.macro pixman_composite_src_n_8888_process_pixblock_tail -.endm - -.macro pixman_composite_src_n_8888_process_pixblock_tail_head - vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! -.endm - -.macro pixman_composite_src_n_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d0[0]}, [DUMMY] - vsli.u64 d0, d0, #32 - vorr d1, d0, d0 - vorr q1, q0, q0 -.endm - -.macro pixman_composite_src_n_8888_cleanup -.endm - -generate_composite_function \ - pixman_composite_src_n_8888_asm_neon, 0, 0, 32, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 0, /* prefetch distance */ \ - pixman_composite_src_n_8888_init, \ - pixman_composite_src_n_8888_cleanup, \ - pixman_composite_src_n_8888_process_pixblock_head, \ - pixman_composite_src_n_8888_process_pixblock_tail, \ - pixman_composite_src_n_8888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_8888_8888_process_pixblock_head -.endm - -.macro pixman_composite_src_8888_8888_process_pixblock_tail -.endm - -.macro pixman_composite_src_8888_8888_process_pixblock_tail_head - vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! - fetch_src_pixblock - cache_preload 8, 8 -.endm - -generate_composite_function \ - pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_8888_8888_process_pixblock_head, \ - pixman_composite_src_8888_8888_process_pixblock_tail, \ - pixman_composite_src_8888_8888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_x888_8888_process_pixblock_head - vorr q0, q0, q2 - vorr q1, q1, q2 -.endm - -.macro pixman_composite_src_x888_8888_process_pixblock_tail -.endm - -.macro pixman_composite_src_x888_8888_process_pixblock_tail_head - vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! - fetch_src_pixblock - vorr q0, q0, q2 - vorr q1, q1, q2 - cache_preload 8, 8 -.endm - -.macro pixman_composite_src_x888_8888_init - vmov.u8 q2, #0xFF - vshl.u32 q2, q2, #24 -.endm - -generate_composite_function \ - pixman_composite_src_x888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - pixman_composite_src_x888_8888_init, \ - default_cleanup, \ - pixman_composite_src_x888_8888_process_pixblock_head, \ - pixman_composite_src_x888_8888_process_pixblock_tail, \ - pixman_composite_src_x888_8888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_n_8_8888_process_pixblock_head - /* expecting deinterleaved source data in {d8, d9, d10, d11} */ - /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ - /* and destination data in {d4, d5, d6, d7} */ - /* mask is in d24 (d25, d26, d27 are unused) */ - - /* in */ - vmull.u8 q0, d24, d8 - vmull.u8 q1, d24, d9 - vmull.u8 q6, d24, d10 - vmull.u8 q7, d24, d11 - vrshr.u16 q10, q0, #8 - vrshr.u16 q11, q1, #8 - vrshr.u16 q12, q6, #8 - vrshr.u16 q13, q7, #8 - vraddhn.u16 d0, q0, q10 - vraddhn.u16 d1, q1, q11 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d3, q7, q13 - vmvn.8 d24, d3 /* get inverted alpha */ - /* source: d0 - blue, d1 - green, d2 - red, d3 - alpha */ - /* destination: d4 - blue, d5 - green, d6 - red, d7 - alpha */ - /* now do alpha blending */ - vmull.u8 q8, d24, d4 - vmull.u8 q9, d24, d5 - vmull.u8 q10, d24, d6 - vmull.u8 q11, d24, d7 -.endm - -.macro pixman_composite_over_n_8_8888_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_n_8_8888_process_pixblock_tail_head - pixman_composite_over_n_8_8888_process_pixblock_tail - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - fetch_mask_pixblock - cache_preload 8, 8 - pixman_composite_over_n_8_8888_process_pixblock_head -.endm - -.macro pixman_composite_over_n_8_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vpush {d8-d15} - vld1.32 {d11[0]}, [DUMMY] - vdup.8 d8, d11[0] - vdup.8 d9, d11[1] - vdup.8 d10, d11[2] - vdup.8 d11, d11[3] -.endm - -.macro pixman_composite_over_n_8_8888_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_n_8_8888_asm_neon, 0, 8, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8_8888_init, \ - pixman_composite_over_n_8_8888_cleanup, \ - pixman_composite_over_n_8_8888_process_pixblock_head, \ - pixman_composite_over_n_8_8888_process_pixblock_tail, \ - pixman_composite_over_n_8_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_n_8_8_process_pixblock_head - vmull.u8 q0, d24, d8 - vmull.u8 q1, d25, d8 - vmull.u8 q6, d26, d8 - vmull.u8 q7, d27, d8 - vrshr.u16 q10, q0, #8 - vrshr.u16 q11, q1, #8 - vrshr.u16 q12, q6, #8 - vrshr.u16 q13, q7, #8 - vraddhn.u16 d0, q0, q10 - vraddhn.u16 d1, q1, q11 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d3, q7, q13 - vmvn.8 q12, q0 - vmvn.8 q13, q1 - vmull.u8 q8, d24, d4 - vmull.u8 q9, d25, d5 - vmull.u8 q10, d26, d6 - vmull.u8 q11, d27, d7 -.endm - -.macro pixman_composite_over_n_8_8_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_n_8_8_process_pixblock_tail_head - vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! - pixman_composite_over_n_8_8_process_pixblock_tail - fetch_mask_pixblock - cache_preload 32, 32 - vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! - pixman_composite_over_n_8_8_process_pixblock_head -.endm - -.macro pixman_composite_over_n_8_8_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vpush {d8-d15} - vld1.32 {d8[0]}, [DUMMY] - vdup.8 d8, d8[3] -.endm - -.macro pixman_composite_over_n_8_8_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8_8_init, \ - pixman_composite_over_n_8_8_cleanup, \ - pixman_composite_over_n_8_8_process_pixblock_head, \ - pixman_composite_over_n_8_8_process_pixblock_tail, \ - pixman_composite_over_n_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head - /* - * 'combine_mask_ca' replacement - * - * input: solid src (n) in {d8, d9, d10, d11} - * dest in {d4, d5, d6, d7 } - * mask in {d24, d25, d26, d27} - * output: updated src in {d0, d1, d2, d3 } - * updated mask in {d24, d25, d26, d3 } - */ - vmull.u8 q0, d24, d8 - vmull.u8 q1, d25, d9 - vmull.u8 q6, d26, d10 - vmull.u8 q7, d27, d11 - vmull.u8 q9, d11, d25 - vmull.u8 q12, d11, d24 - vmull.u8 q13, d11, d26 - vrshr.u16 q8, q0, #8 - vrshr.u16 q10, q1, #8 - vrshr.u16 q11, q6, #8 - vraddhn.u16 d0, q0, q8 - vraddhn.u16 d1, q1, q10 - vraddhn.u16 d2, q6, q11 - vrshr.u16 q11, q12, #8 - vrshr.u16 q8, q9, #8 - vrshr.u16 q6, q13, #8 - vrshr.u16 q10, q7, #8 - vraddhn.u16 d24, q12, q11 - vraddhn.u16 d25, q9, q8 - vraddhn.u16 d26, q13, q6 - vraddhn.u16 d3, q7, q10 - /* - * 'combine_over_ca' replacement - * - * output: updated dest in {d28, d29, d30, d31} - */ - vmvn.8 d24, d24 - vmvn.8 d25, d25 - vmull.u8 q8, d24, d4 - vmull.u8 q9, d25, d5 - vmvn.8 d26, d26 - vmvn.8 d27, d3 - vmull.u8 q10, d26, d6 - vmull.u8 q11, d27, d7 -.endm - -.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail - /* ... continue 'combine_over_ca' replacement */ - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q6, q10, #8 - vrshr.u16 q7, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q6, q10 - vraddhn.u16 d31, q7, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vrshr.u16 q6, q10, #8 - vrshr.u16 q7, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q6, q10 - vraddhn.u16 d31, q7, q11 - fetch_mask_pixblock - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 - cache_preload 8, 8 - pixman_composite_over_n_8888_8888_ca_process_pixblock_head - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -.macro pixman_composite_over_n_8888_8888_ca_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vpush {d8-d15} - vld1.32 {d11[0]}, [DUMMY] - vdup.8 d8, d11[0] - vdup.8 d9, d11[1] - vdup.8 d10, d11[2] - vdup.8 d11, d11[3] -.endm - -.macro pixman_composite_over_n_8888_8888_ca_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_n_8888_8888_ca_asm_neon, 0, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8888_8888_ca_init, \ - pixman_composite_over_n_8888_8888_ca_cleanup, \ - pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \ - pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \ - pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_in_n_8_process_pixblock_head - /* expecting source data in {d0, d1, d2, d3} */ - /* and destination data in {d4, d5, d6, d7} */ - vmull.u8 q8, d4, d3 - vmull.u8 q9, d5, d3 - vmull.u8 q10, d6, d3 - vmull.u8 q11, d7, d3 -.endm - -.macro pixman_composite_in_n_8_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d28, q8, q14 - vraddhn.u16 d29, q9, q15 - vraddhn.u16 d30, q10, q12 - vraddhn.u16 d31, q11, q13 -.endm - -.macro pixman_composite_in_n_8_process_pixblock_tail_head - pixman_composite_in_n_8_process_pixblock_tail - vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! - cache_preload 32, 32 - pixman_composite_in_n_8_process_pixblock_head - vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -.macro pixman_composite_in_n_8_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d3[0]}, [DUMMY] - vdup.8 d3, d3[3] -.endm - -.macro pixman_composite_in_n_8_cleanup -.endm - -generate_composite_function \ - pixman_composite_in_n_8_asm_neon, 0, 0, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_in_n_8_init, \ - pixman_composite_in_n_8_cleanup, \ - pixman_composite_in_n_8_process_pixblock_head, \ - pixman_composite_in_n_8_process_pixblock_tail, \ - pixman_composite_in_n_8_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -.macro pixman_composite_add_n_8_8_process_pixblock_head - /* expecting source data in {d8, d9, d10, d11} */ - /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ - /* and destination data in {d4, d5, d6, d7} */ - /* mask is in d24, d25, d26, d27 */ - vmull.u8 q0, d24, d11 - vmull.u8 q1, d25, d11 - vmull.u8 q6, d26, d11 - vmull.u8 q7, d27, d11 - vrshr.u16 q10, q0, #8 - vrshr.u16 q11, q1, #8 - vrshr.u16 q12, q6, #8 - vrshr.u16 q13, q7, #8 - vraddhn.u16 d0, q0, q10 - vraddhn.u16 d1, q1, q11 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d3, q7, q13 - vqadd.u8 q14, q0, q2 - vqadd.u8 q15, q1, q3 -.endm - -.macro pixman_composite_add_n_8_8_process_pixblock_tail -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_add_n_8_8_process_pixblock_tail_head - pixman_composite_add_n_8_8_process_pixblock_tail - vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! - vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! - fetch_mask_pixblock - cache_preload 32, 32 - pixman_composite_add_n_8_8_process_pixblock_head -.endm - -.macro pixman_composite_add_n_8_8_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vpush {d8-d15} - vld1.32 {d11[0]}, [DUMMY] - vdup.8 d11, d11[3] -.endm - -.macro pixman_composite_add_n_8_8_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_add_n_8_8_asm_neon, 0, 8, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_add_n_8_8_init, \ - pixman_composite_add_n_8_8_cleanup, \ - pixman_composite_add_n_8_8_process_pixblock_head, \ - pixman_composite_add_n_8_8_process_pixblock_tail, \ - pixman_composite_add_n_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_add_8_8_8_process_pixblock_head - /* expecting source data in {d0, d1, d2, d3} */ - /* destination data in {d4, d5, d6, d7} */ - /* mask in {d24, d25, d26, d27} */ - vmull.u8 q8, d24, d0 - vmull.u8 q9, d25, d1 - vmull.u8 q10, d26, d2 - vmull.u8 q11, d27, d3 - vrshr.u16 q0, q8, #8 - vrshr.u16 q1, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d0, q0, q8 - vraddhn.u16 d1, q1, q9 - vraddhn.u16 d2, q12, q10 - vraddhn.u16 d3, q13, q11 - vqadd.u8 q14, q0, q2 - vqadd.u8 q15, q1, q3 -.endm - -.macro pixman_composite_add_8_8_8_process_pixblock_tail -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_add_8_8_8_process_pixblock_tail_head - pixman_composite_add_8_8_8_process_pixblock_tail - vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! - vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! - fetch_mask_pixblock - fetch_src_pixblock - cache_preload 32, 32 - pixman_composite_add_8_8_8_process_pixblock_head -.endm - -.macro pixman_composite_add_8_8_8_init -.endm - -.macro pixman_composite_add_8_8_8_cleanup -.endm - -generate_composite_function \ - pixman_composite_add_8_8_8_asm_neon, 8, 8, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_add_8_8_8_init, \ - pixman_composite_add_8_8_8_cleanup, \ - pixman_composite_add_8_8_8_process_pixblock_head, \ - pixman_composite_add_8_8_8_process_pixblock_tail, \ - pixman_composite_add_8_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_add_8888_8888_8888_process_pixblock_head - /* expecting source data in {d0, d1, d2, d3} */ - /* destination data in {d4, d5, d6, d7} */ - /* mask in {d24, d25, d26, d27} */ - vmull.u8 q8, d27, d0 - vmull.u8 q9, d27, d1 - vmull.u8 q10, d27, d2 - vmull.u8 q11, d27, d3 - /* 1 cycle bubble */ - vrsra.u16 q8, q8, #8 - vrsra.u16 q9, q9, #8 - vrsra.u16 q10, q10, #8 - vrsra.u16 q11, q11, #8 -.endm - -.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail - /* 2 cycle bubble */ - vrshrn.u16 d28, q8, #8 - vrshrn.u16 d29, q9, #8 - vrshrn.u16 d30, q10, #8 - vrshrn.u16 d31, q11, #8 - vqadd.u8 q14, q2, q14 - /* 1 cycle bubble */ - vqadd.u8 q15, q3, q15 -.endm - -.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head - fetch_src_pixblock - vrshrn.u16 d28, q8, #8 - fetch_mask_pixblock - vrshrn.u16 d29, q9, #8 - vmull.u8 q8, d27, d0 - vrshrn.u16 d30, q10, #8 - vmull.u8 q9, d27, d1 - vrshrn.u16 d31, q11, #8 - vmull.u8 q10, d27, d2 - vqadd.u8 q14, q2, q14 - vmull.u8 q11, d27, d3 - vqadd.u8 q15, q3, q15 - vrsra.u16 q8, q8, #8 - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vrsra.u16 q9, q9, #8 - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - vrsra.u16 q10, q10, #8 - - cache_preload 8, 8 - - vrsra.u16 q11, q11, #8 -.endm - -generate_composite_function \ - pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head - -generate_composite_function_single_scanline \ - pixman_composite_scanline_add_mask_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head - -/******************************************************************************/ - -generate_composite_function \ - pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 27 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_add_n_8_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d3[0]}, [DUMMY] - vdup.8 d0, d3[0] - vdup.8 d1, d3[1] - vdup.8 d2, d3[2] - vdup.8 d3, d3[3] -.endm - -.macro pixman_composite_add_n_8_8888_cleanup -.endm - -generate_composite_function \ - pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_add_n_8_8888_init, \ - pixman_composite_add_n_8_8888_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 27 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_add_8888_n_8888_init - add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) - vld1.32 {d27[0]}, [DUMMY] - vdup.8 d27, d27[3] -.endm - -.macro pixman_composite_add_8888_n_8888_cleanup -.endm - -generate_composite_function \ - pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_add_8888_n_8888_init, \ - pixman_composite_add_8888_n_8888_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 27 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head - /* expecting source data in {d0, d1, d2, d3} */ - /* destination data in {d4, d5, d6, d7} */ - /* solid mask is in d15 */ - - /* 'in' */ - vmull.u8 q8, d15, d3 - vmull.u8 q6, d15, d2 - vmull.u8 q5, d15, d1 - vmull.u8 q4, d15, d0 - vrshr.u16 q13, q8, #8 - vrshr.u16 q12, q6, #8 - vrshr.u16 q11, q5, #8 - vrshr.u16 q10, q4, #8 - vraddhn.u16 d3, q8, q13 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d1, q5, q11 - vraddhn.u16 d0, q4, q10 - vmvn.8 d24, d3 /* get inverted alpha */ - /* now do alpha blending */ - vmull.u8 q8, d24, d4 - vmull.u8 q9, d24, d5 - vmull.u8 q10, d24, d6 - vmull.u8 q11, d24, d7 -.endm - -.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail - fetch_src_pixblock - cache_preload 8, 8 - fetch_mask_pixblock - pixman_composite_out_reverse_8888_n_8888_process_pixblock_head - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -generate_composite_function_single_scanline \ - pixman_composite_scanline_out_reverse_mask_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_out_reverse_8888_n_8888_process_pixblock_head, \ - pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail, \ - pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 12 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_8888_n_8888_process_pixblock_head - pixman_composite_out_reverse_8888_n_8888_process_pixblock_head -.endm - -.macro pixman_composite_over_8888_n_8888_process_pixblock_tail - pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_8888_n_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - pixman_composite_over_8888_n_8888_process_pixblock_tail - fetch_src_pixblock - cache_preload 8, 8 - pixman_composite_over_8888_n_8888_process_pixblock_head - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -.macro pixman_composite_over_8888_n_8888_init - add DUMMY, sp, #48 - vpush {d8-d15} - vld1.32 {d15[0]}, [DUMMY] - vdup.8 d15, d15[3] -.endm - -.macro pixman_composite_over_8888_n_8888_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_8888_n_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_8888_n_8888_init, \ - pixman_composite_over_8888_n_8888_cleanup, \ - pixman_composite_over_8888_n_8888_process_pixblock_head, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail_head - -/******************************************************************************/ - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - pixman_composite_over_8888_n_8888_process_pixblock_tail - fetch_src_pixblock - cache_preload 8, 8 - fetch_mask_pixblock - pixman_composite_over_8888_n_8888_process_pixblock_head - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -generate_composite_function \ - pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_n_8888_process_pixblock_head, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 12 /* mask_basereg */ - -generate_composite_function_single_scanline \ - pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_n_8888_process_pixblock_head, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 12 /* mask_basereg */ - -/******************************************************************************/ - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - pixman_composite_over_8888_n_8888_process_pixblock_tail - fetch_src_pixblock - cache_preload 8, 8 - fetch_mask_pixblock - pixman_composite_over_8888_n_8888_process_pixblock_head - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -generate_composite_function \ - pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_n_8888_process_pixblock_head, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8_8888_process_pixblock_tail_head \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_0888_0888_process_pixblock_head -.endm - -.macro pixman_composite_src_0888_0888_process_pixblock_tail -.endm - -.macro pixman_composite_src_0888_0888_process_pixblock_tail_head - vst3.8 {d0, d1, d2}, [DST_W]! - fetch_src_pixblock - cache_preload 8, 8 -.endm - -generate_composite_function \ - pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0888_0888_process_pixblock_head, \ - pixman_composite_src_0888_0888_process_pixblock_tail, \ - pixman_composite_src_0888_0888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_0888_8888_rev_process_pixblock_head - vswp d0, d2 -.endm - -.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail -.endm - -.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head - vst4.8 {d0, d1, d2, d3}, [DST_W]! - fetch_src_pixblock - vswp d0, d2 - cache_preload 8, 8 -.endm - -.macro pixman_composite_src_0888_8888_rev_init - veor d3, d3, d3 -.endm - -generate_composite_function \ - pixman_composite_src_0888_8888_rev_asm_neon, 24, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - pixman_composite_src_0888_8888_rev_init, \ - default_cleanup, \ - pixman_composite_src_0888_8888_rev_process_pixblock_head, \ - pixman_composite_src_0888_8888_rev_process_pixblock_tail, \ - pixman_composite_src_0888_8888_rev_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_0888_0565_rev_process_pixblock_head - vshll.u8 q8, d1, #8 - vshll.u8 q9, d2, #8 -.endm - -.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail - vshll.u8 q14, d0, #8 - vsri.u16 q14, q8, #5 - vsri.u16 q14, q9, #11 -.endm - -.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head - vshll.u8 q14, d0, #8 - fetch_src_pixblock - vsri.u16 q14, q8, #5 - vsri.u16 q14, q9, #11 - vshll.u8 q8, d1, #8 - vst1.16 {d28, d29}, [DST_W, :128]! - vshll.u8 q9, d2, #8 -.endm - -generate_composite_function \ - pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0888_0565_rev_process_pixblock_head, \ - pixman_composite_src_0888_0565_rev_process_pixblock_tail, \ - pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_pixbuf_8888_process_pixblock_head - vmull.u8 q8, d3, d0 - vmull.u8 q9, d3, d1 - vmull.u8 q10, d3, d2 -.endm - -.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail - vrshr.u16 q11, q8, #8 - vswp d3, d31 - vrshr.u16 q12, q9, #8 - vrshr.u16 q13, q10, #8 - vraddhn.u16 d30, q11, q8 - vraddhn.u16 d29, q12, q9 - vraddhn.u16 d28, q13, q10 -.endm - -.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head - vrshr.u16 q11, q8, #8 - vswp d3, d31 - vrshr.u16 q12, q9, #8 - vrshr.u16 q13, q10, #8 - fetch_src_pixblock - vraddhn.u16 d30, q11, q8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vraddhn.u16 d29, q12, q9 - vraddhn.u16 d28, q13, q10 - vmull.u8 q8, d3, d0 - vmull.u8 q9, d3, d1 - vmull.u8 q10, d3, d2 - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF cmp PF_X, ORIG_W - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! -.endm - -generate_composite_function \ - pixman_composite_src_pixbuf_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_pixbuf_8888_process_pixblock_head, \ - pixman_composite_src_pixbuf_8888_process_pixblock_tail, \ - pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_rpixbuf_8888_process_pixblock_head - vmull.u8 q8, d3, d0 - vmull.u8 q9, d3, d1 - vmull.u8 q10, d3, d2 -.endm - -.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail - vrshr.u16 q11, q8, #8 - vswp d3, d31 - vrshr.u16 q12, q9, #8 - vrshr.u16 q13, q10, #8 - vraddhn.u16 d28, q11, q8 - vraddhn.u16 d29, q12, q9 - vraddhn.u16 d30, q13, q10 -.endm - -.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head - vrshr.u16 q11, q8, #8 - vswp d3, d31 - vrshr.u16 q12, q9, #8 - vrshr.u16 q13, q10, #8 - fetch_src_pixblock - vraddhn.u16 d28, q11, q8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vraddhn.u16 d29, q12, q9 - vraddhn.u16 d30, q13, q10 - vmull.u8 q8, d3, d0 - vmull.u8 q9, d3, d1 - vmull.u8 q10, d3, d2 - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF cmp PF_X, ORIG_W - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! -.endm - -generate_composite_function \ - pixman_composite_src_rpixbuf_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_rpixbuf_8888_process_pixblock_head, \ - pixman_composite_src_rpixbuf_8888_process_pixblock_tail, \ - pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_0565_8_0565_process_pixblock_head - /* mask is in d15 */ - convert_0565_to_x888 q4, d2, d1, d0 - convert_0565_to_x888 q5, d6, d5, d4 - /* source pixel data is in {d0, d1, d2, XX} */ - /* destination pixel data is in {d4, d5, d6, XX} */ - vmvn.8 d7, d15 - vmull.u8 q6, d15, d2 - vmull.u8 q5, d15, d1 - vmull.u8 q4, d15, d0 - vmull.u8 q8, d7, d4 - vmull.u8 q9, d7, d5 - vmull.u8 q13, d7, d6 - vrshr.u16 q12, q6, #8 - vrshr.u16 q11, q5, #8 - vrshr.u16 q10, q4, #8 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d1, q5, q11 - vraddhn.u16 d0, q4, q10 -.endm - -.macro pixman_composite_over_0565_8_0565_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q13, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q12, q13 - vqadd.u8 q0, q0, q14 - vqadd.u8 q1, q1, q15 - /* 32bpp result is in {d0, d1, d2, XX} */ - convert_8888_to_0565 d2, d1, d0, q14, q15, q3 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head - fetch_mask_pixblock - pixman_composite_over_0565_8_0565_process_pixblock_tail - fetch_src_pixblock - vld1.16 {d10, d11}, [DST_R, :128]! - cache_preload 8, 8 - pixman_composite_over_0565_8_0565_process_pixblock_head - vst1.16 {d28, d29}, [DST_W, :128]! -.endm - -generate_composite_function \ - pixman_composite_over_0565_8_0565_asm_neon, 16, 8, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_0565_8_0565_process_pixblock_head, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_0565_n_0565_init - add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) - vpush {d8-d15} - vld1.32 {d15[0]}, [DUMMY] - vdup.8 d15, d15[3] -.endm - -.macro pixman_composite_over_0565_n_0565_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_0565_n_0565_init, \ - pixman_composite_over_0565_n_0565_cleanup, \ - pixman_composite_over_0565_8_0565_process_pixblock_head, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_add_0565_8_0565_process_pixblock_head - /* mask is in d15 */ - convert_0565_to_x888 q4, d2, d1, d0 - convert_0565_to_x888 q5, d6, d5, d4 - /* source pixel data is in {d0, d1, d2, XX} */ - /* destination pixel data is in {d4, d5, d6, XX} */ - vmull.u8 q6, d15, d2 - vmull.u8 q5, d15, d1 - vmull.u8 q4, d15, d0 - vrshr.u16 q12, q6, #8 - vrshr.u16 q11, q5, #8 - vrshr.u16 q10, q4, #8 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d1, q5, q11 - vraddhn.u16 d0, q4, q10 -.endm - -.macro pixman_composite_add_0565_8_0565_process_pixblock_tail - vqadd.u8 q0, q0, q2 - vqadd.u8 q1, q1, q3 - /* 32bpp result is in {d0, d1, d2, XX} */ - convert_8888_to_0565 d2, d1, d0, q14, q15, q3 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head - fetch_mask_pixblock - pixman_composite_add_0565_8_0565_process_pixblock_tail - fetch_src_pixblock - vld1.16 {d10, d11}, [DST_R, :128]! - cache_preload 8, 8 - pixman_composite_add_0565_8_0565_process_pixblock_head - vst1.16 {d28, d29}, [DST_W, :128]! -.endm - -generate_composite_function \ - pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_add_0565_8_0565_process_pixblock_head, \ - pixman_composite_add_0565_8_0565_process_pixblock_tail, \ - pixman_composite_add_0565_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_out_reverse_8_0565_process_pixblock_head - /* mask is in d15 */ - convert_0565_to_x888 q5, d6, d5, d4 - /* destination pixel data is in {d4, d5, d6, xx} */ - vmvn.8 d24, d15 /* get inverted alpha */ - /* now do alpha blending */ - vmull.u8 q8, d24, d4 - vmull.u8 q9, d24, d5 - vmull.u8 q10, d24, d6 -.endm - -.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vraddhn.u16 d0, q14, q8 - vraddhn.u16 d1, q15, q9 - vraddhn.u16 d2, q12, q10 - /* 32bpp result is in {d0, d1, d2, XX} */ - convert_8888_to_0565 d2, d1, d0, q14, q15, q3 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head - fetch_src_pixblock - pixman_composite_out_reverse_8_0565_process_pixblock_tail - vld1.16 {d10, d11}, [DST_R, :128]! - cache_preload 8, 8 - pixman_composite_out_reverse_8_0565_process_pixblock_head - vst1.16 {d28, d29}, [DST_W, :128]! -.endm - -generate_composite_function \ - pixman_composite_out_reverse_8_0565_asm_neon, 8, 0, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_out_reverse_8_0565_process_pixblock_head, \ - pixman_composite_out_reverse_8_0565_process_pixblock_tail, \ - pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 15, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_8888_8888_OVER_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_process_pixblock_tail_head - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_8888_0565_OVER_asm_neon, 32, 0, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_0565_process_pixblock_head, \ - pixman_composite_over_8888_0565_process_pixblock_tail, \ - pixman_composite_over_8888_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_8888_0565_SRC_asm_neon, 32, 0, 16, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_8888_0565_process_pixblock_head, \ - pixman_composite_src_8888_0565_process_pixblock_tail, \ - pixman_composite_src_8888_0565_process_pixblock_tail_head - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_0565_8888_SRC_asm_neon, 16, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0565_8888_process_pixblock_head, \ - pixman_composite_src_0565_8888_process_pixblock_tail, \ - pixman_composite_src_0565_8888_process_pixblock_tail_head - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_neon, 32, 8, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_8_0565_process_pixblock_head, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 24 /* mask_basereg */ - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_neon, 16, 8, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_0565_8_0565_process_pixblock_head, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -/* Supplementary macro for setting function attributes */ -.macro pixman_asm_function fname - .func fname - .global fname -#ifdef __ELF__ - .hidden fname - .type fname, %function -#endif -fname: -.endm - -.macro bilinear_interpolate_last_pixel - mov TMP1, X, asr #16 - mov TMP2, X, asr #16 - add TMP1, TOP, TMP1, asl #2 - add TMP2, BOTTOM, TMP2, asl #2 - vld1.32 {d0}, [TMP1] - vshr.u16 d30, d24, #8 - vld1.32 {d1}, [TMP2] - vmull.u8 q1, d0, d28 - vmlal.u8 q1, d1, d29 - /* 5 cycles bubble */ - vshll.u16 q0, d2, #8 - vmlsl.u16 q0, d2, d30 - vmlal.u16 q0, d3, d30 - /* 5 cycles bubble */ - vshrn.u32 d0, q0, #16 - /* 3 cycles bubble */ - vmovn.u16 d0, q0 - /* 1 cycle bubble */ - vst1.32 {d0[0]}, [OUT, :32]! -.endm - -.macro bilinear_interpolate_two_pixels - mov TMP1, X, asr #16 - mov TMP2, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - add TMP2, BOTTOM, TMP2, asl #2 - vld1.32 {d0}, [TMP1] - vld1.32 {d1}, [TMP2] - vmull.u8 q1, d0, d28 - vmlal.u8 q1, d1, d29 - mov TMP1, X, asr #16 - mov TMP2, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - add TMP2, BOTTOM, TMP2, asl #2 - vld1.32 {d20}, [TMP1] - vld1.32 {d21}, [TMP2] - vmull.u8 q11, d20, d28 - vmlal.u8 q11, d21, d29 - vshr.u16 q15, q12, #8 - vadd.u16 q12, q12, q13 - vshll.u16 q0, d2, #8 - vmlsl.u16 q0, d2, d30 - vmlal.u16 q0, d3, d30 - vshll.u16 q10, d22, #8 - vmlsl.u16 q10, d22, d31 - vmlal.u16 q10, d23, d31 - vshrn.u32 d30, q0, #16 - vshrn.u32 d31, q10, #16 - vmovn.u16 d0, q15 - vst1.32 {d0}, [OUT]! -.endm - -.macro bilinear_interpolate_four_pixels - mov TMP1, X, asr #16 - mov TMP2, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - add TMP2, BOTTOM, TMP2, asl #2 - vld1.32 {d0}, [TMP1] - vld1.32 {d1}, [TMP2] - vmull.u8 q1, d0, d28 - vmlal.u8 q1, d1, d29 - mov TMP1, X, asr #16 - mov TMP2, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - add TMP2, BOTTOM, TMP2, asl #2 - vld1.32 {d20}, [TMP1] - vld1.32 {d21}, [TMP2] - vmull.u8 q11, d20, d28 - vmlal.u8 q11, d21, d29 - vshr.u16 q15, q12, #8 - vadd.u16 q12, q12, q13 - vshll.u16 q0, d2, #8 - vmlsl.u16 q0, d2, d30 - vmlal.u16 q0, d3, d30 - vshll.u16 q10, d22, #8 - vmlsl.u16 q10, d22, d31 - vmlal.u16 q10, d23, d31 - mov TMP1, X, asr #16 - mov TMP2, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - add TMP2, BOTTOM, TMP2, asl #2 - vld1.32 {d4}, [TMP1] - vld1.32 {d5}, [TMP2] - vmull.u8 q3, d4, d28 - vmlal.u8 q3, d5, d29 - mov TMP1, X, asr #16 - mov TMP2, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP1, asl #2 - add TMP2, BOTTOM, TMP2, asl #2 - vld1.32 {d16}, [TMP1] - vld1.32 {d17}, [TMP2] - vmull.u8 q9, d16, d28 - vmlal.u8 q9, d17, d29 - vshr.u16 q15, q12, #8 - vadd.u16 q12, q12, q13 - vshll.u16 q2, d6, #8 - vmlsl.u16 q2, d6, d30 - vmlal.u16 q2, d7, d30 - vshll.u16 q8, d18, #8 - vmlsl.u16 q8, d18, d31 - vmlal.u16 q8, d19, d31 - vshrn.u32 d0, q0, #16 - vshrn.u32 d1, q10, #16 - vshrn.u32 d4, q2, #16 - vshrn.u32 d5, q8, #16 - vmovn.u16 d0, q0 - vmovn.u16 d1, q2 - vst1.32 {d0, d1}, [OUT]! -.endm - - -/* - * pixman_scaled_bilinear_scanline_8888_8888_SRC (uint32_t * out, - * const uint32_t * top, - * const uint32_t * bottom, - * int wt, - * int wb, - * pixman_fixed_t x, - * pixman_fixed_t ux, - * int width) - */ - -pixman_asm_function pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon - OUT .req r0 - TOP .req r1 - BOTTOM .req r2 - WT .req r3 - WB .req r4 - X .req r5 - UX .req r6 - WIDTH .req ip - TMP1 .req r3 - TMP2 .req r4 - - mov ip, sp - push {r4, r5, r6, r7} - ldmia ip, {WB, X, UX, WIDTH} - - cmp WIDTH, #0 - ble 3f - vdup.u16 q12, X - vdup.u16 q13, UX - vdup.u8 d28, WT - vdup.u8 d29, WB - vadd.u16 d25, d25, d26 - vadd.u16 q13, q13, q13 - - subs WIDTH, WIDTH, #4 - blt 1f -0: - bilinear_interpolate_four_pixels - subs WIDTH, WIDTH, #4 - bge 0b -1: - tst WIDTH, #2 - beq 2f - bilinear_interpolate_two_pixels -2: - tst WIDTH, #1 - beq 3f - bilinear_interpolate_last_pixel -3: - pop {r4, r5, r6, r7} - bx lr - - .unreq OUT - .unreq TOP - .unreq BOTTOM - .unreq WT - .unreq WB - .unreq X - .unreq UX - .unreq WIDTH - .unreq TMP1 - .unreq TMP2 -.endfunc +/* + * Copyright © 2009 Nokia Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) + */ + +/* + * This file contains implementations of NEON optimized pixel processing + * functions. There is no full and detailed tutorial, but some functions + * (those which are exposing some new or interesting features) are + * extensively commented and can be used as examples. + * + * You may want to have a look at the comments for following functions: + * - pixman_composite_over_8888_0565_asm_neon + * - pixman_composite_over_n_8_0565_asm_neon + */ + +/* Prevent the stack from becoming executable for no reason... */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + + .text + .fpu neon + .arch armv7a + .object_arch armv4 + .eabi_attribute 10, 0 /* suppress Tag_FP_arch */ + .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */ + .arm + .altmacro + +#include "pixman-arm-neon-asm.h" + +/* Global configuration options and preferences */ + +/* + * The code can optionally make use of unaligned memory accesses to improve + * performance of handling leading/trailing pixels for each scanline. + * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for + * example in linux if unaligned memory accesses are not configured to + * generate.exceptions. + */ +.set RESPECT_STRICT_ALIGNMENT, 1 + +/* + * Set default prefetch type. There is a choice between the following options: + * + * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work + * as NOP to workaround some HW bugs or for whatever other reason) + * + * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where + * advanced prefetch intruduces heavy overhead) + * + * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8 + * which can run ARM and NEON instructions simultaneously so that extra ARM + * instructions do not add (many) extra cycles, but improve prefetch efficiency) + * + * Note: some types of function can't support advanced prefetch and fallback + * to simple one (those which handle 24bpp pixels) + */ +.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED + +/* Prefetch distance in pixels for simple prefetch */ +.set PREFETCH_DISTANCE_SIMPLE, 64 + +/* + * Implementation of pixman_composite_over_8888_0565_asm_neon + * + * This function takes a8r8g8b8 source buffer, r5g6b5 destination buffer and + * performs OVER compositing operation. Function fast_composite_over_8888_0565 + * from pixman-fast-path.c does the same in C and can be used as a reference. + * + * First we need to have some NEON assembly code which can do the actual + * operation on the pixels and provide it to the template macro. + * + * Template macro quite conveniently takes care of emitting all the necessary + * code for memory reading and writing (including quite tricky cases of + * handling unaligned leading/trailing pixels), so we only need to deal with + * the data in NEON registers. + * + * NEON registers allocation in general is recommented to be the following: + * d0, d1, d2, d3 - contain loaded source pixel data + * d4, d5, d6, d7 - contain loaded destination pixels (if they are needed) + * d24, d25, d26, d27 - contain loading mask pixel data (if mask is used) + * d28, d29, d30, d31 - place for storing the result (destination pixels) + * + * As can be seen above, four 64-bit NEON registers are used for keeping + * intermediate pixel data and up to 8 pixels can be processed in one step + * for 32bpp formats (16 pixels for 16bpp, 32 pixels for 8bpp). + * + * This particular function uses the following registers allocation: + * d0, d1, d2, d3 - contain loaded source pixel data + * d4, d5 - contain loaded destination pixels (they are needed) + * d28, d29 - place for storing the result (destination pixels) + */ + +/* + * Step one. We need to have some code to do some arithmetics on pixel data. + * This is implemented as a pair of macros: '*_head' and '*_tail'. When used + * back-to-back, they take pixel data from {d0, d1, d2, d3} and {d4, d5}, + * perform all the needed calculations and write the result to {d28, d29}. + * The rationale for having two macros and not just one will be explained + * later. In practice, any single monolitic function which does the work can + * be split into two parts in any arbitrary way without affecting correctness. + * + * There is one special trick here too. Common template macro can optionally + * make our life a bit easier by doing R, G, B, A color components + * deinterleaving for 32bpp pixel formats (and this feature is used in + * 'pixman_composite_over_8888_0565_asm_neon' function). So it means that + * instead of having 8 packed pixels in {d0, d1, d2, d3} registers, we + * actually use d0 register for blue channel (a vector of eight 8-bit + * values), d1 register for green, d2 for red and d3 for alpha. This + * simple conversion can be also done with a few NEON instructions: + * + * Packed to planar conversion: + * vuzp.8 d0, d1 + * vuzp.8 d2, d3 + * vuzp.8 d1, d3 + * vuzp.8 d0, d2 + * + * Planar to packed conversion: + * vzip.8 d0, d2 + * vzip.8 d1, d3 + * vzip.8 d2, d3 + * vzip.8 d0, d1 + * + * But pixel can be loaded directly in planar format using VLD4.8 NEON + * instruction. It is 1 cycle slower than VLD1.32, so this is not always + * desirable, that's why deinterleaving is optional. + * + * But anyway, here is the code: + */ +.macro pixman_composite_over_8888_0565_process_pixblock_head + /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format + and put data into d6 - red, d7 - green, d30 - blue */ + vshrn.u16 d6, q2, #8 + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vsri.u8 d6, d6, #5 + vmvn.8 d3, d3 /* invert source alpha */ + vsri.u8 d7, d7, #6 + vshrn.u16 d30, q2, #2 + /* now do alpha blending, storing results in 8-bit planar format + into d16 - red, d19 - green, d18 - blue */ + vmull.u8 q10, d3, d6 + vmull.u8 q11, d3, d7 + vmull.u8 q12, d3, d30 + vrshr.u16 q13, q10, #8 + vrshr.u16 q3, q11, #8 + vrshr.u16 q15, q12, #8 + vraddhn.u16 d20, q10, q13 + vraddhn.u16 d23, q11, q3 + vraddhn.u16 d22, q12, q15 +.endm + +.macro pixman_composite_over_8888_0565_process_pixblock_tail + /* ... continue alpha blending */ + vqadd.u8 d16, d2, d20 + vqadd.u8 q9, q0, q11 + /* convert the result to r5g6b5 and store it into {d28, d29} */ + vshll.u8 q14, d16, #8 + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +/* + * OK, now we got almost everything that we need. Using the above two + * macros, the work can be done right. But now we want to optimize + * it a bit. ARM Cortex-A8 is an in-order core, and benefits really + * a lot from good code scheduling and software pipelining. + * + * Let's construct some code, which will run in the core main loop. + * Some pseudo-code of the main loop will look like this: + * head + * while (...) { + * tail + * head + * } + * tail + * + * It may look a bit weird, but this setup allows to hide instruction + * latencies better and also utilize dual-issue capability more + * efficiently (make pairs of load-store and ALU instructions). + * + * So what we need now is a '*_tail_head' macro, which will be used + * in the core main loop. A trivial straightforward implementation + * of this macro would look like this: + * + * pixman_composite_over_8888_0565_process_pixblock_tail + * vst1.16 {d28, d29}, [DST_W, :128]! + * vld1.16 {d4, d5}, [DST_R, :128]! + * vld4.32 {d0, d1, d2, d3}, [SRC]! + * pixman_composite_over_8888_0565_process_pixblock_head + * cache_preload 8, 8 + * + * Now it also got some VLD/VST instructions. We simply can't move from + * processing one block of pixels to the other one with just arithmetics. + * The previously processed data needs to be written to memory and new + * data needs to be fetched. Fortunately, this main loop does not deal + * with partial leading/trailing pixels and can load/store a full block + * of pixels in a bulk. Additionally, destination buffer is already + * 16 bytes aligned here (which is good for performance). + * + * New things here are DST_R, DST_W, SRC and MASK identifiers. These + * are the aliases for ARM registers which are used as pointers for + * accessing data. We maintain separate pointers for reading and writing + * destination buffer (DST_R and DST_W). + * + * Another new thing is 'cache_preload' macro. It is used for prefetching + * data into CPU L2 cache and improve performance when dealing with large + * images which are far larger than cache size. It uses one argument + * (actually two, but they need to be the same here) - number of pixels + * in a block. Looking into 'pixman-arm-neon-asm.h' can provide some + * details about this macro. Moreover, if good performance is needed + * the code from this macro needs to be copied into '*_tail_head' macro + * and mixed with the rest of code for optimal instructions scheduling. + * We are actually doing it below. + * + * Now after all the explanations, here is the optimized code. + * Different instruction streams (originaling from '*_head', '*_tail' + * and 'cache_preload' macro) use different indentation levels for + * better readability. Actually taking the code from one of these + * indentation levels and ignoring a few VLD/VST instructions would + * result in exactly the code from '*_head', '*_tail' or 'cache_preload' + * macro! + */ + +#if 1 + +.macro pixman_composite_over_8888_0565_process_pixblock_tail_head + vqadd.u8 d16, d2, d20 + vld1.16 {d4, d5}, [DST_R, :128]! + vqadd.u8 q9, q0, q11 + vshrn.u16 d6, q2, #8 + fetch_src_pixblock + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vshll.u8 q14, d16, #8 + PF add PF_X, PF_X, #8 + vshll.u8 q8, d19, #8 + PF tst PF_CTL, #0xF + vsri.u8 d6, d6, #5 + PF addne PF_X, PF_X, #8 + vmvn.8 d3, d3 + PF subne PF_CTL, PF_CTL, #1 + vsri.u8 d7, d7, #6 + vshrn.u16 d30, q2, #2 + vmull.u8 q10, d3, d6 + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + vmull.u8 q11, d3, d7 + vmull.u8 q12, d3, d30 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vsri.u16 q14, q8, #5 + PF cmp PF_X, ORIG_W + vshll.u8 q9, d18, #8 + vrshr.u16 q13, q10, #8 + PF subge PF_X, PF_X, ORIG_W + vrshr.u16 q3, q11, #8 + vrshr.u16 q15, q12, #8 + PF subges PF_CTL, PF_CTL, #0x10 + vsri.u16 q14, q9, #11 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + vraddhn.u16 d20, q10, q13 + vraddhn.u16 d23, q11, q3 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vraddhn.u16 d22, q12, q15 + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +#else + +/* If we did not care much about the performance, we would just use this... */ +.macro pixman_composite_over_8888_0565_process_pixblock_tail_head + pixman_composite_over_8888_0565_process_pixblock_tail + vst1.16 {d28, d29}, [DST_W, :128]! + vld1.16 {d4, d5}, [DST_R, :128]! + fetch_src_pixblock + pixman_composite_over_8888_0565_process_pixblock_head + cache_preload 8, 8 +.endm + +#endif + +/* + * And now the final part. We are using 'generate_composite_function' macro + * to put all the stuff together. We are specifying the name of the function + * which we want to get, number of bits per pixel for the source, mask and + * destination (0 if unused, like mask in this case). Next come some bit + * flags: + * FLAG_DST_READWRITE - tells that the destination buffer is both read + * and written, for write-only buffer we would use + * FLAG_DST_WRITEONLY flag instead + * FLAG_DEINTERLEAVE_32BPP - tells that we prefer to work with planar data + * and separate color channels for 32bpp format. + * The next things are: + * - the number of pixels processed per iteration (8 in this case, because + * that's the maximum what can fit into four 64-bit NEON registers). + * - prefetch distance, measured in pixel blocks. In this case it is 5 times + * by 8 pixels. That would be 40 pixels, or up to 160 bytes. Optimal + * prefetch distance can be selected by running some benchmarks. + * + * After that we specify some macros, these are 'default_init', + * 'default_cleanup' here which are empty (but it is possible to have custom + * init/cleanup macros to be able to save/restore some extra NEON registers + * like d8-d15 or do anything else) followed by + * 'pixman_composite_over_8888_0565_process_pixblock_head', + * 'pixman_composite_over_8888_0565_process_pixblock_tail' and + * 'pixman_composite_over_8888_0565_process_pixblock_tail_head' + * which we got implemented above. + * + * The last part is the NEON registers allocation scheme. + */ +generate_composite_function \ + pixman_composite_over_8888_0565_asm_neon, 32, 0, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_0565_process_pixblock_head, \ + pixman_composite_over_8888_0565_process_pixblock_tail, \ + pixman_composite_over_8888_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_n_0565_process_pixblock_head + /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format + and put data into d6 - red, d7 - green, d30 - blue */ + vshrn.u16 d6, q2, #8 + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vsri.u8 d6, d6, #5 + vsri.u8 d7, d7, #6 + vshrn.u16 d30, q2, #2 + /* now do alpha blending, storing results in 8-bit planar format + into d16 - red, d19 - green, d18 - blue */ + vmull.u8 q10, d3, d6 + vmull.u8 q11, d3, d7 + vmull.u8 q12, d3, d30 + vrshr.u16 q13, q10, #8 + vrshr.u16 q3, q11, #8 + vrshr.u16 q15, q12, #8 + vraddhn.u16 d20, q10, q13 + vraddhn.u16 d23, q11, q3 + vraddhn.u16 d22, q12, q15 +.endm + +.macro pixman_composite_over_n_0565_process_pixblock_tail + /* ... continue alpha blending */ + vqadd.u8 d16, d2, d20 + vqadd.u8 q9, q0, q11 + /* convert the result to r5g6b5 and store it into {d28, d29} */ + vshll.u8 q14, d16, #8 + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_n_0565_process_pixblock_tail_head + pixman_composite_over_n_0565_process_pixblock_tail + vld1.16 {d4, d5}, [DST_R, :128]! + vst1.16 {d28, d29}, [DST_W, :128]! + pixman_composite_over_n_0565_process_pixblock_head + cache_preload 8, 8 +.endm + +.macro pixman_composite_over_n_0565_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d0, d3[0] + vdup.8 d1, d3[1] + vdup.8 d2, d3[2] + vdup.8 d3, d3[3] + vmvn.8 d3, d3 /* invert source alpha */ +.endm + +generate_composite_function \ + pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_0565_init, \ + default_cleanup, \ + pixman_composite_over_n_0565_process_pixblock_head, \ + pixman_composite_over_n_0565_process_pixblock_tail, \ + pixman_composite_over_n_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_8888_0565_process_pixblock_head + vshll.u8 q8, d1, #8 + vshll.u8 q14, d2, #8 + vshll.u8 q9, d0, #8 +.endm + +.macro pixman_composite_src_8888_0565_process_pixblock_tail + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +.macro pixman_composite_src_8888_0565_process_pixblock_tail_head + vsri.u16 q14, q8, #5 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + fetch_src_pixblock + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vsri.u16 q14, q9, #11 + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + vshll.u8 q8, d1, #8 + vst1.16 {d28, d29}, [DST_W, :128]! + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + vshll.u8 q14, d2, #8 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + vshll.u8 q9, d0, #8 +.endm + +generate_composite_function \ + pixman_composite_src_8888_0565_asm_neon, 32, 0, 16, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_8888_0565_process_pixblock_head, \ + pixman_composite_src_8888_0565_process_pixblock_tail, \ + pixman_composite_src_8888_0565_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_src_0565_8888_process_pixblock_head + vshrn.u16 d30, q0, #8 + vshrn.u16 d29, q0, #3 + vsli.u16 q0, q0, #5 + vmov.u8 d31, #255 + vsri.u8 d30, d30, #5 + vsri.u8 d29, d29, #6 + vshrn.u16 d28, q0, #2 +.endm + +.macro pixman_composite_src_0565_8888_process_pixblock_tail +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_src_0565_8888_process_pixblock_tail_head + pixman_composite_src_0565_8888_process_pixblock_tail + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + fetch_src_pixblock + pixman_composite_src_0565_8888_process_pixblock_head + cache_preload 8, 8 +.endm + +generate_composite_function \ + pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0565_8888_process_pixblock_head, \ + pixman_composite_src_0565_8888_process_pixblock_tail, \ + pixman_composite_src_0565_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_add_8_8_process_pixblock_head + vqadd.u8 q14, q0, q2 + vqadd.u8 q15, q1, q3 +.endm + +.macro pixman_composite_add_8_8_process_pixblock_tail +.endm + +.macro pixman_composite_add_8_8_process_pixblock_tail_head + fetch_src_pixblock + PF add PF_X, PF_X, #32 + PF tst PF_CTL, #0xF + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + PF addne PF_X, PF_X, #32 + PF subne PF_CTL, PF_CTL, #1 + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + vqadd.u8 q14, q0, q2 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vqadd.u8 q15, q1, q3 +.endm + +generate_composite_function \ + pixman_composite_add_8_8_asm_neon, 8, 0, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8_8_process_pixblock_head, \ + pixman_composite_add_8_8_process_pixblock_tail, \ + pixman_composite_add_8_8_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_add_8888_8888_process_pixblock_tail_head + fetch_src_pixblock + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + vld1.32 {d4, d5, d6, d7}, [DST_R, :128]! + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vst1.32 {d28, d29, d30, d31}, [DST_W, :128]! + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + vqadd.u8 q14, q0, q2 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vqadd.u8 q15, q1, q3 +.endm + +generate_composite_function \ + pixman_composite_add_8888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8_8_process_pixblock_head, \ + pixman_composite_add_8_8_process_pixblock_tail, \ + pixman_composite_add_8888_8888_process_pixblock_tail_head + +generate_composite_function_single_scanline \ + pixman_composite_scanline_add_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8_8_process_pixblock_head, \ + pixman_composite_add_8_8_process_pixblock_tail, \ + pixman_composite_add_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_out_reverse_8888_8888_process_pixblock_head + vmvn.8 d24, d3 /* get inverted alpha */ + /* do alpha blending */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 + vmull.u8 q11, d24, d7 +.endm + +.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 +.endm + +.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrshr.u16 q14, q8, #8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + PF cmp PF_X, ORIG_W + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + fetch_src_pixblock + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + vmvn.8 d22, d3 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q8, d22, d4 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q9, d22, d5 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + vmull.u8 q10, d22, d6 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vmull.u8 q11, d22, d7 +.endm + +generate_composite_function_single_scanline \ + pixman_composite_scanline_out_reverse_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_out_reverse_8888_8888_process_pixblock_head, \ + pixman_composite_out_reverse_8888_8888_process_pixblock_tail, \ + pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_8888_8888_process_pixblock_head + pixman_composite_out_reverse_8888_8888_process_pixblock_head +.endm + +.macro pixman_composite_over_8888_8888_process_pixblock_tail + pixman_composite_out_reverse_8888_8888_process_pixblock_tail + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +.macro pixman_composite_over_8888_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrshr.u16 q14, q8, #8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + PF cmp PF_X, ORIG_W + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 + fetch_src_pixblock + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + vmvn.8 d22, d3 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q8, d22, d4 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q9, d22, d5 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + vmull.u8 q10, d22, d6 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vmull.u8 q11, d22, d7 +.endm + +generate_composite_function \ + pixman_composite_over_8888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_process_pixblock_tail_head + +generate_composite_function_single_scanline \ + pixman_composite_scanline_over_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_n_8888_process_pixblock_tail_head + pixman_composite_over_8888_8888_process_pixblock_tail + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + pixman_composite_over_8888_8888_process_pixblock_head + cache_preload 8, 8 +.endm + +.macro pixman_composite_over_n_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d0, d3[0] + vdup.8 d1, d3[1] + vdup.8 d2, d3[2] + vdup.8 d3, d3[3] +.endm + +generate_composite_function \ + pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8888_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_n_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head + vrshr.u16 q14, q8, #8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + PF cmp PF_X, ORIG_W + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 + vld4.8 {d0, d1, d2, d3}, [DST_R, :128]! + vmvn.8 d22, d3 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q8, d22, d4 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q9, d22, d5 + vmull.u8 q10, d22, d6 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vmull.u8 q11, d22, d7 +.endm + +.macro pixman_composite_over_reverse_n_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d7[0]}, [DUMMY] + vdup.8 d4, d7[0] + vdup.8 d5, d7[1] + vdup.8 d6, d7[2] + vdup.8 d7, d7[3] +.endm + +generate_composite_function \ + pixman_composite_over_reverse_n_8888_asm_neon, 0, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_reverse_n_8888_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 4, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_8888_8_0565_process_pixblock_head + vmull.u8 q0, d24, d8 /* IN for SRC pixels (part1) */ + vmull.u8 q1, d24, d9 + vmull.u8 q6, d24, d10 + vmull.u8 q7, d24, d11 + vshrn.u16 d6, q2, #8 /* convert DST_R data to 32-bpp (part1) */ + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vrshr.u16 q8, q0, #8 /* IN for SRC pixels (part2) */ + vrshr.u16 q9, q1, #8 + vrshr.u16 q10, q6, #8 + vrshr.u16 q11, q7, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q9 + vraddhn.u16 d2, q6, q10 + vraddhn.u16 d3, q7, q11 + vsri.u8 d6, d6, #5 /* convert DST_R data to 32-bpp (part2) */ + vsri.u8 d7, d7, #6 + vmvn.8 d3, d3 + vshrn.u16 d30, q2, #2 + vmull.u8 q8, d3, d6 /* now do alpha blending */ + vmull.u8 q9, d3, d7 + vmull.u8 q10, d3, d30 +.endm + +.macro pixman_composite_over_8888_8_0565_process_pixblock_tail + /* 3 cycle bubble (after vmull.u8) */ + vrshr.u16 q13, q8, #8 + vrshr.u16 q11, q9, #8 + vrshr.u16 q15, q10, #8 + vraddhn.u16 d16, q8, q13 + vraddhn.u16 d27, q9, q11 + vraddhn.u16 d26, q10, q15 + vqadd.u8 d16, d2, d16 + /* 1 cycle bubble */ + vqadd.u8 q9, q0, q13 + vshll.u8 q14, d16, #8 /* convert to 16bpp */ + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + /* 1 cycle bubble */ + vsri.u16 q14, q9, #11 +.endm + +.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head + vld1.16 {d4, d5}, [DST_R, :128]! + vshrn.u16 d6, q2, #8 + fetch_mask_pixblock + vshrn.u16 d7, q2, #3 + fetch_src_pixblock + vmull.u8 q6, d24, d10 + vrshr.u16 q13, q8, #8 + vrshr.u16 q11, q9, #8 + vrshr.u16 q15, q10, #8 + vraddhn.u16 d16, q8, q13 + vraddhn.u16 d27, q9, q11 + vraddhn.u16 d26, q10, q15 + vqadd.u8 d16, d2, d16 + vmull.u8 q1, d24, d9 + vqadd.u8 q9, q0, q13 + vshll.u8 q14, d16, #8 + vmull.u8 q0, d24, d8 + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + vmull.u8 q7, d24, d11 + vsri.u16 q14, q9, #11 + + cache_preload 8, 8 + + vsli.u16 q2, q2, #5 + vrshr.u16 q8, q0, #8 + vrshr.u16 q9, q1, #8 + vrshr.u16 q10, q6, #8 + vrshr.u16 q11, q7, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q9 + vraddhn.u16 d2, q6, q10 + vraddhn.u16 d3, q7, q11 + vsri.u8 d6, d6, #5 + vsri.u8 d7, d7, #6 + vmvn.8 d3, d3 + vshrn.u16 d30, q2, #2 + vst1.16 {d28, d29}, [DST_W, :128]! + vmull.u8 q8, d3, d6 + vmull.u8 q9, d3, d7 + vmull.u8 q10, d3, d30 +.endm + +generate_composite_function \ + pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_8_0565_process_pixblock_head, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +/* + * This function needs a special initialization of solid mask. + * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET + * offset, split into color components and replicated in d8-d11 + * registers. Additionally, this function needs all the NEON registers, + * so it has to save d8-d15 registers which are callee saved according + * to ABI. These registers are restored from 'cleanup' macro. All the + * other NEON registers are caller saved, so can be clobbered freely + * without introducing any problems. + */ +.macro pixman_composite_over_n_8_0565_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d8, d11[0] + vdup.8 d9, d11[1] + vdup.8 d10, d11[2] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_over_n_8_0565_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8_0565_asm_neon, 0, 8, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8_0565_init, \ + pixman_composite_over_n_8_0565_cleanup, \ + pixman_composite_over_8888_8_0565_process_pixblock_head, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_8888_n_0565_init + add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) + vpush {d8-d15} + vld1.32 {d24[0]}, [DUMMY] + vdup.8 d24, d24[3] +.endm + +.macro pixman_composite_over_8888_n_0565_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_8888_n_0565_init, \ + pixman_composite_over_8888_n_0565_cleanup, \ + pixman_composite_over_8888_8_0565_process_pixblock_head, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_0565_0565_process_pixblock_head +.endm + +.macro pixman_composite_src_0565_0565_process_pixblock_tail +.endm + +.macro pixman_composite_src_0565_0565_process_pixblock_tail_head + vst1.16 {d0, d1, d2, d3}, [DST_W, :128]! + fetch_src_pixblock + cache_preload 16, 16 +.endm + +generate_composite_function \ + pixman_composite_src_0565_0565_asm_neon, 16, 0, 16, \ + FLAG_DST_WRITEONLY, \ + 16, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0565_0565_process_pixblock_head, \ + pixman_composite_src_0565_0565_process_pixblock_tail, \ + pixman_composite_src_0565_0565_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_n_8_process_pixblock_head +.endm + +.macro pixman_composite_src_n_8_process_pixblock_tail +.endm + +.macro pixman_composite_src_n_8_process_pixblock_tail_head + vst1.8 {d0, d1, d2, d3}, [DST_W, :128]! +.endm + +.macro pixman_composite_src_n_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d0[0]}, [DUMMY] + vsli.u64 d0, d0, #8 + vsli.u64 d0, d0, #16 + vsli.u64 d0, d0, #32 + vorr d1, d0, d0 + vorr q1, q0, q0 +.endm + +.macro pixman_composite_src_n_8_cleanup +.endm + +generate_composite_function \ + pixman_composite_src_n_8_asm_neon, 0, 0, 8, \ + FLAG_DST_WRITEONLY, \ + 32, /* number of pixels, processed in a single block */ \ + 0, /* prefetch distance */ \ + pixman_composite_src_n_8_init, \ + pixman_composite_src_n_8_cleanup, \ + pixman_composite_src_n_8_process_pixblock_head, \ + pixman_composite_src_n_8_process_pixblock_tail, \ + pixman_composite_src_n_8_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_n_0565_process_pixblock_head +.endm + +.macro pixman_composite_src_n_0565_process_pixblock_tail +.endm + +.macro pixman_composite_src_n_0565_process_pixblock_tail_head + vst1.16 {d0, d1, d2, d3}, [DST_W, :128]! +.endm + +.macro pixman_composite_src_n_0565_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d0[0]}, [DUMMY] + vsli.u64 d0, d0, #16 + vsli.u64 d0, d0, #32 + vorr d1, d0, d0 + vorr q1, q0, q0 +.endm + +.macro pixman_composite_src_n_0565_cleanup +.endm + +generate_composite_function \ + pixman_composite_src_n_0565_asm_neon, 0, 0, 16, \ + FLAG_DST_WRITEONLY, \ + 16, /* number of pixels, processed in a single block */ \ + 0, /* prefetch distance */ \ + pixman_composite_src_n_0565_init, \ + pixman_composite_src_n_0565_cleanup, \ + pixman_composite_src_n_0565_process_pixblock_head, \ + pixman_composite_src_n_0565_process_pixblock_tail, \ + pixman_composite_src_n_0565_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_n_8888_process_pixblock_head +.endm + +.macro pixman_composite_src_n_8888_process_pixblock_tail +.endm + +.macro pixman_composite_src_n_8888_process_pixblock_tail_head + vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! +.endm + +.macro pixman_composite_src_n_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d0[0]}, [DUMMY] + vsli.u64 d0, d0, #32 + vorr d1, d0, d0 + vorr q1, q0, q0 +.endm + +.macro pixman_composite_src_n_8888_cleanup +.endm + +generate_composite_function \ + pixman_composite_src_n_8888_asm_neon, 0, 0, 32, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 0, /* prefetch distance */ \ + pixman_composite_src_n_8888_init, \ + pixman_composite_src_n_8888_cleanup, \ + pixman_composite_src_n_8888_process_pixblock_head, \ + pixman_composite_src_n_8888_process_pixblock_tail, \ + pixman_composite_src_n_8888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_8888_8888_process_pixblock_head +.endm + +.macro pixman_composite_src_8888_8888_process_pixblock_tail +.endm + +.macro pixman_composite_src_8888_8888_process_pixblock_tail_head + vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! + fetch_src_pixblock + cache_preload 8, 8 +.endm + +generate_composite_function \ + pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_8888_8888_process_pixblock_head, \ + pixman_composite_src_8888_8888_process_pixblock_tail, \ + pixman_composite_src_8888_8888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_x888_8888_process_pixblock_head + vorr q0, q0, q2 + vorr q1, q1, q2 +.endm + +.macro pixman_composite_src_x888_8888_process_pixblock_tail +.endm + +.macro pixman_composite_src_x888_8888_process_pixblock_tail_head + vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! + fetch_src_pixblock + vorr q0, q0, q2 + vorr q1, q1, q2 + cache_preload 8, 8 +.endm + +.macro pixman_composite_src_x888_8888_init + vmov.u8 q2, #0xFF + vshl.u32 q2, q2, #24 +.endm + +generate_composite_function \ + pixman_composite_src_x888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + pixman_composite_src_x888_8888_init, \ + default_cleanup, \ + pixman_composite_src_x888_8888_process_pixblock_head, \ + pixman_composite_src_x888_8888_process_pixblock_tail, \ + pixman_composite_src_x888_8888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_n_8_8888_process_pixblock_head + /* expecting deinterleaved source data in {d8, d9, d10, d11} */ + /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ + /* and destination data in {d4, d5, d6, d7} */ + /* mask is in d24 (d25, d26, d27 are unused) */ + + /* in */ + vmull.u8 q0, d24, d8 + vmull.u8 q1, d24, d9 + vmull.u8 q6, d24, d10 + vmull.u8 q7, d24, d11 + vrshr.u16 q10, q0, #8 + vrshr.u16 q11, q1, #8 + vrshr.u16 q12, q6, #8 + vrshr.u16 q13, q7, #8 + vraddhn.u16 d0, q0, q10 + vraddhn.u16 d1, q1, q11 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d3, q7, q13 + vmvn.8 d24, d3 /* get inverted alpha */ + /* source: d0 - blue, d1 - green, d2 - red, d3 - alpha */ + /* destination: d4 - blue, d5 - green, d6 - red, d7 - alpha */ + /* now do alpha blending */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 + vmull.u8 q11, d24, d7 +.endm + +.macro pixman_composite_over_n_8_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_n_8_8888_process_pixblock_tail_head + pixman_composite_over_n_8_8888_process_pixblock_tail + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + fetch_mask_pixblock + cache_preload 8, 8 + pixman_composite_over_n_8_8888_process_pixblock_head +.endm + +.macro pixman_composite_over_n_8_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d8, d11[0] + vdup.8 d9, d11[1] + vdup.8 d10, d11[2] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_over_n_8_8888_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8_8888_asm_neon, 0, 8, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8_8888_init, \ + pixman_composite_over_n_8_8888_cleanup, \ + pixman_composite_over_n_8_8888_process_pixblock_head, \ + pixman_composite_over_n_8_8888_process_pixblock_tail, \ + pixman_composite_over_n_8_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_n_8_8_process_pixblock_head + vmull.u8 q0, d24, d8 + vmull.u8 q1, d25, d8 + vmull.u8 q6, d26, d8 + vmull.u8 q7, d27, d8 + vrshr.u16 q10, q0, #8 + vrshr.u16 q11, q1, #8 + vrshr.u16 q12, q6, #8 + vrshr.u16 q13, q7, #8 + vraddhn.u16 d0, q0, q10 + vraddhn.u16 d1, q1, q11 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d3, q7, q13 + vmvn.8 q12, q0 + vmvn.8 q13, q1 + vmull.u8 q8, d24, d4 + vmull.u8 q9, d25, d5 + vmull.u8 q10, d26, d6 + vmull.u8 q11, d27, d7 +.endm + +.macro pixman_composite_over_n_8_8_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_n_8_8_process_pixblock_tail_head + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_over_n_8_8_process_pixblock_tail + fetch_mask_pixblock + cache_preload 32, 32 + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + pixman_composite_over_n_8_8_process_pixblock_head +.endm + +.macro pixman_composite_over_n_8_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d8[0]}, [DUMMY] + vdup.8 d8, d8[3] +.endm + +.macro pixman_composite_over_n_8_8_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8_8_init, \ + pixman_composite_over_n_8_8_cleanup, \ + pixman_composite_over_n_8_8_process_pixblock_head, \ + pixman_composite_over_n_8_8_process_pixblock_tail, \ + pixman_composite_over_n_8_8_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head + /* + * 'combine_mask_ca' replacement + * + * input: solid src (n) in {d8, d9, d10, d11} + * dest in {d4, d5, d6, d7 } + * mask in {d24, d25, d26, d27} + * output: updated src in {d0, d1, d2, d3 } + * updated mask in {d24, d25, d26, d3 } + */ + vmull.u8 q0, d24, d8 + vmull.u8 q1, d25, d9 + vmull.u8 q6, d26, d10 + vmull.u8 q7, d27, d11 + vmull.u8 q9, d11, d25 + vmull.u8 q12, d11, d24 + vmull.u8 q13, d11, d26 + vrshr.u16 q8, q0, #8 + vrshr.u16 q10, q1, #8 + vrshr.u16 q11, q6, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q10 + vraddhn.u16 d2, q6, q11 + vrshr.u16 q11, q12, #8 + vrshr.u16 q8, q9, #8 + vrshr.u16 q6, q13, #8 + vrshr.u16 q10, q7, #8 + vraddhn.u16 d24, q12, q11 + vraddhn.u16 d25, q9, q8 + vraddhn.u16 d26, q13, q6 + vraddhn.u16 d3, q7, q10 + /* + * 'combine_over_ca' replacement + * + * output: updated dest in {d28, d29, d30, d31} + */ + vmvn.8 d24, d24 + vmvn.8 d25, d25 + vmull.u8 q8, d24, d4 + vmull.u8 q9, d25, d5 + vmvn.8 d26, d26 + vmvn.8 d27, d3 + vmull.u8 q10, d26, d6 + vmull.u8 q11, d27, d7 +.endm + +.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail + /* ... continue 'combine_over_ca' replacement */ + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q6, q10, #8 + vrshr.u16 q7, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q6, q10 + vraddhn.u16 d31, q7, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrshr.u16 q6, q10, #8 + vrshr.u16 q7, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q6, q10 + vraddhn.u16 d31, q7, q11 + fetch_mask_pixblock + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 + cache_preload 8, 8 + pixman_composite_over_n_8888_8888_ca_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +.macro pixman_composite_over_n_8888_8888_ca_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d8, d11[0] + vdup.8 d9, d11[1] + vdup.8 d10, d11[2] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_over_n_8888_8888_ca_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8888_8888_ca_asm_neon, 0, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8888_8888_ca_init, \ + pixman_composite_over_n_8888_8888_ca_cleanup, \ + pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \ + pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \ + pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_in_n_8_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* and destination data in {d4, d5, d6, d7} */ + vmull.u8 q8, d4, d3 + vmull.u8 q9, d5, d3 + vmull.u8 q10, d6, d3 + vmull.u8 q11, d7, d3 +.endm + +.macro pixman_composite_in_n_8_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q8, q14 + vraddhn.u16 d29, q9, q15 + vraddhn.u16 d30, q10, q12 + vraddhn.u16 d31, q11, q13 +.endm + +.macro pixman_composite_in_n_8_process_pixblock_tail_head + pixman_composite_in_n_8_process_pixblock_tail + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + cache_preload 32, 32 + pixman_composite_in_n_8_process_pixblock_head + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +.macro pixman_composite_in_n_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d3, d3[3] +.endm + +.macro pixman_composite_in_n_8_cleanup +.endm + +generate_composite_function \ + pixman_composite_in_n_8_asm_neon, 0, 0, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_in_n_8_init, \ + pixman_composite_in_n_8_cleanup, \ + pixman_composite_in_n_8_process_pixblock_head, \ + pixman_composite_in_n_8_process_pixblock_tail, \ + pixman_composite_in_n_8_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 24 /* mask_basereg */ + +.macro pixman_composite_add_n_8_8_process_pixblock_head + /* expecting source data in {d8, d9, d10, d11} */ + /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ + /* and destination data in {d4, d5, d6, d7} */ + /* mask is in d24, d25, d26, d27 */ + vmull.u8 q0, d24, d11 + vmull.u8 q1, d25, d11 + vmull.u8 q6, d26, d11 + vmull.u8 q7, d27, d11 + vrshr.u16 q10, q0, #8 + vrshr.u16 q11, q1, #8 + vrshr.u16 q12, q6, #8 + vrshr.u16 q13, q7, #8 + vraddhn.u16 d0, q0, q10 + vraddhn.u16 d1, q1, q11 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d3, q7, q13 + vqadd.u8 q14, q0, q2 + vqadd.u8 q15, q1, q3 +.endm + +.macro pixman_composite_add_n_8_8_process_pixblock_tail +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_add_n_8_8_process_pixblock_tail_head + pixman_composite_add_n_8_8_process_pixblock_tail + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + fetch_mask_pixblock + cache_preload 32, 32 + pixman_composite_add_n_8_8_process_pixblock_head +.endm + +.macro pixman_composite_add_n_8_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_add_n_8_8_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_add_n_8_8_asm_neon, 0, 8, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_add_n_8_8_init, \ + pixman_composite_add_n_8_8_cleanup, \ + pixman_composite_add_n_8_8_process_pixblock_head, \ + pixman_composite_add_n_8_8_process_pixblock_tail, \ + pixman_composite_add_n_8_8_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_add_8_8_8_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* destination data in {d4, d5, d6, d7} */ + /* mask in {d24, d25, d26, d27} */ + vmull.u8 q8, d24, d0 + vmull.u8 q9, d25, d1 + vmull.u8 q10, d26, d2 + vmull.u8 q11, d27, d3 + vrshr.u16 q0, q8, #8 + vrshr.u16 q1, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q9 + vraddhn.u16 d2, q12, q10 + vraddhn.u16 d3, q13, q11 + vqadd.u8 q14, q0, q2 + vqadd.u8 q15, q1, q3 +.endm + +.macro pixman_composite_add_8_8_8_process_pixblock_tail +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_add_8_8_8_process_pixblock_tail_head + pixman_composite_add_8_8_8_process_pixblock_tail + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + fetch_mask_pixblock + fetch_src_pixblock + cache_preload 32, 32 + pixman_composite_add_8_8_8_process_pixblock_head +.endm + +.macro pixman_composite_add_8_8_8_init +.endm + +.macro pixman_composite_add_8_8_8_cleanup +.endm + +generate_composite_function \ + pixman_composite_add_8_8_8_asm_neon, 8, 8, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_add_8_8_8_init, \ + pixman_composite_add_8_8_8_cleanup, \ + pixman_composite_add_8_8_8_process_pixblock_head, \ + pixman_composite_add_8_8_8_process_pixblock_tail, \ + pixman_composite_add_8_8_8_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_add_8888_8888_8888_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* destination data in {d4, d5, d6, d7} */ + /* mask in {d24, d25, d26, d27} */ + vmull.u8 q8, d27, d0 + vmull.u8 q9, d27, d1 + vmull.u8 q10, d27, d2 + vmull.u8 q11, d27, d3 + /* 1 cycle bubble */ + vrsra.u16 q8, q8, #8 + vrsra.u16 q9, q9, #8 + vrsra.u16 q10, q10, #8 + vrsra.u16 q11, q11, #8 +.endm + +.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail + /* 2 cycle bubble */ + vrshrn.u16 d28, q8, #8 + vrshrn.u16 d29, q9, #8 + vrshrn.u16 d30, q10, #8 + vrshrn.u16 d31, q11, #8 + vqadd.u8 q14, q2, q14 + /* 1 cycle bubble */ + vqadd.u8 q15, q3, q15 +.endm + +.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head + fetch_src_pixblock + vrshrn.u16 d28, q8, #8 + fetch_mask_pixblock + vrshrn.u16 d29, q9, #8 + vmull.u8 q8, d27, d0 + vrshrn.u16 d30, q10, #8 + vmull.u8 q9, d27, d1 + vrshrn.u16 d31, q11, #8 + vmull.u8 q10, d27, d2 + vqadd.u8 q14, q2, q14 + vmull.u8 q11, d27, d3 + vqadd.u8 q15, q3, q15 + vrsra.u16 q8, q8, #8 + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrsra.u16 q9, q9, #8 + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + vrsra.u16 q10, q10, #8 + + cache_preload 8, 8 + + vrsra.u16 q11, q11, #8 +.endm + +generate_composite_function \ + pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head + +generate_composite_function_single_scanline \ + pixman_composite_scanline_add_mask_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + +generate_composite_function \ + pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 27 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_add_n_8_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d0, d3[0] + vdup.8 d1, d3[1] + vdup.8 d2, d3[2] + vdup.8 d3, d3[3] +.endm + +.macro pixman_composite_add_n_8_8888_cleanup +.endm + +generate_composite_function \ + pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_add_n_8_8888_init, \ + pixman_composite_add_n_8_8888_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 27 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_add_8888_n_8888_init + add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) + vld1.32 {d27[0]}, [DUMMY] + vdup.8 d27, d27[3] +.endm + +.macro pixman_composite_add_8888_n_8888_cleanup +.endm + +generate_composite_function \ + pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_add_8888_n_8888_init, \ + pixman_composite_add_8888_n_8888_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 27 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* destination data in {d4, d5, d6, d7} */ + /* solid mask is in d15 */ + + /* 'in' */ + vmull.u8 q8, d15, d3 + vmull.u8 q6, d15, d2 + vmull.u8 q5, d15, d1 + vmull.u8 q4, d15, d0 + vrshr.u16 q13, q8, #8 + vrshr.u16 q12, q6, #8 + vrshr.u16 q11, q5, #8 + vrshr.u16 q10, q4, #8 + vraddhn.u16 d3, q8, q13 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d1, q5, q11 + vraddhn.u16 d0, q4, q10 + vmvn.8 d24, d3 /* get inverted alpha */ + /* now do alpha blending */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 + vmull.u8 q11, d24, d7 +.endm + +.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 + fetch_mask_pixblock + pixman_composite_out_reverse_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +generate_composite_function_single_scanline \ + pixman_composite_scanline_out_reverse_mask_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_out_reverse_8888_n_8888_process_pixblock_head, \ + pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail, \ + pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 12 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_8888_n_8888_process_pixblock_head + pixman_composite_out_reverse_8888_n_8888_process_pixblock_head +.endm + +.macro pixman_composite_over_8888_n_8888_process_pixblock_tail + pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_8888_n_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_over_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 + pixman_composite_over_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +.macro pixman_composite_over_8888_n_8888_init + add DUMMY, sp, #48 + vpush {d8-d15} + vld1.32 {d15[0]}, [DUMMY] + vdup.8 d15, d15[3] +.endm + +.macro pixman_composite_over_8888_n_8888_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_8888_n_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_8888_n_8888_init, \ + pixman_composite_over_8888_n_8888_cleanup, \ + pixman_composite_over_8888_n_8888_process_pixblock_head, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail_head + +/******************************************************************************/ + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_over_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 + fetch_mask_pixblock + pixman_composite_over_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_n_8888_process_pixblock_head, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 12 /* mask_basereg */ + +generate_composite_function_single_scanline \ + pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_n_8888_process_pixblock_head, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 12 /* mask_basereg */ + +/******************************************************************************/ + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_over_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 + fetch_mask_pixblock + pixman_composite_over_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_n_8888_process_pixblock_head, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8_8888_process_pixblock_tail_head \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_0888_0888_process_pixblock_head +.endm + +.macro pixman_composite_src_0888_0888_process_pixblock_tail +.endm + +.macro pixman_composite_src_0888_0888_process_pixblock_tail_head + vst3.8 {d0, d1, d2}, [DST_W]! + fetch_src_pixblock + cache_preload 8, 8 +.endm + +generate_composite_function \ + pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0888_0888_process_pixblock_head, \ + pixman_composite_src_0888_0888_process_pixblock_tail, \ + pixman_composite_src_0888_0888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_0888_8888_rev_process_pixblock_head + vswp d0, d2 +.endm + +.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail +.endm + +.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head + vst4.8 {d0, d1, d2, d3}, [DST_W]! + fetch_src_pixblock + vswp d0, d2 + cache_preload 8, 8 +.endm + +.macro pixman_composite_src_0888_8888_rev_init + veor d3, d3, d3 +.endm + +generate_composite_function \ + pixman_composite_src_0888_8888_rev_asm_neon, 24, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + pixman_composite_src_0888_8888_rev_init, \ + default_cleanup, \ + pixman_composite_src_0888_8888_rev_process_pixblock_head, \ + pixman_composite_src_0888_8888_rev_process_pixblock_tail, \ + pixman_composite_src_0888_8888_rev_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_0888_0565_rev_process_pixblock_head + vshll.u8 q8, d1, #8 + vshll.u8 q9, d2, #8 +.endm + +.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail + vshll.u8 q14, d0, #8 + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head + vshll.u8 q14, d0, #8 + fetch_src_pixblock + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 + vshll.u8 q8, d1, #8 + vst1.16 {d28, d29}, [DST_W, :128]! + vshll.u8 q9, d2, #8 +.endm + +generate_composite_function \ + pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0888_0565_rev_process_pixblock_head, \ + pixman_composite_src_0888_0565_rev_process_pixblock_tail, \ + pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_pixbuf_8888_process_pixblock_head + vmull.u8 q8, d3, d0 + vmull.u8 q9, d3, d1 + vmull.u8 q10, d3, d2 +.endm + +.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail + vrshr.u16 q11, q8, #8 + vswp d3, d31 + vrshr.u16 q12, q9, #8 + vrshr.u16 q13, q10, #8 + vraddhn.u16 d30, q11, q8 + vraddhn.u16 d29, q12, q9 + vraddhn.u16 d28, q13, q10 +.endm + +.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head + vrshr.u16 q11, q8, #8 + vswp d3, d31 + vrshr.u16 q12, q9, #8 + vrshr.u16 q13, q10, #8 + fetch_src_pixblock + vraddhn.u16 d30, q11, q8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d29, q12, q9 + vraddhn.u16 d28, q13, q10 + vmull.u8 q8, d3, d0 + vmull.u8 q9, d3, d1 + vmull.u8 q10, d3, d2 + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! +.endm + +generate_composite_function \ + pixman_composite_src_pixbuf_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_pixbuf_8888_process_pixblock_head, \ + pixman_composite_src_pixbuf_8888_process_pixblock_tail, \ + pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_rpixbuf_8888_process_pixblock_head + vmull.u8 q8, d3, d0 + vmull.u8 q9, d3, d1 + vmull.u8 q10, d3, d2 +.endm + +.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail + vrshr.u16 q11, q8, #8 + vswp d3, d31 + vrshr.u16 q12, q9, #8 + vrshr.u16 q13, q10, #8 + vraddhn.u16 d28, q11, q8 + vraddhn.u16 d29, q12, q9 + vraddhn.u16 d30, q13, q10 +.endm + +.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head + vrshr.u16 q11, q8, #8 + vswp d3, d31 + vrshr.u16 q12, q9, #8 + vrshr.u16 q13, q10, #8 + fetch_src_pixblock + vraddhn.u16 d28, q11, q8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d29, q12, q9 + vraddhn.u16 d30, q13, q10 + vmull.u8 q8, d3, d0 + vmull.u8 q9, d3, d1 + vmull.u8 q10, d3, d2 + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! +.endm + +generate_composite_function \ + pixman_composite_src_rpixbuf_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_rpixbuf_8888_process_pixblock_head, \ + pixman_composite_src_rpixbuf_8888_process_pixblock_tail, \ + pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_0565_8_0565_process_pixblock_head + /* mask is in d15 */ + convert_0565_to_x888 q4, d2, d1, d0 + convert_0565_to_x888 q5, d6, d5, d4 + /* source pixel data is in {d0, d1, d2, XX} */ + /* destination pixel data is in {d4, d5, d6, XX} */ + vmvn.8 d7, d15 + vmull.u8 q6, d15, d2 + vmull.u8 q5, d15, d1 + vmull.u8 q4, d15, d0 + vmull.u8 q8, d7, d4 + vmull.u8 q9, d7, d5 + vmull.u8 q13, d7, d6 + vrshr.u16 q12, q6, #8 + vrshr.u16 q11, q5, #8 + vrshr.u16 q10, q4, #8 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d1, q5, q11 + vraddhn.u16 d0, q4, q10 +.endm + +.macro pixman_composite_over_0565_8_0565_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q13, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q13 + vqadd.u8 q0, q0, q14 + vqadd.u8 q1, q1, q15 + /* 32bpp result is in {d0, d1, d2, XX} */ + convert_8888_to_0565 d2, d1, d0, q14, q15, q3 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head + fetch_mask_pixblock + pixman_composite_over_0565_8_0565_process_pixblock_tail + fetch_src_pixblock + vld1.16 {d10, d11}, [DST_R, :128]! + cache_preload 8, 8 + pixman_composite_over_0565_8_0565_process_pixblock_head + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_over_0565_8_0565_asm_neon, 16, 8, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_0565_8_0565_process_pixblock_head, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_0565_n_0565_init + add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) + vpush {d8-d15} + vld1.32 {d15[0]}, [DUMMY] + vdup.8 d15, d15[3] +.endm + +.macro pixman_composite_over_0565_n_0565_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_0565_n_0565_init, \ + pixman_composite_over_0565_n_0565_cleanup, \ + pixman_composite_over_0565_8_0565_process_pixblock_head, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_add_0565_8_0565_process_pixblock_head + /* mask is in d15 */ + convert_0565_to_x888 q4, d2, d1, d0 + convert_0565_to_x888 q5, d6, d5, d4 + /* source pixel data is in {d0, d1, d2, XX} */ + /* destination pixel data is in {d4, d5, d6, XX} */ + vmull.u8 q6, d15, d2 + vmull.u8 q5, d15, d1 + vmull.u8 q4, d15, d0 + vrshr.u16 q12, q6, #8 + vrshr.u16 q11, q5, #8 + vrshr.u16 q10, q4, #8 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d1, q5, q11 + vraddhn.u16 d0, q4, q10 +.endm + +.macro pixman_composite_add_0565_8_0565_process_pixblock_tail + vqadd.u8 q0, q0, q2 + vqadd.u8 q1, q1, q3 + /* 32bpp result is in {d0, d1, d2, XX} */ + convert_8888_to_0565 d2, d1, d0, q14, q15, q3 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head + fetch_mask_pixblock + pixman_composite_add_0565_8_0565_process_pixblock_tail + fetch_src_pixblock + vld1.16 {d10, d11}, [DST_R, :128]! + cache_preload 8, 8 + pixman_composite_add_0565_8_0565_process_pixblock_head + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_add_0565_8_0565_process_pixblock_head, \ + pixman_composite_add_0565_8_0565_process_pixblock_tail, \ + pixman_composite_add_0565_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_out_reverse_8_0565_process_pixblock_head + /* mask is in d15 */ + convert_0565_to_x888 q5, d6, d5, d4 + /* destination pixel data is in {d4, d5, d6, xx} */ + vmvn.8 d24, d15 /* get inverted alpha */ + /* now do alpha blending */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 +.endm + +.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vraddhn.u16 d0, q14, q8 + vraddhn.u16 d1, q15, q9 + vraddhn.u16 d2, q12, q10 + /* 32bpp result is in {d0, d1, d2, XX} */ + convert_8888_to_0565 d2, d1, d0, q14, q15, q3 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head + fetch_src_pixblock + pixman_composite_out_reverse_8_0565_process_pixblock_tail + vld1.16 {d10, d11}, [DST_R, :128]! + cache_preload 8, 8 + pixman_composite_out_reverse_8_0565_process_pixblock_head + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_out_reverse_8_0565_asm_neon, 8, 0, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_out_reverse_8_0565_process_pixblock_head, \ + pixman_composite_out_reverse_8_0565_process_pixblock_tail, \ + pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 15, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_8888_8888_OVER_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_process_pixblock_tail_head + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_8888_0565_OVER_asm_neon, 32, 0, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_0565_process_pixblock_head, \ + pixman_composite_over_8888_0565_process_pixblock_tail, \ + pixman_composite_over_8888_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 24 /* mask_basereg */ + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_8888_0565_SRC_asm_neon, 32, 0, 16, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_8888_0565_process_pixblock_head, \ + pixman_composite_src_8888_0565_process_pixblock_tail, \ + pixman_composite_src_8888_0565_process_pixblock_tail_head + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_0565_8888_SRC_asm_neon, 16, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0565_8888_process_pixblock_head, \ + pixman_composite_src_0565_8888_process_pixblock_tail, \ + pixman_composite_src_0565_8888_process_pixblock_tail_head + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_neon, 32, 8, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_8_0565_process_pixblock_head, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 24 /* mask_basereg */ + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_neon, 16, 8, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_0565_8_0565_process_pixblock_head, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +/* Supplementary macro for setting function attributes */ +.macro pixman_asm_function fname + .func fname + .global fname +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif +fname: +.endm + +/* + * Bilinear scaling support code which tries to provide pixel fetching, color + * format conversion, and interpolation as separate macros which can be used + * as the basic building blocks for constructing bilinear scanline functions. + */ + +.macro bilinear_load_8888 reg1, reg2, tmp + mov TMP2, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP2, asl #2 + add TMP2, BOTTOM, TMP2, asl #2 + vld1.32 {reg1}, [TMP1] + vld1.32 {reg2}, [TMP2] +.endm + +.macro bilinear_load_0565 reg1, reg2, tmp + mov TMP2, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP2, asl #1 + add TMP2, BOTTOM, TMP2, asl #1 + vld1.32 {reg2[0]}, [TMP1] + vld1.32 {reg2[1]}, [TMP2] + convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp +.endm + +.macro bilinear_load_and_vertical_interpolate_two_8888 \ + acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2 + + bilinear_load_8888 reg1, reg2, tmp1 + vmull.u8 acc1, reg1, d28 + vmlal.u8 acc1, reg2, d29 + bilinear_load_8888 reg3, reg4, tmp2 + vmull.u8 acc2, reg3, d28 + vmlal.u8 acc2, reg4, d29 +.endm + +.macro bilinear_load_and_vertical_interpolate_four_8888 \ + xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ + yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi + + bilinear_load_and_vertical_interpolate_two_8888 \ + xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi + bilinear_load_and_vertical_interpolate_two_8888 \ + yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi +.endm + +.macro bilinear_load_and_vertical_interpolate_two_0565 \ + acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi + + mov TMP2, X, asr #16 + add X, X, UX + mov TMP4, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP2, asl #1 + add TMP2, BOTTOM, TMP2, asl #1 + add TMP3, TOP, TMP4, asl #1 + add TMP4, BOTTOM, TMP4, asl #1 + vld1.32 {acc2lo[0]}, [TMP1] + vld1.32 {acc2hi[0]}, [TMP3] + vld1.32 {acc2lo[1]}, [TMP2] + vld1.32 {acc2hi[1]}, [TMP4] + convert_0565_to_x888 acc2, reg3, reg2, reg1 + vzip.u8 reg1, reg3 + vzip.u8 reg2, reg4 + vzip.u8 reg3, reg4 + vzip.u8 reg1, reg2 + vmull.u8 acc1, reg1, d28 + vmlal.u8 acc1, reg2, d29 + vmull.u8 acc2, reg3, d28 + vmlal.u8 acc2, reg4, d29 +.endm + +.macro bilinear_load_and_vertical_interpolate_four_0565 \ + xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ + yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi + + mov TMP2, X, asr #16 + add X, X, UX + mov TMP4, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP2, asl #1 + add TMP2, BOTTOM, TMP2, asl #1 + add TMP3, TOP, TMP4, asl #1 + add TMP4, BOTTOM, TMP4, asl #1 + vld1.32 {xacc2lo[0]}, [TMP1] + vld1.32 {xacc2hi[0]}, [TMP3] + vld1.32 {xacc2lo[1]}, [TMP2] + vld1.32 {xacc2hi[1]}, [TMP4] + convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1 + mov TMP2, X, asr #16 + add X, X, UX + mov TMP4, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP2, asl #1 + add TMP2, BOTTOM, TMP2, asl #1 + add TMP3, TOP, TMP4, asl #1 + add TMP4, BOTTOM, TMP4, asl #1 + vld1.32 {yacc2lo[0]}, [TMP1] + vzip.u8 xreg1, xreg3 + vld1.32 {yacc2hi[0]}, [TMP3] + vzip.u8 xreg2, xreg4 + vld1.32 {yacc2lo[1]}, [TMP2] + vzip.u8 xreg3, xreg4 + vld1.32 {yacc2hi[1]}, [TMP4] + vzip.u8 xreg1, xreg2 + convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1 + vmull.u8 xacc1, xreg1, d28 + vzip.u8 yreg1, yreg3 + vmlal.u8 xacc1, xreg2, d29 + vzip.u8 yreg2, yreg4 + vmull.u8 xacc2, xreg3, d28 + vzip.u8 yreg3, yreg4 + vmlal.u8 xacc2, xreg4, d29 + vzip.u8 yreg1, yreg2 + vmull.u8 yacc1, yreg1, d28 + vmlal.u8 yacc1, yreg2, d29 + vmull.u8 yacc2, yreg3, d28 + vmlal.u8 yacc2, yreg4, d29 +.endm + +.macro bilinear_store_8888 numpix, tmp1, tmp2 +.if numpix == 4 + vst1.32 {d0, d1}, [OUT]! +.elseif numpix == 2 + vst1.32 {d0}, [OUT]! +.elseif numpix == 1 + vst1.32 {d0[0]}, [OUT, :32]! +.else + .error bilinear_store_8888 numpix is unsupported +.endif +.endm + +.macro bilinear_store_0565 numpix, tmp1, tmp2 + vuzp.u8 d0, d1 + vuzp.u8 d2, d3 + vuzp.u8 d1, d3 + vuzp.u8 d0, d2 + convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2 +.if numpix == 4 + vst1.16 {d2}, [OUT]! +.elseif numpix == 2 + vst1.32 {d2[0]}, [OUT]! +.elseif numpix == 1 + vst1.16 {d2[0]}, [OUT]! +.else + .error bilinear_store_0565 numpix is unsupported +.endif +.endm + +.macro bilinear_interpolate_last_pixel src_fmt, dst_fmt + bilinear_load_&src_fmt d0, d1, d2 + vmull.u8 q1, d0, d28 + vmlal.u8 q1, d1, d29 + vshr.u16 d30, d24, #8 + /* 4 cycles bubble */ + vshll.u16 q0, d2, #8 + vmlsl.u16 q0, d2, d30 + vmlal.u16 q0, d3, d30 + /* 5 cycles bubble */ + vshrn.u32 d0, q0, #16 + /* 3 cycles bubble */ + vmovn.u16 d0, q0 + /* 1 cycle bubble */ + bilinear_store_&dst_fmt 1, q2, q3 +.endm + +.macro bilinear_interpolate_two_pixels src_fmt, dst_fmt + bilinear_load_and_vertical_interpolate_two_&src_fmt \ + q1, q11, d0, d1, d20, d21, d22, d23 + vshr.u16 q15, q12, #8 + vadd.u16 q12, q12, q13 + vshll.u16 q0, d2, #8 + vmlsl.u16 q0, d2, d30 + vmlal.u16 q0, d3, d30 + vshll.u16 q10, d22, #8 + vmlsl.u16 q10, d22, d31 + vmlal.u16 q10, d23, d31 + vshrn.u32 d30, q0, #16 + vshrn.u32 d31, q10, #16 + vmovn.u16 d0, q15 + bilinear_store_&dst_fmt 2, q2, q3 +.endm + +.macro bilinear_interpolate_four_pixels src_fmt, dst_fmt + bilinear_load_and_vertical_interpolate_four_&src_fmt \ + q1, q11, d0, d1, d20, d21, d22, d23 \ + q3, q9, d4, d5, d16, d17, d18, d19 + pld [TMP1, PF_OFFS] + vshr.u16 q15, q12, #8 + vadd.u16 q12, q12, q13 + vshll.u16 q0, d2, #8 + vmlsl.u16 q0, d2, d30 + vmlal.u16 q0, d3, d30 + vshll.u16 q10, d22, #8 + vmlsl.u16 q10, d22, d31 + vmlal.u16 q10, d23, d31 + vshr.u16 q15, q12, #8 + vshll.u16 q2, d6, #8 + vmlsl.u16 q2, d6, d30 + vmlal.u16 q2, d7, d30 + vshll.u16 q8, d18, #8 + pld [TMP2, PF_OFFS] + vmlsl.u16 q8, d18, d31 + vmlal.u16 q8, d19, d31 + vadd.u16 q12, q12, q13 + vshrn.u32 d0, q0, #16 + vshrn.u32 d1, q10, #16 + vshrn.u32 d4, q2, #16 + vshrn.u32 d5, q8, #16 + vmovn.u16 d0, q0 + vmovn.u16 d1, q2 + bilinear_store_&dst_fmt 4, q2, q3 +.endm + +/* + * Main template macro for generating NEON optimized bilinear scanline + * functions. + * + * TODO: use software pipelining and aligned writes to the destination buffer + * in order to improve performance + * + * Bilinear scanline scaler macro template uses the following arguments: + * fname - name of the function to generate + * src_fmt - source color format (8888 or 0565) + * dst_fmt - destination color format (8888 or 0565) + * bpp_shift - (1 << bpp_shift) is the size of source pixel in bytes + * prefetch_distance - prefetch in the source image by that many + * pixels ahead + */ + +.macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \ + bpp_shift, prefetch_distance + +pixman_asm_function fname + OUT .req r0 + TOP .req r1 + BOTTOM .req r2 + WT .req r3 + WB .req r4 + X .req r5 + UX .req r6 + WIDTH .req ip + TMP1 .req r3 + TMP2 .req r4 + PF_OFFS .req r7 + TMP3 .req r8 + TMP4 .req r9 + + mov ip, sp + push {r4, r5, r6, r7, r8, r9} + mov PF_OFFS, #prefetch_distance + ldmia ip, {WB, X, UX, WIDTH} + mul PF_OFFS, PF_OFFS, UX + + cmp WIDTH, #0 + ble 3f + + vdup.u16 q12, X + vdup.u16 q13, UX + vdup.u8 d28, WT + vdup.u8 d29, WB + vadd.u16 d25, d25, d26 + vadd.u16 q13, q13, q13 + + subs WIDTH, WIDTH, #4 + blt 1f + mov PF_OFFS, PF_OFFS, asr #(16 - bpp_shift) +0: + bilinear_interpolate_four_pixels src_fmt, dst_fmt + subs WIDTH, WIDTH, #4 + bge 0b +1: + tst WIDTH, #2 + beq 2f + bilinear_interpolate_two_pixels src_fmt, dst_fmt +2: + tst WIDTH, #1 + beq 3f + bilinear_interpolate_last_pixel src_fmt, dst_fmt +3: + pop {r4, r5, r6, r7, r8, r9} + bx lr + + .unreq OUT + .unreq TOP + .unreq BOTTOM + .unreq WT + .unreq WB + .unreq X + .unreq UX + .unreq WIDTH + .unreq TMP1 + .unreq TMP2 + .unreq PF_OFFS + .unreq TMP3 + .unreq TMP4 +.endfunc + +.endm + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 28 + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 28 + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 28 + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, 1, 28 diff --git a/pixman/pixman/pixman-arm-neon-asm.h b/pixman/pixman/pixman-arm-neon-asm.h index 6e3d583f5..0ba67d05f 100644 --- a/pixman/pixman/pixman-arm-neon-asm.h +++ b/pixman/pixman/pixman-arm-neon-asm.h @@ -1158,3 +1158,20 @@ fname: vsri.u16 out, tmp1, #5 vsri.u16 out, tmp2, #11 .endm + +/* + * Conversion of four r5g6b5 pixels (in) to four x8r8g8b8 pixels + * returned in (out0, out1) registers pair. Requires one temporary + * 64-bit register (tmp). 'out1' and 'in' may overlap, the original + * value from 'in' is lost + */ +.macro convert_four_0565_to_x888_packed in, out0, out1, tmp + vshl.u16 out0, in, #5 /* G top 6 bits */ + vshl.u16 tmp, in, #11 /* B top 5 bits */ + vsri.u16 in, in, #5 /* R is ready in top bits */ + vsri.u16 out0, out0, #6 /* G is ready in top bits */ + vsri.u16 tmp, tmp, #5 /* B is ready in top bits */ + vshr.u16 out1, in, #8 /* R is in place */ + vsri.u16 out0, tmp, #8 /* G & B is in place */ + vzip.u16 out0, out1 /* everything is in place */ +.endm diff --git a/pixman/pixman/pixman-arm-neon.c b/pixman/pixman/pixman-arm-neon.c index c7c025418..5213a2007 100644 --- a/pixman/pixman/pixman-arm-neon.c +++ b/pixman/pixman/pixman-arm-neon.c @@ -1,486 +1,460 @@ -/* - * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of ARM Ltd not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. ARM Ltd makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Ian Rickards (ian.rickards@arm.com) - * Author: Jonathan Morton (jonathan.morton@movial.com) - * Author: Markku Vire (markku.vire@movial.com) - * - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include "pixman-private.h" -#include "pixman-arm-common.h" - -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565, - uint16_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888, - uint8_t, 3, uint8_t, 3) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565, - uint32_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888, - uint16_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev, - uint8_t, 3, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev, - uint8_t, 3, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565, - uint32_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565, - uint8_t, 1, uint16_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565, - uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888, - uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888, - uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8, - uint8_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565, - uint8_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888, - uint8_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888, - uint8_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565, - uint32_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565, - uint16_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888, - uint32_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, - uint8_t, 1, uint8_t, 1, uint8_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565, - uint16_t, 1, uint8_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888, - uint32_t, 1, uint8_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888, - uint32_t, 1, uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888, - uint32_t, 1, uint8_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888, - uint32_t, 1, uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565, - uint32_t, 1, uint8_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565, - uint16_t, 1, uint8_t, 1, uint16_t, 1) - -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER, - uint32_t, uint32_t) -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER, - uint32_t, uint16_t) -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC, - uint32_t, uint16_t) -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC, - uint16_t, uint32_t) - -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565, - OVER, uint32_t, uint16_t) -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565, - OVER, uint16_t, uint16_t) - -void -pixman_composite_src_n_8_asm_neon (int32_t w, - int32_t h, - uint8_t *dst, - int32_t dst_stride, - uint8_t src); - -void -pixman_composite_src_n_0565_asm_neon (int32_t w, - int32_t h, - uint16_t *dst, - int32_t dst_stride, - uint16_t src); - -void -pixman_composite_src_n_8888_asm_neon (int32_t w, - int32_t h, - uint32_t *dst, - int32_t dst_stride, - uint32_t src); - -static pixman_bool_t -pixman_fill_neon (uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t _xor) -{ - /* stride is always multiple of 32bit units in pixman */ - uint32_t byte_stride = stride * sizeof(uint32_t); - - switch (bpp) - { - case 8: - pixman_composite_src_n_8_asm_neon ( - width, - height, - (uint8_t *)(((char *) bits) + y * byte_stride + x), - byte_stride, - _xor & 0xff); - return TRUE; - case 16: - pixman_composite_src_n_0565_asm_neon ( - width, - height, - (uint16_t *)(((char *) bits) + y * byte_stride + x * 2), - byte_stride / 2, - _xor & 0xffff); - return TRUE; - case 32: - pixman_composite_src_n_8888_asm_neon ( - width, - height, - (uint32_t *)(((char *) bits) + y * byte_stride + x * 4), - byte_stride / 4, - _xor); - return TRUE; - default: - return FALSE; - } -} - -static pixman_bool_t -pixman_blt_neon (uint32_t *src_bits, - uint32_t *dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dst_x, - int dst_y, - int width, - int height) -{ - if (src_bpp != dst_bpp) - return FALSE; - - switch (src_bpp) - { - case 16: - pixman_composite_src_0565_0565_asm_neon ( - width, height, - (uint16_t *)(((char *) dst_bits) + - dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2, - (uint16_t *)(((char *) src_bits) + - src_y * src_stride * 4 + src_x * 2), src_stride * 2); - return TRUE; - case 32: - pixman_composite_src_8888_8888_asm_neon ( - width, height, - (uint32_t *)(((char *) dst_bits) + - dst_y * dst_stride * 4 + dst_x * 4), dst_stride, - (uint32_t *)(((char *) src_bits) + - src_y * src_stride * 4 + src_x * 4), src_stride); - return TRUE; - default: - return FALSE; - } -} - -void -pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (uint32_t * out, - const uint32_t * top, - const uint32_t * bottom, - int wt, - int wb, - pixman_fixed_t x, - pixman_fixed_t ux, - int width); - -static force_inline void -scaled_bilinear_scanline_neon_8888_8888_SRC (uint32_t * dst, - const uint32_t * mask, - const uint32_t * src_top, - const uint32_t * src_bottom, - int32_t w, - int wt, - int wb, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t max_vx, - pixman_bool_t zero_src) -{ - pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top, - src_bottom, wt, wb, - vx, unit_x, w); -} - -FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_cover_SRC, - scaled_bilinear_scanline_neon_8888_8888_SRC, - uint32_t, uint32_t, uint32_t, - COVER, FALSE, FALSE) -FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_pad_SRC, - scaled_bilinear_scanline_neon_8888_8888_SRC, - uint32_t, uint32_t, uint32_t, - PAD, FALSE, FALSE) -FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_none_SRC, - scaled_bilinear_scanline_neon_8888_8888_SRC, - uint32_t, uint32_t, uint32_t, - NONE, FALSE, FALSE) - -static const pixman_fast_path_t arm_neon_fast_paths[] = -{ - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, neon_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, neon_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, neon_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, neon_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, neon_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888), - PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev), - PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev), - PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888), - PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, neon_composite_src_rpixbuf_8888), - PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, neon_composite_src_rpixbuf_8888), - PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, neon_composite_src_pixbuf_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, neon_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, neon_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, neon_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, neon_composite_over_n_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, neon_composite_over_n_8888), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565), - PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, neon_composite_over_0565_n_0565), - PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, neon_composite_over_0565_n_0565), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, neon_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, r5g6b5, neon_composite_over_8888_8_0565), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, b5g6r5, neon_composite_over_8888_8_0565), - PIXMAN_STD_FAST_PATH (OVER, r5g6b5, a8, r5g6b5, neon_composite_over_0565_8_0565), - PIXMAN_STD_FAST_PATH (OVER, b5g6r5, a8, b5g6r5, neon_composite_over_0565_8_0565), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, neon_composite_over_8888_0565), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, neon_composite_over_8888_0565), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, neon_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, neon_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, neon_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, neon_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, neon_composite_add_n_8_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, neon_composite_add_n_8_8888), - PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8), - PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565), - PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, neon_composite_add_8888_n_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, neon_composite_add_8888_n_8888), - PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (IN, solid, null, a8, neon_composite_in_n_8), - PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888), - PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888), - PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565), - PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565), - - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888), - - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565), - - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565), - - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888), - /* Note: NONE repeat is not supported yet */ - SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), - SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), - SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), - SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), - - PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565), - PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565), - - PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565), - PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565), - - SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888), - - { PIXMAN_OP_NONE }, -}; - -static pixman_bool_t -arm_neon_blt (pixman_implementation_t *imp, - uint32_t * src_bits, - uint32_t * dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dst_x, - int dst_y, - int width, - int height) -{ - if (!pixman_blt_neon ( - src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, - src_x, src_y, dst_x, dst_y, width, height)) - - { - return _pixman_implementation_blt ( - imp->delegate, - src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, - src_x, src_y, dst_x, dst_y, width, height); - } - - return TRUE; -} - -static pixman_bool_t -arm_neon_fill (pixman_implementation_t *imp, - uint32_t * bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor) -{ - if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor)) - return TRUE; - - return _pixman_implementation_fill ( - imp->delegate, bits, stride, bpp, x, y, width, height, xor); -} - -#define BIND_COMBINE_U(name) \ -void \ -pixman_composite_scanline_##name##_mask_asm_neon (int32_t w, \ - const uint32_t *dst, \ - const uint32_t *src, \ - const uint32_t *mask); \ - \ -void \ -pixman_composite_scanline_##name##_asm_neon (int32_t w, \ - const uint32_t *dst, \ - const uint32_t *src); \ - \ -static void \ -neon_combine_##name##_u (pixman_implementation_t *imp, \ - pixman_op_t op, \ - uint32_t * dest, \ - const uint32_t * src, \ - const uint32_t * mask, \ - int width) \ -{ \ - if (mask) \ - pixman_composite_scanline_##name##_mask_asm_neon (width, dest, \ - src, mask); \ - else \ - pixman_composite_scanline_##name##_asm_neon (width, dest, src); \ -} - -BIND_COMBINE_U (over) -BIND_COMBINE_U (add) -BIND_COMBINE_U (out_reverse) - -pixman_implementation_t * -_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback) -{ - pixman_implementation_t *imp = - _pixman_implementation_create (fallback, arm_neon_fast_paths); - - imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u; - imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u; - imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u; - - imp->blt = arm_neon_blt; - imp->fill = arm_neon_fill; - - return imp; -} +/* + * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of ARM Ltd not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. ARM Ltd makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Ian Rickards (ian.rickards@arm.com) + * Author: Jonathan Morton (jonathan.morton@movial.com) + * Author: Markku Vire (markku.vire@movial.com) + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include "pixman-private.h" +#include "pixman-arm-common.h" + +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565, + uint16_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888, + uint8_t, 3, uint8_t, 3) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888, + uint16_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev, + uint8_t, 3, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev, + uint8_t, 3, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565, + uint8_t, 1, uint16_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565, + uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888, + uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888, + uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8, + uint8_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565, + uint8_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888, + uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888, + uint8_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565, + uint16_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888, + uint32_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, + uint8_t, 1, uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565, + uint16_t, 1, uint8_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888, + uint32_t, 1, uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888, + uint32_t, 1, uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888, + uint32_t, 1, uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888, + uint32_t, 1, uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565, + uint32_t, 1, uint8_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565, + uint16_t, 1, uint8_t, 1, uint16_t, 1) + +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER, + uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC, + uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC, + uint16_t, uint32_t) + +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565, + OVER, uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565, + OVER, uint16_t, uint16_t) + +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC, + uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC, + uint16_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC, + uint16_t, uint16_t) + +void +pixman_composite_src_n_8_asm_neon (int32_t w, + int32_t h, + uint8_t *dst, + int32_t dst_stride, + uint8_t src); + +void +pixman_composite_src_n_0565_asm_neon (int32_t w, + int32_t h, + uint16_t *dst, + int32_t dst_stride, + uint16_t src); + +void +pixman_composite_src_n_8888_asm_neon (int32_t w, + int32_t h, + uint32_t *dst, + int32_t dst_stride, + uint32_t src); + +static pixman_bool_t +pixman_fill_neon (uint32_t *bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t _xor) +{ + /* stride is always multiple of 32bit units in pixman */ + uint32_t byte_stride = stride * sizeof(uint32_t); + + switch (bpp) + { + case 8: + pixman_composite_src_n_8_asm_neon ( + width, + height, + (uint8_t *)(((char *) bits) + y * byte_stride + x), + byte_stride, + _xor & 0xff); + return TRUE; + case 16: + pixman_composite_src_n_0565_asm_neon ( + width, + height, + (uint16_t *)(((char *) bits) + y * byte_stride + x * 2), + byte_stride / 2, + _xor & 0xffff); + return TRUE; + case 32: + pixman_composite_src_n_8888_asm_neon ( + width, + height, + (uint32_t *)(((char *) bits) + y * byte_stride + x * 4), + byte_stride / 4, + _xor); + return TRUE; + default: + return FALSE; + } +} + +static pixman_bool_t +pixman_blt_neon (uint32_t *src_bits, + uint32_t *dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) +{ + if (src_bpp != dst_bpp) + return FALSE; + + switch (src_bpp) + { + case 16: + pixman_composite_src_0565_0565_asm_neon ( + width, height, + (uint16_t *)(((char *) dst_bits) + + dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2, + (uint16_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 2), src_stride * 2); + return TRUE; + case 32: + pixman_composite_src_8888_8888_asm_neon ( + width, height, + (uint32_t *)(((char *) dst_bits) + + dst_y * dst_stride * 4 + dst_x * 4), dst_stride, + (uint32_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 4), src_stride); + return TRUE; + default: + return FALSE; + } +} + +static const pixman_fast_path_t arm_neon_fast_paths[] = +{ + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, neon_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev), + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, neon_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, neon_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, neon_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, neon_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, neon_composite_over_n_8888), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565), + PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, neon_composite_over_0565_n_0565), + PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, neon_composite_over_0565_n_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, r5g6b5, neon_composite_over_8888_8_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, b5g6r5, neon_composite_over_8888_8_0565), + PIXMAN_STD_FAST_PATH (OVER, r5g6b5, a8, r5g6b5, neon_composite_over_0565_8_0565), + PIXMAN_STD_FAST_PATH (OVER, b5g6r5, a8, b5g6r5, neon_composite_over_0565_8_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, neon_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, neon_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, neon_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, neon_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8), + PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565), + PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, neon_composite_add_8888_n_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, neon_composite_add_8888_n_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (IN, solid, null, a8, neon_composite_in_n_8), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888), + /* Note: NONE repeat is not supported yet */ + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), + + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565), + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565), + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565), + + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888), + + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), + + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888), + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565), + + { PIXMAN_OP_NONE }, +}; + +static pixman_bool_t +arm_neon_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) +{ + if (!pixman_blt_neon ( + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, + src_x, src_y, dst_x, dst_y, width, height)) + + { + return _pixman_implementation_blt ( + imp->delegate, + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, + src_x, src_y, dst_x, dst_y, width, height); + } + + return TRUE; +} + +static pixman_bool_t +arm_neon_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor)) + return TRUE; + + return _pixman_implementation_fill ( + imp->delegate, bits, stride, bpp, x, y, width, height, xor); +} + +#define BIND_COMBINE_U(name) \ +void \ +pixman_composite_scanline_##name##_mask_asm_neon (int32_t w, \ + const uint32_t *dst, \ + const uint32_t *src, \ + const uint32_t *mask); \ + \ +void \ +pixman_composite_scanline_##name##_asm_neon (int32_t w, \ + const uint32_t *dst, \ + const uint32_t *src); \ + \ +static void \ +neon_combine_##name##_u (pixman_implementation_t *imp, \ + pixman_op_t op, \ + uint32_t * dest, \ + const uint32_t * src, \ + const uint32_t * mask, \ + int width) \ +{ \ + if (mask) \ + pixman_composite_scanline_##name##_mask_asm_neon (width, dest, \ + src, mask); \ + else \ + pixman_composite_scanline_##name##_asm_neon (width, dest, src); \ +} + +BIND_COMBINE_U (over) +BIND_COMBINE_U (add) +BIND_COMBINE_U (out_reverse) + +pixman_implementation_t * +_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = + _pixman_implementation_create (fallback, arm_neon_fast_paths); + + imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u; + imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u; + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u; + + imp->blt = arm_neon_blt; + imp->fill = arm_neon_fill; + + return imp; +} diff --git a/pixman/pixman/pixman-arm-simd-asm.S b/pixman/pixman/pixman-arm-simd-asm.S index d97545c1b..e00836b6c 100644 --- a/pixman/pixman/pixman-arm-simd-asm.S +++ b/pixman/pixman/pixman-arm-simd-asm.S @@ -331,15 +331,29 @@ pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6 .endfunc /* - * Note: This function is only using armv4t instructions (not even armv6), + * Note: This code is only using armv5te instructions (not even armv6), * but is scheduled for ARM Cortex-A8 pipeline. So it might need to * be split into a few variants, tuned for each microarchitecture. * * TODO: In order to get good performance on ARM9/ARM11 cores (which don't * have efficient write combining), it needs to be changed to use 16-byte * aligned writes using STM instruction. + * + * Nearest scanline scaler macro template uses the following arguments: + * fname - name of the function to generate + * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes + * t - type suffix for LDR/STR instructions + * prefetch_distance - prefetch in the source image by that many + * pixels ahead + * prefetch_braking_distance - stop prefetching when that many pixels are + * remaining before the end of scanline */ -pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6 + +.macro generate_nearest_scanline_func fname, bpp_shift, t, \ + prefetch_distance, \ + prefetch_braking_distance + +pixman_asm_function fname W .req r0 DST .req r1 SRC .req r2 @@ -348,30 +362,46 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6 TMP1 .req r4 TMP2 .req r5 VXMASK .req r6 + PF_OFFS .req r7 ldr UNIT_X, [sp] push {r4, r5, r6, r7} - mvn VXMASK, #1 + mvn VXMASK, #((1 << bpp_shift) - 1) /* define helper macro */ .macro scale_2_pixels - ldrh TMP1, [SRC, TMP1] - and TMP2, VXMASK, VX, lsr #15 + ldr&t TMP1, [SRC, TMP1] + and TMP2, VXMASK, VX, lsr #(16 - bpp_shift) add VX, VX, UNIT_X - strh TMP1, [DST], #2 + str&t TMP1, [DST], #(1 << bpp_shift) - ldrh TMP2, [SRC, TMP2] - and TMP1, VXMASK, VX, lsr #15 + ldr&t TMP2, [SRC, TMP2] + and TMP1, VXMASK, VX, lsr #(16 - bpp_shift) add VX, VX, UNIT_X - strh TMP2, [DST], #2 + str&t TMP2, [DST], #(1 << bpp_shift) .endm /* now do the scaling */ - and TMP1, VXMASK, VX, lsr #15 + and TMP1, VXMASK, VX, lsr #(16 - bpp_shift) add VX, VX, UNIT_X - subs W, #4 + subs W, W, #(8 + prefetch_braking_distance) blt 2f -1: /* main loop, process 4 pixels per iteration */ + /* calculate prefetch offset */ + mov PF_OFFS, #prefetch_distance + mla PF_OFFS, UNIT_X, PF_OFFS, VX +1: /* main loop, process 8 pixels per iteration with prefetch */ + subs W, W, #8 + add PF_OFFS, UNIT_X, lsl #3 + scale_2_pixels + scale_2_pixels + scale_2_pixels + scale_2_pixels + pld [SRC, PF_OFFS, lsr #(16 - bpp_shift)] + bge 1b +2: + subs W, W, #(4 - 8 - prefetch_braking_distance) + blt 2f +1: /* process the remaining pixels */ scale_2_pixels scale_2_pixels subs W, W, #4 @@ -382,8 +412,8 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6 scale_2_pixels 2: tst W, #1 - ldrneh TMP1, [SRC, TMP1] - strneh TMP1, [DST], #2 + ldrne&t TMP1, [SRC, TMP1] + strne&t TMP1, [DST] /* cleanup helper macro */ .purgem scale_2_pixels .unreq DST @@ -394,7 +424,15 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6 .unreq TMP1 .unreq TMP2 .unreq VXMASK + .unreq PF_OFFS /* return */ pop {r4, r5, r6, r7} bx lr .endfunc +.endm + +generate_nearest_scanline_func \ + pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32 + +generate_nearest_scanline_func \ + pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32 diff --git a/pixman/pixman/pixman-arm-simd.c b/pixman/pixman/pixman-arm-simd.c index 6bbc1094d..45981a6b0 100644 --- a/pixman/pixman/pixman-arm-simd.c +++ b/pixman/pixman/pixman-arm-simd.c @@ -1,423 +1,432 @@ -/* - * Copyright © 2008 Mozilla Corporation - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Mozilla Corporation not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Mozilla Corporation makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Jeff Muizelaar (jeff@infidigm.net) - * - */ -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "pixman-private.h" -#include "pixman-arm-common.h" -#include "pixman-fast-path.h" - -#if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */ - -void -pixman_composite_add_8_8_asm_armv6 (int32_t width, - int32_t height, - uint8_t *dst_line, - int32_t dst_stride, - uint8_t *src_line, - int32_t src_stride) -{ - uint8_t *dst, *src; - int32_t w; - uint8_t s, d; - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - /* ensure both src and dst are properly aligned before doing 32 bit reads - * we'll stay in this loop if src and dst have differing alignments - */ - while (w && (((unsigned long)dst & 3) || ((unsigned long)src & 3))) - { - s = *src; - d = *dst; - asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s)); - *dst = d; - - dst++; - src++; - w--; - } - - while (w >= 4) - { - asm ("uqadd8 %0, %1, %2" - : "=r" (*(uint32_t*)dst) - : "r" (*(uint32_t*)src), "r" (*(uint32_t*)dst)); - dst += 4; - src += 4; - w -= 4; - } - - while (w) - { - s = *src; - d = *dst; - asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s)); - *dst = d; - - dst++; - src++; - w--; - } - } - -} - -void -pixman_composite_over_8888_8888_asm_armv6 (int32_t width, - int32_t height, - uint32_t *dst_line, - int32_t dst_stride, - uint32_t *src_line, - int32_t src_stride) -{ - uint32_t *dst; - uint32_t *src; - int32_t w; - uint32_t component_half = 0x800080; - uint32_t upper_component_mask = 0xff00ff00; - uint32_t alpha_mask = 0xff; - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - -/* #define inner_branch */ - asm volatile ( - "cmp %[w], #0\n\t" - "beq 2f\n\t" - "1:\n\t" - /* load src */ - "ldr r5, [%[src]], #4\n\t" -#ifdef inner_branch - /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. - * The 0x0 case also allows us to avoid doing an unecessary data - * write which is more valuable so we only check for that - */ - "cmp r5, #0\n\t" - "beq 3f\n\t" - - /* = 255 - alpha */ - "sub r8, %[alpha_mask], r5, lsr #24\n\t" - - "ldr r4, [%[dest]] \n\t" - -#else - "ldr r4, [%[dest]] \n\t" - - /* = 255 - alpha */ - "sub r8, %[alpha_mask], r5, lsr #24\n\t" -#endif - "uxtb16 r6, r4\n\t" - "uxtb16 r7, r4, ror #8\n\t" - - /* multiply by 257 and divide by 65536 */ - "mla r6, r6, r8, %[component_half]\n\t" - "mla r7, r7, r8, %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - /* recombine the 0xff00ff00 bytes of r6 and r7 */ - "and r7, r7, %[upper_component_mask]\n\t" - "uxtab16 r6, r7, r6, ror #8\n\t" - - "uqadd8 r5, r6, r5\n\t" - -#ifdef inner_branch - "3:\n\t" - -#endif - "str r5, [%[dest]], #4\n\t" - /* increment counter and jmp to top */ - "subs %[w], %[w], #1\n\t" - "bne 1b\n\t" - "2:\n\t" - : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) - : [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask), - [alpha_mask] "r" (alpha_mask) - : "r4", "r5", "r6", "r7", "r8", "cc", "memory" - ); - } -} - -void -pixman_composite_over_8888_n_8888_asm_armv6 (int32_t width, - int32_t height, - uint32_t *dst_line, - int32_t dst_stride, - uint32_t *src_line, - int32_t src_stride, - uint32_t mask) -{ - uint32_t *dst; - uint32_t *src; - int32_t w; - uint32_t component_half = 0x800080; - uint32_t alpha_mask = 0xff; - - mask = (mask) >> 24; - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - -/* #define inner_branch */ - asm volatile ( - "cmp %[w], #0\n\t" - "beq 2f\n\t" - "1:\n\t" - /* load src */ - "ldr r5, [%[src]], #4\n\t" -#ifdef inner_branch - /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. - * The 0x0 case also allows us to avoid doing an unecessary data - * write which is more valuable so we only check for that - */ - "cmp r5, #0\n\t" - "beq 3f\n\t" - -#endif - "ldr r4, [%[dest]] \n\t" - - "uxtb16 r6, r5\n\t" - "uxtb16 r7, r5, ror #8\n\t" - - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, r6, %[mask_alpha], %[component_half]\n\t" - "mla r7, r7, %[mask_alpha], %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" - - /* recombine */ - "orr r5, r6, r7, lsl #8\n\t" - - "uxtb16 r6, r4\n\t" - "uxtb16 r7, r4, ror #8\n\t" - - /* 255 - alpha */ - "sub r8, %[alpha_mask], r5, lsr #24\n\t" - - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, r6, r8, %[component_half]\n\t" - "mla r7, r7, r8, %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" - - /* recombine */ - "orr r6, r6, r7, lsl #8\n\t" - - "uqadd8 r5, r6, r5\n\t" - -#ifdef inner_branch - "3:\n\t" - -#endif - "str r5, [%[dest]], #4\n\t" - /* increment counter and jmp to top */ - "subs %[w], %[w], #1\n\t" - "bne 1b\n\t" - "2:\n\t" - : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) - : [component_half] "r" (component_half), [mask_alpha] "r" (mask), - [alpha_mask] "r" (alpha_mask) - : "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory" - ); - } -} - -void -pixman_composite_over_n_8_8888_asm_armv6 (int32_t width, - int32_t height, - uint32_t *dst_line, - int32_t dst_stride, - uint32_t src, - int32_t unused, - uint8_t *mask_line, - int32_t mask_stride) -{ - uint32_t srca; - uint32_t *dst; - uint8_t *mask; - int32_t w; - - srca = src >> 24; - - uint32_t component_mask = 0xff00ff; - uint32_t component_half = 0x800080; - - uint32_t src_hi = (src >> 8) & component_mask; - uint32_t src_lo = src & component_mask; - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - -/* #define inner_branch */ - asm volatile ( - "cmp %[w], #0\n\t" - "beq 2f\n\t" - "1:\n\t" - /* load mask */ - "ldrb r5, [%[mask]], #1\n\t" -#ifdef inner_branch - /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. - * The 0x0 case also allows us to avoid doing an unecessary data - * write which is more valuable so we only check for that - */ - "cmp r5, #0\n\t" - "beq 3f\n\t" - -#endif - "ldr r4, [%[dest]] \n\t" - - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, %[src_lo], r5, %[component_half]\n\t" - "mla r7, %[src_hi], r5, %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" - - /* recombine */ - "orr r5, r6, r7, lsl #8\n\t" - - "uxtb16 r6, r4\n\t" - "uxtb16 r7, r4, ror #8\n\t" - - /* we could simplify this to use 'sub' if we were - * willing to give up a register for alpha_mask - */ - "mvn r8, r5\n\t" - "mov r8, r8, lsr #24\n\t" - - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, r6, r8, %[component_half]\n\t" - "mla r7, r7, r8, %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" - - /* recombine */ - "orr r6, r6, r7, lsl #8\n\t" - - "uqadd8 r5, r6, r5\n\t" - -#ifdef inner_branch - "3:\n\t" - -#endif - "str r5, [%[dest]], #4\n\t" - /* increment counter and jmp to top */ - "subs %[w], %[w], #1\n\t" - "bne 1b\n\t" - "2:\n\t" - : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask) - : [component_half] "r" (component_half), - [src_hi] "r" (src_hi), [src_lo] "r" (src_lo) - : "r4", "r5", "r6", "r7", "r8", "cc", "memory"); - } -} - -#endif - -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888, - uint32_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888, - uint32_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888, - uint8_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC, - uint16_t, uint16_t) - -static const pixman_fast_path_t arm_simd_fast_paths[] = -{ - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, armv6_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, armv6_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, armv6_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888), - - PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8), - - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888), - - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565), - - { PIXMAN_OP_NONE }, -}; - -pixman_implementation_t * -_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback) -{ - pixman_implementation_t *imp = _pixman_implementation_create (fallback, arm_simd_fast_paths); - - return imp; -} +/* + * Copyright © 2008 Mozilla Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Mozilla Corporation not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Mozilla Corporation makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Jeff Muizelaar (jeff@infidigm.net) + * + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "pixman-private.h" +#include "pixman-arm-common.h" +#include "pixman-fast-path.h" + +#if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */ + +void +pixman_composite_add_8_8_asm_armv6 (int32_t width, + int32_t height, + uint8_t *dst_line, + int32_t dst_stride, + uint8_t *src_line, + int32_t src_stride) +{ + uint8_t *dst, *src; + int32_t w; + uint8_t s, d; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + /* ensure both src and dst are properly aligned before doing 32 bit reads + * we'll stay in this loop if src and dst have differing alignments + */ + while (w && (((unsigned long)dst & 3) || ((unsigned long)src & 3))) + { + s = *src; + d = *dst; + asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s)); + *dst = d; + + dst++; + src++; + w--; + } + + while (w >= 4) + { + asm ("uqadd8 %0, %1, %2" + : "=r" (*(uint32_t*)dst) + : "r" (*(uint32_t*)src), "r" (*(uint32_t*)dst)); + dst += 4; + src += 4; + w -= 4; + } + + while (w) + { + s = *src; + d = *dst; + asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s)); + *dst = d; + + dst++; + src++; + w--; + } + } + +} + +void +pixman_composite_over_8888_8888_asm_armv6 (int32_t width, + int32_t height, + uint32_t *dst_line, + int32_t dst_stride, + uint32_t *src_line, + int32_t src_stride) +{ + uint32_t *dst; + uint32_t *src; + int32_t w; + uint32_t component_half = 0x800080; + uint32_t upper_component_mask = 0xff00ff00; + uint32_t alpha_mask = 0xff; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + +/* #define inner_branch */ + asm volatile ( + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load src */ + "ldr r5, [%[src]], #4\n\t" +#ifdef inner_branch + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that + */ + "cmp r5, #0\n\t" + "beq 3f\n\t" + + /* = 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" + + "ldr r4, [%[dest]] \n\t" + +#else + "ldr r4, [%[dest]] \n\t" + + /* = 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" +#endif + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" + + /* multiply by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + /* recombine the 0xff00ff00 bytes of r6 and r7 */ + "and r7, r7, %[upper_component_mask]\n\t" + "uxtab16 r6, r7, r6, ror #8\n\t" + + "uqadd8 r5, r6, r5\n\t" + +#ifdef inner_branch + "3:\n\t" + +#endif + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) + : [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask), + [alpha_mask] "r" (alpha_mask) + : "r4", "r5", "r6", "r7", "r8", "cc", "memory" + ); + } +} + +void +pixman_composite_over_8888_n_8888_asm_armv6 (int32_t width, + int32_t height, + uint32_t *dst_line, + int32_t dst_stride, + uint32_t *src_line, + int32_t src_stride, + uint32_t mask) +{ + uint32_t *dst; + uint32_t *src; + int32_t w; + uint32_t component_half = 0x800080; + uint32_t alpha_mask = 0xff; + + mask = (mask) >> 24; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + +/* #define inner_branch */ + asm volatile ( + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load src */ + "ldr r5, [%[src]], #4\n\t" +#ifdef inner_branch + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that + */ + "cmp r5, #0\n\t" + "beq 3f\n\t" + +#endif + "ldr r4, [%[dest]] \n\t" + + "uxtb16 r6, r5\n\t" + "uxtb16 r7, r5, ror #8\n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, %[mask_alpha], %[component_half]\n\t" + "mla r7, r7, %[mask_alpha], %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r5, r6, r7, lsl #8\n\t" + + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" + + /* 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r6, r6, r7, lsl #8\n\t" + + "uqadd8 r5, r6, r5\n\t" + +#ifdef inner_branch + "3:\n\t" + +#endif + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) + : [component_half] "r" (component_half), [mask_alpha] "r" (mask), + [alpha_mask] "r" (alpha_mask) + : "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory" + ); + } +} + +void +pixman_composite_over_n_8_8888_asm_armv6 (int32_t width, + int32_t height, + uint32_t *dst_line, + int32_t dst_stride, + uint32_t src, + int32_t unused, + uint8_t *mask_line, + int32_t mask_stride) +{ + uint32_t srca; + uint32_t *dst; + uint8_t *mask; + int32_t w; + + srca = src >> 24; + + uint32_t component_mask = 0xff00ff; + uint32_t component_half = 0x800080; + + uint32_t src_hi = (src >> 8) & component_mask; + uint32_t src_lo = src & component_mask; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + +/* #define inner_branch */ + asm volatile ( + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load mask */ + "ldrb r5, [%[mask]], #1\n\t" +#ifdef inner_branch + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that + */ + "cmp r5, #0\n\t" + "beq 3f\n\t" + +#endif + "ldr r4, [%[dest]] \n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, %[src_lo], r5, %[component_half]\n\t" + "mla r7, %[src_hi], r5, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r5, r6, r7, lsl #8\n\t" + + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" + + /* we could simplify this to use 'sub' if we were + * willing to give up a register for alpha_mask + */ + "mvn r8, r5\n\t" + "mov r8, r8, lsr #24\n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r6, r6, r7, lsl #8\n\t" + + "uqadd8 r5, r6, r5\n\t" + +#ifdef inner_branch + "3:\n\t" + +#endif + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask) + : [component_half] "r" (component_half), + [src_hi] "r" (src_hi), [src_lo] "r" (src_lo) + : "r4", "r5", "r6", "r7", "r8", "cc", "memory"); + } +} + +#endif + +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888, + uint32_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888, + uint8_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC, + uint16_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC, + uint32_t, uint32_t) + +static const pixman_fast_path_t arm_simd_fast_paths[] = +{ + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, armv6_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, armv6_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, armv6_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888), + + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8), + + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888), + + { PIXMAN_OP_NONE }, +}; + +pixman_implementation_t * +_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = _pixman_implementation_create (fallback, arm_simd_fast_paths); + + return imp; +} diff --git a/xorg-server/xkeyboard-config/rules/base.lists.part b/xorg-server/xkeyboard-config/rules/base.lists.part index a0fecdff1..64319d376 100644 --- a/xorg-server/xkeyboard-config/rules/base.lists.part +++ b/xorg-server/xkeyboard-config/rules/base.lists.part @@ -9,7 +9,7 @@ ! $pcmodels = pc101 pc102 pc104 pc105 // Microsoft models (using MS geometry) -! $msmodels = microsoft microsoft7000 microsoftpro microsoftprousb microsoftprose +! $msmodels = microsoft microsoft4000 microsoft7000 microsoftpro microsoftprousb microsoftprose // Nokia devices and keyboards ! $nokiamodels = nokiasu8w nokiarx44 nokiarx51 @@ -57,8 +57,8 @@ logiinkse logiinkseusb logiitc logiik \ logitech_base itouch logiultrax \ logitech_g15 \ - logidinovo logidinovoedge \ - microsoft7000 microsoftinet microsoftprousb microsoftprooem microsoftprose \ + logidinovo logidinovoedge \ + microsoft4000 microsoft7000 microsoftinet microsoftprousb microsoftprooem microsoftprose \ microsoftoffice microsoftmult \ mx1998 mx2500 mx2750 \ oretec \ diff --git a/xorg-server/xkeyboard-config/rules/base.xml.in b/xorg-server/xkeyboard-config/rules/base.xml.in index 00c315e21..1a3d528d1 100644 --- a/xorg-server/xkeyboard-config/rules/base.xml.in +++ b/xorg-server/xkeyboard-config/rules/base.xml.in @@ -1,6202 +1,6209 @@ - - - - - - - pc101 - <_description>Generic 101-key PC - Generic - - - - - pc102 - <_description>Generic 102-key (Intl) PC - Generic - - - - - pc104 - <_description>Generic 104-key PC - Generic - - - - - pc105 - <_description>Generic 105-key (Intl) PC - Generic - - - - - dell101 - <_description>Dell 101-key PC - Dell - - - - - latitude - <_description>Dell Latitude series laptop - Dell - - - - - dellm65 - <_description>Dell Precision M65 - Dell - - - - - everex - <_description>Everex STEPnote - Everex - - - - - flexpro - <_description>Keytronic FlexPro - Keytronic - - - - - microsoft - <_description>Microsoft Natural - Microsoft Inc. - - - - - omnikey101 - <_description>Northgate OmniKey 101 - Northgate - - - - - winbook - <_description>Winbook Model XP5 - Generic - - - - - pc98 - <_description>PC-98xx Series - Generic - - - - - a4techKB21 - <_description>A4Tech KB-21 - A4Tech - - - - - a4techKBS8 - <_description>A4Tech KBS-8 - A4Tech - - - - - a4_rfkb23 - <_description>A4Tech Wireless Desktop RFKB-23 - A4Tech - - - - - airkey - <_description>Acer AirKey V - Acer - - - - - azonaRF2300 - <_description>Azona RF2300 wireless Internet Keyboard - Azona - - - - - scorpius - <_description>Advance Scorpius KI - Scorpius - - - - - brother - <_description>Brother Internet Keyboard - Brother - - - - - btc5113rf - <_description>BTC 5113RF Multimedia - BTC - - - - - btc5126t - <_description>BTC 5126T - BTC - - - - - btc6301urf - <_description>BTC 6301URF - BTC - - - - - btc9000 - <_description>BTC 9000 - BTC - - - - - btc9000a - <_description>BTC 9000A - BTC - - - - - btc9001ah - <_description>BTC 9001AH - BTC - - - - - btc5090 - <_description>BTC 5090 - BTC - - - - - btc9019u - <_description>BTC 9019U - BTC - - - - - btc9116u - <_description>BTC 9116U Mini Wireless Internet and Gaming - - - - - cherryblue - <_description>Cherry Blue Line CyBo@rd - - - - - cherryblueb - <_description>Cherry CyMotion Master XPress - Cherry - - - - - cherrybluea - <_description>Cherry Blue Line CyBo@rd (alternate option) - Cherry - - - - - cherrycyboard - <_description>Cherry CyBo@rd USB-Hub - Cherry - - - - - cherrycmexpert - <_description>Cherry CyMotion Expert - Cherry - - - - - cherrybunlim - <_description>Cherry B.UNLIMITED - Cherry - - - - - chicony - <_description>Chicony Internet Keyboard - Chicony - - - - - chicony0108 - <_description>Chicony KU-0108 - Chicony - - - - - chicony0420 - <_description>Chicony KU-0420 - Chicony - - - - - chicony9885 - <_description>Chicony KB-9885 - Chicony - - - - - compaqeak8 - <_description>Compaq Easy Access Keyboard - Compaq - - - - - compaqik7 - <_description>Compaq Internet Keyboard (7 keys) - Compaq - - - - - compaqik13 - <_description>Compaq Internet Keyboard (13 keys) - Compaq - - - - - compaqik18 - <_description>Compaq Internet Keyboard (18 keys) - Compaq - - - - - cymotionlinux - <_description>Cherry CyMotion Master Linux - Cherry - - - - - armada - <_description>Laptop/notebook Compaq (eg. Armada) Laptop Keyboard - Compaq - - - - - presario - <_description>Laptop/notebook Compaq (eg. Presario) Internet Keyboard - Compaq - - - - - ipaq - <_description>Compaq iPaq Keyboard - Compaq - - - - - dell - <_description>Dell - Dell - - - - - dellsk8125 - <_description>Dell SK-8125 - Dell - - - - - dellsk8135 - <_description>Dell SK-8135 - Dell - - - - - dellusbmm - <_description>Dell USB Multimedia Keyboard - Dell - - - - - inspiron - <_description>Dell Laptop/notebook Inspiron 6xxx/8xxx - Dell - - - - - precision_m - <_description>Dell Laptop/notebook Precision M series - Dell - - - - - dexxa - <_description>Dexxa Wireless Desktop Keyboard - Dexxa - - - - - diamond - <_description>Diamond 9801 / 9802 series - Diamond - - - - - dtk2000 - <_description>DTK2000 - - - - - ennyah_dkb1008 - <_description>Ennyah DKB-1008 - Ennyah - - - - - fscaa1667g - <_description>Fujitsu-Siemens Computers AMILO laptop - Fujitsu-Siemens - - - - - genius - <_description>Genius Comfy KB-16M / Genius MM Keyboard KWD-910 - Genius - - - - - geniuscomfy - <_description>Genius Comfy KB-12e - Genius - - - - - geniuscomfy2 - <_description>Genius Comfy KB-21e-Scroll - Genius - - - - - geniuskb19e - <_description>Genius KB-19e NB - Genius - - - - - geniuskkb2050hs - <_description>Genius KKB-2050HS - Genius - - - - - gyration - <_description>Gyration - Gyration - - - - - htcdream - <_description>HTC Dream - HTC - - - - - kinesis - <_description>Kinesis - Kinesis - - - - - logitech_base - <_description>Logitech Generic Keyboard - Logitech - - - - - logitech_g15 - <_description>Logitech G15 extra keys via G15daemon - Logitech - - - - - hpi6 - <_description>Hewlett-Packard Internet Keyboard - Hewlett-Packard - - - - - hp250x - <_description>Hewlett-Packard SK-250x Multimedia Keyboard - Hewlett-Packard - - - - - hpxe3gc - <_description>Hewlett-Packard Omnibook XE3 GC - Hewlett-Packard - - - - - hpxe3gf - <_description>Hewlett-Packard Omnibook XE3 GF - Hewlett-Packard - - - - - hpxt1000 - <_description>Hewlett-Packard Omnibook XT1000 - Hewlett-Packard - - - - - hpdv5 - <_description>Hewlett-Packard Pavilion dv5 - Hewlett-Packard - - - - - hpzt11xx - <_description>Hewlett-Packard Pavilion ZT11xx - Hewlett-Packard - - - - - hp500fa - <_description>Hewlett-Packard Omnibook 500 FA - Hewlett-Packard - - - - - hp5xx - <_description>Hewlett-Packard Omnibook 5xx - Hewlett-Packard - - - - - hpnx9020 - <_description>Hewlett-Packard nx9020 - Hewlett-Packard - - - - - hp6000 - <_description>Hewlett-Packard Omnibook 6000/6100 - Hewlett-Packard - - - - - honeywell_euroboard - <_description>Honeywell Euroboard - Hewlett-Packard - - - - - hpmini110 - <_description>Hewlett-Packard Mini 110 Notebook - Hewlett-Packard - - - - - rapidaccess - <_description>IBM Rapid Access - Lenovo (previously IBM) - - - - - rapidaccess2 - <_description>IBM Rapid Access II - Lenovo (previously IBM) - - - - - thinkpad - <_description>IBM ThinkPad 560Z/600/600E/A22E - Lenovo (previously IBM) - - - - - thinkpad60 - <_description>IBM ThinkPad R60/T60/R61/T61 - Lenovo (previously IBM) - - - - - thinkpadz60 - <_description>IBM ThinkPad Z60m/Z60t/Z61m/Z61t - Lenovo (previously IBM) - - - - - ibm_spacesaver - <_description>IBM Space Saver - Lenovo (previously IBM) - - - - - logiaccess - <_description>Logitech Access Keyboard - Logitech - - - - - logiclx300 - <_description>Logitech Cordless Desktop LX-300 - Logitech - - - - - logii350 - <_description>Logitech Internet 350 Keyboard - Logitech - - - - - logimel - <_description>Logitech Media Elite Keyboard - Logitech - - - - - logicd - <_description>Logitech Cordless Desktop - Logitech - - - - - logicd_it - <_description>Logitech Cordless Desktop iTouch - Logitech - - - - - logicd_nav - <_description>Logitech Cordless Desktop Navigator - Logitech - - - - - logicd_opt - <_description>Logitech Cordless Desktop Optical - Logitech - - - - - logicda - <_description>Logitech Cordless Desktop (alternate option) - Logitech - - - - - logicdpa2 - <_description>Logitech Cordless Desktop Pro (alternate option 2) - Logitech - - - - - logicfn - <_description>Logitech Cordless Freedom/Desktop Navigator - Logitech - - - - - logicdn - <_description>Logitech Cordless Desktop Navigator - Logitech - - - - - logiitc - <_description>Logitech iTouch Cordless Keyboard (model Y-RB6) - Logitech - - - - - logiik - <_description>Logitech Internet Keyboard - Logitech - - - - - itouch - <_description>Logitech iTouch - Logitech - - - - - logicink - <_description>Logitech Internet Navigator Keyboard - Logitech - - - - - logiex110 - <_description>Logitech Cordless Desktop EX110 - Logitech - - - - - logiinkse - <_description>Logitech iTouch Internet Navigator Keyboard SE - Logitech - - - - - logiinkseusb - <_description>Logitech iTouch Internet Navigator Keyboard SE (USB) - Logitech - - - - - logiultrax - <_description>Logitech Ultra-X Keyboard - Logitech - - - - - logiultraxc - <_description>Logitech Ultra-X Cordless Media Desktop Keyboard - Logitech - - - - - logidinovo - <_description>Logitech diNovo Keyboard - Logitech - - - - - logidinovoedge - <_description>Logitech diNovo Edge Keyboard - Logitech - - - - - mx1998 - <_description>Memorex MX1998 - Memorex - - - - - mx2500 - <_description>Memorex MX2500 EZ-Access Keyboard - Memorex - - - - - mx2750 - <_description>Memorex MX2750 - Memorex - - - - - microsoft7000 - <_description>Microsoft Natural Wireless Ergonomic Keyboard 7000 - Microsoft Inc. - - - - - microsoftinet - <_description>Microsoft Internet Keyboard - Microsoft Inc. - - - - - microsoftpro - <_description>Microsoft Natural Keyboard Pro / Microsoft Internet Keyboard Pro - Microsoft Inc. - - - - - microsoftprousb - <_description>Microsoft Natural Keyboard Pro USB / Microsoft Internet Keyboard Pro - Microsoft Inc. - - - - - microsoftprooem - <_description>Microsoft Natural Keyboard Pro OEM - Microsoft Inc. - - - - - vsonku306 - <_description>ViewSonic KU-306 Internet Keyboard - ViewSonic - - - - - microsoftprose - <_description>Microsoft Internet Keyboard Pro, Swedish - Microsoft Inc. - - - - - microsoftoffice - <_description>Microsoft Office Keyboard - Microsoft Inc. - - - - - microsoftmult - <_description>Microsoft Wireless Multimedia Keyboard 1.0A - Microsoft Inc. - - - - - microsoftelite - <_description>Microsoft Natural Keyboard Elite - Microsoft Inc. - - - - - microsoftccurve2k - <_description>Microsoft Comfort Curve Keyboard 2000 - Microsoft Inc. - - - - - oretec - <_description>Ortek MCK-800 MM/Internet keyboard - Ortek - - - - - propeller - <_description>Propeller Voyager (KTEZ-1000) - KeyTronic - - - - - qtronix - <_description>QTronix Scorpius 98N+ - QTronix - - - - - samsung4500 - <_description>Samsung SDM 4500P - Samsung - - - - - samsung4510 - <_description>Samsung SDM 4510P - Samsung - - - - - sanwaskbkg3 - <_description>Sanwa Supply SKB-KG3 - Sanwa Supply Inc. - - - - - sk1300 - <_description>SK-1300 - NEC - - - - - sk2500 - <_description>SK-2500 - NEC - - - - - sk6200 - <_description>SK-6200 - NEC - - - - - sk7100 - <_description>SK-7100 - NEC - - - - - sp_inet - <_description>Super Power Multimedia Keyboard - Generic - - - - - sven - <_description>SVEN Ergonomic 2500 - SVEN - - - - - sven303 - <_description>SVEN Slim 303 - SVEN - - - - - symplon - <_description>Symplon PaceBook (tablet PC) - Symplon - - - - - toshiba_s3000 - <_description>Toshiba Satellite S3000 - Toshiba - - - - - trust - <_description>Trust Wireless Keyboard Classic - Trust - - - - - trustda - <_description>Trust Direct Access Keyboard - Trust - - - - - trust_slimline - <_description>Trust Slimline - Trust - - - - - tm2020 - <_description>TypeMatrix EZ-Reach 2020 - TypeMatrix - - - - - tm2030PS2 - <_description>TypeMatrix EZ-Reach 2030 PS2 - TypeMatrix - - - - - tm2030USB - <_description>TypeMatrix EZ-Reach 2030 USB - TypeMatrix - - - - - tm2030USB-102 - <_description>TypeMatrix EZ-Reach 2030 USB (102/105:EU mode) - TypeMatrix - - - - - tm2030USB-106 - <_description>TypeMatrix EZ-Reach 2030 USB (106:JP mode) - TypeMatrix - - - - - yahoo - <_description>Yahoo! Internet Keyboard - Yahoo! - - - - - macbook78 - <_description>MacBook/MacBook Pro - Apple - - - - - macbook79 - <_description>MacBook/MacBook Pro (Intl) - Apple - - - - - macintosh - <_description>Macintosh - Apple - - - - - macintosh_old - <_description>Macintosh Old - Apple - - - - - macintosh_hhk - <_description>Happy Hacking Keyboard for Mac - Fujitsu - - - - - acer_c300 - <_description>Acer C300 - Acer - - - - - acer_ferrari4k - <_description>Acer Ferrari 4000 - Acer - - - - - acer_laptop - <_description>Acer Laptop - Acer - - - - - asus_laptop - <_description>Asus Laptop - Asus - - - - - apple - <_description>Apple - Apple - - - - - apple_laptop - <_description>Apple Laptop - Apple - - - - - applealu_ansi - <_description>Apple Aluminium Keyboard (ANSI) - Apple - - - - - applealu_iso - <_description>Apple Aluminium Keyboard (ISO) - Apple - - - - - applealu_jis - <_description>Apple Aluminium Keyboard (JIS) - Apple - - - - - silvercrest - <_description>SILVERCREST Multimedia Wireless Keyboard - Silvercrest - - - - - emachines - <_description>Laptop/notebook eMachines m68xx - eMachines - - - - - benqx - <_description>BenQ X-Touch - BenQ - - - - - benqx730 - <_description>BenQ X-Touch 730 - BenQ - - - - - benqx800 - <_description>BenQ X-Touch 800 - BenQ - - - - - hhk - <_description>Happy Hacking Keyboard - Fujitsu - - - - - classmate - <_description>Classmate PC - Intel - - - - - olpc - <_description>OLPC - OLPC - - - - - sun6 - <_description>Sun Type 5/6 - Sun Microsystems - - - - - targa_v811 - <_description>Targa Visionary 811 - Targa - - - - - unitekkb1925 - <_description>Unitek KB-1925 - Unitek Group - - - - - compalfl90 - <_description>FL90 - Compal Electronics Inc. - - - - - creativedw7000 - <_description>Creative Desktop Wireless 7000 - Creative - - - - - htcdream - <_description>Htc Dream phone - htc - - - - - - - us - <_shortDescription>USA - <_description>USA - - eng - - - - - - chr - <_description>USA - Cherokee - - chr - - - - - - euro - <_description>USA - With EuroSign on 5 - - - - - intl - <_description>USA - International (with dead keys) - - - - - alt-intl - <_description>USA - Alternative international - - - - - colemak - <_description>USA - Colemak - - - - - dvorak - <_description>USA - Dvorak - - - - - dvorak-intl - <_description>USA - Dvorak international (with dead keys) - - - - - dvorak-alt-intl - <_description>USA - Dvorak alternative international (no dead keys) - - - - - dvorak-l - <_description>USA - Left handed Dvorak - - - - - dvorak-r - <_description>USA - Right handed Dvorak - - - - - dvorak-classic - <_description>USA - Classic Dvorak - - - - - dvp - <_description>USA - Programmer Dvorak - - - - - rus - <_description>USA - Russian phonetic - - rus - - - - - - mac - <_description>USA - Macintosh - - - - - altgr-intl - <_description>USA - International (AltGr dead keys) - eng - fra - ger - - - - - olpc2 - <_description>USA - Layout toggle on multiply/divide key - - - - - hbs - <_description>USA - Serbo-Croatian - eng - bos - hbs - hrv - srp - - - - - - - ad - <_shortDescription>And - <_description>Andorra - - cat - - - - - - - af - <_shortDescription>Afg - <_description>Afghanistan - - - - - ps - <_description>Afghanistan - Pashto - - pus - - - - - - uz - <_description>Afghanistan - Southern Uzbek - - uzb - - - - - - olpc-ps - <_description>Afghanistan - OLPC Pashto - - pus - - - - - - fa-olpc - <_description>Afghanistan - OLPC Dari - - - - - uz-olpc - <_description>Afghanistan - OLPC Southern Uzbek - - uzb - - - - - - - - ara - <_shortDescription>Ara - <_description>Arabic - - AE - BH - DZ - EG - EH - JO - KW - LB - LY - MA - MR - OM - PS - QA - SA - SD - SY - TN - YE - - - ara - - - - - - azerty - <_description>Arabic - azerty - - - - - azerty_digits - <_description>Arabic - azerty/digits - - - - - digits - <_description>Arabic - digits - - - - - qwerty - <_description>Arabic - qwerty - - - - - qwerty_digits - <_description>Arabic - qwerty/digits - - - - - buckwalter - <_description>Arabic - Buckwalter - - - - - - - al - <_shortDescription>Alb - <_description>Albania - - alb - - - - - - - am - <_shortDescription>Arm - <_description>Armenia - - hye - - - - - - phonetic - <_description>Armenia - Phonetic - - - - - phonetic-alt - <_description>Armenia - Alternative Phonetic - - - - - eastern - <_description>Armenia - Eastern - - - - - western - <_description>Armenia - Western - - - - - eastern-alt - <_description>Armenia - Alternative Eastern - - - - - - - at - <_shortDescription>Aut - <_description>Austria - - ger - - - - - - nodeadkeys - <_description>Austria - Eliminate dead keys - - - - - sundeadkeys - <_description>Austria - Sun dead keys - - - - - mac - <_description>Austria - Macintosh - - - - - - - az - <_shortDescription>Aze - <_description>Azerbaijan - - aze - - - - - - cyrillic - <_description>Azerbaijan - Cyrillic - - - - - - - by - <_shortDescription>Blr - <_description>Belarus - - bel - - - - - - legacy - <_description>Belarus - Legacy - - - - - latin - <_description>Belarus - Latin - - - - - - - be - <_shortDescription>Bel - <_description>Belgium - ger - nld - fra - - - - - oss - <_description>Belgium - Alternative - - - - - oss_latin9 - <_description>Belgium - Alternative, latin-9 only - - - - - oss_sundeadkeys - <_description>Belgium - Alternative, Sun dead keys - - - - - iso-alternate - <_description>Belgium - ISO Alternate - - - - - nodeadkeys - <_description>Belgium - Eliminate dead keys - - - - - sundeadkeys - <_description>Belgium - Sun dead keys - - - - - wang - <_description>Belgium - Wang model 724 azerty - - - - - - - bd - <_shortDescription>Bgd - <_description>Bangladesh - - ben - - - - - - probhat - <_description>Bangladesh - Probhat - - - - - - - in - <_shortDescription>Ind - <_description>India - - - - - ben - <_description>India - Bengali - - ben - - - - - - ben_probhat - <_description>India - Bengali Probhat - - ben - - - - - - guj - <_description>India - Gujarati - - guj - - - - - - guru - <_description>India - Gurmukhi - - pan - - - - - - jhelum - <_description>India - Gurmukhi Jhelum - - pan - - - - - - kan - <_description>India - Kannada - - kan - - - - - - mal - <_description>India - Malayalam - - mal - - - - - - mal_lalitha - <_description>India - Malayalam Lalitha - - mal - - - - - - mal_enhanced - <_description>India - Malayalam enhanced INSCRIPT with Rupee Sign - mal - - - - - ori - <_description>India - Oriya - - ori - - - - - - tam_unicode - <_description>India - Tamil Unicode - - tam - - - - - - tam_keyboard_with_numerals - <_description>India - Tamil Keyboard with Numerals - - tam - - - - - - tam_TAB - <_description>India - Tamil TAB Typewriter - - tam - - - - - - tam_TSCII - <_description>India - Tamil TSCII Typewriter - - tam - - - - - - tam - <_description>India - Tamil - - tam - - - - - - tel - <_description>India - Telugu - - tel - - - - - - urd-phonetic - <_description>India - Urdu, Phonetic - - urd - - - - - - urd-phonetic3 - <_description>India - Urdu, Alternative phonetic - - urd - - - - - - urd-winkeys - <_description>India - Urdu, Winkeys - - urd - - - - - - bolnagri - <_description>India - Hindi Bolnagri - - hin - - - - - - hin-wx - <_description>India - Hindi Wx - - hin - - - - - - eng - <_description>India - English with RupeeSign - - eng - - - - - - - - ba - <_shortDescription>Bih - <_description>Bosnia and Herzegovina - - bos - - - - - - alternatequotes - <_description>Bosnia and Herzegovina - Use guillemets for quotes - - - - - unicode - <_description>Bosnia and Herzegovina - Use Bosnian digraphs - - - - - unicodeus - <_description>Bosnia and Herzegovina - US keyboard with Bosnian digraphs - - - - - us - <_description>Bosnia and Herzegovina - US keyboard with Bosnian letters - - - - - - - br - <_shortDescription>Bra - <_description>Brazil - - por - - - - - - nodeadkeys - <_description>Brazil - Eliminate dead keys - - - - - dvorak - <_description>Brazil - Dvorak - - - - - nativo - <_description>Brazil - Nativo - - - - - nativo-us - <_description>Brazil - Nativo for USA keyboards - - - - - nativo-epo - <_description>Brazil - Nativo for Esperanto - - epo - - - - - - - - bg - <_shortDescription>Bgr - <_description>Bulgaria - - bul - - - - - - phonetic - <_description>Bulgaria - Traditional phonetic - - - - - bas_phonetic - <_description>Bulgaria - New phonetic - - - - - - - ma - <_description>Morocco - - - - - french - <_description>Morocco - French - - fra - - - - - - tifinagh - <_description>Morocco - Tifinagh - - ber - - - - - - tifinagh-alt - <_description>Morocco - Tifinagh alternative - - ber - - - - - - tifinagh-alt-phonetic - <_description>Morocco - Tifinagh alternative phonetic - - ber - - - - - - tifinagh-extended - <_description>Morocco - Tifinagh extended - - ber - - - - - - tifinagh-phonetic - <_description>Morocco - Tifinagh phonetic - - ber - - - - - - tifinagh-extended-phonetic - <_description>Morocco - Tifinagh extended phonetic - - ber - - - - - - - - mm - <_shortDescription>Mmr - <_description>Myanmar - - mya - - - - - - - ca - <_shortDescription>Can - <_description>Canada - - fra - - - - - - fr-dvorak - <_description>Canada - French Dvorak - - - - - fr-legacy - <_description>Canada - French (legacy) - - - - - multix - <_description>Canada - Multilingual - - - - - multi - <_description>Canada - Multilingual, first part - - - - - multi-2gr - <_description>Canada - Multilingual, second part - - - - - ike - <_description>Canada - Inuktitut - - iku - - - - - - shs - <_description>Canada - Secwepemctsin - - - - - kut - <_description>Canada - Ktunaxa - - - - - eng - <_description>Canada - English - - eng - - - - - - - - cd - <_shortDescription>COD - <_description>Congo, Democratic Republic of the - - fra - - - - - - - cn - <_shortDescription>Chn - <_description>China - - chi - - - - - - tib - <_description>China - Tibetan - - tib - - - - - - tib_asciinum - <_description>China - Tibetan (with ASCII numerals) - - tib - - - - - - uig - <_description>China - Uyghur - - uig - - - - - - - - hr - <_shortDescription>Hrv - <_description>Croatia - - scr - - - - - - alternatequotes - <_description>Croatia - Use guillemets for quotes - - - - - unicode - <_description>Croatia - Use Croatian digraphs - - - - - unicodeus - <_description>Croatia - US keyboard with Croatian digraphs - - - - - us - <_description>Croatia - US keyboard with Croatian letters - - - - - - - cz - <_shortDescription>Cze - <_description>Czechia - - cze - - - - - - bksl - <_description>Czechia - With <\|> key - - - - - qwerty - <_description>Czechia - qwerty - - - - - qwerty_bksl - <_description>Czechia - qwerty, extended Backslash - - - - - ucw - <_description>Czechia - UCW layout (accented letters only) - - - - - dvorak-ucw - <_description>Czechia - US Dvorak with CZ UCW support - - - - - - - dk - <_shortDescription>Dnk - <_description>Denmark - - dan - - - - - - nodeadkeys - <_description>Denmark - Eliminate dead keys - - - - - mac - <_description>Denmark - Macintosh - - - - - mac_nodeadkeys - <_description>Denmark - Macintosh, eliminate dead keys - - - - - dvorak - <_description>Denmark - Dvorak - - - - - - - nl - <_shortDescription>Nld - <_description>Netherlands - - nld - - - - - - sundeadkeys - <_description>Netherlands - Sun dead keys - - - - - mac - <_description>Netherlands - Macintosh - - - - - std - <_description>Netherlands - Standard - - - - - - - bt - <_shortDescription>Btn - <_description>Bhutan - - dzo - - - - - - ee - <_shortDescription>Est - <_description>Estonia - - est - - - - - - nodeadkeys - <_description>Estonia - Eliminate dead keys - - - - - dvorak - <_description>Estonia - Dvorak - - - - - us - <_description>Estonia - US keyboard with Estonian letters - - - - - - - ir - <_shortDescription>Irn - <_description>Iran - - per - - - - - - pes_keypad - <_description>Iran - Persian, with Persian Keypad - - - - - ku - <_description>Iran - Kurdish, Latin Q - - kur - - - - - - ku_f - <_description>Iran - Kurdish, (F) - - kur - - - - - - ku_alt - <_description>Iran - Kurdish, Latin Alt-Q - - kur - - - - - - ku_ara - <_description>Iran - Kurdish, Arabic-Latin - - kur - - - - - - - - iq - <_shortDescription>Irq - <_description>Iraq - ara - kur - - - - - ku - <_description>Iraq - Kurdish, Latin Q - - kur - - - - - - ku_f - <_description>Iraq - Kurdish, (F) - - kur - - - - - - ku_alt - <_description>Iraq - Kurdish, Latin Alt-Q - - kur - - - - - - ku_ara - <_description>Iraq - Kurdish, Arabic-Latin - - kur - - - - - - - - fo - <_shortDescription>Fro - <_description>Faroe Islands - - fao - - - - - - nodeadkeys - <_description>Faroe Islands - Eliminate dead keys - - - - - - - fi - <_shortDescription>Fin - <_description>Finland - - fin - - - - - - classic - <_description>Finland - Classic - - - - - nodeadkeys - <_description>Finland - Classic, eliminate dead keys - - - - - smi - <_description>Finland - Northern Saami - smi - sme - - - - - mac - <_description>Finland - Macintosh - - - - - - - fr - <_shortDescription>Fra - <_description>France - - fra - - - - - - nodeadkeys - <_description>France - Eliminate dead keys - - - - - sundeadkeys - <_description>France - Sun dead keys - - - - - oss - <_description>France - Alternative - - - - - oss_latin9 - <_description>France - Alternative, latin-9 only - - - - - oss_nodeadkeys - <_description>France - Alternative, eliminate dead keys - - - - - oss_sundeadkeys - <_description>France - Alternative, Sun dead keys - - - - - latin9 - <_description>France - (Legacy) Alternative - - - - - latin9_nodeadkeys - <_description>France - (Legacy) Alternative, eliminate dead keys - - - - - latin9_sundeadkeys - <_description>France - (Legacy) Alternative, Sun dead keys - - - - - bepo - <_description>France - Bepo, ergonomic, Dvorak way - - - - - bepo_latin9 - <_description>France - Bepo, ergonomic, Dvorak way, latin-9 only - - - - - dvorak - <_description>France - Dvorak - - - - - mac - <_description>France - Macintosh - - - - - bre - <_description>France - Breton - - - - - oci - <_description>France - Occitan - - oci - - - - - - geo - <_description>France - Georgian AZERTY Tskapo - - geo - - - - - - - - gh - <_shortDescription>Gha - <_description>Ghana - - eng - - - - - - generic - <_description>Ghana - Multilingual - - - - - akan - <_description>Ghana - Akan - - aka - - - - - - ewe - <_description>Ghana - Ewe - - ewe - - - - - - fula - <_description>Ghana - Fula - - ful - - - - - - ga - <_description>Ghana - Ga - - gaa - - - - - - hausa - <_description>Ghana - Hausa - - hau - - - - - - avn - <_description>Ghana - Avatime - - avn - - - - - - gillbt - <_description>Ghana - GILLBT - - - - - - - gn - <_shortDescription>Gin - <_description>Guinea - - fra - - - - - - - ge - <_shortDescription>Geo - <_description>Georgia - - geo - - - - - - ergonomic - <_description>Georgia - Ergonomic - - - - - mess - <_description>Georgia - MESS - - - - - ru - <_description>Georgia - Russian - - rus - - - - - - os - <_description>Georgia - Ossetian - - oss - - - - - - - - de - <_shortDescription>Deu - <_description>Germany - - ger - - - - - - deadacute - <_description>Germany - Dead acute - - - - - deadgraveacute - <_description>Germany - Dead grave acute - - - - - nodeadkeys - <_description>Germany - Eliminate dead keys - - - - - ro - <_description>Germany - Romanian keyboard with German letters - - - - - ro_nodeadkeys - <_description>Germany - Romanian keyboard with German letters, eliminate dead keys - - - - - dvorak - <_description>Germany - Dvorak - - - - - sundeadkeys - <_description>Germany - Sun dead keys - - - - - neo - <_description>Germany - Neo 2 - - - - - mac - <_description>Germany - Macintosh - - - - - mac_nodeadkeys - <_description>Germany - Macintosh, eliminate dead keys - - - - - dsb - <_description>Germany - Lower Sorbian - - dsb - - - - - - dsb_qwertz - <_description>Germany - Lower Sorbian (qwertz) - - dsb - - - - - - qwerty - <_description>Germany - qwerty - - - - - ru - <_description>Germany - Russian phonetic - - rus - - - - - - - - gr - <_shortDescription>Grc - <_description>Greece - - gre - - - - - - simple - <_description>Greece - Simple - - - - - extended - <_description>Greece - Extended - - - - - nodeadkeys - <_description>Greece - Eliminate dead keys - - - - - polytonic - <_description>Greece - Polytonic - - - - - - - hu - <_shortDescription>Hun - <_description>Hungary - - hun - - - - - - standard - <_description>Hungary - Standard - - - - - nodeadkeys - <_description>Hungary - Eliminate dead keys - - - - - qwerty - <_description>Hungary - qwerty - - - - - 101_qwertz_comma_dead - <_description>Hungary - 101/qwertz/comma/Dead keys - - - - - 101_qwertz_comma_nodead - <_description>Hungary - 101/qwertz/comma/Eliminate dead keys - - - - - 101_qwertz_dot_dead - <_description>Hungary - 101/qwertz/dot/Dead keys - - - - - 101_qwertz_dot_nodead - <_description>Hungary - 101/qwertz/dot/Eliminate dead keys - - - - - 101_qwerty_comma_dead - <_description>Hungary - 101/qwerty/comma/Dead keys - - - - - 101_qwerty_comma_nodead - <_description>Hungary - 101/qwerty/comma/Eliminate dead keys - - - - - 101_qwerty_dot_dead - <_description>Hungary - 101/qwerty/dot/Dead keys - - - - - 101_qwerty_dot_nodead - <_description>Hungary - 101/qwerty/dot/Eliminate dead keys - - - - - 102_qwertz_comma_dead - <_description>Hungary - 102/qwertz/comma/Dead keys - - - - - 102_qwertz_comma_nodead - <_description>Hungary - 102/qwertz/comma/Eliminate dead keys - - - - - 102_qwertz_dot_dead - <_description>Hungary - 102/qwertz/dot/Dead keys - - - - - 102_qwertz_dot_nodead - <_description>Hungary - 102/qwertz/dot/Eliminate dead keys - - - - - 102_qwerty_comma_dead - <_description>Hungary - 102/qwerty/comma/Dead keys - - - - - 102_qwerty_comma_nodead - <_description>Hungary - 102/qwerty/comma/Eliminate dead keys - - - - - 102_qwerty_dot_dead - <_description>Hungary - 102/qwerty/dot/Dead keys - - - - - 102_qwerty_dot_nodead - <_description>Hungary - 102/qwerty/dot/Eliminate dead keys - - - - - - - is - <_shortDescription>Isl - <_description>Iceland - - ice - - - - - - Sundeadkeys - <_description>Iceland - Sun dead keys - - - - - nodeadkeys - <_description>Iceland - Eliminate dead keys - - - - - mac - <_description>Iceland - Macintosh - - - - - dvorak - <_description>Iceland - Dvorak - - - - - - - il - <_shortDescription>Isr - <_description>Israel - - heb - - - - - - lyx - <_description>Israel - lyx - - - - - phonetic - <_description>Israel - Phonetic - - - - - biblical - <_description>Israel - Biblical Hebrew (Tiro) - - - - - - - it - <_shortDescription>Ita - <_description>Italy - - ita - - - - - - nodeadkeys - <_description>Italy - Eliminate dead keys - - - - - mac - <_description>Italy - Macintosh - - - - - us - <_description>Italy - US keyboard with Italian letters - - - - - geo - <_description>Italy - Georgian - - geo - - - - - - - - jp - <_shortDescription>Jpn - <_description>Japan - - jpn - - - - - - kana - <_description>Japan - Kana - - - - - kana86 - <_description>Japan - Kana 86 - - - - - OADG109A - <_description>Japan - OADG 109A - - - - - mac - <_description>Japan - Macintosh - - - - - - - kg - <_shortDescription>Kgz - <_description>Kyrgyzstan - - kir - - - - - - phonetic - <_description>Kyrgyzstan - Phonetic - - - - - - - kh - <_shortDescription>Khm - <_description>Cambodia - - khm - - - - - - - kz - <_shortDescription>Kaz - <_description>Kazakhstan - - kaz - - - - - - ruskaz - <_description>Kazakhstan - Russian with Kazakh - kaz - rus - - - - - kazrus - <_description>Kazakhstan - Kazakh with Russian - kaz - rus - - - - - - - la - <_shortDescription>Lao - <_description>Laos - - lao - - - - - - stea - <_description>Laos - STEA (proposed standard layout) - lao - - - - - - - - latam - <_shortDescription>Esp - <_description>Latin American - - AR - BO - CL - CO - CR - CU - DO - EC - GT - HN - HT - MX - NI - PA - PE - PR - PY - SV - US - UY - VE - - - spa - - - - - - nodeadkeys - <_description>Latin American - Eliminate dead keys - - - - - deadtilde - <_description>Latin American - Include dead tilde - - - - - sundeadkeys - <_description>Latin American - Sun dead keys - - - - - - - lt - <_shortDescription>Ltu - <_description>Lithuania - - lit - - - - - - std - <_description>Lithuania - Standard - - - - - us - <_description>Lithuania - US keyboard with Lithuanian letters - - - - - ibm - <_description>Lithuania - IBM (LST 1205-92) - - - - - lekp - <_description>Lithuania - LEKP - - - - - lekpa - <_description>Lithuania - LEKPa - - - - - - - lv - <_shortDescription>Lva - <_description>Latvia - - lav - - - - - - apostrophe - <_description>Latvia - Apostrophe (') variant - - - - - tilde - <_description>Latvia - Tilde (~) variant - - - - - fkey - <_description>Latvia - F-letter (F) variant - - - - - - - mao - <_shortDescription>Mao - <_description>Maori - - mao - - - - - - - me - <_shortDescription>MNE - <_description>Montenegro - - srp - - - - - - cyrillic - <_description>Montenegro - Cyrillic - - - - - cyrillicyz - <_description>Montenegro - Cyrillic, Z and ZHE swapped - - - - - latinunicode - <_description>Montenegro - Latin unicode - - - - - latinyz - <_description>Montenegro - Latin qwerty - - - - - latinunicodeyz - <_description>Montenegro - Latin unicode qwerty - - - - - cyrillicalternatequotes - <_description>Montenegro - Cyrillic with guillemets - - - - - latinalternatequotes - <_description>Montenegro - Latin with guillemets - - - - - - - mk - <_shortDescription>Mkd - <_description>Macedonia - - mkd - - - - - - nodeadkeys - <_description>Macedonia - Eliminate dead keys - - - - - - - mt - <_shortDescription>Mlt - <_description>Malta - - mlt - - - - - - us - <_description>Malta - Maltese keyboard with US layout - - - - - - - mn - <_shortDescription>Mng - <_description>Mongolia - - mng - - - - - - - no - <_shortDescription>Nor - <_description>Norway - - nor - - - - - - nodeadkeys - <_description>Norway - Eliminate dead keys - - - - - dvorak - <_description>Norway - Dvorak - - - - - smi - <_description>Norway - Northern Saami - - sme - - - - - - smi_nodeadkeys - <_description>Norway - Northern Saami, eliminate dead keys - - sme - - - - - - mac - <_description>Norway - Macintosh - - - - - mac_nodeadkeys - <_description>Norway - Macintosh, eliminate dead keys - - - - - - - pl - <_shortDescription>Pol - <_description>Poland - - pol - - - - - - qwertz - <_description>Poland - qwertz - - - - - dvorak - <_description>Poland - Dvorak - - - - - dvorak_quotes - <_description>Poland - Dvorak, Polish quotes on quotemark key - - - - - dvorak_altquotes - <_description>Poland - Dvorak, Polish quotes on key 1 - - - - - csb - <_description>Poland - Kashubian - - csb - - - - - - ru_phonetic_dvorak - <_description>Poland - Russian phonetic Dvorak - - rus - - - - - - dvp - <_description>Poland - Programmer Dvorak - - - - - - - pt - <_shortDescription>Prt - <_description>Portugal - - por - - - - - - nodeadkeys - <_description>Portugal - Eliminate dead keys - - - - - sundeadkeys - <_description>Portugal - Sun dead keys - - - - - mac - <_description>Portugal - Macintosh - - - - - mac_nodeadkeys - <_description>Portugal - Macintosh, eliminate dead keys - - - - - mac_sundeadkeys - <_description>Portugal - Macintosh, Sun dead keys - - - - - nativo - <_description>Portugal - Nativo - - - - - nativo-us - <_description>Portugal - Nativo for USA keyboards - - - - - nativo-epo - <_description>Portugal - Nativo for Esperanto - - epo - - - - - - - - ro - <_shortDescription>Rou - <_description>Romania - - rum - - - - - - cedilla - <_description>Romania - Cedilla - - - - - std - <_description>Romania - Standard - - - - - std_cedilla - <_description>Romania - Standard (Cedilla) - - - - - winkeys - <_description>Romania - Winkeys - - - - - crh_f - <_description>Romania - Crimean Tatar (Turkish F) - - crh - - - - - - crh_alt - <_description>Romania - Crimean Tatar (Turkish Alt-Q) - - crh - - - - - - crh_dobruca1 - <_description>Romania - Crimean Tatar (Dobruca-1 Q) - - crh - - - - - - crh_dobruca2 - <_description>Romania - Crimean Tatar (Dobruca-2 Q) - - crh - - - - - - - - ru - <_shortDescription>Rus - <_description>Russia - - rus - - - - - - phonetic - <_description>Russia - Phonetic - - - - - phonetic_winkeys - <_description>Russia - Phonetic Winkeys - - - - - typewriter - <_description>Russia - Typewriter - - - - - legacy - <_description>Russia - Legacy - - - - - typewriter-legacy - <_description>Russia - Typewriter, legacy - - - - - tt - <_description>Russia - Tatar - - tat - - - - - - os_legacy - <_description>Russia - Ossetian, legacy - - oss - - - - - - os_winkeys - <_description>Russia - Ossetian, Winkeys - - oss - - - - - - cv - <_description>Russia - Chuvash - - chv - - - - - - cv_latin - <_description>Russia - Chuvash Latin - - chv - - - - - - udm - <_description>Russia - Udmurt - - udm - - - - - - kom - <_description>Russia - Komi - - kom - - - - - - sah - <_description>Russia - Yakut - - sah - - - - - - xal - <_description>Russia - Kalmyk - - xal - - - - - - dos - <_description>Russia - DOS - - - - - srp - <_description>Russia - Serbian - rus - srp - - - - - bak - <_description>Russia - Bashkirian - - bak - - - - - - chm - <_description>Russia - Mari - - chm - - - - - - - - rs - <_shortDescription>SRB - <_description>Serbia - - srp - - - - - - yz - <_description>Serbia - Z and ZHE swapped - - - - - latin - <_description>Serbia - Latin - - - - - latinunicode - <_description>Serbia - Latin Unicode - - - - - latinyz - <_description>Serbia - Latin qwerty - - - - - latinunicodeyz - <_description>Serbia - Latin Unicode qwerty - - - - - alternatequotes - <_description>Serbia - With guillemets - - - - - latinalternatequotes - <_description>Serbia - Latin with guillemets - - - - - rue - <_description>Serbia - Pannonian Rusyn Homophonic - - rue - - - - - - - - si - <_shortDescription>Svn - <_description>Slovenia - - slv - - - - - - alternatequotes - <_description>Slovenia - Use guillemets for quotes - - - - - us - <_description>Slovenia - US keyboard with Slovenian letters - - - - - - - sk - <_shortDescription>Svk - <_description>Slovakia - - slo - - - - - - bksl - <_description>Slovakia - Extended Backslash - - - - - qwerty - <_description>Slovakia - qwerty - - - - - qwerty_bksl - <_description>Slovakia - qwerty, extended Backslash - - - - - - - es - <_shortDescription>Esp - <_description>Spain - - spa - - - - - - nodeadkeys - <_description>Spain - Eliminate dead keys - - - - - deadtilde - <_description>Spain - Include dead tilde - - - - - sundeadkeys - <_description>Spain - Sun dead keys - - - - - dvorak - <_description>Spain - Dvorak - - - - - ast - <_description>Spain - Asturian variant with bottom-dot H and bottom-dot L - - ast - - - - - - cat - <_description>Spain - Catalan variant with middle-dot L - - cat - - - - - - mac - <_description>Spain - Macintosh - - - - - - - se - <_shortDescription>Swe - <_description>Sweden - - swe - - - - - - nodeadkeys - <_description>Sweden - Eliminate dead keys - - - - - dvorak - <_description>Sweden - Dvorak - - - - - rus - <_description>Sweden - Russian phonetic - - rus - - - - - - rus_nodeadkeys - <_description>Sweden - Russian phonetic, eliminate dead keys - - rus - - - - - - smi - <_description>Sweden - Northern Saami - - sme - - - - - - mac - <_description>Sweden - Macintosh - - - - - svdvorak - <_description>Sweden - Svdvorak - - - - - - - ch - <_shortDescription>Che - <_description>Switzerland - ger - gsw - - - - - legacy - <_description>Switzerland - Legacy - - - - - de_nodeadkeys - <_description>Switzerland - German, eliminate dead keys - - - - - de_sundeadkeys - <_description>Switzerland - German, Sun dead keys - - - - - fr - <_description>Switzerland - French - - fra - - - - - - fr_nodeadkeys - <_description>Switzerland - French, eliminate dead keys - - fra - - - - - - fr_sundeadkeys - <_description>Switzerland - French, Sun dead keys - - fra - - - - - - fr_mac - <_description>Switzerland - French (Macintosh) - - fra - - - - - - de_mac - <_description>Switzerland - German (Macintosh) - - - - - - - sy - <_shortDescription>Syr - <_description>Syria - - syr - - - - - - syc - <_description>Syria - Syriac - - - - - syc_phonetic - <_description>Syria - Syriac phonetic - - - - - ku - <_description>Syria - Kurdish, Latin Q - - kur - - - - - - ku_f - <_description>Syria - Kurdish, (F) - - kur - - - - - - ku_alt - <_description>Syria - Kurdish, Latin Alt-Q - - kur - - - - - - - - tj - <_shortDescription>Tjk - <_description>Tajikistan - - tgk - - - - - - legacy - <_description>Tajikistan - Legacy - - - - - - - lk - <_shortDescription>Lka - <_description>Sri Lanka - - sin - - - - - - tam_unicode - <_description>Sri Lanka - Tamil Unicode - - tam - - - - - - tam_TAB - <_description>Sri Lanka - Tamil TAB Typewriter - - tam - - - - - - - - th - <_shortDescription>Tha - <_description>Thailand - - tha - - - - - - tis - <_description>Thailand - TIS-820.2538 - - - - - pat - <_description>Thailand - Pattachote - - - - - - - tr - <_shortDescription>Tur - <_description>Turkey - - tur - - - - - - f - <_description>Turkey - (F) - - - - - alt - <_description>Turkey - Alt-Q - - - - - sundeadkeys - <_description>Turkey - Sun dead keys - - - - - ku - <_description>Turkey - Kurdish, Latin Q - - kur - - - - - - ku_f - <_description>Turkey - Kurdish, (F) - - kur - - - - - - ku_alt - <_description>Turkey - Kurdish, Latin Alt-Q - - kur - - - - - - intl - <_description>Turkey - International (with dead keys) - - - - - crh - <_description>Turkey - Crimean Tatar (Turkish Q) - - crh - - - - - - crh_f - <_description>Turkey - Crimean Tatar (Turkish F) - - crh - - - - - - crh_alt - <_description>Turkey - Crimean Tatar (Turkish Alt-Q) - - crh - - - - - - - - tw - <_shortDescription>Twn - <_description>Taiwan - - trv - - - - - - indigenous - <_description>Taiwan - Indigenous - - ami - tay - bnn - ckv - pwn - pyu - dru - ais - ssf - tao - tsu - - - - - - saisiyat - <_description>Taiwan - Saisiyat - - xsf - - - - - - - - ua - <_shortDescription>Ukr - <_description>Ukraine - - ukr - - - - - - phonetic - <_description>Ukraine - Phonetic - - - - - typewriter - <_description>Ukraine - Typewriter - - - - - winkeys - <_description>Ukraine - Winkeys - - - - - legacy - <_description>Ukraine - Legacy - - - - - rstu - <_description>Ukraine - Standard RSTU - - - - - rstu_ru - <_description>Ukraine - Standard RSTU on Russian layout - - - - - homophonic - <_description>Ukraine - Homophonic - - - - - crh - <_description>Ukraine - Crimean Tatar (Turkish Q) - - crh - - - - - - crh_f - <_description>Ukraine - Crimean Tatar (Turkish F) - - crh - - - - - - crh_alt - <_description>Ukraine - Crimean Tatar (Turkish Alt-Q) - - crh - - - - - - - - gb - <_shortDescription>GBr - <_description>United Kingdom - - eng - - - - - - extd - <_description>United Kingdom - Extended - Winkeys - - - - - intl - <_description>United Kingdom - International (with dead keys) - - - - - dvorak - <_description>United Kingdom - Dvorak - - - - - dvorakukp - <_description>United Kingdom - Dvorak (UK Punctuation) - - - - - mac - <_description>United Kingdom - Macintosh - - - - - mac_intl - <_description>United Kingdom - Macintosh (International) - - - - - colemak - <_description>United Kingdom - Colemak - - - - - - - uz - <_shortDescription>Uzb - <_description>Uzbekistan - - uzb - - - - - - latin - <_description>Uzbekistan - Latin - - - - - crh - <_description>Uzbekistan - Crimean Tatar (Turkish Q) - - crh - - - - - - crh_f - <_description>Uzbekistan - Crimean Tatar (Turkish F) - - crh - - - - - - crh_alt - <_description>Uzbekistan - Crimean Tatar (Turkish Alt-Q) - - crh - - - - - - - - vn - <_shortDescription>Vnm - <_description>Vietnam - - vie - - - - - - - kr - <_shortDescription>Kor - <_description>Korea, Republic of - - kor - - - - - - kr104 - <_description>Korea, Republic of - 101/104 key Compatible - - - - - - - nec_vndr/jp - <_shortDescription>Jpn - <_description>Japan (PC-98xx Series) - - JP - - - jpn - - - - - - - ie - <_shortDescription>Irl - <_description>Ireland - - eng - - - - - - CloGaelach - <_description>Ireland - CloGaelach - - gla - - - - - - UnicodeExpert - <_description>Ireland - UnicodeExpert - - - - - ogam - <_description>Ireland - Ogham - - - - - ogam_is434 - <_description>Ireland - Ogham IS434 - - - - - - - pk - <_shortDescription>Pak - <_description>Pakistan - - urd - - - - - - urd-crulp - <_description>Pakistan - CRULP - - urd - - - - - - urd-nla - <_description>Pakistan - NLA - - urd - - - - - - ara - <_description>Pakistan - Arabic - - ara - - - - - - snd - <_description>Pakistan - Sindhi - - sd - - - - - - - - mv - <_shortDescription>Mdv - <_description>Maldives - - div - - - - - - - za - <_shortDescription>Zaf - <_description>South Africa - - eng - - - - - - epo - <_shortDescription>Epo - <_description>Esperanto - - epo - - - - - - legacy - <_description>Esperanto - displaced semicolon and quote (obsolete) - - - - - - - np - <_shortDescription>Npl - <_description>Nepal - - nep - - - - - - ng - <_shortDescription>Nga - <_description>Nigeria - - eng - - - - - - igbo - <_description>Nigeria - Igbo - - ibo - - - - - - yoruba - <_description>Nigeria - Yoruba - - yor - - - - - - hausa - <_description>Nigeria - Hausa - - hau - - - - - - - - et - <_shortDescription>Eth - <_description>Ethiopia - - amh - - - - - - - sn - <_shortDescription>Sen - <_description>Senegal - - wol - - - - - - - brai - <_shortDescription>Brl - <_description>Braille - - - - - left_hand - <_description>Braille - Left hand - - - - - right_hand - <_description>Braille - Right hand - - - - - - - tm - <_shortDescription>Tkm - <_description>Turkmenistan - - tuk - - - - - - alt - <_description>Turkmenistan - Alt-Q - - - - - - - ml - <_shortDescription>Mli - <_description>Mali - - bam - - - - - - fr-oss - <_description>Mali - Français (France Alternative) - - - - - us-mac - <_description>Mali - English (USA Macintosh) - - - - - us-intl - <_description>Mali - English (USA International) - - - - - - - tz - <_shortDescription>Tza - <_description>Tanzania - - swa - - - - - - ke - <_shortDescription>Ken - <_description>Kenya - - swa - - - - - - kik - <_description>Kenya - Kikuyu - - kik - - - - - - - - bw - <_shortDescription>Bwa - <_description>Botswana - - tsn - - - - - - ph - <_shortDescription>Phi - <_description>Philippines - eng - bik - ceb - fil - hil - ilo - pam - pag - phi - tgl - war - - - - - qwerty-bay - <_description>Philippines - QWERTY (Baybayin) - bik - ceb - fil - hil - ilo - pam - pag - phi - tgl - war - - - - - capewell-dvorak - <_description>Philippines - Capewell-Dvorak (Latin) - - - - - capewell-dvorak-bay - <_description>Philippines - Capewell-Dvorak (Baybayin) - bik - ceb - fil - hil - ilo - pam - pag - phi - tgl - war - - - - - capewell-qwerf2k6 - <_description>Philippines - Capewell-QWERF 2006 (Latin) - - - - - capewell-qwerf2k6-bay - <_description>Philippines - Capewell-QWERF 2006 (Baybayin) - bik - ceb - fil - hil - ilo - pam - pag - phi - tgl - war - - - - - colemak - <_description>Philippines - Colemak (Latin) - - - - - colemak-bay - <_description>Philippines - Colemak (Baybayin) - bik - ceb - fil - hil - ilo - pam - pag - phi - tgl - war - - - - - dvorak - <_description>Philippines - Dvorak (Latin) - - - - - dvorak-bay - <_description>Philippines - Dvorak (Baybayin) - bik - ceb - fil - hil - ilo - pam - pag - phi - tgl - war - - - - - - - - - - grp - <_description>Key(s) to change layout - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - lv3 - <_description>Key to choose 3rd level - - - - - - - - - - - - - - - - - - - - - - - ctrl - <_description>Ctrl key position - - - - - - - - - - - - - grp_led - <_description>Use keyboard LED to show alternative layout - - - - - - - - - keypad - <_description>Numeric keypad layout selection - - - - - - - - - - - - - - - kpdl - <_description>Numeric keypad delete key behaviour - - - - - - - - - - - - - - caps - <_description>Caps Lock key behavior - - - - - - - - - - - - - - - - - - - - altwin - <_description>Alt/Win key behavior - - - - - - - - - - - - - - - Compose key - <_description>Compose key position - - - - - - - - - - - - - - - - compat - <_description>Miscellaneous compatibility options - - - - - - - - - - - - - - - - - - eurosign - <_description>Adding currency signs to certain keys - - - - - - - - - - lv5 - <_description>Key to choose 5th level - - - - - - - - - - - - - - - - nbsp - <_description>Using space key to input non-breakable space character - - - - - - - - - - - - - - - - - - - - - japan - <_description>Japanese keyboard options - - - - - - - esperanto - <_description>Adding Esperanto circumflexes (supersigno) - - - - - - - terminate - <_description>Key sequence to kill the X server - - - - - + + + + + + + pc101 + <_description>Generic 101-key PC + Generic + + + + + pc102 + <_description>Generic 102-key (Intl) PC + Generic + + + + + pc104 + <_description>Generic 104-key PC + Generic + + + + + pc105 + <_description>Generic 105-key (Intl) PC + Generic + + + + + dell101 + <_description>Dell 101-key PC + Dell + + + + + latitude + <_description>Dell Latitude series laptop + Dell + + + + + dellm65 + <_description>Dell Precision M65 + Dell + + + + + everex + <_description>Everex STEPnote + Everex + + + + + flexpro + <_description>Keytronic FlexPro + Keytronic + + + + + microsoft + <_description>Microsoft Natural + Microsoft Inc. + + + + + omnikey101 + <_description>Northgate OmniKey 101 + Northgate + + + + + winbook + <_description>Winbook Model XP5 + Generic + + + + + pc98 + <_description>PC-98xx Series + Generic + + + + + a4techKB21 + <_description>A4Tech KB-21 + A4Tech + + + + + a4techKBS8 + <_description>A4Tech KBS-8 + A4Tech + + + + + a4_rfkb23 + <_description>A4Tech Wireless Desktop RFKB-23 + A4Tech + + + + + airkey + <_description>Acer AirKey V + Acer + + + + + azonaRF2300 + <_description>Azona RF2300 wireless Internet Keyboard + Azona + + + + + scorpius + <_description>Advance Scorpius KI + Scorpius + + + + + brother + <_description>Brother Internet Keyboard + Brother + + + + + btc5113rf + <_description>BTC 5113RF Multimedia + BTC + + + + + btc5126t + <_description>BTC 5126T + BTC + + + + + btc6301urf + <_description>BTC 6301URF + BTC + + + + + btc9000 + <_description>BTC 9000 + BTC + + + + + btc9000a + <_description>BTC 9000A + BTC + + + + + btc9001ah + <_description>BTC 9001AH + BTC + + + + + btc5090 + <_description>BTC 5090 + BTC + + + + + btc9019u + <_description>BTC 9019U + BTC + + + + + btc9116u + <_description>BTC 9116U Mini Wireless Internet and Gaming + + + + + cherryblue + <_description>Cherry Blue Line CyBo@rd + + + + + cherryblueb + <_description>Cherry CyMotion Master XPress + Cherry + + + + + cherrybluea + <_description>Cherry Blue Line CyBo@rd (alternate option) + Cherry + + + + + cherrycyboard + <_description>Cherry CyBo@rd USB-Hub + Cherry + + + + + cherrycmexpert + <_description>Cherry CyMotion Expert + Cherry + + + + + cherrybunlim + <_description>Cherry B.UNLIMITED + Cherry + + + + + chicony + <_description>Chicony Internet Keyboard + Chicony + + + + + chicony0108 + <_description>Chicony KU-0108 + Chicony + + + + + chicony0420 + <_description>Chicony KU-0420 + Chicony + + + + + chicony9885 + <_description>Chicony KB-9885 + Chicony + + + + + compaqeak8 + <_description>Compaq Easy Access Keyboard + Compaq + + + + + compaqik7 + <_description>Compaq Internet Keyboard (7 keys) + Compaq + + + + + compaqik13 + <_description>Compaq Internet Keyboard (13 keys) + Compaq + + + + + compaqik18 + <_description>Compaq Internet Keyboard (18 keys) + Compaq + + + + + cymotionlinux + <_description>Cherry CyMotion Master Linux + Cherry + + + + + armada + <_description>Laptop/notebook Compaq (eg. Armada) Laptop Keyboard + Compaq + + + + + presario + <_description>Laptop/notebook Compaq (eg. Presario) Internet Keyboard + Compaq + + + + + ipaq + <_description>Compaq iPaq Keyboard + Compaq + + + + + dell + <_description>Dell + Dell + + + + + dellsk8125 + <_description>Dell SK-8125 + Dell + + + + + dellsk8135 + <_description>Dell SK-8135 + Dell + + + + + dellusbmm + <_description>Dell USB Multimedia Keyboard + Dell + + + + + inspiron + <_description>Dell Laptop/notebook Inspiron 6xxx/8xxx + Dell + + + + + precision_m + <_description>Dell Laptop/notebook Precision M series + Dell + + + + + dexxa + <_description>Dexxa Wireless Desktop Keyboard + Dexxa + + + + + diamond + <_description>Diamond 9801 / 9802 series + Diamond + + + + + dtk2000 + <_description>DTK2000 + + + + + ennyah_dkb1008 + <_description>Ennyah DKB-1008 + Ennyah + + + + + fscaa1667g + <_description>Fujitsu-Siemens Computers AMILO laptop + Fujitsu-Siemens + + + + + genius + <_description>Genius Comfy KB-16M / Genius MM Keyboard KWD-910 + Genius + + + + + geniuscomfy + <_description>Genius Comfy KB-12e + Genius + + + + + geniuscomfy2 + <_description>Genius Comfy KB-21e-Scroll + Genius + + + + + geniuskb19e + <_description>Genius KB-19e NB + Genius + + + + + geniuskkb2050hs + <_description>Genius KKB-2050HS + Genius + + + + + gyration + <_description>Gyration + Gyration + + + + + htcdream + <_description>HTC Dream + HTC + + + + + kinesis + <_description>Kinesis + Kinesis + + + + + logitech_base + <_description>Logitech Generic Keyboard + Logitech + + + + + logitech_g15 + <_description>Logitech G15 extra keys via G15daemon + Logitech + + + + + hpi6 + <_description>Hewlett-Packard Internet Keyboard + Hewlett-Packard + + + + + hp250x + <_description>Hewlett-Packard SK-250x Multimedia Keyboard + Hewlett-Packard + + + + + hpxe3gc + <_description>Hewlett-Packard Omnibook XE3 GC + Hewlett-Packard + + + + + hpxe3gf + <_description>Hewlett-Packard Omnibook XE3 GF + Hewlett-Packard + + + + + hpxt1000 + <_description>Hewlett-Packard Omnibook XT1000 + Hewlett-Packard + + + + + hpdv5 + <_description>Hewlett-Packard Pavilion dv5 + Hewlett-Packard + + + + + hpzt11xx + <_description>Hewlett-Packard Pavilion ZT11xx + Hewlett-Packard + + + + + hp500fa + <_description>Hewlett-Packard Omnibook 500 FA + Hewlett-Packard + + + + + hp5xx + <_description>Hewlett-Packard Omnibook 5xx + Hewlett-Packard + + + + + hpnx9020 + <_description>Hewlett-Packard nx9020 + Hewlett-Packard + + + + + hp6000 + <_description>Hewlett-Packard Omnibook 6000/6100 + Hewlett-Packard + + + + + honeywell_euroboard + <_description>Honeywell Euroboard + Hewlett-Packard + + + + + hpmini110 + <_description>Hewlett-Packard Mini 110 Notebook + Hewlett-Packard + + + + + rapidaccess + <_description>IBM Rapid Access + Lenovo (previously IBM) + + + + + rapidaccess2 + <_description>IBM Rapid Access II + Lenovo (previously IBM) + + + + + thinkpad + <_description>IBM ThinkPad 560Z/600/600E/A22E + Lenovo (previously IBM) + + + + + thinkpad60 + <_description>IBM ThinkPad R60/T60/R61/T61 + Lenovo (previously IBM) + + + + + thinkpadz60 + <_description>IBM ThinkPad Z60m/Z60t/Z61m/Z61t + Lenovo (previously IBM) + + + + + ibm_spacesaver + <_description>IBM Space Saver + Lenovo (previously IBM) + + + + + logiaccess + <_description>Logitech Access Keyboard + Logitech + + + + + logiclx300 + <_description>Logitech Cordless Desktop LX-300 + Logitech + + + + + logii350 + <_description>Logitech Internet 350 Keyboard + Logitech + + + + + logimel + <_description>Logitech Media Elite Keyboard + Logitech + + + + + logicd + <_description>Logitech Cordless Desktop + Logitech + + + + + logicd_it + <_description>Logitech Cordless Desktop iTouch + Logitech + + + + + logicd_nav + <_description>Logitech Cordless Desktop Navigator + Logitech + + + + + logicd_opt + <_description>Logitech Cordless Desktop Optical + Logitech + + + + + logicda + <_description>Logitech Cordless Desktop (alternate option) + Logitech + + + + + logicdpa2 + <_description>Logitech Cordless Desktop Pro (alternate option 2) + Logitech + + + + + logicfn + <_description>Logitech Cordless Freedom/Desktop Navigator + Logitech + + + + + logicdn + <_description>Logitech Cordless Desktop Navigator + Logitech + + + + + logiitc + <_description>Logitech iTouch Cordless Keyboard (model Y-RB6) + Logitech + + + + + logiik + <_description>Logitech Internet Keyboard + Logitech + + + + + itouch + <_description>Logitech iTouch + Logitech + + + + + logicink + <_description>Logitech Internet Navigator Keyboard + Logitech + + + + + logiex110 + <_description>Logitech Cordless Desktop EX110 + Logitech + + + + + logiinkse + <_description>Logitech iTouch Internet Navigator Keyboard SE + Logitech + + + + + logiinkseusb + <_description>Logitech iTouch Internet Navigator Keyboard SE (USB) + Logitech + + + + + logiultrax + <_description>Logitech Ultra-X Keyboard + Logitech + + + + + logiultraxc + <_description>Logitech Ultra-X Cordless Media Desktop Keyboard + Logitech + + + + + logidinovo + <_description>Logitech diNovo Keyboard + Logitech + + + + + logidinovoedge + <_description>Logitech diNovo Edge Keyboard + Logitech + + + + + mx1998 + <_description>Memorex MX1998 + Memorex + + + + + mx2500 + <_description>Memorex MX2500 EZ-Access Keyboard + Memorex + + + + + mx2750 + <_description>Memorex MX2750 + Memorex + + + + + microsoft4000 + <_description>Microsoft Natural Wireless Ergonomic Keyboard 4000 + Microsoft Inc. + + + + + microsoft7000 + <_description>Microsoft Natural Wireless Ergonomic Keyboard 7000 + Microsoft Inc. + + + + + microsoftinet + <_description>Microsoft Internet Keyboard + Microsoft Inc. + + + + + microsoftpro + <_description>Microsoft Natural Keyboard Pro / Microsoft Internet Keyboard Pro + Microsoft Inc. + + + + + microsoftprousb + <_description>Microsoft Natural Keyboard Pro USB / Microsoft Internet Keyboard Pro + Microsoft Inc. + + + + + microsoftprooem + <_description>Microsoft Natural Keyboard Pro OEM + Microsoft Inc. + + + + + vsonku306 + <_description>ViewSonic KU-306 Internet Keyboard + ViewSonic + + + + + microsoftprose + <_description>Microsoft Internet Keyboard Pro, Swedish + Microsoft Inc. + + + + + microsoftoffice + <_description>Microsoft Office Keyboard + Microsoft Inc. + + + + + microsoftmult + <_description>Microsoft Wireless Multimedia Keyboard 1.0A + Microsoft Inc. + + + + + microsoftelite + <_description>Microsoft Natural Keyboard Elite + Microsoft Inc. + + + + + microsoftccurve2k + <_description>Microsoft Comfort Curve Keyboard 2000 + Microsoft Inc. + + + + + oretec + <_description>Ortek MCK-800 MM/Internet keyboard + Ortek + + + + + propeller + <_description>Propeller Voyager (KTEZ-1000) + KeyTronic + + + + + qtronix + <_description>QTronix Scorpius 98N+ + QTronix + + + + + samsung4500 + <_description>Samsung SDM 4500P + Samsung + + + + + samsung4510 + <_description>Samsung SDM 4510P + Samsung + + + + + sanwaskbkg3 + <_description>Sanwa Supply SKB-KG3 + Sanwa Supply Inc. + + + + + sk1300 + <_description>SK-1300 + NEC + + + + + sk2500 + <_description>SK-2500 + NEC + + + + + sk6200 + <_description>SK-6200 + NEC + + + + + sk7100 + <_description>SK-7100 + NEC + + + + + sp_inet + <_description>Super Power Multimedia Keyboard + Generic + + + + + sven + <_description>SVEN Ergonomic 2500 + SVEN + + + + + sven303 + <_description>SVEN Slim 303 + SVEN + + + + + symplon + <_description>Symplon PaceBook (tablet PC) + Symplon + + + + + toshiba_s3000 + <_description>Toshiba Satellite S3000 + Toshiba + + + + + trust + <_description>Trust Wireless Keyboard Classic + Trust + + + + + trustda + <_description>Trust Direct Access Keyboard + Trust + + + + + trust_slimline + <_description>Trust Slimline + Trust + + + + + tm2020 + <_description>TypeMatrix EZ-Reach 2020 + TypeMatrix + + + + + tm2030PS2 + <_description>TypeMatrix EZ-Reach 2030 PS2 + TypeMatrix + + + + + tm2030USB + <_description>TypeMatrix EZ-Reach 2030 USB + TypeMatrix + + + + + tm2030USB-102 + <_description>TypeMatrix EZ-Reach 2030 USB (102/105:EU mode) + TypeMatrix + + + + + tm2030USB-106 + <_description>TypeMatrix EZ-Reach 2030 USB (106:JP mode) + TypeMatrix + + + + + yahoo + <_description>Yahoo! Internet Keyboard + Yahoo! + + + + + macbook78 + <_description>MacBook/MacBook Pro + Apple + + + + + macbook79 + <_description>MacBook/MacBook Pro (Intl) + Apple + + + + + macintosh + <_description>Macintosh + Apple + + + + + macintosh_old + <_description>Macintosh Old + Apple + + + + + macintosh_hhk + <_description>Happy Hacking Keyboard for Mac + Fujitsu + + + + + acer_c300 + <_description>Acer C300 + Acer + + + + + acer_ferrari4k + <_description>Acer Ferrari 4000 + Acer + + + + + acer_laptop + <_description>Acer Laptop + Acer + + + + + asus_laptop + <_description>Asus Laptop + Asus + + + + + apple + <_description>Apple + Apple + + + + + apple_laptop + <_description>Apple Laptop + Apple + + + + + applealu_ansi + <_description>Apple Aluminium Keyboard (ANSI) + Apple + + + + + applealu_iso + <_description>Apple Aluminium Keyboard (ISO) + Apple + + + + + applealu_jis + <_description>Apple Aluminium Keyboard (JIS) + Apple + + + + + silvercrest + <_description>SILVERCREST Multimedia Wireless Keyboard + Silvercrest + + + + + emachines + <_description>Laptop/notebook eMachines m68xx + eMachines + + + + + benqx + <_description>BenQ X-Touch + BenQ + + + + + benqx730 + <_description>BenQ X-Touch 730 + BenQ + + + + + benqx800 + <_description>BenQ X-Touch 800 + BenQ + + + + + hhk + <_description>Happy Hacking Keyboard + Fujitsu + + + + + classmate + <_description>Classmate PC + Intel + + + + + olpc + <_description>OLPC + OLPC + + + + + sun6 + <_description>Sun Type 5/6 + Sun Microsystems + + + + + targa_v811 + <_description>Targa Visionary 811 + Targa + + + + + unitekkb1925 + <_description>Unitek KB-1925 + Unitek Group + + + + + compalfl90 + <_description>FL90 + Compal Electronics Inc. + + + + + creativedw7000 + <_description>Creative Desktop Wireless 7000 + Creative + + + + + htcdream + <_description>Htc Dream phone + htc + + + + + + + us + <_shortDescription>USA + <_description>USA + + eng + + + + + + chr + <_description>USA - Cherokee + + chr + + + + + + euro + <_description>USA - With EuroSign on 5 + + + + + intl + <_description>USA - International (with dead keys) + + + + + alt-intl + <_description>USA - Alternative international + + + + + colemak + <_description>USA - Colemak + + + + + dvorak + <_description>USA - Dvorak + + + + + dvorak-intl + <_description>USA - Dvorak international (with dead keys) + + + + + dvorak-alt-intl + <_description>USA - Dvorak alternative international (no dead keys) + + + + + dvorak-l + <_description>USA - Left handed Dvorak + + + + + dvorak-r + <_description>USA - Right handed Dvorak + + + + + dvorak-classic + <_description>USA - Classic Dvorak + + + + + dvp + <_description>USA - Programmer Dvorak + + + + + rus + <_description>USA - Russian phonetic + + rus + + + + + + mac + <_description>USA - Macintosh + + + + + altgr-intl + <_description>USA - International (AltGr dead keys) + eng + fra + ger + + + + + olpc2 + <_description>USA - Layout toggle on multiply/divide key + + + + + hbs + <_description>USA - Serbo-Croatian + eng + bos + hbs + hrv + srp + + + + + + + ad + <_shortDescription>And + <_description>Andorra + + cat + + + + + + + af + <_shortDescription>Afg + <_description>Afghanistan + + + + + ps + <_description>Afghanistan - Pashto + + pus + + + + + + uz + <_description>Afghanistan - Southern Uzbek + + uzb + + + + + + olpc-ps + <_description>Afghanistan - OLPC Pashto + + pus + + + + + + fa-olpc + <_description>Afghanistan - OLPC Dari + + + + + uz-olpc + <_description>Afghanistan - OLPC Southern Uzbek + + uzb + + + + + + + + ara + <_shortDescription>Ara + <_description>Arabic + + AE + BH + DZ + EG + EH + JO + KW + LB + LY + MA + MR + OM + PS + QA + SA + SD + SY + TN + YE + + + ara + + + + + + azerty + <_description>Arabic - azerty + + + + + azerty_digits + <_description>Arabic - azerty/digits + + + + + digits + <_description>Arabic - digits + + + + + qwerty + <_description>Arabic - qwerty + + + + + qwerty_digits + <_description>Arabic - qwerty/digits + + + + + buckwalter + <_description>Arabic - Buckwalter + + + + + + + al + <_shortDescription>Alb + <_description>Albania + + alb + + + + + + + am + <_shortDescription>Arm + <_description>Armenia + + hye + + + + + + phonetic + <_description>Armenia - Phonetic + + + + + phonetic-alt + <_description>Armenia - Alternative Phonetic + + + + + eastern + <_description>Armenia - Eastern + + + + + western + <_description>Armenia - Western + + + + + eastern-alt + <_description>Armenia - Alternative Eastern + + + + + + + at + <_shortDescription>Aut + <_description>Austria + + ger + + + + + + nodeadkeys + <_description>Austria - Eliminate dead keys + + + + + sundeadkeys + <_description>Austria - Sun dead keys + + + + + mac + <_description>Austria - Macintosh + + + + + + + az + <_shortDescription>Aze + <_description>Azerbaijan + + aze + + + + + + cyrillic + <_description>Azerbaijan - Cyrillic + + + + + + + by + <_shortDescription>Blr + <_description>Belarus + + bel + + + + + + legacy + <_description>Belarus - Legacy + + + + + latin + <_description>Belarus - Latin + + + + + + + be + <_shortDescription>Bel + <_description>Belgium + ger + nld + fra + + + + + oss + <_description>Belgium - Alternative + + + + + oss_latin9 + <_description>Belgium - Alternative, latin-9 only + + + + + oss_sundeadkeys + <_description>Belgium - Alternative, Sun dead keys + + + + + iso-alternate + <_description>Belgium - ISO Alternate + + + + + nodeadkeys + <_description>Belgium - Eliminate dead keys + + + + + sundeadkeys + <_description>Belgium - Sun dead keys + + + + + wang + <_description>Belgium - Wang model 724 azerty + + + + + + + bd + <_shortDescription>Bgd + <_description>Bangladesh + + ben + + + + + + probhat + <_description>Bangladesh - Probhat + + + + + + + in + <_shortDescription>Ind + <_description>India + + + + + ben + <_description>India - Bengali + + ben + + + + + + ben_probhat + <_description>India - Bengali Probhat + + ben + + + + + + guj + <_description>India - Gujarati + + guj + + + + + + guru + <_description>India - Gurmukhi + + pan + + + + + + jhelum + <_description>India - Gurmukhi Jhelum + + pan + + + + + + kan + <_description>India - Kannada + + kan + + + + + + mal + <_description>India - Malayalam + + mal + + + + + + mal_lalitha + <_description>India - Malayalam Lalitha + + mal + + + + + + mal_enhanced + <_description>India - Malayalam enhanced INSCRIPT with Rupee Sign + mal + + + + + ori + <_description>India - Oriya + + ori + + + + + + tam_unicode + <_description>India - Tamil Unicode + + tam + + + + + + tam_keyboard_with_numerals + <_description>India - Tamil Keyboard with Numerals + + tam + + + + + + tam_TAB + <_description>India - Tamil TAB Typewriter + + tam + + + + + + tam_TSCII + <_description>India - Tamil TSCII Typewriter + + tam + + + + + + tam + <_description>India - Tamil + + tam + + + + + + tel + <_description>India - Telugu + + tel + + + + + + urd-phonetic + <_description>India - Urdu, Phonetic + + urd + + + + + + urd-phonetic3 + <_description>India - Urdu, Alternative phonetic + + urd + + + + + + urd-winkeys + <_description>India - Urdu, Winkeys + + urd + + + + + + bolnagri + <_description>India - Hindi Bolnagri + + hin + + + + + + hin-wx + <_description>India - Hindi Wx + + hin + + + + + + eng + <_description>India - English with RupeeSign + + eng + + + + + + + + ba + <_shortDescription>Bih + <_description>Bosnia and Herzegovina + + bos + + + + + + alternatequotes + <_description>Bosnia and Herzegovina - Use guillemets for quotes + + + + + unicode + <_description>Bosnia and Herzegovina - Use Bosnian digraphs + + + + + unicodeus + <_description>Bosnia and Herzegovina - US keyboard with Bosnian digraphs + + + + + us + <_description>Bosnia and Herzegovina - US keyboard with Bosnian letters + + + + + + + br + <_shortDescription>Bra + <_description>Brazil + + por + + + + + + nodeadkeys + <_description>Brazil - Eliminate dead keys + + + + + dvorak + <_description>Brazil - Dvorak + + + + + nativo + <_description>Brazil - Nativo + + + + + nativo-us + <_description>Brazil - Nativo for USA keyboards + + + + + nativo-epo + <_description>Brazil - Nativo for Esperanto + + epo + + + + + + + + bg + <_shortDescription>Bgr + <_description>Bulgaria + + bul + + + + + + phonetic + <_description>Bulgaria - Traditional phonetic + + + + + bas_phonetic + <_description>Bulgaria - New phonetic + + + + + + + ma + <_description>Morocco + + + + + french + <_description>Morocco - French + + fra + + + + + + tifinagh + <_description>Morocco - Tifinagh + + ber + + + + + + tifinagh-alt + <_description>Morocco - Tifinagh alternative + + ber + + + + + + tifinagh-alt-phonetic + <_description>Morocco - Tifinagh alternative phonetic + + ber + + + + + + tifinagh-extended + <_description>Morocco - Tifinagh extended + + ber + + + + + + tifinagh-phonetic + <_description>Morocco - Tifinagh phonetic + + ber + + + + + + tifinagh-extended-phonetic + <_description>Morocco - Tifinagh extended phonetic + + ber + + + + + + + + mm + <_shortDescription>Mmr + <_description>Myanmar + + mya + + + + + + + ca + <_shortDescription>Can + <_description>Canada + + fra + + + + + + fr-dvorak + <_description>Canada - French Dvorak + + + + + fr-legacy + <_description>Canada - French (legacy) + + + + + multix + <_description>Canada - Multilingual + + + + + multi + <_description>Canada - Multilingual, first part + + + + + multi-2gr + <_description>Canada - Multilingual, second part + + + + + ike + <_description>Canada - Inuktitut + + iku + + + + + + shs + <_description>Canada - Secwepemctsin + + + + + kut + <_description>Canada - Ktunaxa + + + + + eng + <_description>Canada - English + + eng + + + + + + + + cd + <_shortDescription>COD + <_description>Congo, Democratic Republic of the + + fra + + + + + + + cn + <_shortDescription>Chn + <_description>China + + chi + + + + + + tib + <_description>China - Tibetan + + tib + + + + + + tib_asciinum + <_description>China - Tibetan (with ASCII numerals) + + tib + + + + + + uig + <_description>China - Uyghur + + uig + + + + + + + + hr + <_shortDescription>Hrv + <_description>Croatia + + scr + + + + + + alternatequotes + <_description>Croatia - Use guillemets for quotes + + + + + unicode + <_description>Croatia - Use Croatian digraphs + + + + + unicodeus + <_description>Croatia - US keyboard with Croatian digraphs + + + + + us + <_description>Croatia - US keyboard with Croatian letters + + + + + + + cz + <_shortDescription>Cze + <_description>Czechia + + cze + + + + + + bksl + <_description>Czechia - With <\|> key + + + + + qwerty + <_description>Czechia - qwerty + + + + + qwerty_bksl + <_description>Czechia - qwerty, extended Backslash + + + + + ucw + <_description>Czechia - UCW layout (accented letters only) + + + + + dvorak-ucw + <_description>Czechia - US Dvorak with CZ UCW support + + + + + + + dk + <_shortDescription>Dnk + <_description>Denmark + + dan + + + + + + nodeadkeys + <_description>Denmark - Eliminate dead keys + + + + + mac + <_description>Denmark - Macintosh + + + + + mac_nodeadkeys + <_description>Denmark - Macintosh, eliminate dead keys + + + + + dvorak + <_description>Denmark - Dvorak + + + + + + + nl + <_shortDescription>Nld + <_description>Netherlands + + nld + + + + + + sundeadkeys + <_description>Netherlands - Sun dead keys + + + + + mac + <_description>Netherlands - Macintosh + + + + + std + <_description>Netherlands - Standard + + + + + + + bt + <_shortDescription>Btn + <_description>Bhutan + + dzo + + + + + + ee + <_shortDescription>Est + <_description>Estonia + + est + + + + + + nodeadkeys + <_description>Estonia - Eliminate dead keys + + + + + dvorak + <_description>Estonia - Dvorak + + + + + us + <_description>Estonia - US keyboard with Estonian letters + + + + + + + ir + <_shortDescription>Irn + <_description>Iran + + per + + + + + + pes_keypad + <_description>Iran - Persian, with Persian Keypad + + + + + ku + <_description>Iran - Kurdish, Latin Q + + kur + + + + + + ku_f + <_description>Iran - Kurdish, (F) + + kur + + + + + + ku_alt + <_description>Iran - Kurdish, Latin Alt-Q + + kur + + + + + + ku_ara + <_description>Iran - Kurdish, Arabic-Latin + + kur + + + + + + + + iq + <_shortDescription>Irq + <_description>Iraq + ara + kur + + + + + ku + <_description>Iraq - Kurdish, Latin Q + + kur + + + + + + ku_f + <_description>Iraq - Kurdish, (F) + + kur + + + + + + ku_alt + <_description>Iraq - Kurdish, Latin Alt-Q + + kur + + + + + + ku_ara + <_description>Iraq - Kurdish, Arabic-Latin + + kur + + + + + + + + fo + <_shortDescription>Fro + <_description>Faroe Islands + + fao + + + + + + nodeadkeys + <_description>Faroe Islands - Eliminate dead keys + + + + + + + fi + <_shortDescription>Fin + <_description>Finland + + fin + + + + + + classic + <_description>Finland - Classic + + + + + nodeadkeys + <_description>Finland - Classic, eliminate dead keys + + + + + smi + <_description>Finland - Northern Saami + smi + sme + + + + + mac + <_description>Finland - Macintosh + + + + + + + fr + <_shortDescription>Fra + <_description>France + + fra + + + + + + nodeadkeys + <_description>France - Eliminate dead keys + + + + + sundeadkeys + <_description>France - Sun dead keys + + + + + oss + <_description>France - Alternative + + + + + oss_latin9 + <_description>France - Alternative, latin-9 only + + + + + oss_nodeadkeys + <_description>France - Alternative, eliminate dead keys + + + + + oss_sundeadkeys + <_description>France - Alternative, Sun dead keys + + + + + latin9 + <_description>France - (Legacy) Alternative + + + + + latin9_nodeadkeys + <_description>France - (Legacy) Alternative, eliminate dead keys + + + + + latin9_sundeadkeys + <_description>France - (Legacy) Alternative, Sun dead keys + + + + + bepo + <_description>France - Bepo, ergonomic, Dvorak way + + + + + bepo_latin9 + <_description>France - Bepo, ergonomic, Dvorak way, latin-9 only + + + + + dvorak + <_description>France - Dvorak + + + + + mac + <_description>France - Macintosh + + + + + bre + <_description>France - Breton + + + + + oci + <_description>France - Occitan + + oci + + + + + + geo + <_description>France - Georgian AZERTY Tskapo + + geo + + + + + + + + gh + <_shortDescription>Gha + <_description>Ghana + + eng + + + + + + generic + <_description>Ghana - Multilingual + + + + + akan + <_description>Ghana - Akan + + aka + + + + + + ewe + <_description>Ghana - Ewe + + ewe + + + + + + fula + <_description>Ghana - Fula + + ful + + + + + + ga + <_description>Ghana - Ga + + gaa + + + + + + hausa + <_description>Ghana - Hausa + + hau + + + + + + avn + <_description>Ghana - Avatime + + avn + + + + + + gillbt + <_description>Ghana - GILLBT + + + + + + + gn + <_shortDescription>Gin + <_description>Guinea + + fra + + + + + + + ge + <_shortDescription>Geo + <_description>Georgia + + geo + + + + + + ergonomic + <_description>Georgia - Ergonomic + + + + + mess + <_description>Georgia - MESS + + + + + ru + <_description>Georgia - Russian + + rus + + + + + + os + <_description>Georgia - Ossetian + + oss + + + + + + + + de + <_shortDescription>Deu + <_description>Germany + + ger + + + + + + deadacute + <_description>Germany - Dead acute + + + + + deadgraveacute + <_description>Germany - Dead grave acute + + + + + nodeadkeys + <_description>Germany - Eliminate dead keys + + + + + ro + <_description>Germany - Romanian keyboard with German letters + + + + + ro_nodeadkeys + <_description>Germany - Romanian keyboard with German letters, eliminate dead keys + + + + + dvorak + <_description>Germany - Dvorak + + + + + sundeadkeys + <_description>Germany - Sun dead keys + + + + + neo + <_description>Germany - Neo 2 + + + + + mac + <_description>Germany - Macintosh + + + + + mac_nodeadkeys + <_description>Germany - Macintosh, eliminate dead keys + + + + + dsb + <_description>Germany - Lower Sorbian + + dsb + + + + + + dsb_qwertz + <_description>Germany - Lower Sorbian (qwertz) + + dsb + + + + + + qwerty + <_description>Germany - qwerty + + + + + ru + <_description>Germany - Russian phonetic + + rus + + + + + + + + gr + <_shortDescription>Grc + <_description>Greece + + gre + + + + + + simple + <_description>Greece - Simple + + + + + extended + <_description>Greece - Extended + + + + + nodeadkeys + <_description>Greece - Eliminate dead keys + + + + + polytonic + <_description>Greece - Polytonic + + + + + + + hu + <_shortDescription>Hun + <_description>Hungary + + hun + + + + + + standard + <_description>Hungary - Standard + + + + + nodeadkeys + <_description>Hungary - Eliminate dead keys + + + + + qwerty + <_description>Hungary - qwerty + + + + + 101_qwertz_comma_dead + <_description>Hungary - 101/qwertz/comma/Dead keys + + + + + 101_qwertz_comma_nodead + <_description>Hungary - 101/qwertz/comma/Eliminate dead keys + + + + + 101_qwertz_dot_dead + <_description>Hungary - 101/qwertz/dot/Dead keys + + + + + 101_qwertz_dot_nodead + <_description>Hungary - 101/qwertz/dot/Eliminate dead keys + + + + + 101_qwerty_comma_dead + <_description>Hungary - 101/qwerty/comma/Dead keys + + + + + 101_qwerty_comma_nodead + <_description>Hungary - 101/qwerty/comma/Eliminate dead keys + + + + + 101_qwerty_dot_dead + <_description>Hungary - 101/qwerty/dot/Dead keys + + + + + 101_qwerty_dot_nodead + <_description>Hungary - 101/qwerty/dot/Eliminate dead keys + + + + + 102_qwertz_comma_dead + <_description>Hungary - 102/qwertz/comma/Dead keys + + + + + 102_qwertz_comma_nodead + <_description>Hungary - 102/qwertz/comma/Eliminate dead keys + + + + + 102_qwertz_dot_dead + <_description>Hungary - 102/qwertz/dot/Dead keys + + + + + 102_qwertz_dot_nodead + <_description>Hungary - 102/qwertz/dot/Eliminate dead keys + + + + + 102_qwerty_comma_dead + <_description>Hungary - 102/qwerty/comma/Dead keys + + + + + 102_qwerty_comma_nodead + <_description>Hungary - 102/qwerty/comma/Eliminate dead keys + + + + + 102_qwerty_dot_dead + <_description>Hungary - 102/qwerty/dot/Dead keys + + + + + 102_qwerty_dot_nodead + <_description>Hungary - 102/qwerty/dot/Eliminate dead keys + + + + + + + is + <_shortDescription>Isl + <_description>Iceland + + ice + + + + + + Sundeadkeys + <_description>Iceland - Sun dead keys + + + + + nodeadkeys + <_description>Iceland - Eliminate dead keys + + + + + mac + <_description>Iceland - Macintosh + + + + + dvorak + <_description>Iceland - Dvorak + + + + + + + il + <_shortDescription>Isr + <_description>Israel + + heb + + + + + + lyx + <_description>Israel - lyx + + + + + phonetic + <_description>Israel - Phonetic + + + + + biblical + <_description>Israel - Biblical Hebrew (Tiro) + + + + + + + it + <_shortDescription>Ita + <_description>Italy + + ita + + + + + + nodeadkeys + <_description>Italy - Eliminate dead keys + + + + + mac + <_description>Italy - Macintosh + + + + + us + <_description>Italy - US keyboard with Italian letters + + + + + geo + <_description>Italy - Georgian + + geo + + + + + + + + jp + <_shortDescription>Jpn + <_description>Japan + + jpn + + + + + + kana + <_description>Japan - Kana + + + + + kana86 + <_description>Japan - Kana 86 + + + + + OADG109A + <_description>Japan - OADG 109A + + + + + mac + <_description>Japan - Macintosh + + + + + + + kg + <_shortDescription>Kgz + <_description>Kyrgyzstan + + kir + + + + + + phonetic + <_description>Kyrgyzstan - Phonetic + + + + + + + kh + <_shortDescription>Khm + <_description>Cambodia + + khm + + + + + + + kz + <_shortDescription>Kaz + <_description>Kazakhstan + + kaz + + + + + + ruskaz + <_description>Kazakhstan - Russian with Kazakh + kaz + rus + + + + + kazrus + <_description>Kazakhstan - Kazakh with Russian + kaz + rus + + + + + + + la + <_shortDescription>Lao + <_description>Laos + + lao + + + + + + stea + <_description>Laos - STEA (proposed standard layout) + lao + + + + + + + + latam + <_shortDescription>Esp + <_description>Latin American + + AR + BO + CL + CO + CR + CU + DO + EC + GT + HN + HT + MX + NI + PA + PE + PR + PY + SV + US + UY + VE + + + spa + + + + + + nodeadkeys + <_description>Latin American - Eliminate dead keys + + + + + deadtilde + <_description>Latin American - Include dead tilde + + + + + sundeadkeys + <_description>Latin American - Sun dead keys + + + + + + + lt + <_shortDescription>Ltu + <_description>Lithuania + + lit + + + + + + std + <_description>Lithuania - Standard + + + + + us + <_description>Lithuania - US keyboard with Lithuanian letters + + + + + ibm + <_description>Lithuania - IBM (LST 1205-92) + + + + + lekp + <_description>Lithuania - LEKP + + + + + lekpa + <_description>Lithuania - LEKPa + + + + + + + lv + <_shortDescription>Lva + <_description>Latvia + + lav + + + + + + apostrophe + <_description>Latvia - Apostrophe (') variant + + + + + tilde + <_description>Latvia - Tilde (~) variant + + + + + fkey + <_description>Latvia - F-letter (F) variant + + + + + + + mao + <_shortDescription>Mao + <_description>Maori + + mao + + + + + + + me + <_shortDescription>MNE + <_description>Montenegro + + srp + + + + + + cyrillic + <_description>Montenegro - Cyrillic + + + + + cyrillicyz + <_description>Montenegro - Cyrillic, Z and ZHE swapped + + + + + latinunicode + <_description>Montenegro - Latin unicode + + + + + latinyz + <_description>Montenegro - Latin qwerty + + + + + latinunicodeyz + <_description>Montenegro - Latin unicode qwerty + + + + + cyrillicalternatequotes + <_description>Montenegro - Cyrillic with guillemets + + + + + latinalternatequotes + <_description>Montenegro - Latin with guillemets + + + + + + + mk + <_shortDescription>Mkd + <_description>Macedonia + + mkd + + + + + + nodeadkeys + <_description>Macedonia - Eliminate dead keys + + + + + + + mt + <_shortDescription>Mlt + <_description>Malta + + mlt + + + + + + us + <_description>Malta - Maltese keyboard with US layout + + + + + + + mn + <_shortDescription>Mng + <_description>Mongolia + + mng + + + + + + + no + <_shortDescription>Nor + <_description>Norway + + nor + + + + + + nodeadkeys + <_description>Norway - Eliminate dead keys + + + + + dvorak + <_description>Norway - Dvorak + + + + + smi + <_description>Norway - Northern Saami + + sme + + + + + + smi_nodeadkeys + <_description>Norway - Northern Saami, eliminate dead keys + + sme + + + + + + mac + <_description>Norway - Macintosh + + + + + mac_nodeadkeys + <_description>Norway - Macintosh, eliminate dead keys + + + + + + + pl + <_shortDescription>Pol + <_description>Poland + + pol + + + + + + qwertz + <_description>Poland - qwertz + + + + + dvorak + <_description>Poland - Dvorak + + + + + dvorak_quotes + <_description>Poland - Dvorak, Polish quotes on quotemark key + + + + + dvorak_altquotes + <_description>Poland - Dvorak, Polish quotes on key 1 + + + + + csb + <_description>Poland - Kashubian + + csb + + + + + + ru_phonetic_dvorak + <_description>Poland - Russian phonetic Dvorak + + rus + + + + + + dvp + <_description>Poland - Programmer Dvorak + + + + + + + pt + <_shortDescription>Prt + <_description>Portugal + + por + + + + + + nodeadkeys + <_description>Portugal - Eliminate dead keys + + + + + sundeadkeys + <_description>Portugal - Sun dead keys + + + + + mac + <_description>Portugal - Macintosh + + + + + mac_nodeadkeys + <_description>Portugal - Macintosh, eliminate dead keys + + + + + mac_sundeadkeys + <_description>Portugal - Macintosh, Sun dead keys + + + + + nativo + <_description>Portugal - Nativo + + + + + nativo-us + <_description>Portugal - Nativo for USA keyboards + + + + + nativo-epo + <_description>Portugal - Nativo for Esperanto + + epo + + + + + + + + ro + <_shortDescription>Rou + <_description>Romania + + rum + + + + + + cedilla + <_description>Romania - Cedilla + + + + + std + <_description>Romania - Standard + + + + + std_cedilla + <_description>Romania - Standard (Cedilla) + + + + + winkeys + <_description>Romania - Winkeys + + + + + crh_f + <_description>Romania - Crimean Tatar (Turkish F) + + crh + + + + + + crh_alt + <_description>Romania - Crimean Tatar (Turkish Alt-Q) + + crh + + + + + + crh_dobruca1 + <_description>Romania - Crimean Tatar (Dobruca-1 Q) + + crh + + + + + + crh_dobruca2 + <_description>Romania - Crimean Tatar (Dobruca-2 Q) + + crh + + + + + + + + ru + <_shortDescription>Rus + <_description>Russia + + rus + + + + + + phonetic + <_description>Russia - Phonetic + + + + + phonetic_winkeys + <_description>Russia - Phonetic Winkeys + + + + + typewriter + <_description>Russia - Typewriter + + + + + legacy + <_description>Russia - Legacy + + + + + typewriter-legacy + <_description>Russia - Typewriter, legacy + + + + + tt + <_description>Russia - Tatar + + tat + + + + + + os_legacy + <_description>Russia - Ossetian, legacy + + oss + + + + + + os_winkeys + <_description>Russia - Ossetian, Winkeys + + oss + + + + + + cv + <_description>Russia - Chuvash + + chv + + + + + + cv_latin + <_description>Russia - Chuvash Latin + + chv + + + + + + udm + <_description>Russia - Udmurt + + udm + + + + + + kom + <_description>Russia - Komi + + kom + + + + + + sah + <_description>Russia - Yakut + + sah + + + + + + xal + <_description>Russia - Kalmyk + + xal + + + + + + dos + <_description>Russia - DOS + + + + + srp + <_description>Russia - Serbian + rus + srp + + + + + bak + <_description>Russia - Bashkirian + + bak + + + + + + chm + <_description>Russia - Mari + + chm + + + + + + + + rs + <_shortDescription>SRB + <_description>Serbia + + srp + + + + + + yz + <_description>Serbia - Z and ZHE swapped + + + + + latin + <_description>Serbia - Latin + + + + + latinunicode + <_description>Serbia - Latin Unicode + + + + + latinyz + <_description>Serbia - Latin qwerty + + + + + latinunicodeyz + <_description>Serbia - Latin Unicode qwerty + + + + + alternatequotes + <_description>Serbia - With guillemets + + + + + latinalternatequotes + <_description>Serbia - Latin with guillemets + + + + + rue + <_description>Serbia - Pannonian Rusyn Homophonic + + rue + + + + + + + + si + <_shortDescription>Svn + <_description>Slovenia + + slv + + + + + + alternatequotes + <_description>Slovenia - Use guillemets for quotes + + + + + us + <_description>Slovenia - US keyboard with Slovenian letters + + + + + + + sk + <_shortDescription>Svk + <_description>Slovakia + + slo + + + + + + bksl + <_description>Slovakia - Extended Backslash + + + + + qwerty + <_description>Slovakia - qwerty + + + + + qwerty_bksl + <_description>Slovakia - qwerty, extended Backslash + + + + + + + es + <_shortDescription>Esp + <_description>Spain + + spa + + + + + + nodeadkeys + <_description>Spain - Eliminate dead keys + + + + + deadtilde + <_description>Spain - Include dead tilde + + + + + sundeadkeys + <_description>Spain - Sun dead keys + + + + + dvorak + <_description>Spain - Dvorak + + + + + ast + <_description>Spain - Asturian variant with bottom-dot H and bottom-dot L + + ast + + + + + + cat + <_description>Spain - Catalan variant with middle-dot L + + cat + + + + + + mac + <_description>Spain - Macintosh + + + + + + + se + <_shortDescription>Swe + <_description>Sweden + + swe + + + + + + nodeadkeys + <_description>Sweden - Eliminate dead keys + + + + + dvorak + <_description>Sweden - Dvorak + + + + + rus + <_description>Sweden - Russian phonetic + + rus + + + + + + rus_nodeadkeys + <_description>Sweden - Russian phonetic, eliminate dead keys + + rus + + + + + + smi + <_description>Sweden - Northern Saami + + sme + + + + + + mac + <_description>Sweden - Macintosh + + + + + svdvorak + <_description>Sweden - Svdvorak + + + + + + + ch + <_shortDescription>Che + <_description>Switzerland + ger + gsw + + + + + legacy + <_description>Switzerland - Legacy + + + + + de_nodeadkeys + <_description>Switzerland - German, eliminate dead keys + + + + + de_sundeadkeys + <_description>Switzerland - German, Sun dead keys + + + + + fr + <_description>Switzerland - French + + fra + + + + + + fr_nodeadkeys + <_description>Switzerland - French, eliminate dead keys + + fra + + + + + + fr_sundeadkeys + <_description>Switzerland - French, Sun dead keys + + fra + + + + + + fr_mac + <_description>Switzerland - French (Macintosh) + + fra + + + + + + de_mac + <_description>Switzerland - German (Macintosh) + + + + + + + sy + <_shortDescription>Syr + <_description>Syria + + syr + + + + + + syc + <_description>Syria - Syriac + + + + + syc_phonetic + <_description>Syria - Syriac phonetic + + + + + ku + <_description>Syria - Kurdish, Latin Q + + kur + + + + + + ku_f + <_description>Syria - Kurdish, (F) + + kur + + + + + + ku_alt + <_description>Syria - Kurdish, Latin Alt-Q + + kur + + + + + + + + tj + <_shortDescription>Tjk + <_description>Tajikistan + + tgk + + + + + + legacy + <_description>Tajikistan - Legacy + + + + + + + lk + <_shortDescription>Lka + <_description>Sri Lanka + + sin + + + + + + tam_unicode + <_description>Sri Lanka - Tamil Unicode + + tam + + + + + + tam_TAB + <_description>Sri Lanka - Tamil TAB Typewriter + + tam + + + + + + + + th + <_shortDescription>Tha + <_description>Thailand + + tha + + + + + + tis + <_description>Thailand - TIS-820.2538 + + + + + pat + <_description>Thailand - Pattachote + + + + + + + tr + <_shortDescription>Tur + <_description>Turkey + + tur + + + + + + f + <_description>Turkey - (F) + + + + + alt + <_description>Turkey - Alt-Q + + + + + sundeadkeys + <_description>Turkey - Sun dead keys + + + + + ku + <_description>Turkey - Kurdish, Latin Q + + kur + + + + + + ku_f + <_description>Turkey - Kurdish, (F) + + kur + + + + + + ku_alt + <_description>Turkey - Kurdish, Latin Alt-Q + + kur + + + + + + intl + <_description>Turkey - International (with dead keys) + + + + + crh + <_description>Turkey - Crimean Tatar (Turkish Q) + + crh + + + + + + crh_f + <_description>Turkey - Crimean Tatar (Turkish F) + + crh + + + + + + crh_alt + <_description>Turkey - Crimean Tatar (Turkish Alt-Q) + + crh + + + + + + + + tw + <_shortDescription>Twn + <_description>Taiwan + + trv + + + + + + indigenous + <_description>Taiwan - Indigenous + + ami + tay + bnn + ckv + pwn + pyu + dru + ais + ssf + tao + tsu + + + + + + saisiyat + <_description>Taiwan - Saisiyat + + xsf + + + + + + + + ua + <_shortDescription>Ukr + <_description>Ukraine + + ukr + + + + + + phonetic + <_description>Ukraine - Phonetic + + + + + typewriter + <_description>Ukraine - Typewriter + + + + + winkeys + <_description>Ukraine - Winkeys + + + + + legacy + <_description>Ukraine - Legacy + + + + + rstu + <_description>Ukraine - Standard RSTU + + + + + rstu_ru + <_description>Ukraine - Standard RSTU on Russian layout + + + + + homophonic + <_description>Ukraine - Homophonic + + + + + crh + <_description>Ukraine - Crimean Tatar (Turkish Q) + + crh + + + + + + crh_f + <_description>Ukraine - Crimean Tatar (Turkish F) + + crh + + + + + + crh_alt + <_description>Ukraine - Crimean Tatar (Turkish Alt-Q) + + crh + + + + + + + + gb + <_shortDescription>GBr + <_description>United Kingdom + + eng + + + + + + extd + <_description>United Kingdom - Extended - Winkeys + + + + + intl + <_description>United Kingdom - International (with dead keys) + + + + + dvorak + <_description>United Kingdom - Dvorak + + + + + dvorakukp + <_description>United Kingdom - Dvorak (UK Punctuation) + + + + + mac + <_description>United Kingdom - Macintosh + + + + + mac_intl + <_description>United Kingdom - Macintosh (International) + + + + + colemak + <_description>United Kingdom - Colemak + + + + + + + uz + <_shortDescription>Uzb + <_description>Uzbekistan + + uzb + + + + + + latin + <_description>Uzbekistan - Latin + + + + + crh + <_description>Uzbekistan - Crimean Tatar (Turkish Q) + + crh + + + + + + crh_f + <_description>Uzbekistan - Crimean Tatar (Turkish F) + + crh + + + + + + crh_alt + <_description>Uzbekistan - Crimean Tatar (Turkish Alt-Q) + + crh + + + + + + + + vn + <_shortDescription>Vnm + <_description>Vietnam + + vie + + + + + + + kr + <_shortDescription>Kor + <_description>Korea, Republic of + + kor + + + + + + kr104 + <_description>Korea, Republic of - 101/104 key Compatible + + + + + + + nec_vndr/jp + <_shortDescription>Jpn + <_description>Japan (PC-98xx Series) + + JP + + + jpn + + + + + + + ie + <_shortDescription>Irl + <_description>Ireland + + eng + + + + + + CloGaelach + <_description>Ireland - CloGaelach + + gla + + + + + + UnicodeExpert + <_description>Ireland - UnicodeExpert + + + + + ogam + <_description>Ireland - Ogham + + + + + ogam_is434 + <_description>Ireland - Ogham IS434 + + + + + + + pk + <_shortDescription>Pak + <_description>Pakistan + + urd + + + + + + urd-crulp + <_description>Pakistan - CRULP + + urd + + + + + + urd-nla + <_description>Pakistan - NLA + + urd + + + + + + ara + <_description>Pakistan - Arabic + + ara + + + + + + snd + <_description>Pakistan - Sindhi + + sd + + + + + + + + mv + <_shortDescription>Mdv + <_description>Maldives + + div + + + + + + + za + <_shortDescription>Zaf + <_description>South Africa + + eng + + + + + + epo + <_shortDescription>Epo + <_description>Esperanto + + epo + + + + + + legacy + <_description>Esperanto - displaced semicolon and quote (obsolete) + + + + + + + np + <_shortDescription>Npl + <_description>Nepal + + nep + + + + + + ng + <_shortDescription>Nga + <_description>Nigeria + + eng + + + + + + igbo + <_description>Nigeria - Igbo + + ibo + + + + + + yoruba + <_description>Nigeria - Yoruba + + yor + + + + + + hausa + <_description>Nigeria - Hausa + + hau + + + + + + + + et + <_shortDescription>Eth + <_description>Ethiopia + + amh + + + + + + + sn + <_shortDescription>Sen + <_description>Senegal + + wol + + + + + + + brai + <_shortDescription>Brl + <_description>Braille + + + + + left_hand + <_description>Braille - Left hand + + + + + right_hand + <_description>Braille - Right hand + + + + + + + tm + <_shortDescription>Tkm + <_description>Turkmenistan + + tuk + + + + + + alt + <_description>Turkmenistan - Alt-Q + + + + + + + ml + <_shortDescription>Mli + <_description>Mali + + bam + + + + + + fr-oss + <_description>Mali - Français (France Alternative) + + + + + us-mac + <_description>Mali - English (USA Macintosh) + + + + + us-intl + <_description>Mali - English (USA International) + + + + + + + tz + <_shortDescription>Tza + <_description>Tanzania + + swa + + + + + + ke + <_shortDescription>Ken + <_description>Kenya + + swa + + + + + + kik + <_description>Kenya - Kikuyu + + kik + + + + + + + + bw + <_shortDescription>Bwa + <_description>Botswana + + tsn + + + + + + ph + <_shortDescription>Phi + <_description>Philippines + eng + bik + ceb + fil + hil + ilo + pam + pag + phi + tgl + war + + + + + qwerty-bay + <_description>Philippines - QWERTY (Baybayin) + bik + ceb + fil + hil + ilo + pam + pag + phi + tgl + war + + + + + capewell-dvorak + <_description>Philippines - Capewell-Dvorak (Latin) + + + + + capewell-dvorak-bay + <_description>Philippines - Capewell-Dvorak (Baybayin) + bik + ceb + fil + hil + ilo + pam + pag + phi + tgl + war + + + + + capewell-qwerf2k6 + <_description>Philippines - Capewell-QWERF 2006 (Latin) + + + + + capewell-qwerf2k6-bay + <_description>Philippines - Capewell-QWERF 2006 (Baybayin) + bik + ceb + fil + hil + ilo + pam + pag + phi + tgl + war + + + + + colemak + <_description>Philippines - Colemak (Latin) + + + + + colemak-bay + <_description>Philippines - Colemak (Baybayin) + bik + ceb + fil + hil + ilo + pam + pag + phi + tgl + war + + + + + dvorak + <_description>Philippines - Dvorak (Latin) + + + + + dvorak-bay + <_description>Philippines - Dvorak (Baybayin) + bik + ceb + fil + hil + ilo + pam + pag + phi + tgl + war + + + + + + + + + + grp + <_description>Key(s) to change layout + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lv3 + <_description>Key to choose 3rd level + + + + + + + + + + + + + + + + + + + + + + + ctrl + <_description>Ctrl key position + + + + + + + + + + + + + grp_led + <_description>Use keyboard LED to show alternative layout + + + + + + + + + keypad + <_description>Numeric keypad layout selection + + + + + + + + + + + + + + + kpdl + <_description>Numeric keypad delete key behaviour + + + + + + + + + + + + + + caps + <_description>Caps Lock key behavior + + + + + + + + + + + + + + + + + + + + altwin + <_description>Alt/Win key behavior + + + + + + + + + + + + + + + Compose key + <_description>Compose key position + + + + + + + + + + + + + + + + compat + <_description>Miscellaneous compatibility options + + + + + + + + + + + + + + + + + + eurosign + <_description>Adding currency signs to certain keys + + + + + + + + + + lv5 + <_description>Key to choose 5th level + + + + + + + + + + + + + + + + nbsp + <_description>Using space key to input non-breakable space character + + + + + + + + + + + + + + + + + + + + + japan + <_description>Japanese keyboard options + + + + + + + esperanto + <_description>Adding Esperanto circumflexes (supersigno) + + + + + + + terminate + <_description>Key sequence to kill the X server + + + + + diff --git a/xorg-server/xkeyboard-config/symbols/inet b/xorg-server/xkeyboard-config/symbols/inet index 8f1151768..5fbc8a7a7 100644 --- a/xorg-server/xkeyboard-config/symbols/inet +++ b/xorg-server/xkeyboard-config/symbols/inet @@ -1266,6 +1266,19 @@ xkb_symbols "mx2750" { // Microsoft +// Microsoft Natural Wireless Ergonomic Keyboard 4000 +partial alphanumeric_keys +xkb_symbols "microsoft4000" { + include "inet(media_nav_common)" + key { [ XF86Launch1 ] }; + key { [ XF86Launch2 ] }; + key { [ XF86Launch3 ] }; + key { [ XF86Launch4 ] }; + key { [ XF86Launch5 ] }; +// Missing because of lack of support from kbd driver: Zoom in and +// slider. +}; + // Microsoft Natural Wireless Ergonomic Keyboard 7000 partial alphanumeric_keys xkb_symbols "microsoft7000" { -- cgit v1.2.3