From aaa4964a2277a1e265da4e7b91d5b1e3c3faa84e Mon Sep 17 00:00:00 2001 From: marha Date: Mon, 17 Jan 2011 18:27:35 +0000 Subject: cvs update pthreads --- pthreads/ptw32_InterlockedCompareExchange.c | 610 ++++++++++++++-------------- 1 file changed, 307 insertions(+), 303 deletions(-) (limited to 'pthreads/ptw32_InterlockedCompareExchange.c') diff --git a/pthreads/ptw32_InterlockedCompareExchange.c b/pthreads/ptw32_InterlockedCompareExchange.c index 0094635f6..34ebfce22 100644 --- a/pthreads/ptw32_InterlockedCompareExchange.c +++ b/pthreads/ptw32_InterlockedCompareExchange.c @@ -1,303 +1,307 @@ -/* - * ptw32_InterlockedCompareExchange.c - * - * Description: - * This translation unit implements routines which are private to - * the implementation and may be used throughout it. - * - * -------------------------------------------------------------------------- - * - * Pthreads-win32 - POSIX Threads Library for Win32 - * Copyright(C) 1998 John E. Bossom - * Copyright(C) 1999,2005 Pthreads-win32 contributors - * - * Contact Email: rpj@callisto.canberra.edu.au - * - * The current list of contributors is contained - * in the file CONTRIBUTORS included with the source - * code distribution. The list can also be seen at the - * following World Wide Web location: - * http://sources.redhat.com/pthreads-win32/contributors.html - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library in the file COPYING.LIB; - * if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -#include "pthread.h" -#include "implement.h" - - -/* - * ptw32_InterlockedCompareExchange -- - * - * Originally needed because W9x doesn't support InterlockedCompareExchange. - * We now use this version wherever possible so we can inline it. - */ - -PTW32_INTERLOCKED_LONG WINAPI -ptw32_InterlockedCompareExchange (PTW32_INTERLOCKED_LPLONG location, - PTW32_INTERLOCKED_LONG value, - PTW32_INTERLOCKED_LONG comparand) -{ - -#if defined(__WATCOMC__) -/* Don't report that result is not assigned a value before being referenced */ -#pragma disable_message (200) -#endif - - PTW32_INTERLOCKED_LONG result; - - /* - * Using the LOCK prefix on uni-processor machines is significantly slower - * and it is not necessary. The overhead of the conditional below is - * negligible in comparison. Since an optimised DLL will inline this - * routine, this will be faster than calling the system supplied - * Interlocked routine, which appears to avoid the LOCK prefix on - * uniprocessor systems. So one DLL works for all systems. - */ - if (ptw32_smp_system) - -/* *INDENT-OFF* */ - -#if defined(_M_IX86) || defined(_X86_) - -#if defined(_MSC_VER) || defined(__WATCOMC__) || (defined(__BORLANDC__) && defined(HAVE_TASM32)) -#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG - { - _asm { - PUSH ecx - PUSH edx - MOV ecx,dword ptr [location] - MOV edx,dword ptr [value] - MOV eax,dword ptr [comparand] - LOCK CMPXCHG dword ptr [ecx],edx - MOV dword ptr [result], eax - POP edx - POP ecx - } - } - else - { - _asm { - PUSH ecx - PUSH edx - MOV ecx,dword ptr [location] - MOV edx,dword ptr [value] - MOV eax,dword ptr [comparand] - CMPXCHG dword ptr [ecx],edx - MOV dword ptr [result], eax - POP edx - POP ecx - } - } - -#elif defined(__GNUC__) -#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG - - { - __asm__ __volatile__ - ( - "lock\n\t" - "cmpxchgl %2,%1" /* if (EAX == [location]) */ - /* [location] = value */ - /* else */ - /* EAX = [location] */ - :"=a" (result) - :"m" (*location), "r" (value), "a" (comparand)); - } - else - { - __asm__ __volatile__ - ( - "cmpxchgl %2,%1" /* if (EAX == [location]) */ - /* [location] = value */ - /* else */ - /* EAX = [location] */ - :"=a" (result) - :"m" (*location), "r" (value), "a" (comparand)); - } - -#endif - -#else - - /* - * If execution gets to here then we're running on a currently - * unsupported processor or compiler. - */ - - result = 0; - -#endif - -/* *INDENT-ON* */ - - return result; - -#if defined(__WATCOMC__) -#pragma enable_message (200) -#endif - -} - -/* - * ptw32_InterlockedExchange -- - * - * We now use this version wherever possible so we can inline it. - */ - -LONG WINAPI -ptw32_InterlockedExchange (LPLONG location, - LONG value) -{ - -#if defined(__WATCOMC__) -/* Don't report that result is not assigned a value before being referenced */ -#pragma disable_message (200) -#endif - - LONG result; - - /* - * The XCHG instruction always locks the bus with or without the - * LOCKED prefix. This makes it significantly slower than CMPXCHG on - * uni-processor machines. The Windows InterlockedExchange function - * is nearly 3 times faster than the XCHG instruction, so this routine - * is not yet very useful for speeding up pthreads. - */ - if (ptw32_smp_system) - -/* *INDENT-OFF* */ - -#if defined(_M_IX86) || defined(_X86_) - -#if defined(_MSC_VER) || defined(__WATCOMC__) || (defined(__BORLANDC__) && defined(HAVE_TASM32)) -#define HAVE_INLINABLE_INTERLOCKED_XCHG - - { - _asm { - PUSH ecx - MOV ecx,dword ptr [location] - MOV eax,dword ptr [value] - XCHG dword ptr [ecx],eax - MOV dword ptr [result], eax - POP ecx - } - } - else - { - /* - * Faster version of XCHG for uni-processor systems because - * it doesn't lock the bus. If an interrupt or context switch - * occurs between the MOV and the CMPXCHG then the value in - * 'location' may have changed, in which case we will loop - * back to do the MOV again. - * - * FIXME! Need memory barriers for the MOV+CMPXCHG combo? - * - * Tests show that this routine has almost identical timing - * to Win32's InterlockedExchange(), which is much faster than - * using the inlined 'xchg' instruction above, so it's probably - * doing something similar to this (on UP systems). - * - * Can we do without the PUSH/POP instructions? - */ - _asm { - PUSH ecx - PUSH edx - MOV ecx,dword ptr [location] - MOV edx,dword ptr [value] -L1: MOV eax,dword ptr [ecx] - CMPXCHG dword ptr [ecx],edx - JNZ L1 - MOV dword ptr [result], eax - POP edx - POP ecx - } - } - -#elif defined(__GNUC__) -#define HAVE_INLINABLE_INTERLOCKED_XCHG - - { - __asm__ __volatile__ - ( - "xchgl %2,%1" - :"=r" (result) - :"m" (*location), "0" (value)); - } - else - { - /* - * Faster version of XCHG for uni-processor systems because - * it doesn't lock the bus. If an interrupt or context switch - * occurs between the movl and the cmpxchgl then the value in - * 'location' may have changed, in which case we will loop - * back to do the movl again. - * - * FIXME! Need memory barriers for the MOV+CMPXCHG combo? - * - * Tests show that this routine has almost identical timing - * to Win32's InterlockedExchange(), which is much faster than - * using the an inlined 'xchg' instruction, so it's probably - * doing something similar to this (on UP systems). - */ - __asm__ __volatile__ - ( - "0:\n\t" - "movl %1,%%eax\n\t" - "cmpxchgl %2,%1\n\t" - "jnz 0b" - :"=&a" (result) - :"m" (*location), "r" (value)); - } - -#endif - -#else - - /* - * If execution gets to here then we're running on a currently - * unsupported processor or compiler. - */ - - result = 0; - -#endif - -/* *INDENT-ON* */ - - return result; - -#if defined(__WATCOMC__) -#pragma enable_message (200) -#endif - -} - - -#if 1 - -#if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_CMPXCHG) -#undef PTW32_INTERLOCKED_COMPARE_EXCHANGE -#define PTW32_INTERLOCKED_COMPARE_EXCHANGE ptw32_InterlockedCompareExchange -#endif - -#if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_XCHG) -#undef PTW32_INTERLOCKED_EXCHANGE -#define PTW32_INTERLOCKED_EXCHANGE ptw32_InterlockedExchange -#endif - -#endif +/* + * ptw32_InterlockedCompareExchange.c + * + * Description: + * This translation unit implements routines which are private to + * the implementation and may be used throughout it. + * + * -------------------------------------------------------------------------- + * + * Pthreads-win32 - POSIX Threads Library for Win32 + * Copyright(C) 1998 John E. Bossom + * Copyright(C) 1999,2005 Pthreads-win32 contributors + * + * Contact Email: rpj@callisto.canberra.edu.au + * + * The current list of contributors is contained + * in the file CONTRIBUTORS included with the source + * code distribution. The list can also be seen at the + * following World Wide Web location: + * http://sources.redhat.com/pthreads-win32/contributors.html + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library in the file COPYING.LIB; + * if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#ifndef _WIN64 + +#include "pthread.h" +#include "implement.h" + + +/* + * ptw32_InterlockedCompareExchange -- + * + * Originally needed because W9x doesn't support InterlockedCompareExchange. + * We now use this version wherever possible so we can inline it. + */ + +PTW32_INTERLOCKED_LONG WINAPI +ptw32_InterlockedCompareExchange (PTW32_INTERLOCKED_LPLONG location, + PTW32_INTERLOCKED_LONG value, + PTW32_INTERLOCKED_LONG comparand) +{ + +#if defined(__WATCOMC__) +/* Don't report that result is not assigned a value before being referenced */ +#pragma disable_message (200) +#endif + + PTW32_INTERLOCKED_LONG result; + + /* + * Using the LOCK prefix on uni-processor machines is significantly slower + * and it is not necessary. The overhead of the conditional below is + * negligible in comparison. Since an optimised DLL will inline this + * routine, this will be faster than calling the system supplied + * Interlocked routine, which appears to avoid the LOCK prefix on + * uniprocessor systems. So one DLL works for all systems. + */ + if (ptw32_smp_system) + +/* *INDENT-OFF* */ + +#if defined(_M_IX86) || defined(_X86_) + +#if defined(_MSC_VER) || defined(__WATCOMC__) || (defined(__BORLANDC__) && defined(HAVE_TASM32)) +#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG + { + _asm { + PUSH ecx + PUSH edx + MOV ecx,dword ptr [location] + MOV edx,dword ptr [value] + MOV eax,dword ptr [comparand] + LOCK CMPXCHG dword ptr [ecx],edx + MOV dword ptr [result], eax + POP edx + POP ecx + } + } + else + { + _asm { + PUSH ecx + PUSH edx + MOV ecx,dword ptr [location] + MOV edx,dword ptr [value] + MOV eax,dword ptr [comparand] + CMPXCHG dword ptr [ecx],edx + MOV dword ptr [result], eax + POP edx + POP ecx + } + } + +#elif defined(__GNUC__) +#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG + + { + __asm__ __volatile__ + ( + "lock\n\t" + "cmpxchgl %2,%1" /* if (EAX == [location]) */ + /* [location] = value */ + /* else */ + /* EAX = [location] */ + :"=a" (result) + :"m" (*location), "r" (value), "a" (comparand)); + } + else + { + __asm__ __volatile__ + ( + "cmpxchgl %2,%1" /* if (EAX == [location]) */ + /* [location] = value */ + /* else */ + /* EAX = [location] */ + :"=a" (result) + :"m" (*location), "r" (value), "a" (comparand)); + } + +#endif + +#else + + /* + * If execution gets to here then we're running on a currently + * unsupported processor or compiler. + */ + +#error Unsupported platform or compiler! + +#endif + +/* *INDENT-ON* */ + + return result; + +#if defined(__WATCOMC__) +#pragma enable_message (200) +#endif + +} + +/* + * ptw32_InterlockedExchange -- + * + * We now use this version wherever possible so we can inline it. + */ + +LONG WINAPI +ptw32_InterlockedExchange (LPLONG location, + LONG value) +{ + +#if defined(__WATCOMC__) +/* Don't report that result is not assigned a value before being referenced */ +#pragma disable_message (200) +#endif + + LONG result; + + /* + * The XCHG instruction always locks the bus with or without the + * LOCKED prefix. This makes it significantly slower than CMPXCHG on + * uni-processor machines. The Windows InterlockedExchange function + * is nearly 3 times faster than the XCHG instruction, so this routine + * is not yet very useful for speeding up pthreads. + */ + if (ptw32_smp_system) + +/* *INDENT-OFF* */ + +#if defined(_M_IX86) || defined(_X86_) + +#if defined(_MSC_VER) || defined(__WATCOMC__) || (defined(__BORLANDC__) && defined(HAVE_TASM32)) +#define HAVE_INLINABLE_INTERLOCKED_XCHG + + { + _asm { + PUSH ecx + MOV ecx,dword ptr [location] + MOV eax,dword ptr [value] + XCHG dword ptr [ecx],eax + MOV dword ptr [result], eax + POP ecx + } + } + else + { + /* + * Faster version of XCHG for uni-processor systems because + * it doesn't lock the bus. If an interrupt or context switch + * occurs between the MOV and the CMPXCHG then the value in + * 'location' may have changed, in which case we will loop + * back to do the MOV again. + * + * FIXME! Need memory barriers for the MOV+CMPXCHG combo? + * + * Tests show that this routine has almost identical timing + * to Win32's InterlockedExchange(), which is much faster than + * using the inlined 'xchg' instruction above, so it's probably + * doing something similar to this (on UP systems). + * + * Can we do without the PUSH/POP instructions? + */ + _asm { + PUSH ecx + PUSH edx + MOV ecx,dword ptr [location] + MOV edx,dword ptr [value] +L1: MOV eax,dword ptr [ecx] + CMPXCHG dword ptr [ecx],edx + JNZ L1 + MOV dword ptr [result], eax + POP edx + POP ecx + } + } + +#elif defined(__GNUC__) +#define HAVE_INLINABLE_INTERLOCKED_XCHG + + { + __asm__ __volatile__ + ( + "xchgl %2,%1" + :"=r" (result) + :"m" (*location), "0" (value)); + } + else + { + /* + * Faster version of XCHG for uni-processor systems because + * it doesn't lock the bus. If an interrupt or context switch + * occurs between the movl and the cmpxchgl then the value in + * 'location' may have changed, in which case we will loop + * back to do the movl again. + * + * FIXME! Need memory barriers for the MOV+CMPXCHG combo? + * + * Tests show that this routine has almost identical timing + * to Win32's InterlockedExchange(), and is much faster than + * using an inlined 'xchg' instruction, so Win32 is probably + * doing something similar to this (on UP systems). + */ + __asm__ __volatile__ + ( + "0:\n\t" + "movl %1,%%eax\n\t" + "cmpxchgl %2,%1\n\t" + "jnz 0b" + :"=&a" (result) + :"m" (*location), "r" (value)); + } + +#endif + +#else + + /* + * If execution gets to here then we're running on a currently + * unsupported processor or compiler. + */ + +#error Unsupported platform or compiler! + +#endif + +/* *INDENT-ON* */ + + return result; + +#if defined(__WATCOMC__) +#pragma enable_message (200) +#endif + +} + + +#if 1 + +#if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_CMPXCHG) +#undef PTW32_INTERLOCKED_COMPARE_EXCHANGE +#define PTW32_INTERLOCKED_COMPARE_EXCHANGE ptw32_InterlockedCompareExchange +#endif + +#if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_XCHG) +#undef PTW32_INTERLOCKED_EXCHANGE +#define PTW32_INTERLOCKED_EXCHANGE ptw32_InterlockedExchange +#endif + +#endif + +#endif -- cgit v1.2.3