aboutsummaryrefslogtreecommitdiff
path: root/pthreads/ptw32_InterlockedCompareExchange.c
diff options
context:
space:
mode:
Diffstat (limited to 'pthreads/ptw32_InterlockedCompareExchange.c')
-rw-r--r--pthreads/ptw32_InterlockedCompareExchange.c610
1 files changed, 307 insertions, 303 deletions
diff --git a/pthreads/ptw32_InterlockedCompareExchange.c b/pthreads/ptw32_InterlockedCompareExchange.c
index 0094635f6..34ebfce22 100644
--- a/pthreads/ptw32_InterlockedCompareExchange.c
+++ b/pthreads/ptw32_InterlockedCompareExchange.c
@@ -1,303 +1,307 @@
-/*
- * ptw32_InterlockedCompareExchange.c
- *
- * Description:
- * This translation unit implements routines which are private to
- * the implementation and may be used throughout it.
- *
- * --------------------------------------------------------------------------
- *
- * Pthreads-win32 - POSIX Threads Library for Win32
- * Copyright(C) 1998 John E. Bossom
- * Copyright(C) 1999,2005 Pthreads-win32 contributors
- *
- * Contact Email: rpj@callisto.canberra.edu.au
- *
- * The current list of contributors is contained
- * in the file CONTRIBUTORS included with the source
- * code distribution. The list can also be seen at the
- * following World Wide Web location:
- * http://sources.redhat.com/pthreads-win32/contributors.html
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library in the file COPYING.LIB;
- * if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- */
-
-#include "pthread.h"
-#include "implement.h"
-
-
-/*
- * ptw32_InterlockedCompareExchange --
- *
- * Originally needed because W9x doesn't support InterlockedCompareExchange.
- * We now use this version wherever possible so we can inline it.
- */
-
-PTW32_INTERLOCKED_LONG WINAPI
-ptw32_InterlockedCompareExchange (PTW32_INTERLOCKED_LPLONG location,
- PTW32_INTERLOCKED_LONG value,
- PTW32_INTERLOCKED_LONG comparand)
-{
-
-#if defined(__WATCOMC__)
-/* Don't report that result is not assigned a value before being referenced */
-#pragma disable_message (200)
-#endif
-
- PTW32_INTERLOCKED_LONG result;
-
- /*
- * Using the LOCK prefix on uni-processor machines is significantly slower
- * and it is not necessary. The overhead of the conditional below is
- * negligible in comparison. Since an optimised DLL will inline this
- * routine, this will be faster than calling the system supplied
- * Interlocked routine, which appears to avoid the LOCK prefix on
- * uniprocessor systems. So one DLL works for all systems.
- */
- if (ptw32_smp_system)
-
-/* *INDENT-OFF* */
-
-#if defined(_M_IX86) || defined(_X86_)
-
-#if defined(_MSC_VER) || defined(__WATCOMC__) || (defined(__BORLANDC__) && defined(HAVE_TASM32))
-#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG
- {
- _asm {
- PUSH ecx
- PUSH edx
- MOV ecx,dword ptr [location]
- MOV edx,dword ptr [value]
- MOV eax,dword ptr [comparand]
- LOCK CMPXCHG dword ptr [ecx],edx
- MOV dword ptr [result], eax
- POP edx
- POP ecx
- }
- }
- else
- {
- _asm {
- PUSH ecx
- PUSH edx
- MOV ecx,dword ptr [location]
- MOV edx,dword ptr [value]
- MOV eax,dword ptr [comparand]
- CMPXCHG dword ptr [ecx],edx
- MOV dword ptr [result], eax
- POP edx
- POP ecx
- }
- }
-
-#elif defined(__GNUC__)
-#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG
-
- {
- __asm__ __volatile__
- (
- "lock\n\t"
- "cmpxchgl %2,%1" /* if (EAX == [location]) */
- /* [location] = value */
- /* else */
- /* EAX = [location] */
- :"=a" (result)
- :"m" (*location), "r" (value), "a" (comparand));
- }
- else
- {
- __asm__ __volatile__
- (
- "cmpxchgl %2,%1" /* if (EAX == [location]) */
- /* [location] = value */
- /* else */
- /* EAX = [location] */
- :"=a" (result)
- :"m" (*location), "r" (value), "a" (comparand));
- }
-
-#endif
-
-#else
-
- /*
- * If execution gets to here then we're running on a currently
- * unsupported processor or compiler.
- */
-
- result = 0;
-
-#endif
-
-/* *INDENT-ON* */
-
- return result;
-
-#if defined(__WATCOMC__)
-#pragma enable_message (200)
-#endif
-
-}
-
-/*
- * ptw32_InterlockedExchange --
- *
- * We now use this version wherever possible so we can inline it.
- */
-
-LONG WINAPI
-ptw32_InterlockedExchange (LPLONG location,
- LONG value)
-{
-
-#if defined(__WATCOMC__)
-/* Don't report that result is not assigned a value before being referenced */
-#pragma disable_message (200)
-#endif
-
- LONG result;
-
- /*
- * The XCHG instruction always locks the bus with or without the
- * LOCKED prefix. This makes it significantly slower than CMPXCHG on
- * uni-processor machines. The Windows InterlockedExchange function
- * is nearly 3 times faster than the XCHG instruction, so this routine
- * is not yet very useful for speeding up pthreads.
- */
- if (ptw32_smp_system)
-
-/* *INDENT-OFF* */
-
-#if defined(_M_IX86) || defined(_X86_)
-
-#if defined(_MSC_VER) || defined(__WATCOMC__) || (defined(__BORLANDC__) && defined(HAVE_TASM32))
-#define HAVE_INLINABLE_INTERLOCKED_XCHG
-
- {
- _asm {
- PUSH ecx
- MOV ecx,dword ptr [location]
- MOV eax,dword ptr [value]
- XCHG dword ptr [ecx],eax
- MOV dword ptr [result], eax
- POP ecx
- }
- }
- else
- {
- /*
- * Faster version of XCHG for uni-processor systems because
- * it doesn't lock the bus. If an interrupt or context switch
- * occurs between the MOV and the CMPXCHG then the value in
- * 'location' may have changed, in which case we will loop
- * back to do the MOV again.
- *
- * FIXME! Need memory barriers for the MOV+CMPXCHG combo?
- *
- * Tests show that this routine has almost identical timing
- * to Win32's InterlockedExchange(), which is much faster than
- * using the inlined 'xchg' instruction above, so it's probably
- * doing something similar to this (on UP systems).
- *
- * Can we do without the PUSH/POP instructions?
- */
- _asm {
- PUSH ecx
- PUSH edx
- MOV ecx,dword ptr [location]
- MOV edx,dword ptr [value]
-L1: MOV eax,dword ptr [ecx]
- CMPXCHG dword ptr [ecx],edx
- JNZ L1
- MOV dword ptr [result], eax
- POP edx
- POP ecx
- }
- }
-
-#elif defined(__GNUC__)
-#define HAVE_INLINABLE_INTERLOCKED_XCHG
-
- {
- __asm__ __volatile__
- (
- "xchgl %2,%1"
- :"=r" (result)
- :"m" (*location), "0" (value));
- }
- else
- {
- /*
- * Faster version of XCHG for uni-processor systems because
- * it doesn't lock the bus. If an interrupt or context switch
- * occurs between the movl and the cmpxchgl then the value in
- * 'location' may have changed, in which case we will loop
- * back to do the movl again.
- *
- * FIXME! Need memory barriers for the MOV+CMPXCHG combo?
- *
- * Tests show that this routine has almost identical timing
- * to Win32's InterlockedExchange(), which is much faster than
- * using the an inlined 'xchg' instruction, so it's probably
- * doing something similar to this (on UP systems).
- */
- __asm__ __volatile__
- (
- "0:\n\t"
- "movl %1,%%eax\n\t"
- "cmpxchgl %2,%1\n\t"
- "jnz 0b"
- :"=&a" (result)
- :"m" (*location), "r" (value));
- }
-
-#endif
-
-#else
-
- /*
- * If execution gets to here then we're running on a currently
- * unsupported processor or compiler.
- */
-
- result = 0;
-
-#endif
-
-/* *INDENT-ON* */
-
- return result;
-
-#if defined(__WATCOMC__)
-#pragma enable_message (200)
-#endif
-
-}
-
-
-#if 1
-
-#if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_CMPXCHG)
-#undef PTW32_INTERLOCKED_COMPARE_EXCHANGE
-#define PTW32_INTERLOCKED_COMPARE_EXCHANGE ptw32_InterlockedCompareExchange
-#endif
-
-#if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_XCHG)
-#undef PTW32_INTERLOCKED_EXCHANGE
-#define PTW32_INTERLOCKED_EXCHANGE ptw32_InterlockedExchange
-#endif
-
-#endif
+/*
+ * ptw32_InterlockedCompareExchange.c
+ *
+ * Description:
+ * This translation unit implements routines which are private to
+ * the implementation and may be used throughout it.
+ *
+ * --------------------------------------------------------------------------
+ *
+ * Pthreads-win32 - POSIX Threads Library for Win32
+ * Copyright(C) 1998 John E. Bossom
+ * Copyright(C) 1999,2005 Pthreads-win32 contributors
+ *
+ * Contact Email: rpj@callisto.canberra.edu.au
+ *
+ * The current list of contributors is contained
+ * in the file CONTRIBUTORS included with the source
+ * code distribution. The list can also be seen at the
+ * following World Wide Web location:
+ * http://sources.redhat.com/pthreads-win32/contributors.html
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library in the file COPYING.LIB;
+ * if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#ifndef _WIN64
+
+#include "pthread.h"
+#include "implement.h"
+
+
+/*
+ * ptw32_InterlockedCompareExchange --
+ *
+ * Originally needed because W9x doesn't support InterlockedCompareExchange.
+ * We now use this version wherever possible so we can inline it.
+ */
+
+PTW32_INTERLOCKED_LONG WINAPI
+ptw32_InterlockedCompareExchange (PTW32_INTERLOCKED_LPLONG location,
+ PTW32_INTERLOCKED_LONG value,
+ PTW32_INTERLOCKED_LONG comparand)
+{
+
+#if defined(__WATCOMC__)
+/* Don't report that result is not assigned a value before being referenced */
+#pragma disable_message (200)
+#endif
+
+ PTW32_INTERLOCKED_LONG result;
+
+ /*
+ * Using the LOCK prefix on uni-processor machines is significantly slower
+ * and it is not necessary. The overhead of the conditional below is
+ * negligible in comparison. Since an optimised DLL will inline this
+ * routine, this will be faster than calling the system supplied
+ * Interlocked routine, which appears to avoid the LOCK prefix on
+ * uniprocessor systems. So one DLL works for all systems.
+ */
+ if (ptw32_smp_system)
+
+/* *INDENT-OFF* */
+
+#if defined(_M_IX86) || defined(_X86_)
+
+#if defined(_MSC_VER) || defined(__WATCOMC__) || (defined(__BORLANDC__) && defined(HAVE_TASM32))
+#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG
+ {
+ _asm {
+ PUSH ecx
+ PUSH edx
+ MOV ecx,dword ptr [location]
+ MOV edx,dword ptr [value]
+ MOV eax,dword ptr [comparand]
+ LOCK CMPXCHG dword ptr [ecx],edx
+ MOV dword ptr [result], eax
+ POP edx
+ POP ecx
+ }
+ }
+ else
+ {
+ _asm {
+ PUSH ecx
+ PUSH edx
+ MOV ecx,dword ptr [location]
+ MOV edx,dword ptr [value]
+ MOV eax,dword ptr [comparand]
+ CMPXCHG dword ptr [ecx],edx
+ MOV dword ptr [result], eax
+ POP edx
+ POP ecx
+ }
+ }
+
+#elif defined(__GNUC__)
+#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG
+
+ {
+ __asm__ __volatile__
+ (
+ "lock\n\t"
+ "cmpxchgl %2,%1" /* if (EAX == [location]) */
+ /* [location] = value */
+ /* else */
+ /* EAX = [location] */
+ :"=a" (result)
+ :"m" (*location), "r" (value), "a" (comparand));
+ }
+ else
+ {
+ __asm__ __volatile__
+ (
+ "cmpxchgl %2,%1" /* if (EAX == [location]) */
+ /* [location] = value */
+ /* else */
+ /* EAX = [location] */
+ :"=a" (result)
+ :"m" (*location), "r" (value), "a" (comparand));
+ }
+
+#endif
+
+#else
+
+ /*
+ * If execution gets to here then we're running on a currently
+ * unsupported processor or compiler.
+ */
+
+#error Unsupported platform or compiler!
+
+#endif
+
+/* *INDENT-ON* */
+
+ return result;
+
+#if defined(__WATCOMC__)
+#pragma enable_message (200)
+#endif
+
+}
+
+/*
+ * ptw32_InterlockedExchange --
+ *
+ * We now use this version wherever possible so we can inline it.
+ */
+
+LONG WINAPI
+ptw32_InterlockedExchange (LPLONG location,
+ LONG value)
+{
+
+#if defined(__WATCOMC__)
+/* Don't report that result is not assigned a value before being referenced */
+#pragma disable_message (200)
+#endif
+
+ LONG result;
+
+ /*
+ * The XCHG instruction always locks the bus with or without the
+ * LOCKED prefix. This makes it significantly slower than CMPXCHG on
+ * uni-processor machines. The Windows InterlockedExchange function
+ * is nearly 3 times faster than the XCHG instruction, so this routine
+ * is not yet very useful for speeding up pthreads.
+ */
+ if (ptw32_smp_system)
+
+/* *INDENT-OFF* */
+
+#if defined(_M_IX86) || defined(_X86_)
+
+#if defined(_MSC_VER) || defined(__WATCOMC__) || (defined(__BORLANDC__) && defined(HAVE_TASM32))
+#define HAVE_INLINABLE_INTERLOCKED_XCHG
+
+ {
+ _asm {
+ PUSH ecx
+ MOV ecx,dword ptr [location]
+ MOV eax,dword ptr [value]
+ XCHG dword ptr [ecx],eax
+ MOV dword ptr [result], eax
+ POP ecx
+ }
+ }
+ else
+ {
+ /*
+ * Faster version of XCHG for uni-processor systems because
+ * it doesn't lock the bus. If an interrupt or context switch
+ * occurs between the MOV and the CMPXCHG then the value in
+ * 'location' may have changed, in which case we will loop
+ * back to do the MOV again.
+ *
+ * FIXME! Need memory barriers for the MOV+CMPXCHG combo?
+ *
+ * Tests show that this routine has almost identical timing
+ * to Win32's InterlockedExchange(), which is much faster than
+ * using the inlined 'xchg' instruction above, so it's probably
+ * doing something similar to this (on UP systems).
+ *
+ * Can we do without the PUSH/POP instructions?
+ */
+ _asm {
+ PUSH ecx
+ PUSH edx
+ MOV ecx,dword ptr [location]
+ MOV edx,dword ptr [value]
+L1: MOV eax,dword ptr [ecx]
+ CMPXCHG dword ptr [ecx],edx
+ JNZ L1
+ MOV dword ptr [result], eax
+ POP edx
+ POP ecx
+ }
+ }
+
+#elif defined(__GNUC__)
+#define HAVE_INLINABLE_INTERLOCKED_XCHG
+
+ {
+ __asm__ __volatile__
+ (
+ "xchgl %2,%1"
+ :"=r" (result)
+ :"m" (*location), "0" (value));
+ }
+ else
+ {
+ /*
+ * Faster version of XCHG for uni-processor systems because
+ * it doesn't lock the bus. If an interrupt or context switch
+ * occurs between the movl and the cmpxchgl then the value in
+ * 'location' may have changed, in which case we will loop
+ * back to do the movl again.
+ *
+ * FIXME! Need memory barriers for the MOV+CMPXCHG combo?
+ *
+ * Tests show that this routine has almost identical timing
+ * to Win32's InterlockedExchange(), and is much faster than
+ * using an inlined 'xchg' instruction, so Win32 is probably
+ * doing something similar to this (on UP systems).
+ */
+ __asm__ __volatile__
+ (
+ "0:\n\t"
+ "movl %1,%%eax\n\t"
+ "cmpxchgl %2,%1\n\t"
+ "jnz 0b"
+ :"=&a" (result)
+ :"m" (*location), "r" (value));
+ }
+
+#endif
+
+#else
+
+ /*
+ * If execution gets to here then we're running on a currently
+ * unsupported processor or compiler.
+ */
+
+#error Unsupported platform or compiler!
+
+#endif
+
+/* *INDENT-ON* */
+
+ return result;
+
+#if defined(__WATCOMC__)
+#pragma enable_message (200)
+#endif
+
+}
+
+
+#if 1
+
+#if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_CMPXCHG)
+#undef PTW32_INTERLOCKED_COMPARE_EXCHANGE
+#define PTW32_INTERLOCKED_COMPARE_EXCHANGE ptw32_InterlockedCompareExchange
+#endif
+
+#if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_XCHG)
+#undef PTW32_INTERLOCKED_EXCHANGE
+#define PTW32_INTERLOCKED_EXCHANGE ptw32_InterlockedExchange
+#endif
+
+#endif
+
+#endif