@@ -234,7 +234,23 @@ spin_delay(void)
234234#endif /* __x86_64__ */
235235
236236
237- #if defined(__ia64__ ) || defined(__ia64 )/* Intel Itanium */
237+ #if defined(__ia64__ ) || defined(__ia64 )
238+ /*
239+ * Intel Itanium, gcc or Intel's compiler.
240+ *
241+ * Itanium has weak memory ordering, but we rely on the compiler to enforce
242+ * strict ordering of accesses to volatile data. In particular, while the
243+ * xchg instruction implicitly acts as a memory barrier with 'acquire'
244+ * semantics, we do not have an explicit memory fence instruction in the
245+ * S_UNLOCK macro. We use a regular assignment to clear the spinlock, and
246+ * trust that the compiler marks the generated store instruction with the
247+ * ".rel" opcode.
248+ *
249+ * Testing shows that assumption to hold on gcc, although I could not find
250+ * any explicit statement on that in the gcc manual. In Intel's compiler,
251+ * the -m[no-]serialize-volatile option controls that, and testing shows that
252+ * it is enabled by default.
253+ */
238254#define HAS_TEST_AND_SET
239255
240256typedef unsigned int slock_t ;
@@ -785,7 +801,19 @@ tas(volatile slock_t *lock)
785801
786802
787803#if defined(__hpux ) && defined(__ia64 ) && !defined(__GNUC__ )
788-
804+ /*
805+ * HP-UX on Itanium, non-gcc compiler
806+ *
807+ * We assume that the compiler enforces strict ordering of loads/stores on
808+ * volatile data (see comments on the gcc-version earlier in this file).
809+ * Note that this assumption does *not* hold if you use the
810+ * +Ovolatile=__unordered option on the HP-UX compiler, so don't do that.
811+ *
812+ * See also Implementing Spinlocks on the Intel Itanium Architecture and
813+ * PA-RISC, by Tor Ekqvist and David Graves, for more information. As of
814+ * this writing, version 1.0 of the manual is available at:
815+ * http://h21007.www2.hp.com/portal/download/files/unprot/itanium/spinlocks.pdf
816+ */
789817#define HAS_TEST_AND_SET
790818
791819typedef unsigned int slock_t ;
0 commit comments