[MIPS] Cleanup memory barriers for weakly ordered systems.

Also the R4000 / R4600 LL/SC instructions imply a sync so no explicit sync
needed.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 3947e5d..4d64960 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1277,6 +1277,7 @@
 	select CPU_SUPPORTS_32BIT_KERNEL
 	select CPU_SUPPORTS_64BIT_KERNEL
 	select CPU_SUPPORTS_HIGHMEM
+	select WEAK_ORDERING
 
 config CPU_SB1
 	bool "SB1"
@@ -1285,6 +1286,7 @@
 	select CPU_SUPPORTS_32BIT_KERNEL
 	select CPU_SUPPORTS_64BIT_KERNEL
 	select CPU_SUPPORTS_HIGHMEM
+	select WEAK_ORDERING
 
 endchoice
 
@@ -1345,6 +1347,8 @@
 config SYS_HAS_CPU_SB1
 	bool
 
+config WEAK_ORDERING
+	bool
 endmenu
 
 #
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 49db516..f2a8701 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -172,7 +172,7 @@
 
 	spin_lock(&smp_call_lock);
 	call_data = &data;
-	mb();
+	smp_mb();
 
 	/* Send a message to all other CPUs and wait for them to respond */
 	for_each_online_cpu(i)
@@ -204,7 +204,7 @@
 	 * Notify initiating CPU that I've grabbed the data and am
 	 * about to execute the function.
 	 */
-	mb();
+	smp_mb();
 	atomic_inc(&call_data->started);
 
 	/*
@@ -215,7 +215,7 @@
 	irq_exit();
 
 	if (wait) {
-		mb();
+		smp_mb();
 		atomic_inc(&call_data->finished);
 	}
 }
diff --git a/include/asm-mips/atomic.h b/include/asm-mips/atomic.h
index 3657670..c1a2409 100644
--- a/include/asm-mips/atomic.h
+++ b/include/asm-mips/atomic.h
@@ -15,6 +15,7 @@
 #define _ASM_ATOMIC_H
 
 #include <linux/irqflags.h>
+#include <asm/barrier.h>
 #include <asm/cpu-features.h>
 #include <asm/war.h>
 
@@ -130,6 +131,8 @@
 {
 	unsigned long result;
 
+	smp_mb();
+
 	if (cpu_has_llsc && R10000_LLSC_WAR) {
 		unsigned long temp;
 
@@ -140,7 +143,6 @@
 		"	sc	%0, %2					\n"
 		"	beqzl	%0, 1b					\n"
 		"	addu	%0, %1, %3				\n"
-		"	sync						\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -155,7 +157,6 @@
 		"	sc	%0, %2					\n"
 		"	beqz	%0, 1b					\n"
 		"	addu	%0, %1, %3				\n"
-		"	sync						\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -170,6 +171,8 @@
 		local_irq_restore(flags);
 	}
 
+	smp_mb();
+
 	return result;
 }
 
@@ -177,6 +180,8 @@
 {
 	unsigned long result;
 
+	smp_mb();
+
 	if (cpu_has_llsc && R10000_LLSC_WAR) {
 		unsigned long temp;
 
@@ -187,7 +192,6 @@
 		"	sc	%0, %2					\n"
 		"	beqzl	%0, 1b					\n"
 		"	subu	%0, %1, %3				\n"
-		"	sync						\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -202,7 +206,6 @@
 		"	sc	%0, %2					\n"
 		"	beqz	%0, 1b					\n"
 		"	subu	%0, %1, %3				\n"
-		"	sync						\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -217,6 +220,8 @@
 		local_irq_restore(flags);
 	}
 
+	smp_mb();
+
 	return result;
 }
 
@@ -232,6 +237,8 @@
 {
 	unsigned long result;
 
+	smp_mb();
+
 	if (cpu_has_llsc && R10000_LLSC_WAR) {
 		unsigned long temp;
 
@@ -245,7 +252,6 @@
 		"	beqzl	%0, 1b					\n"
 		"	 subu	%0, %1, %3				\n"
 		"	.set	reorder					\n"
-		"	sync						\n"
 		"1:							\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
@@ -264,7 +270,6 @@
 		"	beqz	%0, 1b					\n"
 		"	 subu	%0, %1, %3				\n"
 		"	.set	reorder					\n"
-		"	sync						\n"
 		"1:							\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
@@ -281,6 +286,8 @@
 		local_irq_restore(flags);
 	}
 
+	smp_mb();
+
 	return result;
 }
 
@@ -484,6 +491,8 @@
 {
 	unsigned long result;
 
+	smp_mb();
+
 	if (cpu_has_llsc && R10000_LLSC_WAR) {
 		unsigned long temp;
 
@@ -494,7 +503,6 @@
 		"	scd	%0, %2					\n"
 		"	beqzl	%0, 1b					\n"
 		"	addu	%0, %1, %3				\n"
-		"	sync						\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -509,7 +517,6 @@
 		"	scd	%0, %2					\n"
 		"	beqz	%0, 1b					\n"
 		"	addu	%0, %1, %3				\n"
-		"	sync						\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -524,6 +531,8 @@
 		local_irq_restore(flags);
 	}
 
+	smp_mb();
+
 	return result;
 }
 
@@ -531,6 +540,8 @@
 {
 	unsigned long result;
 
+	smp_mb();
+
 	if (cpu_has_llsc && R10000_LLSC_WAR) {
 		unsigned long temp;
 
@@ -541,7 +552,6 @@
 		"	scd	%0, %2					\n"
 		"	beqzl	%0, 1b					\n"
 		"	subu	%0, %1, %3				\n"
-		"	sync						\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -556,7 +566,6 @@
 		"	scd	%0, %2					\n"
 		"	beqz	%0, 1b					\n"
 		"	subu	%0, %1, %3				\n"
-		"	sync						\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -571,6 +580,8 @@
 		local_irq_restore(flags);
 	}
 
+	smp_mb();
+
 	return result;
 }
 
@@ -586,6 +597,8 @@
 {
 	unsigned long result;
 
+	smp_mb();
+
 	if (cpu_has_llsc && R10000_LLSC_WAR) {
 		unsigned long temp;
 
@@ -599,7 +612,6 @@
 		"	beqzl	%0, 1b					\n"
 		"	 dsubu	%0, %1, %3				\n"
 		"	.set	reorder					\n"
-		"	sync						\n"
 		"1:							\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
@@ -618,7 +630,6 @@
 		"	beqz	%0, 1b					\n"
 		"	 dsubu	%0, %1, %3				\n"
 		"	.set	reorder					\n"
-		"	sync						\n"
 		"1:							\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
@@ -635,6 +646,8 @@
 		local_irq_restore(flags);
 	}
 
+	smp_mb();
+
 	return result;
 }
 
diff --git a/include/asm-mips/barrier.h b/include/asm-mips/barrier.h
new file mode 100644
index 0000000..ed82631
--- /dev/null
+++ b/include/asm-mips/barrier.h
@@ -0,0 +1,132 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2006 by Ralf Baechle (ralf@linux-mips.org)
+ */
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+/*
+ * read_barrier_depends - Flush all pending reads that subsequents reads
+ * depend on.
+ *
+ * No data-dependent reads from memory-like regions are ever reordered
+ * over this barrier.  All reads preceding this primitive are guaranteed
+ * to access memory (but not necessarily other CPUs' caches) before any
+ * reads following this primitive that depend on the data return by
+ * any of the preceding reads.  This primitive is much lighter weight than
+ * rmb() on most CPUs, and is never heavier weight than is
+ * rmb().
+ *
+ * These ordering constraints are respected by both the local CPU
+ * and the compiler.
+ *
+ * Ordering is not guaranteed by anything other than these primitives,
+ * not even by data dependencies.  See the documentation for
+ * memory_barrier() for examples and URLs to more information.
+ *
+ * For example, the following code would force ordering (the initial
+ * value of "a" is zero, "b" is one, and "p" is "&a"):
+ *
+ * <programlisting>
+ *	CPU 0				CPU 1
+ *
+ *	b = 2;
+ *	memory_barrier();
+ *	p = &b;				q = p;
+ *					read_barrier_depends();
+ *					d = *q;
+ * </programlisting>
+ *
+ * because the read of "*q" depends on the read of "p" and these
+ * two reads are separated by a read_barrier_depends().  However,
+ * the following code, with the same initial values for "a" and "b":
+ *
+ * <programlisting>
+ *	CPU 0				CPU 1
+ *
+ *	a = 2;
+ *	memory_barrier();
+ *	b = 3;				y = b;
+ *					read_barrier_depends();
+ *					x = a;
+ * </programlisting>
+ *
+ * does not enforce ordering, since there is no data dependency between
+ * the read of "a" and the read of "b".  Therefore, on some CPUs, such
+ * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
+ * in cases like this where there are no data dependencies.
+ */
+
+#define read_barrier_depends()		do { } while(0)
+#define smp_read_barrier_depends()	do { } while(0)
+
+#ifdef CONFIG_CPU_HAS_SYNC
+#define __sync()				\
+	__asm__ __volatile__(			\
+		".set	push\n\t"		\
+		".set	noreorder\n\t"		\
+		".set	mips2\n\t"		\
+		"sync\n\t"			\
+		".set	pop"			\
+		: /* no output */		\
+		: /* no input */		\
+		: "memory")
+#else
+#define __sync()	do { } while(0)
+#endif
+
+#define __fast_iob()				\
+	__asm__ __volatile__(			\
+		".set	push\n\t"		\
+		".set	noreorder\n\t"		\
+		"lw	$0,%0\n\t"		\
+		"nop\n\t"			\
+		".set	pop"			\
+		: /* no output */		\
+		: "m" (*(int *)CKSEG1)		\
+		: "memory")
+
+#define fast_wmb()	__sync()
+#define fast_rmb()	__sync()
+#define fast_mb()	__sync()
+#define fast_iob()				\
+	do {					\
+		__sync();			\
+		__fast_iob();			\
+	} while (0)
+
+#ifdef CONFIG_CPU_HAS_WB
+
+#include <asm/wbflush.h>
+
+#define wmb()		fast_wmb()
+#define rmb()		fast_rmb()
+#define mb()		wbflush()
+#define iob()		wbflush()
+
+#else /* !CONFIG_CPU_HAS_WB */
+
+#define wmb()		fast_wmb()
+#define rmb()		fast_rmb()
+#define mb()		fast_mb()
+#define iob()		fast_iob()
+
+#endif /* !CONFIG_CPU_HAS_WB */
+
+#if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP)
+#define __WEAK_ORDERING_MB	"       sync	\n"
+#else
+#define __WEAK_ORDERING_MB	"		\n"
+#endif
+
+#define smp_mb()	__asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory")
+#define smp_rmb()	__asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory")
+#define smp_wmb()	__asm__ __volatile__(__WEAK_ORDERING_MB : : :"memory")
+
+#define set_mb(var, value) \
+	do { var = value; smp_mb(); } while (0)
+
+#endif /* __ASM_BARRIER_H */
diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h
index b900741..06445de 100644
--- a/include/asm-mips/bitops.h
+++ b/include/asm-mips/bitops.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 1994 - 1997, 1999, 2000  Ralf Baechle (ralf@gnu.org)
+ * Copyright (c) 1994 - 1997, 1999, 2000, 06  Ralf Baechle (ralf@linux-mips.org)
  * Copyright (c) 1999, 2000  Silicon Graphics, Inc.
  */
 #ifndef _ASM_BITOPS_H
@@ -12,6 +12,7 @@
 #include <linux/compiler.h>
 #include <linux/irqflags.h>
 #include <linux/types.h>
+#include <asm/barrier.h>
 #include <asm/bug.h>
 #include <asm/byteorder.h>		/* sigh ... */
 #include <asm/cpu-features.h>
@@ -204,9 +205,6 @@
 		"	" __SC	"%2, %1					\n"
 		"	beqzl	%2, 1b					\n"
 		"	and	%2, %0, %3				\n"
-#ifdef CONFIG_SMP
-		"	sync						\n"
-#endif
 		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
@@ -226,9 +224,6 @@
 		"	" __SC	"%2, %1					\n"
 		"	beqz	%2, 1b					\n"
 		"	 and	%2, %0, %3				\n"
-#ifdef CONFIG_SMP
-		"	sync						\n"
-#endif
 		"	.set	pop					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
@@ -250,6 +245,8 @@
 
 		return retval;
 	}
+
+	smp_mb();
 }
 
 /*
@@ -275,9 +272,6 @@
 		"	" __SC 	"%2, %1					\n"
 		"	beqzl	%2, 1b					\n"
 		"	and	%2, %0, %3				\n"
-#ifdef CONFIG_SMP
-		"	sync						\n"
-#endif
 		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
@@ -298,9 +292,6 @@
 		"	" __SC 	"%2, %1					\n"
 		"	beqz	%2, 1b					\n"
 		"	 and	%2, %0, %3				\n"
-#ifdef CONFIG_SMP
-		"	sync						\n"
-#endif
 		"	.set	pop					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
@@ -322,6 +313,8 @@
 
 		return retval;
 	}
+
+	smp_mb();
 }
 
 /*
@@ -346,9 +339,6 @@
 		"	" __SC	"%2, %1					\n"
 		"	beqzl	%2, 1b					\n"
 		"	and	%2, %0, %3				\n"
-#ifdef CONFIG_SMP
-		"	sync						\n"
-#endif
 		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
@@ -368,9 +358,6 @@
 		"	" __SC	"\t%2, %1				\n"
 		"	beqz	%2, 1b					\n"
 		"	 and	%2, %0, %3				\n"
-#ifdef CONFIG_SMP
-		"	sync						\n"
-#endif
 		"	.set	pop					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
@@ -391,6 +378,8 @@
 
 		return retval;
 	}
+
+	smp_mb();
 }
 
 #include <asm-generic/bitops/non-atomic.h>
diff --git a/include/asm-mips/futex.h b/include/asm-mips/futex.h
index ed023ea..927a216 100644
--- a/include/asm-mips/futex.h
+++ b/include/asm-mips/futex.h
@@ -1,19 +1,21 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2006  Ralf Baechle (ralf@linux-mips.org)
+ */
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
 #ifdef __KERNEL__
 
 #include <linux/futex.h>
+#include <asm/barrier.h>
 #include <asm/errno.h>
 #include <asm/uaccess.h>
 #include <asm/war.h>
 
-#ifdef CONFIG_SMP
-#define __FUTEX_SMP_SYNC "	sync					\n"
-#else
-#define __FUTEX_SMP_SYNC
-#endif
-
 #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)		\
 {									\
 	if (cpu_has_llsc && R10000_LLSC_WAR) {				\
@@ -27,7 +29,7 @@
 		"	.set	mips3				\n"	\
 		"2:	sc	$1, %2				\n"	\
 		"	beqzl	$1, 1b				\n"	\
-		__FUTEX_SMP_SYNC					\
+		__WEAK_ORDERING_MB					\
 		"3:						\n"	\
 		"	.set	pop				\n"	\
 		"	.set	mips0				\n"	\
@@ -53,7 +55,7 @@
 		"	.set	mips3				\n"	\
 		"2:	sc	$1, %2				\n"	\
 		"	beqz	$1, 1b				\n"	\
-		__FUTEX_SMP_SYNC					\
+		__WEAK_ORDERING_MB					\
 		"3:						\n"	\
 		"	.set	pop				\n"	\
 		"	.set	mips0				\n"	\
@@ -150,7 +152,7 @@
 		"	.set	mips3					\n"
 		"2:	sc	$1, %1					\n"
 		"	beqzl	$1, 1b					\n"
-		__FUTEX_SMP_SYNC
+		__WEAK_ORDERING_MB
 		"3:							\n"
 		"	.set	pop					\n"
 		"	.section .fixup,\"ax\"				\n"
@@ -177,7 +179,7 @@
 		"	.set	mips3					\n"
 		"2:	sc	$1, %1					\n"
 		"	beqz	$1, 1b					\n"
-		__FUTEX_SMP_SYNC
+		__WEAK_ORDERING_MB
 		"3:							\n"
 		"	.set	pop					\n"
 		"	.section .fixup,\"ax\"				\n"
diff --git a/include/asm-mips/spinlock.h b/include/asm-mips/spinlock.h
index c8d5587..fc3217f 100644
--- a/include/asm-mips/spinlock.h
+++ b/include/asm-mips/spinlock.h
@@ -3,12 +3,13 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1999, 2000 by Ralf Baechle
+ * Copyright (C) 1999, 2000, 06 by Ralf Baechle
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
  */
 #ifndef _ASM_SPINLOCK_H
 #define _ASM_SPINLOCK_H
 
+#include <asm/barrier.h>
 #include <asm/war.h>
 
 /*
@@ -40,7 +41,6 @@
 		"	sc	%1, %0					\n"
 		"	beqzl	%1, 1b					\n"
 		"	 nop						\n"
-		"	sync						\n"
 		"	.set	reorder					\n"
 		: "=m" (lock->lock), "=&r" (tmp)
 		: "m" (lock->lock)
@@ -53,19 +53,22 @@
 		"	 li	%1, 1					\n"
 		"	sc	%1, %0					\n"
 		"	beqz	%1, 1b					\n"
-		"	 sync						\n"
+		"	 nop						\n"
 		"	.set	reorder					\n"
 		: "=m" (lock->lock), "=&r" (tmp)
 		: "m" (lock->lock)
 		: "memory");
 	}
+
+	smp_mb();
 }
 
 static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
+	smp_mb();
+
 	__asm__ __volatile__(
 	"	.set	noreorder	# __raw_spin_unlock	\n"
-	"	sync						\n"
 	"	sw	$0, %0					\n"
 	"	.set\treorder					\n"
 	: "=m" (lock->lock)
@@ -86,7 +89,6 @@
 		"	beqzl	%2, 1b					\n"
 		"	 nop						\n"
 		"	andi	%2, %0, 1				\n"
-		"	sync						\n"
 		"	.set	reorder"
 		: "=&r" (temp), "=m" (lock->lock), "=&r" (res)
 		: "m" (lock->lock)
@@ -99,13 +101,14 @@
 		"	sc	%2, %1					\n"
 		"	beqz	%2, 1b					\n"
 		"	 andi	%2, %0, 1				\n"
-		"	sync						\n"
 		"	.set	reorder"
 		: "=&r" (temp), "=m" (lock->lock), "=&r" (res)
 		: "m" (lock->lock)
 		: "memory");
 	}
 
+	smp_mb();
+
 	return res == 0;
 }
 
@@ -143,7 +146,6 @@
 		"	sc	%1, %0					\n"
 		"	beqzl	%1, 1b					\n"
 		"	 nop						\n"
-		"	sync						\n"
 		"	.set	reorder					\n"
 		: "=m" (rw->lock), "=&r" (tmp)
 		: "m" (rw->lock)
@@ -156,12 +158,14 @@
 		"	 addu	%1, 1					\n"
 		"	sc	%1, %0					\n"
 		"	beqz	%1, 1b					\n"
-		"	 sync						\n"
+		"	 nop						\n"
 		"	.set	reorder					\n"
 		: "=m" (rw->lock), "=&r" (tmp)
 		: "m" (rw->lock)
 		: "memory");
 	}
+
+	smp_mb();
 }
 
 /* Note the use of sub, not subu which will make the kernel die with an
@@ -171,13 +175,14 @@
 {
 	unsigned int tmp;
 
+	smp_mb();
+
 	if (R10000_LLSC_WAR) {
 		__asm__ __volatile__(
 		"1:	ll	%1, %2		# __raw_read_unlock	\n"
 		"	sub	%1, 1					\n"
 		"	sc	%1, %0					\n"
 		"	beqzl	%1, 1b					\n"
-		"	sync						\n"
 		: "=m" (rw->lock), "=&r" (tmp)
 		: "m" (rw->lock)
 		: "memory");
@@ -188,7 +193,7 @@
 		"	sub	%1, 1					\n"
 		"	sc	%1, %0					\n"
 		"	beqz	%1, 1b					\n"
-		"	 sync						\n"
+		"	 nop						\n"
 		"	.set	reorder					\n"
 		: "=m" (rw->lock), "=&r" (tmp)
 		: "m" (rw->lock)
@@ -208,7 +213,7 @@
 		"	 lui	%1, 0x8000				\n"
 		"	sc	%1, %0					\n"
 		"	beqzl	%1, 1b					\n"
-		"	 sync						\n"
+		"	 nop						\n"
 		"	.set	reorder					\n"
 		: "=m" (rw->lock), "=&r" (tmp)
 		: "m" (rw->lock)
@@ -221,18 +226,22 @@
 		"	 lui	%1, 0x8000				\n"
 		"	sc	%1, %0					\n"
 		"	beqz	%1, 1b					\n"
-		"	 sync						\n"
+		"	 nop						\n"
 		"	.set	reorder					\n"
 		: "=m" (rw->lock), "=&r" (tmp)
 		: "m" (rw->lock)
 		: "memory");
 	}
+
+	smp_mb();
 }
 
 static inline void __raw_write_unlock(raw_rwlock_t *rw)
 {
+	smp_mb();
+
 	__asm__ __volatile__(
-	"	sync			# __raw_write_unlock	\n"
+	"				# __raw_write_unlock	\n"
 	"	sw	$0, %0					\n"
 	: "=m" (rw->lock)
 	: "m" (rw->lock)
@@ -252,11 +261,10 @@
 		"	bnez	%1, 2f					\n"
 		"	 addu	%1, 1					\n"
 		"	sc	%1, %0					\n"
-		"	beqzl	%1, 1b					\n"
 		"	.set	reorder					\n"
-#ifdef CONFIG_SMP
-		"	 sync						\n"
-#endif
+		"	beqzl	%1, 1b					\n"
+		"	 nop						\n"
+		__WEAK_ORDERING_MB
 		"	li	%2, 1					\n"
 		"2:							\n"
 		: "=m" (rw->lock), "=&r" (tmp), "=&r" (ret)
@@ -271,10 +279,9 @@
 		"	 addu	%1, 1					\n"
 		"	sc	%1, %0					\n"
 		"	beqz	%1, 1b					\n"
+		"	 nop						\n"
 		"	.set	reorder					\n"
-#ifdef CONFIG_SMP
-		"	 sync						\n"
-#endif
+		__WEAK_ORDERING_MB
 		"	li	%2, 1					\n"
 		"2:							\n"
 		: "=m" (rw->lock), "=&r" (tmp), "=&r" (ret)
@@ -299,7 +306,8 @@
 		"	 lui	%1, 0x8000				\n"
 		"	sc	%1, %0					\n"
 		"	beqzl	%1, 1b					\n"
-		"	 sync						\n"
+		"	 nop						\n"
+		__WEAK_ORDERING_MB
 		"	li	%2, 1					\n"
 		"	.set	reorder					\n"
 		"2:							\n"
@@ -315,7 +323,8 @@
 		"	lui	%1, 0x8000				\n"
 		"	sc	%1, %0					\n"
 		"	beqz	%1, 1b					\n"
-		"	 sync						\n"
+		"	 nop						\n"
+		__WEAK_ORDERING_MB
 		"	li	%2, 1					\n"
 		"	.set	reorder					\n"
 		"2:							\n"
diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h
index 3056fee..9428057 100644
--- a/include/asm-mips/system.h
+++ b/include/asm-mips/system.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1994, 95, 96, 97, 98, 99, 2003 by Ralf Baechle
+ * Copyright (C) 1994, 95, 96, 97, 98, 99, 2003, 06 by Ralf Baechle
  * Copyright (C) 1996 by Paul M. Antoine
  * Copyright (C) 1999 Silicon Graphics
  * Kevin D. Kissell, kevink@mips.org and Carsten Langgaard, carstenl@mips.com
@@ -16,132 +16,12 @@
 #include <linux/irqflags.h>
 
 #include <asm/addrspace.h>
+#include <asm/barrier.h>
 #include <asm/cpu-features.h>
 #include <asm/dsp.h>
 #include <asm/ptrace.h>
 #include <asm/war.h>
 
-/*
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier.  All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads.  This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies.  See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	b = 2;
- *	memory_barrier();
- *	p = &b;				q = p;
- *					read_barrier_depends();
- *					d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends().  However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	a = 2;
- *	memory_barrier();
- *	b = 3;				y = b;
- *					read_barrier_depends();
- *					x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b".  Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
- * in cases like this where there are no data dependencies.
- */
-
-#define read_barrier_depends()	do { } while(0)
-
-#ifdef CONFIG_CPU_HAS_SYNC
-#define __sync()				\
-	__asm__ __volatile__(			\
-		".set	push\n\t"		\
-		".set	noreorder\n\t"		\
-		".set	mips2\n\t"		\
-		"sync\n\t"			\
-		".set	pop"			\
-		: /* no output */		\
-		: /* no input */		\
-		: "memory")
-#else
-#define __sync()	do { } while(0)
-#endif
-
-#define __fast_iob()				\
-	__asm__ __volatile__(			\
-		".set	push\n\t"		\
-		".set	noreorder\n\t"		\
-		"lw	$0,%0\n\t"		\
-		"nop\n\t"			\
-		".set	pop"			\
-		: /* no output */		\
-		: "m" (*(int *)CKSEG1)		\
-		: "memory")
-
-#define fast_wmb()	__sync()
-#define fast_rmb()	__sync()
-#define fast_mb()	__sync()
-#define fast_iob()				\
-	do {					\
-		__sync();			\
-		__fast_iob();			\
-	} while (0)
-
-#ifdef CONFIG_CPU_HAS_WB
-
-#include <asm/wbflush.h>
-
-#define wmb()		fast_wmb()
-#define rmb()		fast_rmb()
-#define mb()		wbflush()
-#define iob()		wbflush()
-
-#else /* !CONFIG_CPU_HAS_WB */
-
-#define wmb()		fast_wmb()
-#define rmb()		fast_rmb()
-#define mb()		fast_mb()
-#define iob()		fast_iob()
-
-#endif /* !CONFIG_CPU_HAS_WB */
-
-#ifdef CONFIG_SMP
-#define smp_mb()	mb()
-#define smp_rmb()	rmb()
-#define smp_wmb()	wmb()
-#define smp_read_barrier_depends()	read_barrier_depends()
-#else
-#define smp_mb()	barrier()
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do { } while(0)
-#endif
-
-#define set_mb(var, value) \
-do { var = value; mb(); } while (0)
 
 /*
  * switch_to(n) should switch tasks to task nr n, first
@@ -217,9 +97,6 @@
 		"	.set	mips3					\n"
 		"	sc	%2, %1					\n"
 		"	beqzl	%2, 1b					\n"
-#ifdef CONFIG_SMP
-		"	sync						\n"
-#endif
 		"	.set	mips0					\n"
 		: "=&r" (retval), "=m" (*m), "=&r" (dummy)
 		: "R" (*m), "Jr" (val)
@@ -235,9 +112,6 @@
 		"	.set	mips3					\n"
 		"	sc	%2, %1					\n"
 		"	beqz	%2, 1b					\n"
-#ifdef CONFIG_SMP
-		"	sync						\n"
-#endif
 		"	.set	mips0					\n"
 		: "=&r" (retval), "=m" (*m), "=&r" (dummy)
 		: "R" (*m), "Jr" (val)
@@ -251,6 +125,8 @@
 		local_irq_restore(flags);	/* implies memory barrier  */
 	}
 
+	smp_mb();
+
 	return retval;
 }
 
@@ -268,9 +144,6 @@
 		"	move	%2, %z4					\n"
 		"	scd	%2, %1					\n"
 		"	beqzl	%2, 1b					\n"
-#ifdef CONFIG_SMP
-		"	sync						\n"
-#endif
 		"	.set	mips0					\n"
 		: "=&r" (retval), "=m" (*m), "=&r" (dummy)
 		: "R" (*m), "Jr" (val)
@@ -284,9 +157,6 @@
 		"	move	%2, %z4					\n"
 		"	scd	%2, %1					\n"
 		"	beqz	%2, 1b					\n"
-#ifdef CONFIG_SMP
-		"	sync						\n"
-#endif
 		"	.set	mips0					\n"
 		: "=&r" (retval), "=m" (*m), "=&r" (dummy)
 		: "R" (*m), "Jr" (val)
@@ -300,6 +170,8 @@
 		local_irq_restore(flags);	/* implies memory barrier  */
 	}
 
+	smp_mb();
+
 	return retval;
 }
 #else
@@ -345,9 +217,6 @@
 		"	.set	mips3					\n"
 		"	sc	$1, %1					\n"
 		"	beqzl	$1, 1b					\n"
-#ifdef CONFIG_SMP
-		"	sync						\n"
-#endif
 		"2:							\n"
 		"	.set	pop					\n"
 		: "=&r" (retval), "=R" (*m)
@@ -365,9 +234,6 @@
 		"	.set	mips3					\n"
 		"	sc	$1, %1					\n"
 		"	beqz	$1, 1b					\n"
-#ifdef CONFIG_SMP
-		"	sync						\n"
-#endif
 		"2:							\n"
 		"	.set	pop					\n"
 		: "=&r" (retval), "=R" (*m)
@@ -383,6 +249,8 @@
 		local_irq_restore(flags);	/* implies memory barrier  */
 	}
 
+	smp_mb();
+
 	return retval;
 }
 
@@ -402,9 +270,6 @@
 		"	move	$1, %z4					\n"
 		"	scd	$1, %1					\n"
 		"	beqzl	$1, 1b					\n"
-#ifdef CONFIG_SMP
-		"	sync						\n"
-#endif
 		"2:							\n"
 		"	.set	pop					\n"
 		: "=&r" (retval), "=R" (*m)
@@ -420,9 +285,6 @@
 		"	move	$1, %z4					\n"
 		"	scd	$1, %1					\n"
 		"	beqz	$1, 1b					\n"
-#ifdef CONFIG_SMP
-		"	sync						\n"
-#endif
 		"2:							\n"
 		"	.set	pop					\n"
 		: "=&r" (retval), "=R" (*m)
@@ -438,6 +300,8 @@
 		local_irq_restore(flags);	/* implies memory barrier  */
 	}
 
+	smp_mb();
+
 	return retval;
 }
 #else