powerpc/fsl-corenet: work around erratum A004510

Erratum A004510 says that under certain load conditions, modified
cache lines can be discarded, causing data corruption.

To work around this, several CCSR and DCSR register updates need to be
made in a careful manner, so that there is no other transaction in
corenet when the update is made.

The update is made from a locked cacheline, with a delay before to flush
any previous activity, and a delay after to flush the CCSR/DCSR update.
We can't use a readback because that would be another corenet
transaction, which is not allowed.

We lock the subsequent cacheline to prevent it from being fetched while
we're executing the previous cacheline.  It is filled with nops so that a
branch doesn't cause us to fetch another cacheline.

Ordinarily we are running in a cache-inhibited mapping at this point, so
we temporarily change that.  We make it guarded so that we should never
see a speculative load, and we never do an explicit load.  Thus, only the
I-cache should ever fill from this mapping, and we flush/unlock it
afterward.  Thus we should avoid problems from any potential cache
aliasing between inhibited and non-inhibited mappings.

NOTE that if PAMU is used with this patch, it will need to use a
dedicated LAW as described in the erratum.  This is the responsibility
of the OS that sets up PAMU.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Andy Fleming <afleming@freescale.com>
diff --git a/README b/README
index 81ee7d4..8197235 100644
--- a/README
+++ b/README
@@ -383,6 +383,31 @@
 		symbol should be set to the TLB1 entry to be used for this
 		purpose.
 
+		CONFIG_SYS_FSL_ERRATUM_A004510
+
+		Enables a workaround for erratum A004510.  If set,
+		then CONFIG_SYS_FSL_ERRATUM_A004510_SVR_REV and
+		CONFIG_SYS_FSL_CORENET_SNOOPVEC_COREONLY must be set.
+
+		CONFIG_SYS_FSL_ERRATUM_A004510_SVR_REV
+		CONFIG_SYS_FSL_ERRATUM_A004510_SVR_REV2 (optional)
+
+		Defines one or two SoC revisions (low 8 bits of SVR)
+		for which the A004510 workaround should be applied.
+
+		The rest of SVR is either not relevant to the decision
+		of whether the erratum is present (e.g. p2040 versus
+		p2041) or is implied by the build target, which controls
+		whether CONFIG_SYS_FSL_ERRATUM_A004510 is set.
+
+		See Freescale App Note 4493 for more information about
+		this erratum.
+
+		CONFIG_SYS_FSL_CORENET_SNOOPVEC_COREONLY
+
+		This is the value to write into CCSR offset 0x18600
+		according to the A004510 workaround.
+
 - Generic CPU options:
 		CONFIG_SYS_BIG_ENDIAN, CONFIG_SYS_LITTLE_ENDIAN
 
diff --git a/arch/powerpc/cpu/mpc85xx/cmd_errata.c b/arch/powerpc/cpu/mpc85xx/cmd_errata.c
index 858b3f8..e8989bd 100644
--- a/arch/powerpc/cpu/mpc85xx/cmd_errata.c
+++ b/arch/powerpc/cpu/mpc85xx/cmd_errata.c
@@ -124,6 +124,9 @@
 	if ((SVR_MAJ(svr) == 1) || IS_SVR_REV(svr, 2, 0))
 		puts("Work-around for Erratum NMG ETSEC129 enabled\n");
 #endif
+#ifdef CONFIG_SYS_FSL_ERRATUM_A004510
+	puts("Work-around for Erratum A004510 enabled\n");
+#endif
 	return 0;
 }
 
diff --git a/arch/powerpc/cpu/mpc85xx/release.S b/arch/powerpc/cpu/mpc85xx/release.S
index 1555a9b..043d0ff 100644
--- a/arch/powerpc/cpu/mpc85xx/release.S
+++ b/arch/powerpc/cpu/mpc85xx/release.S
@@ -74,6 +74,33 @@
 	mtspr	977,r3
 #endif
 
+#ifdef CONFIG_SYS_FSL_ERRATUM_A004510
+	mfspr	r3,SPRN_SVR
+	rlwinm	r3,r3,0,0xff
+	li	r4,CONFIG_SYS_FSL_ERRATUM_A004510_SVR_REV
+	cmpw	r3,r4
+	beq	1f
+
+#ifdef CONFIG_SYS_FSL_ERRATUM_A004510_SVR_REV2
+	li	r4,CONFIG_SYS_FSL_ERRATUM_A004510_SVR_REV2
+	cmpw	r3,r4
+	beq	1f
+#endif
+
+	/* Not a supported revision affected by erratum */
+	b	2f
+
+1:	/* Erratum says set bits 55:60 to 001001 */
+	msync
+	isync
+	mfspr	r3,976
+	li	r4,0x48
+	rlwimi	r3,r4,0,0x1f8
+	mtspr	976,r3
+	isync
+2:
+#endif
+
 	/* Enable branch prediction */
 	lis	r3,BUCSR_ENABLE@h
 	ori	r3,r3,BUCSR_ENABLE@l
diff --git a/arch/powerpc/cpu/mpc85xx/start.S b/arch/powerpc/cpu/mpc85xx/start.S
index b1998b2..66e8eb8 100644
--- a/arch/powerpc/cpu/mpc85xx/start.S
+++ b/arch/powerpc/cpu/mpc85xx/start.S
@@ -86,6 +86,35 @@
 	li	r1,MSR_DE
 	mtmsr 	r1
 
+#ifdef CONFIG_SYS_FSL_ERRATUM_A004510
+	mfspr	r3,SPRN_SVR
+	rlwinm	r3,r3,0,0xff
+	li	r4,CONFIG_SYS_FSL_ERRATUM_A004510_SVR_REV
+	cmpw	r3,r4
+	beq	1f
+
+#ifdef CONFIG_SYS_FSL_ERRATUM_A004510_SVR_REV2
+	li	r4,CONFIG_SYS_FSL_ERRATUM_A004510_SVR_REV2
+	cmpw	r3,r4
+	beq	1f
+#endif
+
+	/* Not a supported revision affected by erratum */
+	li	r27,0
+	b	2f
+
+1:	li	r27,1	/* Remember for later that we have the erratum */
+	/* Erratum says set bits 55:60 to 001001 */
+	msync
+	isync
+	mfspr	r3,976
+	li	r4,0x48
+	rlwimi	r3,r4,0,0x1f8
+	mtspr	976,r3
+	isync
+2:
+#endif
+
 #if defined(CONFIG_SECURE_BOOT) && defined(CONFIG_E500MC)
 	/* ISBC uses L2 as stack.
 	 * Disable L2 cache here so that u-boot can enable it later
@@ -440,6 +469,14 @@
 	mfspr	r2, MAS2
 	andc	r2, r2, r3
 	or	r2, r2, r1
+#ifdef CONFIG_SYS_FSL_ERRATUM_A004510
+	cmpwi	r27,0
+	beq	1f
+	andi.	r15, r2, MAS2_I|MAS2_G /* save the old I/G for later */
+	rlwinm	r2, r2, 0, ~MAS2_I
+	ori	r2, r2, MAS2_G
+1:
+#endif
 	mtspr	MAS2, r2	/* Set the EPN to our PC base address */
 
 	mfspr	r2, MAS3
@@ -719,6 +756,253 @@
 	tlbwe
 #endif /* #if (CONFIG_SYS_CCSRBAR_DEFAULT != CONFIG_SYS_CCSRBAR_PHYS) */
 
+#ifdef CONFIG_SYS_FSL_ERRATUM_A004510
+#define DCSR_LAWBARH0	(CONFIG_SYS_CCSRBAR + 0x1000)
+#define LAW_SIZE_1M	0x13
+#define DCSRBAR_LAWAR	(LAW_EN | (0x1d << 20) | LAW_SIZE_1M)
+
+	cmpwi	r27,0
+	beq	9f
+
+	/*
+	 * Create a TLB entry for CCSR
+	 *
+	 * We're executing out of TLB1 entry in r14, and that's the only
+	 * TLB entry that exists.  To allocate some TLB entries for our
+	 * own use, flip a bit high enough that we won't flip it again
+	 * via incrementing.
+	 */
+
+	xori	r8, r14, 32
+	lis	r0, MAS0_TLBSEL(1)@h
+	rlwimi	r0, r8, 16, MAS0_ESEL_MSK
+	lis	r1, FSL_BOOKE_MAS1(1, 1, 0, 0, BOOKE_PAGESZ_16M)@h
+	ori	r1, r1, FSL_BOOKE_MAS1(1, 1, 0, 0, BOOKE_PAGESZ_16M)@l
+	lis	r7, CONFIG_SYS_CCSRBAR@h
+	ori	r7, r7, CONFIG_SYS_CCSRBAR@l
+	ori	r2, r7, MAS2_I|MAS2_G
+	lis	r3, FSL_BOOKE_MAS3(CONFIG_SYS_CCSRBAR_PHYS_LOW, 0, (MAS3_SW|MAS3_SR))@h
+	ori	r3, r3, FSL_BOOKE_MAS3(CONFIG_SYS_CCSRBAR_PHYS_LOW, 0, (MAS3_SW|MAS3_SR))@l
+	lis	r4, CONFIG_SYS_CCSRBAR_PHYS_HIGH@h
+	ori	r4, r4, CONFIG_SYS_CCSRBAR_PHYS_HIGH@l
+	mtspr	MAS0, r0
+	mtspr	MAS1, r1
+	mtspr	MAS2, r2
+	mtspr	MAS3, r3
+	mtspr	MAS7, r4
+	isync
+	tlbwe
+	isync
+	msync
+
+	/* Map DCSR temporarily to physical address zero */
+	li	r0, 0
+	lis	r3, DCSRBAR_LAWAR@h
+	ori	r3, r3, DCSRBAR_LAWAR@l
+
+	stw	r0, 0xc00(r7)	/* LAWBARH0 */
+	stw	r0, 0xc04(r7)	/* LAWBARL0 */
+	sync
+	stw	r3, 0xc08(r7)	/* LAWAR0 */
+
+	/* Read back from LAWAR to ensure the update is complete. */
+	lwz	r3, 0xc08(r7)	/* LAWAR0 */
+	isync
+
+	/* Create a TLB entry for DCSR at zero */
+
+	addi	r9, r8, 1
+	lis	r0, MAS0_TLBSEL(1)@h
+	rlwimi	r0, r9, 16, MAS0_ESEL_MSK
+	lis	r1, FSL_BOOKE_MAS1(1, 1, 0, 0, BOOKE_PAGESZ_1M)@h
+	ori	r1, r1, FSL_BOOKE_MAS1(1, 1, 0, 0, BOOKE_PAGESZ_1M)@l
+	li	r6, 0	/* DCSR effective address */
+	ori	r2, r6, MAS2_I|MAS2_G
+	li	r3, MAS3_SW|MAS3_SR
+	li	r4, 0
+	mtspr	MAS0, r0
+	mtspr	MAS1, r1
+	mtspr	MAS2, r2
+	mtspr	MAS3, r3
+	mtspr	MAS7, r4
+	isync
+	tlbwe
+	isync
+	msync
+
+	/* enable the timebase */
+#define CTBENR	0xe2084
+	li	r3, 1
+	addis	r4, r7, CTBENR@ha
+	stw	r3, CTBENR@l(r4)
+	lwz	r3, CTBENR@l(r4)
+	twi	0,r3,0
+	isync
+
+	.macro	erratum_set_ccsr offset value
+	addis	r3, r7, \offset@ha
+	lis	r4, \value@h
+	addi	r3, r3, \offset@l
+	ori	r4, r4, \value@l
+	bl	erratum_set_value
+	.endm
+
+	.macro	erratum_set_dcsr offset value
+	addis	r3, r6, \offset@ha
+	lis	r4, \value@h
+	addi	r3, r3, \offset@l
+	ori	r4, r4, \value@l
+	bl	erratum_set_value
+	.endm
+
+	erratum_set_dcsr 0xb0e08 0xe0201800
+	erratum_set_dcsr 0xb0e18 0xe0201800
+	erratum_set_dcsr 0xb0e38 0xe0400000
+	erratum_set_dcsr 0xb0008 0x00900000
+	erratum_set_dcsr 0xb0e40 0xe00a0000
+	erratum_set_ccsr 0x18600 CONFIG_SYS_FSL_CORENET_SNOOPVEC_COREONLY
+	erratum_set_ccsr 0x10f00 0x415e5000
+	erratum_set_ccsr 0x11f00 0x415e5000
+
+	/* Make temp mapping uncacheable again, if it was initially */
+	bl	2f
+2:	mflr	r3
+	tlbsx	0, r3
+	mfspr	r4, MAS2
+	rlwimi	r4, r15, 0, MAS2_I
+	rlwimi	r4, r15, 0, MAS2_G
+	mtspr	MAS2, r4
+	isync
+	tlbwe
+	isync
+	msync
+
+	/* Clear the cache */
+	lis	r3,(L1CSR1_ICFI|L1CSR1_ICLFR)@h
+	ori	r3,r3,(L1CSR1_ICFI|L1CSR1_ICLFR)@l
+	sync
+	isync
+	mtspr	SPRN_L1CSR1,r3
+	isync
+2:	sync
+	mfspr	r4,SPRN_L1CSR1
+	and.	r4,r4,r3
+	bne	2b
+
+	lis	r3,(L1CSR1_CPE|L1CSR1_ICE)@h
+	ori	r3,r3,(L1CSR1_CPE|L1CSR1_ICE)@l
+	sync
+	isync
+	mtspr	SPRN_L1CSR1,r3
+	isync
+2:	sync
+	mfspr	r4,SPRN_L1CSR1
+	and.	r4,r4,r3
+	beq	2b
+
+	/* Remove temporary mappings */
+	lis	r0, MAS0_TLBSEL(1)@h
+	rlwimi	r0, r9, 16, MAS0_ESEL_MSK
+	li	r3, 0
+	mtspr	MAS0, r0
+	mtspr	MAS1, r3
+	isync
+	tlbwe
+	isync
+	msync
+
+	li	r3, 0
+	stw	r3, 0xc08(r7)	/* LAWAR0 */
+	lwz	r3, 0xc08(r7)
+	isync
+
+	lis	r0, MAS0_TLBSEL(1)@h
+	rlwimi	r0, r8, 16, MAS0_ESEL_MSK
+	li	r3, 0
+	mtspr	MAS0, r0
+	mtspr	MAS1, r3
+	isync
+	tlbwe
+	isync
+	msync
+
+	b	9f
+
+	/* r3 = addr, r4 = value, clobbers r5, r11, r12 */
+erratum_set_value:
+	/* Lock two cache lines into I-Cache */
+	sync
+	mfspr	r11, SPRN_L1CSR1
+	rlwinm	r11, r11, 0, ~L1CSR1_ICUL
+	sync
+	isync
+	mtspr	SPRN_L1CSR1, r11
+	isync
+
+	mflr	r12
+	bl	5f
+5:	mflr	r5
+	addi	r5, r5, 2f - 5b
+	icbtls	0, 0, r5
+	addi	r5, r5, 64
+
+	sync
+	mfspr	r11, SPRN_L1CSR1
+3:	andi.	r11, r11, L1CSR1_ICUL
+	bne	3b
+
+	icbtls	0, 0, r5
+	addi	r5, r5, 64
+
+	sync
+	mfspr	r11, SPRN_L1CSR1
+3:	andi.	r11, r11, L1CSR1_ICUL
+	bne	3b
+
+	b	2f
+	.align	6
+	/* Inside a locked cacheline, wait a while, write, then wait a while */
+2:	sync
+
+	mfspr	r5, SPRN_TBRL
+	addis	r11, r5, 0x10000@h /* wait 65536 timebase ticks */
+4:	mfspr	r5, SPRN_TBRL
+	subf.	r5, r5, r11
+	bgt	4b
+
+	stw	r4, 0(r3)
+
+	mfspr	r5, SPRN_TBRL
+	addis	r11, r5, 0x10000@h /* wait 65536 timebase ticks */
+4:	mfspr	r5, SPRN_TBRL
+	subf.	r5, r5, r11
+	bgt	4b
+
+	sync
+
+	/*
+	 * Fill out the rest of this cache line and the next with nops,
+	 * to ensure that nothing outside the locked area will be
+	 * fetched due to a branch.
+	 */
+	.rept 19
+	nop
+	.endr
+
+	sync
+	mfspr	r11, SPRN_L1CSR1
+	rlwinm	r11, r11, 0, ~L1CSR1_ICUL
+	sync
+	isync
+	mtspr	SPRN_L1CSR1, r11
+	isync
+
+	mtlr	r12
+	blr
+
+9:
+#endif
+
 create_init_ram_area:
 	lis     r6,FSL_BOOKE_MAS0(1, 15, 0)@h
 	ori     r6,r6,FSL_BOOKE_MAS0(1, 15, 0)@l
diff --git a/arch/powerpc/include/asm/config_mpc85xx.h b/arch/powerpc/include/asm/config_mpc85xx.h
index 950ec2f..8c7489d 100644
--- a/arch/powerpc/include/asm/config_mpc85xx.h
+++ b/arch/powerpc/include/asm/config_mpc85xx.h
@@ -330,6 +330,10 @@
 #define CONFIG_SYS_FSL_SRIO_MAX_PORTS	2
 #define CONFIG_SYS_FSL_SRIO_OB_WIN_NUM	9
 #define CONFIG_SYS_FSL_SRIO_IB_WIN_NUM	5
+#define CONFIG_SYS_FSL_ERRATUM_A004510
+#define CONFIG_SYS_FSL_ERRATUM_A004510_SVR_REV	0x10
+#define CONFIG_SYS_FSL_ERRATUM_A004510_SVR_REV2	0x11
+#define CONFIG_SYS_FSL_CORENET_SNOOPVEC_COREONLY 0xf0000000
 
 #elif defined(CONFIG_PPC_P3041)
 #define CONFIG_MAX_CPUS			4
@@ -355,6 +359,10 @@
 #define CONFIG_SYS_FSL_SRIO_MAX_PORTS	2
 #define CONFIG_SYS_FSL_SRIO_OB_WIN_NUM	9
 #define CONFIG_SYS_FSL_SRIO_IB_WIN_NUM	5
+#define CONFIG_SYS_FSL_ERRATUM_A004510
+#define CONFIG_SYS_FSL_ERRATUM_A004510_SVR_REV	0x10
+#define CONFIG_SYS_FSL_ERRATUM_A004510_SVR_REV2	0x11
+#define CONFIG_SYS_FSL_CORENET_SNOOPVEC_COREONLY 0xf0000000
 
 #elif defined(CONFIG_PPC_P4080) /* also supports P4040 */
 #define CONFIG_MAX_CPUS			8
@@ -391,6 +399,9 @@
 #define CONFIG_SYS_FSL_SRIO_IB_WIN_NUM	5
 #define CONFIG_SYS_FSL_RMU
 #define CONFIG_SYS_FSL_SRIO_MSG_UNIT_NUM	2
+#define CONFIG_SYS_FSL_ERRATUM_A004510
+#define CONFIG_SYS_FSL_ERRATUM_A004510_SVR_REV	0x20
+#define CONFIG_SYS_FSL_CORENET_SNOOPVEC_COREONLY 0xff000000
 
 #elif defined(CONFIG_PPC_P5020) /* also supports P5010 */
 #define CONFIG_MAX_CPUS			2
@@ -414,6 +425,9 @@
 #define CONFIG_SYS_FSL_SRIO_MAX_PORTS	2
 #define CONFIG_SYS_FSL_SRIO_OB_WIN_NUM	9
 #define CONFIG_SYS_FSL_SRIO_IB_WIN_NUM	5
+#define CONFIG_SYS_FSL_ERRATUM_A004510
+#define CONFIG_SYS_FSL_ERRATUM_A004510_SVR_REV	0x10
+#define CONFIG_SYS_FSL_CORENET_SNOOPVEC_COREONLY 0xc0000000
 
 #elif defined(CONFIG_BSC9131)
 #define CONFIG_MAX_CPUS			1
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index c942bec..a8ffedb 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -486,11 +486,13 @@
 #define SPRN_L2CFG0	0x207	/* L2 Cache Configuration Register 0 */
 #define SPRN_L1CSR0	0x3f2	/* L1 Data Cache Control and Status Register 0 */
 #define   L1CSR0_CPE		0x00010000	/* Data Cache Parity Enable */
+#define   L1CSR0_CUL		0x00000400	/* (D-)Cache Unable to Lock */
 #define   L1CSR0_DCLFR		0x00000100	/* D-Cache Lock Flash Reset */
 #define   L1CSR0_DCFI		0x00000002	/* Data Cache Flash Invalidate */
 #define   L1CSR0_DCE		0x00000001	/* Data Cache Enable */
 #define SPRN_L1CSR1	0x3f3	/* L1 Instruction Cache Control and Status Register 1 */
 #define   L1CSR1_CPE		0x00010000	/* Instruction Cache Parity Enable */
+#define   L1CSR1_ICUL		0x00000400	/* I-Cache Unable to Lock */
 #define   L1CSR1_ICLFR		0x00000100	/* I-Cache Lock Flash Reset */
 #define   L1CSR1_ICFI		0x00000002	/* Instruction Cache Flash Invalidate */
 #define   L1CSR1_ICE		0x00000001	/* Instruction Cache Enable */