Kyle Swenson | 8d8f654 | 2021-03-15 11:02:55 -0600 | [diff] [blame^] | 1 | /* |
| 2 | * TLB Exception Handling for ARC |
| 3 | * |
| 4 | * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or modify |
| 7 | * it under the terms of the GNU General Public License version 2 as |
| 8 | * published by the Free Software Foundation. |
| 9 | * |
| 10 | * Vineetg: April 2011 : |
| 11 | * -MMU v1: moved out legacy code into a seperate file |
| 12 | * -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore, |
| 13 | * helps avoid a shift when preparing PD0 from PTE |
| 14 | * |
| 15 | * Vineetg: July 2009 |
| 16 | * -For MMU V2, we need not do heuristics at the time of commiting a D-TLB |
| 17 | * entry, so that it doesn't knock out it's I-TLB entry |
| 18 | * -Some more fine tuning: |
| 19 | * bmsk instead of add, asl.cc instead of branch, delay slot utilise etc |
| 20 | * |
| 21 | * Vineetg: July 2009 |
| 22 | * -Practically rewrote the I/D TLB Miss handlers |
| 23 | * Now 40 and 135 instructions a peice as compared to 131 and 449 resp. |
| 24 | * Hence Leaner by 1.5 K |
| 25 | * Used Conditional arithmetic to replace excessive branching |
| 26 | * Also used short instructions wherever possible |
| 27 | * |
| 28 | * Vineetg: Aug 13th 2008 |
| 29 | * -Passing ECR (Exception Cause REG) to do_page_fault( ) for printing |
| 30 | * more information in case of a Fatality |
| 31 | * |
| 32 | * Vineetg: March 25th Bug #92690 |
| 33 | * -Added Debug Code to check if sw-ASID == hw-ASID |
| 34 | |
| 35 | * Rahul Trivedi, Amit Bhor: Codito Technologies 2004 |
| 36 | */ |
| 37 | |
| 38 | #include <linux/linkage.h> |
| 39 | #include <asm/entry.h> |
| 40 | #include <asm/mmu.h> |
| 41 | #include <asm/pgtable.h> |
| 42 | #include <asm/arcregs.h> |
| 43 | #include <asm/cache.h> |
| 44 | #include <asm/processor.h> |
| 45 | #include <asm/tlb-mmu1.h> |
| 46 | |
| 47 | #ifdef CONFIG_ISA_ARCOMPACT |
| 48 | ;----------------------------------------------------------------- |
| 49 | ; ARC700 Exception Handling doesn't auto-switch stack and it only provides |
| 50 | ; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0" |
| 51 | ; |
| 52 | ; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a |
| 53 | ; "global" is used to free-up FIRST core reg to be able to code the rest of |
| 54 | ; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe). |
| 55 | ; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3 |
| 56 | ; need to be saved as well by extending the "global" to be 4 words. Hence |
| 57 | ; ".size ex_saved_reg1, 16" |
| 58 | ; [All of this dance is to avoid stack switching for each TLB Miss, since we |
| 59 | ; only need to save only a handful of regs, as opposed to complete reg file] |
| 60 | ; |
| 61 | ; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST |
| 62 | ; core reg as it will not be SMP safe. |
| 63 | ; Thus scratch AUX reg is used (and no longer used to cache task PGD). |
| 64 | ; To save the rest of 3 regs - per cpu, the global is made "per-cpu". |
| 65 | ; Epilogue thus has to locate the "per-cpu" storage for regs. |
| 66 | ; To avoid cache line bouncing the per-cpu global is aligned/sized per |
| 67 | ; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence |
| 68 | ; ".size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)" |
| 69 | |
| 70 | ; As simple as that.... |
| 71 | ;-------------------------------------------------------------------------- |
| 72 | |
| 73 | ; scratch memory to save [r0-r3] used to code TLB refill Handler |
| 74 | ARCFP_DATA ex_saved_reg1 |
| 75 | .align 1 << L1_CACHE_SHIFT |
| 76 | .type ex_saved_reg1, @object |
| 77 | #ifdef CONFIG_SMP |
| 78 | .size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT) |
| 79 | ex_saved_reg1: |
| 80 | .zero (CONFIG_NR_CPUS << L1_CACHE_SHIFT) |
| 81 | #else |
| 82 | .size ex_saved_reg1, 16 |
| 83 | ex_saved_reg1: |
| 84 | .zero 16 |
| 85 | #endif |
| 86 | |
| 87 | .macro TLBMISS_FREEUP_REGS |
| 88 | #ifdef CONFIG_SMP |
| 89 | sr r0, [ARC_REG_SCRATCH_DATA0] ; freeup r0 to code with |
| 90 | GET_CPU_ID r0 ; get to per cpu scratch mem, |
| 91 | asl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu |
| 92 | add r0, @ex_saved_reg1, r0 |
| 93 | #else |
| 94 | st r0, [@ex_saved_reg1] |
| 95 | mov_s r0, @ex_saved_reg1 |
| 96 | #endif |
| 97 | st_s r1, [r0, 4] |
| 98 | st_s r2, [r0, 8] |
| 99 | st_s r3, [r0, 12] |
| 100 | |
| 101 | ; VERIFY if the ASID in MMU-PID Reg is same as |
| 102 | ; one in Linux data structures |
| 103 | |
| 104 | tlb_paranoid_check_asm |
| 105 | .endm |
| 106 | |
| 107 | .macro TLBMISS_RESTORE_REGS |
| 108 | #ifdef CONFIG_SMP |
| 109 | GET_CPU_ID r0 ; get to per cpu scratch mem |
| 110 | asl r0, r0, L1_CACHE_SHIFT ; each is cache line wide |
| 111 | add r0, @ex_saved_reg1, r0 |
| 112 | ld_s r3, [r0,12] |
| 113 | ld_s r2, [r0, 8] |
| 114 | ld_s r1, [r0, 4] |
| 115 | lr r0, [ARC_REG_SCRATCH_DATA0] |
| 116 | #else |
| 117 | mov_s r0, @ex_saved_reg1 |
| 118 | ld_s r3, [r0,12] |
| 119 | ld_s r2, [r0, 8] |
| 120 | ld_s r1, [r0, 4] |
| 121 | ld_s r0, [r0] |
| 122 | #endif |
| 123 | .endm |
| 124 | |
| 125 | #else /* ARCv2 */ |
| 126 | |
| 127 | .macro TLBMISS_FREEUP_REGS |
| 128 | PUSH r0 |
| 129 | PUSH r1 |
| 130 | PUSH r2 |
| 131 | PUSH r3 |
| 132 | .endm |
| 133 | |
| 134 | .macro TLBMISS_RESTORE_REGS |
| 135 | POP r3 |
| 136 | POP r2 |
| 137 | POP r1 |
| 138 | POP r0 |
| 139 | .endm |
| 140 | |
| 141 | #endif |
| 142 | |
| 143 | ;============================================================================ |
| 144 | ; Troubleshooting Stuff |
| 145 | ;============================================================================ |
| 146 | |
| 147 | ; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid |
| 148 | ; When Creating TLB Entries, instead of doing 3 dependent loads from memory, |
| 149 | ; we use the MMU PID Reg to get current ASID. |
| 150 | ; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble. |
| 151 | ; So we try to detect this in TLB Mis shandler |
| 152 | |
| 153 | .macro tlb_paranoid_check_asm |
| 154 | |
| 155 | #ifdef CONFIG_ARC_DBG_TLB_PARANOIA |
| 156 | |
| 157 | GET_CURR_TASK_ON_CPU r3 |
| 158 | ld r0, [r3, TASK_ACT_MM] |
| 159 | ld r0, [r0, MM_CTXT+MM_CTXT_ASID] |
| 160 | breq r0, 0, 55f ; Error if no ASID allocated |
| 161 | |
| 162 | lr r1, [ARC_REG_PID] |
| 163 | and r1, r1, 0xFF |
| 164 | |
| 165 | and r2, r0, 0xFF ; MMU PID bits only for comparison |
| 166 | breq r1, r2, 5f |
| 167 | |
| 168 | 55: |
| 169 | ; Error if H/w and S/w ASID don't match, but NOT if in kernel mode |
| 170 | lr r2, [erstatus] |
| 171 | bbit0 r2, STATUS_U_BIT, 5f |
| 172 | |
| 173 | ; We sure are in troubled waters, Flag the error, but to do so |
| 174 | ; need to switch to kernel mode stack to call error routine |
| 175 | GET_TSK_STACK_BASE r3, sp |
| 176 | |
| 177 | ; Call printk to shoutout aloud |
| 178 | mov r2, 1 |
| 179 | j print_asid_mismatch |
| 180 | |
| 181 | 5: ; ASIDs match so proceed normally |
| 182 | nop |
| 183 | |
| 184 | #endif |
| 185 | |
| 186 | .endm |
| 187 | |
| 188 | ;============================================================================ |
| 189 | ;TLB Miss handling Code |
| 190 | ;============================================================================ |
| 191 | |
| 192 | ;----------------------------------------------------------------------------- |
| 193 | ; This macro does the page-table lookup for the faulting address. |
| 194 | ; OUT: r0 = PTE faulted on, r1 = ptr to PTE, r2 = Faulting V-address |
| 195 | .macro LOAD_FAULT_PTE |
| 196 | |
| 197 | lr r2, [efa] |
| 198 | |
| 199 | #ifndef CONFIG_SMP |
| 200 | lr r1, [ARC_REG_SCRATCH_DATA0] ; current pgd |
| 201 | #else |
| 202 | GET_CURR_TASK_ON_CPU r1 |
| 203 | ld r1, [r1, TASK_ACT_MM] |
| 204 | ld r1, [r1, MM_PGD] |
| 205 | #endif |
| 206 | |
| 207 | lsr r0, r2, PGDIR_SHIFT ; Bits for indexing into PGD |
| 208 | ld.as r3, [r1, r0] ; PGD entry corresp to faulting addr |
| 209 | tst r3, r3 |
| 210 | bz do_slow_path_pf ; if no Page Table, do page fault |
| 211 | |
| 212 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
| 213 | and.f 0, r3, _PAGE_HW_SZ ; Is this Huge PMD (thp) |
| 214 | add2.nz r1, r1, r0 |
| 215 | bnz.d 2f ; YES: PGD == PMD has THP PTE: stop pgd walk |
| 216 | mov.nz r0, r3 |
| 217 | |
| 218 | #endif |
| 219 | and r1, r3, PAGE_MASK |
| 220 | |
| 221 | ; Get the PTE entry: The idea is |
| 222 | ; (1) x = addr >> PAGE_SHIFT -> masks page-off bits from @fault-addr |
| 223 | ; (2) y = x & (PTRS_PER_PTE - 1) -> to get index |
| 224 | ; (3) z = (pgtbl + y * 4) |
| 225 | |
| 226 | #ifdef CONFIG_ARC_HAS_PAE40 |
| 227 | #define PTE_SIZE_LOG 3 /* 8 == 2 ^ 3 */ |
| 228 | #else |
| 229 | #define PTE_SIZE_LOG 2 /* 4 == 2 ^ 2 */ |
| 230 | #endif |
| 231 | |
| 232 | ; multiply in step (3) above avoided by shifting lesser in step (1) |
| 233 | lsr r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG ) |
| 234 | and r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG ) |
| 235 | ld.aw r0, [r1, r0] ; r0: PTE (lower word only for PAE40) |
| 236 | ; r1: PTE ptr |
| 237 | |
| 238 | 2: |
| 239 | |
| 240 | #ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT |
| 241 | and.f 0, r0, _PAGE_PRESENT |
| 242 | bz 1f |
| 243 | ld r3, [num_pte_not_present] |
| 244 | add r3, r3, 1 |
| 245 | st r3, [num_pte_not_present] |
| 246 | 1: |
| 247 | #endif |
| 248 | |
| 249 | .endm |
| 250 | |
| 251 | ;----------------------------------------------------------------- |
| 252 | ; Convert Linux PTE entry into TLB entry |
| 253 | ; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu |
| 254 | ; (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI]) |
| 255 | ; IN: r0 = PTE, r1 = ptr to PTE |
| 256 | |
| 257 | .macro CONV_PTE_TO_TLB |
| 258 | and r3, r0, PTE_BITS_RWX ; r w x |
| 259 | asl r2, r3, 3 ; Kr Kw Kx 0 0 0 (GLOBAL, kernel only) |
| 260 | and.f 0, r0, _PAGE_GLOBAL |
| 261 | or.z r2, r2, r3 ; Kr Kw Kx Ur Uw Ux (!GLOBAL, user page) |
| 262 | |
| 263 | and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE |
| 264 | or r3, r3, r2 |
| 265 | |
| 266 | sr r3, [ARC_REG_TLBPD1] ; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C |
| 267 | #ifdef CONFIG_ARC_HAS_PAE40 |
| 268 | ld r3, [r1, 4] ; paddr[39..32] |
| 269 | sr r3, [ARC_REG_TLBPD1HI] |
| 270 | #endif |
| 271 | |
| 272 | and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb |
| 273 | |
| 274 | lr r3,[ARC_REG_TLBPD0] ; MMU prepares PD0 with vaddr and asid |
| 275 | |
| 276 | or r3, r3, r2 ; S | vaddr | {sasid|asid} |
| 277 | sr r3,[ARC_REG_TLBPD0] ; rewrite PD0 |
| 278 | .endm |
| 279 | |
| 280 | ;----------------------------------------------------------------- |
| 281 | ; Commit the TLB entry into MMU |
| 282 | |
| 283 | .macro COMMIT_ENTRY_TO_MMU |
| 284 | #if (CONFIG_ARC_MMU_VER < 4) |
| 285 | |
| 286 | /* Get free TLB slot: Set = computed from vaddr, way = random */ |
| 287 | sr TLBGetIndex, [ARC_REG_TLBCOMMAND] |
| 288 | |
| 289 | /* Commit the Write */ |
| 290 | #if (CONFIG_ARC_MMU_VER >= 2) /* introduced in v2 */ |
| 291 | sr TLBWriteNI, [ARC_REG_TLBCOMMAND] |
| 292 | #else |
| 293 | sr TLBWrite, [ARC_REG_TLBCOMMAND] |
| 294 | #endif |
| 295 | |
| 296 | #else |
| 297 | sr TLBInsertEntry, [ARC_REG_TLBCOMMAND] |
| 298 | #endif |
| 299 | .endm |
| 300 | |
| 301 | |
| 302 | ARCFP_CODE ;Fast Path Code, candidate for ICCM |
| 303 | |
| 304 | ;----------------------------------------------------------------------------- |
| 305 | ; I-TLB Miss Exception Handler |
| 306 | ;----------------------------------------------------------------------------- |
| 307 | |
| 308 | ENTRY(EV_TLBMissI) |
| 309 | |
| 310 | TLBMISS_FREEUP_REGS |
| 311 | |
| 312 | #ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT |
| 313 | ld r0, [@numitlb] |
| 314 | add r0, r0, 1 |
| 315 | st r0, [@numitlb] |
| 316 | #endif |
| 317 | |
| 318 | ;---------------------------------------------------------------- |
| 319 | ; Get the PTE corresponding to V-addr accessed, r2 is setup with EFA |
| 320 | LOAD_FAULT_PTE |
| 321 | |
| 322 | ;---------------------------------------------------------------- |
| 323 | ; VERIFY_PTE: Check if PTE permissions approp for executing code |
| 324 | cmp_s r2, VMALLOC_START |
| 325 | mov_s r2, (_PAGE_PRESENT | _PAGE_EXECUTE) |
| 326 | or.hs r2, r2, _PAGE_GLOBAL |
| 327 | |
| 328 | and r3, r0, r2 ; Mask out NON Flag bits from PTE |
| 329 | xor.f r3, r3, r2 ; check ( ( pte & flags_test ) == flags_test ) |
| 330 | bnz do_slow_path_pf |
| 331 | |
| 332 | ; Let Linux VM know that the page was accessed |
| 333 | or r0, r0, _PAGE_ACCESSED ; set Accessed Bit |
| 334 | st_s r0, [r1] ; Write back PTE |
| 335 | |
| 336 | CONV_PTE_TO_TLB |
| 337 | COMMIT_ENTRY_TO_MMU |
| 338 | TLBMISS_RESTORE_REGS |
| 339 | EV_TLBMissI_fast_ret: ; additional label for VDK OS-kit instrumentation |
| 340 | rtie |
| 341 | |
| 342 | END(EV_TLBMissI) |
| 343 | |
| 344 | ;----------------------------------------------------------------------------- |
| 345 | ; D-TLB Miss Exception Handler |
| 346 | ;----------------------------------------------------------------------------- |
| 347 | |
| 348 | ENTRY(EV_TLBMissD) |
| 349 | |
| 350 | TLBMISS_FREEUP_REGS |
| 351 | |
| 352 | #ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT |
| 353 | ld r0, [@numdtlb] |
| 354 | add r0, r0, 1 |
| 355 | st r0, [@numdtlb] |
| 356 | #endif |
| 357 | |
| 358 | ;---------------------------------------------------------------- |
| 359 | ; Get the PTE corresponding to V-addr accessed |
| 360 | ; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE, r2 = EFA |
| 361 | LOAD_FAULT_PTE |
| 362 | |
| 363 | ;---------------------------------------------------------------- |
| 364 | ; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W) |
| 365 | |
| 366 | cmp_s r2, VMALLOC_START |
| 367 | mov_s r2, _PAGE_PRESENT ; common bit for K/U PTE |
| 368 | or.hs r2, r2, _PAGE_GLOBAL ; kernel PTE only |
| 369 | |
| 370 | ; Linux PTE [RWX] bits are semantically overloaded: |
| 371 | ; -If PAGE_GLOBAL set, they refer to kernel-only flags (vmalloc) |
| 372 | ; -Otherwise they are user-mode permissions, and those are exactly |
| 373 | ; same for kernel mode as well (e.g. copy_(to|from)_user) |
| 374 | |
| 375 | lr r3, [ecr] |
| 376 | btst_s r3, ECR_C_BIT_DTLB_LD_MISS ; Read Access |
| 377 | or.nz r2, r2, _PAGE_READ ; chk for Read flag in PTE |
| 378 | btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; Write Access |
| 379 | or.nz r2, r2, _PAGE_WRITE ; chk for Write flag in PTE |
| 380 | ; Above laddering takes care of XCHG access (both R and W) |
| 381 | |
| 382 | ; By now, r2 setup with all the Flags we need to check in PTE |
| 383 | and r3, r0, r2 ; Mask out NON Flag bits from PTE |
| 384 | brne.d r3, r2, do_slow_path_pf ; is ((pte & flags_test) == flags_test) |
| 385 | |
| 386 | ;---------------------------------------------------------------- |
| 387 | ; UPDATE_PTE: Let Linux VM know that page was accessed/dirty |
| 388 | lr r3, [ecr] |
| 389 | or r0, r0, _PAGE_ACCESSED ; Accessed bit always |
| 390 | btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; See if it was a Write Access ? |
| 391 | or.nz r0, r0, _PAGE_DIRTY ; if Write, set Dirty bit as well |
| 392 | st_s r0, [r1] ; Write back PTE |
| 393 | |
| 394 | CONV_PTE_TO_TLB |
| 395 | |
| 396 | #if (CONFIG_ARC_MMU_VER == 1) |
| 397 | ; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of |
| 398 | ; memcpy where 3 parties contend for 2 ways, ensuing a livelock. |
| 399 | ; But only for old MMU or one with Metal Fix |
| 400 | TLB_WRITE_HEURISTICS |
| 401 | #endif |
| 402 | |
| 403 | COMMIT_ENTRY_TO_MMU |
| 404 | TLBMISS_RESTORE_REGS |
| 405 | EV_TLBMissD_fast_ret: ; additional label for VDK OS-kit instrumentation |
| 406 | rtie |
| 407 | |
| 408 | ;-------- Common routine to call Linux Page Fault Handler ----------- |
| 409 | do_slow_path_pf: |
| 410 | |
| 411 | ; Restore the 4-scratch regs saved by fast path miss handler |
| 412 | TLBMISS_RESTORE_REGS |
| 413 | |
| 414 | ; Slow path TLB Miss handled as a regular ARC Exception |
| 415 | ; (stack switching / save the complete reg-file). |
| 416 | b call_do_page_fault |
| 417 | END(EV_TLBMissD) |