blob: ef095b6c43b157dc7ad076b641893b59e25775de [file] [log] [blame]
Kyle Swenson8d8f6542021-03-15 11:02:55 -06001/* copy_user.S: Sparc optimized copy_from_user and copy_to_user code.
2 *
3 * Copyright(C) 1995 Linus Torvalds
4 * Copyright(C) 1996 David S. Miller
5 * Copyright(C) 1996 Eddie C. Dost
6 * Copyright(C) 1996,1998 Jakub Jelinek
7 *
8 * derived from:
9 * e-mail between David and Eddie.
10 *
11 * Returns 0 if successful, otherwise count of bytes not copied yet
12 */
13
14#include <asm/ptrace.h>
15#include <asm/asmmacro.h>
16#include <asm/page.h>
17#include <asm/thread_info.h>
18
19/* Work around cpp -rob */
20#define ALLOC #alloc
21#define EXECINSTR #execinstr
22#define EX(x,y,a,b) \
2398: x,y; \
24 .section .fixup,ALLOC,EXECINSTR; \
25 .align 4; \
2699: ba fixupretl; \
27 a, b, %g3; \
28 .section __ex_table,ALLOC; \
29 .align 4; \
30 .word 98b, 99b; \
31 .text; \
32 .align 4
33
34#define EX2(x,y,c,d,e,a,b) \
3598: x,y; \
36 .section .fixup,ALLOC,EXECINSTR; \
37 .align 4; \
3899: c, d, e; \
39 ba fixupretl; \
40 a, b, %g3; \
41 .section __ex_table,ALLOC; \
42 .align 4; \
43 .word 98b, 99b; \
44 .text; \
45 .align 4
46
47#define EXO2(x,y) \
4898: x, y; \
49 .section __ex_table,ALLOC; \
50 .align 4; \
51 .word 98b, 97f; \
52 .text; \
53 .align 4
54
55#define EXT(start,end,handler) \
56 .section __ex_table,ALLOC; \
57 .align 4; \
58 .word start, 0, end, handler; \
59 .text; \
60 .align 4
61
62/* Please do not change following macros unless you change logic used
63 * in .fixup at the end of this file as well
64 */
65
66/* Both these macros have to start with exactly the same insn */
67#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
68 ldd [%src + (offset) + 0x00], %t0; \
69 ldd [%src + (offset) + 0x08], %t2; \
70 ldd [%src + (offset) + 0x10], %t4; \
71 ldd [%src + (offset) + 0x18], %t6; \
72 st %t0, [%dst + (offset) + 0x00]; \
73 st %t1, [%dst + (offset) + 0x04]; \
74 st %t2, [%dst + (offset) + 0x08]; \
75 st %t3, [%dst + (offset) + 0x0c]; \
76 st %t4, [%dst + (offset) + 0x10]; \
77 st %t5, [%dst + (offset) + 0x14]; \
78 st %t6, [%dst + (offset) + 0x18]; \
79 st %t7, [%dst + (offset) + 0x1c];
80
81#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
82 ldd [%src + (offset) + 0x00], %t0; \
83 ldd [%src + (offset) + 0x08], %t2; \
84 ldd [%src + (offset) + 0x10], %t4; \
85 ldd [%src + (offset) + 0x18], %t6; \
86 std %t0, [%dst + (offset) + 0x00]; \
87 std %t2, [%dst + (offset) + 0x08]; \
88 std %t4, [%dst + (offset) + 0x10]; \
89 std %t6, [%dst + (offset) + 0x18];
90
91#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
92 ldd [%src - (offset) - 0x10], %t0; \
93 ldd [%src - (offset) - 0x08], %t2; \
94 st %t0, [%dst - (offset) - 0x10]; \
95 st %t1, [%dst - (offset) - 0x0c]; \
96 st %t2, [%dst - (offset) - 0x08]; \
97 st %t3, [%dst - (offset) - 0x04];
98
99#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
100 lduh [%src + (offset) + 0x00], %t0; \
101 lduh [%src + (offset) + 0x02], %t1; \
102 lduh [%src + (offset) + 0x04], %t2; \
103 lduh [%src + (offset) + 0x06], %t3; \
104 sth %t0, [%dst + (offset) + 0x00]; \
105 sth %t1, [%dst + (offset) + 0x02]; \
106 sth %t2, [%dst + (offset) + 0x04]; \
107 sth %t3, [%dst + (offset) + 0x06];
108
109#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
110 ldub [%src - (offset) - 0x02], %t0; \
111 ldub [%src - (offset) - 0x01], %t1; \
112 stb %t0, [%dst - (offset) - 0x02]; \
113 stb %t1, [%dst - (offset) - 0x01];
114
115 .text
116 .align 4
117
118 .globl __copy_user_begin
119__copy_user_begin:
120
121 .globl __copy_user
122dword_align:
123 andcc %o1, 1, %g0
124 be 4f
125 andcc %o1, 2, %g0
126
127 EXO2(ldub [%o1], %g2)
128 add %o1, 1, %o1
129 EXO2(stb %g2, [%o0])
130 sub %o2, 1, %o2
131 bne 3f
132 add %o0, 1, %o0
133
134 EXO2(lduh [%o1], %g2)
135 add %o1, 2, %o1
136 EXO2(sth %g2, [%o0])
137 sub %o2, 2, %o2
138 b 3f
139 add %o0, 2, %o0
1404:
141 EXO2(lduh [%o1], %g2)
142 add %o1, 2, %o1
143 EXO2(sth %g2, [%o0])
144 sub %o2, 2, %o2
145 b 3f
146 add %o0, 2, %o0
147
148__copy_user: /* %o0=dst %o1=src %o2=len */
149 xor %o0, %o1, %o4
1501:
151 andcc %o4, 3, %o5
1522:
153 bne cannot_optimize
154 cmp %o2, 15
155
156 bleu short_aligned_end
157 andcc %o1, 3, %g0
158
159 bne dword_align
1603:
161 andcc %o1, 4, %g0
162
163 be 2f
164 mov %o2, %g1
165
166 EXO2(ld [%o1], %o4)
167 sub %g1, 4, %g1
168 EXO2(st %o4, [%o0])
169 add %o1, 4, %o1
170 add %o0, 4, %o0
1712:
172 andcc %g1, 0xffffff80, %g7
173 be 3f
174 andcc %o0, 4, %g0
175
176 be ldd_std + 4
1775:
178 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
179 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
180 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
181 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
18280:
183 EXT(5b, 80b, 50f)
184 subcc %g7, 128, %g7
185 add %o1, 128, %o1
186 bne 5b
187 add %o0, 128, %o0
1883:
189 andcc %g1, 0x70, %g7
190 be copy_user_table_end
191 andcc %g1, 8, %g0
192
193 sethi %hi(copy_user_table_end), %o5
194 srl %g7, 1, %o4
195 add %g7, %o4, %o4
196 add %o1, %g7, %o1
197 sub %o5, %o4, %o5
198 jmpl %o5 + %lo(copy_user_table_end), %g0
199 add %o0, %g7, %o0
200
201copy_user_table:
202 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
203 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
204 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
205 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
206 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
207 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
208 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
209copy_user_table_end:
210 EXT(copy_user_table, copy_user_table_end, 51f)
211 be copy_user_last7
212 andcc %g1, 4, %g0
213
214 EX(ldd [%o1], %g2, and %g1, 0xf)
215 add %o0, 8, %o0
216 add %o1, 8, %o1
217 EX(st %g2, [%o0 - 0x08], and %g1, 0xf)
218 EX2(st %g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4)
219copy_user_last7:
220 be 1f
221 andcc %g1, 2, %g0
222
223 EX(ld [%o1], %g2, and %g1, 7)
224 add %o1, 4, %o1
225 EX(st %g2, [%o0], and %g1, 7)
226 add %o0, 4, %o0
2271:
228 be 1f
229 andcc %g1, 1, %g0
230
231 EX(lduh [%o1], %g2, and %g1, 3)
232 add %o1, 2, %o1
233 EX(sth %g2, [%o0], and %g1, 3)
234 add %o0, 2, %o0
2351:
236 be 1f
237 nop
238
239 EX(ldub [%o1], %g2, add %g0, 1)
240 EX(stb %g2, [%o0], add %g0, 1)
2411:
242 retl
243 clr %o0
244
245ldd_std:
246 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
247 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
248 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
249 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
25081:
251 EXT(ldd_std, 81b, 52f)
252 subcc %g7, 128, %g7
253 add %o1, 128, %o1
254 bne ldd_std
255 add %o0, 128, %o0
256
257 andcc %g1, 0x70, %g7
258 be copy_user_table_end
259 andcc %g1, 8, %g0
260
261 sethi %hi(copy_user_table_end), %o5
262 srl %g7, 1, %o4
263 add %g7, %o4, %o4
264 add %o1, %g7, %o1
265 sub %o5, %o4, %o5
266 jmpl %o5 + %lo(copy_user_table_end), %g0
267 add %o0, %g7, %o0
268
269cannot_optimize:
270 bleu short_end
271 cmp %o5, 2
272
273 bne byte_chunk
274 and %o2, 0xfffffff0, %o3
275
276 andcc %o1, 1, %g0
277 be 10f
278 nop
279
280 EXO2(ldub [%o1], %g2)
281 add %o1, 1, %o1
282 EXO2(stb %g2, [%o0])
283 sub %o2, 1, %o2
284 andcc %o2, 0xfffffff0, %o3
285 be short_end
286 add %o0, 1, %o0
28710:
288 MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
289 MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5)
29082:
291 EXT(10b, 82b, 53f)
292 subcc %o3, 0x10, %o3
293 add %o1, 0x10, %o1
294 bne 10b
295 add %o0, 0x10, %o0
296 b 2f
297 and %o2, 0xe, %o3
298
299byte_chunk:
300 MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3)
301 MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3)
302 MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3)
303 MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3)
304 MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3)
305 MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3)
306 MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3)
307 MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3)
30883:
309 EXT(byte_chunk, 83b, 54f)
310 subcc %o3, 0x10, %o3
311 add %o1, 0x10, %o1
312 bne byte_chunk
313 add %o0, 0x10, %o0
314
315short_end:
316 and %o2, 0xe, %o3
3172:
318 sethi %hi(short_table_end), %o5
319 sll %o3, 3, %o4
320 add %o0, %o3, %o0
321 sub %o5, %o4, %o5
322 add %o1, %o3, %o1
323 jmpl %o5 + %lo(short_table_end), %g0
324 andcc %o2, 1, %g0
32584:
326 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
327 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
328 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
329 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
330 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
331 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
332 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
333short_table_end:
334 EXT(84b, short_table_end, 55f)
335 be 1f
336 nop
337 EX(ldub [%o1], %g2, add %g0, 1)
338 EX(stb %g2, [%o0], add %g0, 1)
3391:
340 retl
341 clr %o0
342
343short_aligned_end:
344 bne short_end
345 andcc %o2, 8, %g0
346
347 be 1f
348 andcc %o2, 4, %g0
349
350 EXO2(ld [%o1 + 0x00], %g2)
351 EXO2(ld [%o1 + 0x04], %g3)
352 add %o1, 8, %o1
353 EXO2(st %g2, [%o0 + 0x00])
354 EX(st %g3, [%o0 + 0x04], sub %o2, 4)
355 add %o0, 8, %o0
3561:
357 b copy_user_last7
358 mov %o2, %g1
359
360 .section .fixup,#alloc,#execinstr
361 .align 4
36297:
363 mov %o2, %g3
364fixupretl:
365 sethi %hi(PAGE_OFFSET), %g1
366 cmp %o0, %g1
367 blu 1f
368 cmp %o1, %g1
369 bgeu 1f
370 ld [%g6 + TI_PREEMPT], %g1
371 cmp %g1, 0
372 bne 1f
373 nop
374 save %sp, -64, %sp
375 mov %i0, %o0
376 call __bzero
377 mov %g3, %o1
378 restore
3791: retl
380 mov %g3, %o0
381
382/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */
38350:
384/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK
385 * happens. This is derived from the amount ldd reads, st stores, etc.
386 * x = g2 % 12;
387 * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4);
388 * o0 += (g2 / 12) * 32;
389 */
390 cmp %g2, 12
391 add %o0, %g7, %o0
392 bcs 1f
393 cmp %g2, 24
394 bcs 2f
395 cmp %g2, 36
396 bcs 3f
397 nop
398 sub %g2, 12, %g2
399 sub %g7, 32, %g7
4003: sub %g2, 12, %g2
401 sub %g7, 32, %g7
4022: sub %g2, 12, %g2
403 sub %g7, 32, %g7
4041: cmp %g2, 4
405 bcs,a 60f
406 clr %g2
407 sub %g2, 4, %g2
408 sll %g2, 2, %g2
40960: and %g1, 0x7f, %g3
410 sub %o0, %g7, %o0
411 add %g3, %g7, %g3
412 ba fixupretl
413 sub %g3, %g2, %g3
41451:
415/* i = 41 - g2; j = i % 6;
416 * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16;
417 * o0 -= (i / 6) * 16 + 16;
418 */
419 neg %g2
420 and %g1, 0xf, %g1
421 add %g2, 41, %g2
422 add %o0, %g1, %o0
4231: cmp %g2, 6
424 bcs,a 2f
425 cmp %g2, 4
426 add %g1, 16, %g1
427 b 1b
428 sub %g2, 6, %g2
4292: bcc,a 2f
430 mov 16, %g2
431 inc %g2
432 sll %g2, 2, %g2
4332: add %g1, %g2, %g3
434 ba fixupretl
435 sub %o0, %g3, %o0
43652:
437/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0;
438 o0 += (g2 / 8) * 32 */
439 andn %g2, 7, %g4
440 add %o0, %g7, %o0
441 andcc %g2, 4, %g0
442 and %g2, 3, %g2
443 sll %g4, 2, %g4
444 sll %g2, 3, %g2
445 bne 60b
446 sub %g7, %g4, %g7
447 ba 60b
448 clr %g2
44953:
450/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0;
451 o0 += (g2 & 8) */
452 and %g2, 3, %g4
453 andcc %g2, 4, %g0
454 and %g2, 8, %g2
455 sll %g4, 1, %g4
456 be 1f
457 add %o0, %g2, %o0
458 add %g2, %g4, %g2
4591: and %o2, 0xf, %g3
460 add %g3, %o3, %g3
461 ba fixupretl
462 sub %g3, %g2, %g3
46354:
464/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0;
465 o0 += (g2 / 4) * 2 */
466 srl %g2, 2, %o4
467 and %g2, 1, %o5
468 srl %g2, 1, %g2
469 add %o4, %o4, %o4
470 and %o5, %g2, %o5
471 and %o2, 0xf, %o2
472 add %o0, %o4, %o0
473 sub %o3, %o5, %o3
474 sub %o2, %o4, %o2
475 ba fixupretl
476 add %o2, %o3, %g3
47755:
478/* i = 27 - g2;
479 g3 = (o2 & 1) + i / 4 * 2 + !(i & 3);
480 o0 -= i / 4 * 2 + 1 */
481 neg %g2
482 and %o2, 1, %o2
483 add %g2, 27, %g2
484 srl %g2, 2, %o5
485 andcc %g2, 3, %g0
486 mov 1, %g2
487 add %o5, %o5, %o5
488 be,a 1f
489 clr %g2
4901: add %g2, %o5, %g3
491 sub %o0, %g3, %o0
492 ba fixupretl
493 add %g3, %o2, %g3
494
495 .globl __copy_user_end
496__copy_user_end: