Blame - arch/nios2/kernel/insnemu.S - codeaurora/cp-linux

blob: 1c6b651e770d34092aa98bca2a2f4da368e2647c [file] [log] [blame]

Kyle Swenson	8d8f654	2021-03-15 11:02:55 -0600	[diff] [blame^]	1	/*
				2	* Copyright (C) 2003-2013 Altera Corporation
				3	* All rights reserved.
				4	*
				5	* This program is free software; you can redistribute it and/or modify
				6	* it under the terms of the GNU General Public License as published by
				7	* the Free Software Foundation; either version 2 of the License, or
				8	* (at your option) any later version.
				9	*
				10	* This program is distributed in the hope that it will be useful,
				11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				13	* GNU General Public License for more details.
				14	*
				15	* You should have received a copy of the GNU General Public License
				16	* along with this program. If not, see <http://www.gnu.org/licenses/>.
				17	*/
				18
				19
				20	#include <linux/linkage.h>
				21	#include <asm/entry.h>
				22
				23	.set noat
				24	.set nobreak
				25
				26	/*
				27	* Explicitly allow the use of r1 (the assembler temporary register)
				28	* within this code. This register is normally reserved for the use of
				29	* the compiler.
				30	*/
				31
				32	ENTRY(instruction_trap)
				33	ldw r1, PT_R1(sp) // Restore registers
				34	ldw r2, PT_R2(sp)
				35	ldw r3, PT_R3(sp)
				36	ldw r4, PT_R4(sp)
				37	ldw r5, PT_R5(sp)
				38	ldw r6, PT_R6(sp)
				39	ldw r7, PT_R7(sp)
				40	ldw r8, PT_R8(sp)
				41	ldw r9, PT_R9(sp)
				42	ldw r10, PT_R10(sp)
				43	ldw r11, PT_R11(sp)
				44	ldw r12, PT_R12(sp)
				45	ldw r13, PT_R13(sp)
				46	ldw r14, PT_R14(sp)
				47	ldw r15, PT_R15(sp)
				48	ldw ra, PT_RA(sp)
				49	ldw fp, PT_FP(sp)
				50	ldw gp, PT_GP(sp)
				51	ldw et, PT_ESTATUS(sp)
				52	wrctl estatus, et
				53	ldw ea, PT_EA(sp)
				54	ldw et, PT_SP(sp) /* backup sp in et */
				55
				56	addi sp, sp, PT_REGS_SIZE
				57
				58	/* INSTRUCTION EMULATION
				59	* ---------------------
				60	*
				61	* Nios II processors generate exceptions for unimplemented instructions.
				62	* The routines below emulate these instructions. Depending on the
				63	* processor core, the only instructions that might need to be emulated
				64	* are div, divu, mul, muli, mulxss, mulxsu, and mulxuu.
				65	*
				66	* The emulations match the instructions, except for the following
				67	* limitations:
				68	*
				69	* 1) The emulation routines do not emulate the use of the exception
				70	* temporary register (et) as a source operand because the exception
				71	* handler already has modified it.
				72	*
				73	* 2) The routines do not emulate the use of the stack pointer (sp) or
				74	* the exception return address register (ea) as a destination because
				75	* modifying these registers crashes the exception handler or the
				76	* interrupted routine.
				77	*
				78	* Detailed Design
				79	* ---------------
				80	*
				81	* The emulation routines expect the contents of integer registers r0-r31
				82	* to be on the stack at addresses sp, 4(sp), 8(sp), ... 124(sp). The
				83	* routines retrieve source operands from the stack and modify the
				84	* destination register's value on the stack prior to the end of the
				85	* exception handler. Then all registers except the destination register
				86	* are restored to their previous values.
				87	*
				88	* The instruction that causes the exception is found at address -4(ea).
				89	* The instruction's OP and OPX fields identify the operation to be
				90	* performed.
				91	*
				92	* One instruction, muli, is an I-type instruction that is identified by
				93	* an OP field of 0x24.
				94	*
				95	* muli AAAAA,BBBBB,IIIIIIIIIIIIIIII,-0x24-
				96	* 27 22 6 0 <-- LSB of field
				97	*
				98	* The remaining emulated instructions are R-type and have an OP field
				99	* of 0x3a. Their OPX fields identify them.
				100	*
				101	* R-type AAAAA,BBBBB,CCCCC,XXXXXX,NNNNN,-0x3a-
				102	* 27 22 17 11 6 0 <-- LSB of field
				103	*
				104	*
				105	* Opcode Encoding. muli is identified by its OP value. Then OPX & 0x02
				106	* is used to differentiate between the division opcodes and the
				107	* remaining multiplication opcodes.
				108	*
				109	* Instruction OP OPX OPX & 0x02
				110	* ----------- ---- ---- ----------
				111	* muli 0x24
				112	* divu 0x3a 0x24 0
				113	* div 0x3a 0x25 0
				114	* mul 0x3a 0x27 != 0
				115	* mulxuu 0x3a 0x07 != 0
				116	* mulxsu 0x3a 0x17 != 0
				117	* mulxss 0x3a 0x1f != 0
				118	*/
				119
				120
				121	/*
				122	* Save everything on the stack to make it easy for the emulation
				123	* routines to retrieve the source register operands.
				124	*/
				125
				126	addi sp, sp, -128
				127	stw zero, 0(sp) /* Save zero on stack to avoid special case for r0. */
				128	stw r1, 4(sp)
				129	stw r2, 8(sp)
				130	stw r3, 12(sp)
				131	stw r4, 16(sp)
				132	stw r5, 20(sp)
				133	stw r6, 24(sp)
				134	stw r7, 28(sp)
				135	stw r8, 32(sp)
				136	stw r9, 36(sp)
				137	stw r10, 40(sp)
				138	stw r11, 44(sp)
				139	stw r12, 48(sp)
				140	stw r13, 52(sp)
				141	stw r14, 56(sp)
				142	stw r15, 60(sp)
				143	stw r16, 64(sp)
				144	stw r17, 68(sp)
				145	stw r18, 72(sp)
				146	stw r19, 76(sp)
				147	stw r20, 80(sp)
				148	stw r21, 84(sp)
				149	stw r22, 88(sp)
				150	stw r23, 92(sp)
				151	/* Don't bother to save et. It's already been changed. */
				152	rdctl r5, estatus
				153	stw r5, 100(sp)
				154
				155	stw gp, 104(sp)
				156	stw et, 108(sp) /* et contains previous sp value. */
				157	stw fp, 112(sp)
				158	stw ea, 116(sp)
				159	stw ra, 120(sp)
				160
				161
				162	/*
				163	* Split the instruction into its fields. We need 4A, 4B, and 4*C as
				164	* offsets to the stack pointer for access to the stored register values.
				165	*/
				166	ldw r2,-4(ea) /* r2 = AAAAA,BBBBB,IIIIIIIIIIIIIIII,PPPPPP */
				167	roli r3, r2, 7 /* r3 = BBB,IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BB */
				168	roli r4, r3, 3 /* r4 = IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB */
				169	roli r5, r4, 2 /* r5 = IIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB,II */
				170	srai r4, r4, 16 /* r4 = (sign-extended) IMM16 */
				171	roli r6, r5, 5 /* r6 = XXXX,NNNNN,PPPPPP,AAAAA,BBBBB,CCCCC,XX */
				172	andi r2, r2, 0x3f /* r2 = 00000000000000000000000000,PPPPPP */
				173	andi r3, r3, 0x7c /* r3 = 0000000000000000000000000,AAAAA,00 */
				174	andi r5, r5, 0x7c /* r5 = 0000000000000000000000000,BBBBB,00 */
				175	andi r6, r6, 0x7c /* r6 = 0000000000000000000000000,CCCCC,00 */
				176
				177	/* Now
				178	* r2 = OP
				179	* r3 = 4*A
				180	* r4 = IMM16 (sign extended)
				181	* r5 = 4*B
				182	* r6 = 4*C
				183	*/
				184
				185	/*
				186	* Get the operands.
				187	*
				188	* It is necessary to check for muli because it uses an I-type
				189	* instruction format, while the other instructions are have an R-type
				190	* format.
				191	*
				192	* Prepare for either multiplication or division loop.
				193	* They both loop 32 times.
				194	*/
				195	movi r14, 32
				196
				197	add r3, r3, sp /* r3 = address of A-operand. */
				198	ldw r3, 0(r3) /* r3 = A-operand. */
				199	movi r7, 0x24 /* muli opcode (I-type instruction format) */
				200	beq r2, r7, mul_immed /* muli doesn't use the B register as a source */
				201
				202	add r5, r5, sp /* r5 = address of B-operand. */
				203	ldw r5, 0(r5) /* r5 = B-operand. */
				204	/* r4 = SSSSSSSSSSSSSSSS,-----IMM16------ */
				205	/* IMM16 not needed, align OPX portion */
				206	/* r4 = SSSSSSSSSSSSSSSS,CCCCC,-OPX--,00000 */
				207	srli r4, r4, 5 /* r4 = 00000,SSSSSSSSSSSSSSSS,CCCCC,-OPX-- */
				208	andi r4, r4, 0x3f /* r4 = 00000000000000000000000000,-OPX-- */
				209
				210	/* Now
				211	* r2 = OP
				212	* r3 = src1
				213	* r5 = src2
				214	* r4 = OPX (no longer can be muli)
				215	* r6 = 4*C
				216	*/
				217
				218
				219	/*
				220	* Multiply or Divide?
				221	*/
				222	andi r7, r4, 0x02 /* For R-type multiply instructions,
				223	OPX & 0x02 != 0 */
				224	bne r7, zero, multiply
				225
				226
				227	/* DIVISION
				228	*
				229	* Divide an unsigned dividend by an unsigned divisor using
				230	* a shift-and-subtract algorithm. The example below shows
				231	* 43 div 7 = 6 for 8-bit integers. This classic algorithm uses a
				232	* single register to store both the dividend and the quotient,
				233	* allowing both values to be shifted with a single instruction.
				234	*
				235	* remainder dividend:quotient
				236	* --------- -----------------
				237	* initialize 00000000 00101011:
				238	* shift 00000000 0101011:_
				239	* remainder >= divisor? no 00000000 0101011:0
				240	* shift 00000000 101011:0_
				241	* remainder >= divisor? no 00000000 101011:00
				242	* shift 00000001 01011:00_
				243	* remainder >= divisor? no 00000001 01011:000
				244	* shift 00000010 1011:000_
				245	* remainder >= divisor? no 00000010 1011:0000
				246	* shift 00000101 011:0000_
				247	* remainder >= divisor? no 00000101 011:00000
				248	* shift 00001010 11:00000_
				249	* remainder >= divisor? yes 00001010 11:000001
				250	* remainder -= divisor - 00000111
				251	* ----------
				252	* 00000011 11:000001
				253	* shift 00000111 1:000001_
				254	* remainder >= divisor? yes 00000111 1:0000011
				255	* remainder -= divisor - 00000111
				256	* ----------
				257	* 00000000 1:0000011
				258	* shift 00000001 :0000011_
				259	* remainder >= divisor? no 00000001 :00000110
				260	*
				261	* The quotient is 00000110.
				262	*/
				263
				264	divide:
				265	/*
				266	* Prepare for division by assuming the result
				267	* is unsigned, and storing its "sign" as 0.
				268	*/
				269	movi r17, 0
				270
				271
				272	/* Which division opcode? */
				273	xori r7, r4, 0x25 /* OPX of div */
				274	bne r7, zero, unsigned_division
				275
				276
				277	/*
				278	* OPX is div. Determine and store the sign of the quotient.
				279	* Then take the absolute value of both operands.
				280	*/
				281	xor r17, r3, r5 /* MSB contains sign of quotient */
				282	bge r3,zero,dividend_is_nonnegative
				283	sub r3, zero, r3 /* -r3 */
				284	dividend_is_nonnegative:
				285	bge r5, zero, divisor_is_nonnegative
				286	sub r5, zero, r5 /* -r5 */
				287	divisor_is_nonnegative:
				288
				289
				290	unsigned_division:
				291	/* Initialize the unsigned-division loop. */
				292	movi r13, 0 /* remainder = 0 */
				293
				294	/* Now
				295	* r3 = dividend : quotient
				296	* r4 = 0x25 for div, 0x24 for divu
				297	* r5 = divisor
				298	* r13 = remainder
				299	* r14 = loop counter (already initialized to 32)
				300	* r17 = MSB contains sign of quotient
				301	*/
				302
				303
				304	/*
				305	* for (count = 32; count > 0; --count)
				306	* {
				307	*/
				308	divide_loop:
				309
				310	/*
				311	* Division:
				312	*
				313	* (remainder:dividend:quotient) <<= 1;
				314	*/
				315	slli r13, r13, 1
				316	cmplt r7, r3, zero /* r7 = MSB of r3 */
				317	or r13, r13, r7
				318	slli r3, r3, 1
				319
				320
				321	/*
				322	* if (remainder >= divisor)
				323	* {
				324	* set LSB of quotient
				325	* remainder -= divisor;
				326	* }
				327	*/
				328	bltu r13, r5, div_skip
				329	ori r3, r3, 1
				330	sub r13, r13, r5
				331	div_skip:
				332
				333	/*
				334	* }
				335	*/
				336	subi r14, r14, 1
				337	bne r14, zero, divide_loop
				338
				339
				340	/* Now
				341	* r3 = quotient
				342	* r4 = 0x25 for div, 0x24 for divu
				343	* r6 = 4*C
				344	* r17 = MSB contains sign of quotient
				345	*/
				346
				347
				348	/*
				349	* Conditionally negate signed quotient. If quotient is unsigned,
				350	* the sign already is initialized to 0.
				351	*/
				352	bge r17, zero, quotient_is_nonnegative
				353	sub r3, zero, r3 /* -r3 */
				354	quotient_is_nonnegative:
				355
				356
				357	/*
				358	* Final quotient is in r3.
				359	*/
				360	add r6, r6, sp
				361	stw r3, 0(r6) /* write quotient to stack */
				362	br restore_registers
				363
				364
				365
				366
				367	/* MULTIPLICATION
				368	*
				369	* A "product" is the number that one gets by summing a "multiplicand"
				370	* several times. The "multiplier" specifies the number of copies of the
				371	* multiplicand that are summed.
				372	*
				373	* Actual multiplication algorithms don't use repeated addition, however.
				374	* Shift-and-add algorithms get the same answer as repeated addition, and
				375	* they are faster. To compute the lower half of a product (pppp below)
				376	* one shifts the product left before adding in each of the partial
				377	* products (a * mmmm) through (d * mmmm).
				378	*
				379	* To compute the upper half of a product (PPPP below), one adds in the
				380	* partial products (d * mmmm) through (a * mmmm), each time following
				381	* the add by a right shift of the product.
				382	*
				383	* mmmm
				384	* * abcd
				385	* ------
				386	* #### = d * mmmm
				387	* #### = c * mmmm
				388	* #### = b * mmmm
				389	* #### = a * mmmm
				390	* --------
				391	* PPPPpppp
				392	*
				393	* The example above shows 4 partial products. Computing actual Nios II
				394	* products requires 32 partials.
				395	*
				396	* It is possible to compute the result of mulxsu from the result of
				397	* mulxuu because the only difference between the results of these two
				398	* opcodes is the value of the partial product associated with the sign
				399	* bit of rA.
				400	*
				401	* mulxsu = mulxuu - (rA < 0) ? rB : 0;
				402	*
				403	* It is possible to compute the result of mulxss from the result of
				404	* mulxsu because the only difference between the results of these two
				405	* opcodes is the value of the partial product associated with the sign
				406	* bit of rB.
				407	*
				408	* mulxss = mulxsu - (rB < 0) ? rA : 0;
				409	*
				410	*/
				411
				412	mul_immed:
				413	/* Opcode is muli. Change it into mul for remainder of algorithm. */
				414	mov r6, r5 /* Field B is dest register, not field C. */
				415	mov r5, r4 /* Field IMM16 is src2, not field B. */
				416	movi r4, 0x27 /* OPX of mul is 0x27 */
				417
				418	multiply:
				419	/* Initialize the multiplication loop. */
				420	movi r9, 0 /* mul_product = 0 */
				421	movi r10, 0 /* mulxuu_product = 0 */
				422	mov r11, r5 /* save original multiplier for mulxsu and mulxss */
				423	mov r12, r5 /* mulxuu_multiplier (will be shifted) */
				424	movi r16, 1 /* used to create "rori B,A,1" from "ror B,A,r16" */
				425
				426	/* Now
				427	* r3 = multiplicand
				428	* r5 = mul_multiplier
				429	* r6 = 4 * dest_register (used later as offset to sp)
				430	* r7 = temp
				431	* r9 = mul_product
				432	* r10 = mulxuu_product
				433	* r11 = original multiplier
				434	* r12 = mulxuu_multiplier
				435	* r14 = loop counter (already initialized)
				436	* r16 = 1
				437	*/
				438
				439
				440	/*
				441	* for (count = 32; count > 0; --count)
				442	* {
				443	*/
				444	multiply_loop:
				445
				446	/*
				447	* mul_product <<= 1;
				448	* lsb = multiplier & 1;
				449	*/
				450	slli r9, r9, 1
				451	andi r7, r12, 1
				452
				453	/*
				454	* if (lsb == 1)
				455	* {
				456	* mulxuu_product += multiplicand;
				457	* }
				458	*/
				459	beq r7, zero, mulx_skip
				460	add r10, r10, r3
				461	cmpltu r7, r10, r3 /* Save the carry from the MSB of mulxuu_product. */
				462	ror r7, r7, r16 /* r7 = 0x80000000 on carry, or else 0x00000000 */
				463	mulx_skip:
				464
				465	/*
				466	* if (MSB of mul_multiplier == 1)
				467	* {
				468	* mul_product += multiplicand;
				469	* }
				470	*/
				471	bge r5, zero, mul_skip
				472	add r9, r9, r3
				473	mul_skip:
				474
				475	/*
				476	* mulxuu_product >>= 1; logical shift
				477	* mul_multiplier <<= 1; done with MSB
				478	* mulx_multiplier >>= 1; done with LSB
				479	*/
				480	srli r10, r10, 1
				481	or r10, r10, r7 /* OR in the saved carry bit. */
				482	slli r5, r5, 1
				483	srli r12, r12, 1
				484
				485
				486	/*
				487	* }
				488	*/
				489	subi r14, r14, 1
				490	bne r14, zero, multiply_loop
				491
				492
				493	/*
				494	* Multiply emulation loop done.
				495	*/
				496
				497	/* Now
				498	* r3 = multiplicand
				499	* r4 = OPX
				500	* r6 = 4 * dest_register (used later as offset to sp)
				501	* r7 = temp
				502	* r9 = mul_product
				503	* r10 = mulxuu_product
				504	* r11 = original multiplier
				505	*/
				506
				507
				508	/* Calculate address for result from 4 * dest_register */
				509	add r6, r6, sp
				510
				511
				512	/*
				513	* Select/compute the result based on OPX.
				514	*/
				515
				516
				517	/* OPX == mul? Then store. */
				518	xori r7, r4, 0x27
				519	beq r7, zero, store_product
				520
				521	/* It's one of the mulx.. opcodes. Move over the result. */
				522	mov r9, r10
				523
				524	/* OPX == mulxuu? Then store. */
				525	xori r7, r4, 0x07
				526	beq r7, zero, store_product
				527
				528	/* Compute mulxsu
				529	*
				530	* mulxsu = mulxuu - (rA < 0) ? rB : 0;
				531	*/
				532	bge r3, zero, mulxsu_skip
				533	sub r9, r9, r11
				534	mulxsu_skip:
				535
				536	/* OPX == mulxsu? Then store. */
				537	xori r7, r4, 0x17
				538	beq r7, zero, store_product
				539
				540	/* Compute mulxss
				541	*
				542	* mulxss = mulxsu - (rB < 0) ? rA : 0;
				543	*/
				544	bge r11,zero,mulxss_skip
				545	sub r9, r9, r3
				546	mulxss_skip:
				547	/* At this point, assume that OPX is mulxss, so store*/
				548
				549
				550	store_product:
				551	stw r9, 0(r6)
				552
				553
				554	restore_registers:
				555	/* No need to restore r0. */
				556	ldw r5, 100(sp)
				557	wrctl estatus, r5
				558
				559	ldw r1, 4(sp)
				560	ldw r2, 8(sp)
				561	ldw r3, 12(sp)
				562	ldw r4, 16(sp)
				563	ldw r5, 20(sp)
				564	ldw r6, 24(sp)
				565	ldw r7, 28(sp)
				566	ldw r8, 32(sp)
				567	ldw r9, 36(sp)
				568	ldw r10, 40(sp)
				569	ldw r11, 44(sp)
				570	ldw r12, 48(sp)
				571	ldw r13, 52(sp)
				572	ldw r14, 56(sp)
				573	ldw r15, 60(sp)
				574	ldw r16, 64(sp)
				575	ldw r17, 68(sp)
				576	ldw r18, 72(sp)
				577	ldw r19, 76(sp)
				578	ldw r20, 80(sp)
				579	ldw r21, 84(sp)
				580	ldw r22, 88(sp)
				581	ldw r23, 92(sp)
				582	/* Does not need to restore et */
				583	ldw gp, 104(sp)
				584
				585	ldw fp, 112(sp)
				586	ldw ea, 116(sp)
				587	ldw ra, 120(sp)
				588	ldw sp, 108(sp) /* last restore sp */
				589	eret
				590
				591	.set at
				592	.set break