Blame - arch/alpha/lib/divide.S - codeaurora/cp-linux

blob: 2d1a0484a99e009e3e198f47f77b06194b50006d [file] [log] [blame]

Kyle Swenson	8d8f654	2021-03-15 11:02:55 -0600	[diff] [blame^]	1	/*
				2	* arch/alpha/lib/divide.S
				3	*
				4	* (C) 1995 Linus Torvalds
				5	*
				6	* Alpha division..
				7	*/
				8
				9	/*
				10	* The alpha chip doesn't provide hardware division, so we have to do it
				11	* by hand. The compiler expects the functions
				12	*
				13	* __divqu: 64-bit unsigned long divide
				14	* __remqu: 64-bit unsigned long remainder
				15	* __divqs/__remqs: signed 64-bit
				16	* __divlu/__remlu: unsigned 32-bit
				17	* __divls/__remls: signed 32-bit
				18	*
				19	* These are not normal C functions: instead of the normal
				20	* calling sequence, these expect their arguments in registers
				21	* $24 and $25, and return the result in $27. Register $28 may
				22	* be clobbered (assembly temporary), anything else must be saved.
				23	*
				24	* In short: painful.
				25	*
				26	* This is a rather simple bit-at-a-time algorithm: it's very good
				27	* at dividing random 64-bit numbers, but the more usual case where
				28	* the divisor is small is handled better by the DEC algorithm
				29	* using lookup tables. This uses much less memory, though, and is
				30	* nicer on the cache.. Besides, I don't know the copyright status
				31	* of the DEC code.
				32	*/
				33
				34	/*
				35	* My temporaries:
				36	* $0 - current bit
				37	* $1 - shifted divisor
				38	* $2 - modulus/quotient
				39	*
				40	* $23 - return address
				41	* $24 - dividend
				42	* $25 - divisor
				43	*
				44	* $27 - quotient/modulus
				45	* $28 - compare status
				46	*/
				47
				48	#define halt .long 0
				49
				50	/*
				51	* Select function type and registers
				52	*/
				53	#define mask $0
				54	#define divisor $1
				55	#define compare $28
				56	#define tmp1 $3
				57	#define tmp2 $4
				58
				59	#ifdef DIV
				60	#define DIV_ONLY(x,y...) x,##y
				61	#define MOD_ONLY(x,y...)
				62	#define func(x) __div##x
				63	#define modulus $2
				64	#define quotient $27
				65	#define GETSIGN(x) xor $24,$25,x
				66	#define STACK 48
				67	#else
				68	#define DIV_ONLY(x,y...)
				69	#define MOD_ONLY(x,y...) x,##y
				70	#define func(x) __rem##x
				71	#define modulus $27
				72	#define quotient $2
				73	#define GETSIGN(x) bis $24,$24,x
				74	#define STACK 32
				75	#endif
				76
				77	/*
				78	* For 32-bit operations, we need to extend to 64-bit
				79	*/
				80	#ifdef INTSIZE
				81	#define ufunction func(lu)
				82	#define sfunction func(l)
				83	#define LONGIFY(x) zapnot x,15,x
				84	#define SLONGIFY(x) addl x,0,x
				85	#else
				86	#define ufunction func(qu)
				87	#define sfunction func(q)
				88	#define LONGIFY(x)
				89	#define SLONGIFY(x)
				90	#endif
				91
				92	.set noat
				93	.align 3
				94	.globl ufunction
				95	.ent ufunction
				96	ufunction:
				97	subq $30,STACK,$30
				98	.frame $30,STACK,$23
				99	.prologue 0
				100
				101	7: stq $1, 0($30)
				102	bis $25,$25,divisor
				103	stq $2, 8($30)
				104	bis $24,$24,modulus
				105	stq $0,16($30)
				106	bis $31,$31,quotient
				107	LONGIFY(divisor)
				108	stq tmp1,24($30)
				109	LONGIFY(modulus)
				110	bis $31,1,mask
				111	DIV_ONLY(stq tmp2,32($30))
				112	beq divisor, 9f /* div by zero */
				113
				114	#ifdef INTSIZE
				115	/*
				116	* shift divisor left, using 3-bit shifts for
				117	* 32-bit divides as we can't overflow. Three-bit
				118	* shifts will result in looping three times less
				119	* here, but can result in two loops more later.
				120	* Thus using a large shift isn't worth it (and
				121	* s8add pairs better than a sll..)
				122	*/
				123	1: cmpult divisor,modulus,compare
				124	s8addq divisor,$31,divisor
				125	s8addq mask,$31,mask
				126	bne compare,1b
				127	#else
				128	1: cmpult divisor,modulus,compare
				129	blt divisor, 2f
				130	addq divisor,divisor,divisor
				131	addq mask,mask,mask
				132	bne compare,1b
				133	unop
				134	#endif
				135
				136	/* ok, start to go right again.. */
				137	2: DIV_ONLY(addq quotient,mask,tmp2)
				138	srl mask,1,mask
				139	cmpule divisor,modulus,compare
				140	subq modulus,divisor,tmp1
				141	DIV_ONLY(cmovne compare,tmp2,quotient)
				142	srl divisor,1,divisor
				143	cmovne compare,tmp1,modulus
				144	bne mask,2b
				145
				146	9: ldq $1, 0($30)
				147	ldq $2, 8($30)
				148	ldq $0,16($30)
				149	ldq tmp1,24($30)
				150	DIV_ONLY(ldq tmp2,32($30))
				151	addq $30,STACK,$30
				152	ret $31,($23),1
				153	.end ufunction
				154
				155	/*
				156	* Uhh.. Ugly signed division. I'd rather not have it at all, but
				157	* it's needed in some circumstances. There are different ways to
				158	* handle this, really. This does:
				159	* -a / b = a / -b = -(a / b)
				160	* -a % b = -(a % b)
				161	* a % -b = a % b
				162	* which is probably not the best solution, but at least should
				163	* have the property that (x/y)*y + (x%y) = x.
				164	*/
				165	.align 3
				166	.globl sfunction
				167	.ent sfunction
				168	sfunction:
				169	subq $30,STACK,$30
				170	.frame $30,STACK,$23
				171	.prologue 0
				172	bis $24,$25,$28
				173	SLONGIFY($28)
				174	bge $28,7b
				175	stq $24,0($30)
				176	subq $31,$24,$28
				177	stq $25,8($30)
				178	cmovlt $24,$28,$24 /* abs($24) */
				179	stq $23,16($30)
				180	subq $31,$25,$28
				181	stq tmp1,24($30)
				182	cmovlt $25,$28,$25 /* abs($25) */
				183	unop
				184	bsr $23,ufunction
				185	ldq $24,0($30)
				186	ldq $25,8($30)
				187	GETSIGN($28)
				188	subq $31,$27,tmp1
				189	SLONGIFY($28)
				190	ldq $23,16($30)
				191	cmovlt $28,tmp1,$27
				192	ldq tmp1,24($30)
				193	addq $30,STACK,$30
				194	ret $31,($23),1
				195	.end sfunction