i386: make stack size optimization selectable, and allow i486 insns (bswap)

It's hard to imagine someone still using non-bswap equipped CPU

function                                             old     new   delta
xmalloc_optname_optval                               888     879      -9
write_leases                                         214     205      -9
write32                                               36      27      -9
update_status                                        612     603      -9
udhcpd_main                                         1468    1459      -9
udhcpc_main                                         2708    2699      -9
udhcp_run_script                                     804     795      -9
sha256_process_block64                               423     414      -9
sha1_process_block64                                 337     328      -9
sha1_end                                              80      71      -9
send_ACK                                             161     152      -9
select_lease_time                                     64      55      -9
rpm_getint                                           118     109      -9
readprofile_main                                    1719    1710      -9
read32                                                33      24      -9
rdate_main                                           236     227      -9
machtime                                              39      30      -9
inet_addr_match                                      103      94      -9
get_prefix                                           344     335      -9
f_write32                                             31      22      -9
f_read32                                              31      22      -9
dumpleases_main                                      620     611      -9
KeyExpansion                                         197     188      -9
udhcp_str2optset                                     536     518     -18
read_config                                          222     204     -18
lfp_to_d                                              55      37     -18
ipaddr_modify                                       1226    1208     -18
dnsd_main                                           1278    1260     -18
des_crypt                                           1344    1326     -18
d_to_lfp                                             106      88     -18
bb_bswap_64                                           29      11     -18
INET_setroute                                        827     809     -18
read_leases                                          330     309     -21
zcip_main                                           1256    1229     -27
send_offer                                           476     449     -27
ipcalc_main                                          534     507     -27
handle_incoming_and_exit                            2821    2794     -27
fmt_time_bernstein_25                                131     104     -27
common_traceroute_main                              3804    3768     -36
rpm_gettags                                          451     397     -54
parse_args                                          1412    1358     -54
volume_id_probe_hfs_hfsplus                          627     564     -63
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/42 up/down: 0/-732)          Total: -732 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
diff --git a/Config.in b/Config.in
index ae611f8..51ff01e 100644
--- a/Config.in
+++ b/Config.in
@@ -518,6 +518,14 @@
 	compiler other than gcc.
 	If you do use gcc, this option may needlessly increase code size.
 
+config STACK_OPTIMIZATION_386
+	bool "Use -mpreferred-stack-boundary=2 on i386 arch"
+	default y
+	help
+	This option makes for smaller code, but some libc versions
+	do not work with it (they use SSE instructions without
+	ensuring stack alignment).
+
 comment 'Installation Options ("make install" behavior)'
 
 choice
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
index e6c99c6..425361f 100644
--- a/arch/i386/Makefile
+++ b/arch/i386/Makefile
@@ -2,6 +2,12 @@
 # Build system
 # ==========================================================================
 
+# Allow i486 insns (basically, bswap insn)
+# Do not try to tune for 486+ (might add padding)
+CFLAGS += $(call cc-option,-march=i486 -mtune=i386,)
+
+ifeq ($(CONFIG_STACK_OPTIMIZATION_386),y)
 # -mpreferred-stack-boundary=2 is essential in preventing gcc 4.2.x
 # from aligning stack to 16 bytes. (Which is gcc's way of supporting SSE).
-CFLAGS += $(call cc-option,-march=i386 -mpreferred-stack-boundary=2,)
+CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2,)
+endif