*: introduce and use FAST_FUNC: regparm on i386, otherwise no-on

   text    data     bss     dec     hex filename
 808035     611    6868  815514   c719a busybox_old
 804472     611    6868  811951   c63af busybox_unstripped

diff --git a/archival/ar.c b/archival/ar.c
index 0a95e5c..ddc1209 100644
--- a/archival/ar.c
+++ b/archival/ar.c
@@ -16,7 +16,7 @@
 #include "libbb.h"
 #include "unarchive.h"
 
-static void header_verbose_list_ar(const file_header_t *file_header)
+static void FAST_FUNC header_verbose_list_ar(const file_header_t *file_header)
 {
 	const char *mode = bb_mode_string(file_header->mode);
 	char *mtime;
diff --git a/archival/bbunzip.c b/archival/bbunzip.c
index 09a4eb9..b2e8161 100644
--- a/archival/bbunzip.c
+++ b/archival/bbunzip.c
@@ -28,7 +28,7 @@
 	return 0;
 }
 
-int bbunpack(char **argv,
+int FAST_FUNC bbunpack(char **argv,
 	char* (*make_new_name)(char *filename),
 	USE_DESKTOP(long long) int (*unpacker)(void)
 )
diff --git a/archival/dpkg.c b/archival/dpkg.c
index ee5bd7a..34e5f80 100644
--- a/archival/dpkg.c
+++ b/archival/dpkg.c
@@ -1482,7 +1482,7 @@
 	return ar_handle->sub_archive->buffer;
 }
 
-static void data_extract_all_prefix(archive_handle_t *archive_handle)
+static void FAST_FUNC data_extract_all_prefix(archive_handle_t *archive_handle)
 {
 	char *name_ptr = archive_handle->file_header->name;
 
diff --git a/archival/libunarchive/archive_xread_all_eof.c b/archival/libunarchive/archive_xread_all_eof.c
index c93dfa2..f11a7fd 100644
--- a/archival/libunarchive/archive_xread_all_eof.c
+++ b/archival/libunarchive/archive_xread_all_eof.c
@@ -6,7 +6,7 @@
 #include "libbb.h"
 #include "unarchive.h"
 
-ssize_t archive_xread_all_eof(archive_handle_t *archive_handle,
+ssize_t FAST_FUNC archive_xread_all_eof(archive_handle_t *archive_handle,
 			unsigned char *buf, size_t count)
 {
 	ssize_t size;
diff --git a/archival/libunarchive/data_align.c b/archival/libunarchive/data_align.c
index d98dc57..9f2e843 100644
--- a/archival/libunarchive/data_align.c
+++ b/archival/libunarchive/data_align.c
@@ -6,7 +6,7 @@
 #include "libbb.h"
 #include "unarchive.h"
 
-void data_align(archive_handle_t *archive_handle, unsigned boundary)
+void FAST_FUNC data_align(archive_handle_t *archive_handle, unsigned boundary)
 {
 	unsigned skip_amount = (boundary - (archive_handle->offset % boundary)) % boundary;
 
diff --git a/archival/libunarchive/data_extract_all.c b/archival/libunarchive/data_extract_all.c
index 29a224b..1b48767 100644
--- a/archival/libunarchive/data_extract_all.c
+++ b/archival/libunarchive/data_extract_all.c
@@ -6,7 +6,7 @@
 #include "libbb.h"
 #include "unarchive.h"
 
-void data_extract_all(archive_handle_t *archive_handle)
+void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
 {
 	file_header_t *file_header = archive_handle->file_header;
 	int dst_fd;
diff --git a/archival/libunarchive/data_extract_to_buffer.c b/archival/libunarchive/data_extract_to_buffer.c
index d8fcdf3..1d74e03 100644
--- a/archival/libunarchive/data_extract_to_buffer.c
+++ b/archival/libunarchive/data_extract_to_buffer.c
@@ -8,7 +8,7 @@
 #include "libbb.h"
 #include "unarchive.h"
 
-void data_extract_to_buffer(archive_handle_t *archive_handle)
+void FAST_FUNC data_extract_to_buffer(archive_handle_t *archive_handle)
 {
 	unsigned int size = archive_handle->file_header->size;
 
diff --git a/archival/libunarchive/data_extract_to_stdout.c b/archival/libunarchive/data_extract_to_stdout.c
index c8895ed..a3efea1 100644
--- a/archival/libunarchive/data_extract_to_stdout.c
+++ b/archival/libunarchive/data_extract_to_stdout.c
@@ -6,7 +6,7 @@
 #include "libbb.h"
 #include "unarchive.h"
 
-void data_extract_to_stdout(archive_handle_t *archive_handle)
+void FAST_FUNC data_extract_to_stdout(archive_handle_t *archive_handle)
 {
 	bb_copyfd_exact_size(archive_handle->src_fd,
 			STDOUT_FILENO,
diff --git a/archival/libunarchive/data_skip.c b/archival/libunarchive/data_skip.c
index d9778da..438750f 100644
--- a/archival/libunarchive/data_skip.c
+++ b/archival/libunarchive/data_skip.c
@@ -6,7 +6,7 @@
 #include "libbb.h"
 #include "unarchive.h"
 
-void data_skip(archive_handle_t *archive_handle)
+void FAST_FUNC data_skip(archive_handle_t *archive_handle)
 {
 	archive_handle->seek(archive_handle, archive_handle->file_header->size);
 }
diff --git a/archival/libunarchive/decompress_bunzip2.c b/archival/libunarchive/decompress_bunzip2.c
index f505044..106a08b 100644
--- a/archival/libunarchive/decompress_bunzip2.c
+++ b/archival/libunarchive/decompress_bunzip2.c
@@ -526,7 +526,7 @@
    are ignored, data is written to out_fd and return is RETVAL_OK or error.
 */
 
-int read_bunzip(bunzip_data *bd, char *outbuf, int len)
+int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
 {
 	const unsigned *dbuf;
 	int pos, current, previous, gotcount;
@@ -643,7 +643,7 @@
    should work for NOFORK applets too, we must be extremely careful to not leak
    any allocations! */
 
-int start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf,
+int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf,
 						int len)
 {
 	bunzip_data *bd;
@@ -699,7 +699,7 @@
 	return RETVAL_OK;
 }
 
-void dealloc_bunzip(bunzip_data *bd)
+void FAST_FUNC dealloc_bunzip(bunzip_data *bd)
 {
 	free(bd->dbuf);
 	free(bd);
@@ -708,7 +708,7 @@
 
 /* Decompress src_fd to dst_fd.  Stops at end of bzip data, not end of file. */
 
-USE_DESKTOP(long long) int
+USE_DESKTOP(long long) int FAST_FUNC
 unpack_bz2_stream(int src_fd, int dst_fd)
 {
 	USE_DESKTOP(long long total_written = 0;)
diff --git a/archival/libunarchive/decompress_uncompress.c b/archival/libunarchive/decompress_uncompress.c
index 8c3c65d..1615700 100644
--- a/archival/libunarchive/decompress_uncompress.c
+++ b/archival/libunarchive/decompress_uncompress.c
@@ -70,7 +70,7 @@
  * be stored in the compressed file.
  */
 
-USE_DESKTOP(long long) int
+USE_DESKTOP(long long) int FAST_FUNC
 uncompress(int fd_in, int fd_out)
 {
 	USE_DESKTOP(long long total_written = 0;)
diff --git a/archival/libunarchive/decompress_unlzma.c b/archival/libunarchive/decompress_unlzma.c
index c320400..79df701 100644
--- a/archival/libunarchive/decompress_unlzma.c
+++ b/archival/libunarchive/decompress_unlzma.c
@@ -229,7 +229,7 @@
 };
 
 
-USE_DESKTOP(long long) int
+USE_DESKTOP(long long) int FAST_FUNC
 unpack_lzma_stream(int src_fd, int dst_fd)
 {
 	USE_DESKTOP(long long total_written = 0;)
diff --git a/archival/libunarchive/decompress_unzip.c b/archival/libunarchive/decompress_unzip.c
index 9036fab..3b0ca84 100644
--- a/archival/libunarchive/decompress_unzip.c
+++ b/archival/libunarchive/decompress_unzip.c
@@ -1033,7 +1033,7 @@
 
 /* For unzip */
 
-USE_DESKTOP(long long) int
+USE_DESKTOP(long long) int FAST_FUNC
 inflate_unzip(inflate_unzip_result *res, off_t compr_size, int in, int out)
 {
 	USE_DESKTOP(long long) int n;
@@ -1176,7 +1176,7 @@
 	return 1;
 }
 
-USE_DESKTOP(long long) int
+USE_DESKTOP(long long) int FAST_FUNC
 unpack_gz_stream(int in, int out)
 {
 	uint32_t v32;
diff --git a/archival/libunarchive/filter_accept_all.c b/archival/libunarchive/filter_accept_all.c
index 47d771e..21f9c5c 100644
--- a/archival/libunarchive/filter_accept_all.c
+++ b/archival/libunarchive/filter_accept_all.c
@@ -9,7 +9,7 @@
 #include "unarchive.h"
 
 /* Accept any non-null name, its not really a filter at all */
-char filter_accept_all(archive_handle_t *archive_handle)
+char FAST_FUNC filter_accept_all(archive_handle_t *archive_handle)
 {
 	if (archive_handle->file_header->name)
 		return EXIT_SUCCESS;
diff --git a/archival/libunarchive/filter_accept_list.c b/archival/libunarchive/filter_accept_list.c
index 6e571ad..afa0b4c 100644
--- a/archival/libunarchive/filter_accept_list.c
+++ b/archival/libunarchive/filter_accept_list.c
@@ -11,7 +11,7 @@
 /*
  * Accept names that are in the accept list, ignoring reject list.
  */
-char filter_accept_list(archive_handle_t *archive_handle)
+char FAST_FUNC filter_accept_list(archive_handle_t *archive_handle)
 {
 	if (find_list_entry(archive_handle->accept, archive_handle->file_header->name))
 		return EXIT_SUCCESS;
diff --git a/archival/libunarchive/filter_accept_list_reassign.c b/archival/libunarchive/filter_accept_list_reassign.c
index 969dd1e..4f2d4cd 100644
--- a/archival/libunarchive/filter_accept_list_reassign.c
+++ b/archival/libunarchive/filter_accept_list_reassign.c
@@ -13,7 +13,7 @@
  * e.g. if its a .tar.gz modify archive_handle->sub_archive to process a .tar.gz
  * or if its a .tar.bz2 make archive_handle->sub_archive handle that
  */
-char filter_accept_list_reassign(archive_handle_t *archive_handle)
+char FAST_FUNC filter_accept_list_reassign(archive_handle_t *archive_handle)
 {
 	/* Check the file entry is in the accept list */
 	if (find_list_entry(archive_handle->accept, archive_handle->file_header->name)) {
diff --git a/archival/libunarchive/filter_accept_reject_list.c b/archival/libunarchive/filter_accept_reject_list.c
index 439ba20..aa601e1 100644
--- a/archival/libunarchive/filter_accept_reject_list.c
+++ b/archival/libunarchive/filter_accept_reject_list.c
@@ -11,7 +11,7 @@
 /*
  * Accept names that are in the accept list and not in the reject list
  */
-char filter_accept_reject_list(archive_handle_t *archive_handle)
+char FAST_FUNC filter_accept_reject_list(archive_handle_t *archive_handle)
 {
 	const char *key;
 	const llist_t *reject_entry;
diff --git a/archival/libunarchive/find_list_entry.c b/archival/libunarchive/find_list_entry.c
index 7540589..bc7bc64 100644
--- a/archival/libunarchive/find_list_entry.c
+++ b/archival/libunarchive/find_list_entry.c
@@ -10,7 +10,7 @@
 #include "unarchive.h"
 
 /* Find a string in a shell pattern list */
-const llist_t *find_list_entry(const llist_t *list, const char *filename)
+const llist_t* FAST_FUNC find_list_entry(const llist_t *list, const char *filename)
 {
 	while (list) {
 		if (fnmatch(list->data, filename, 0) == 0) {
@@ -24,7 +24,7 @@
 /* Same, but compares only path components present in pattern
  * (extra trailing path components in filename are assumed to match)
  */
-const llist_t *find_list_entry2(const llist_t *list, const char *filename)
+const llist_t* FAST_FUNC find_list_entry2(const llist_t *list, const char *filename)
 {
 	char buf[PATH_MAX];
 	int pattern_slash_cnt;
diff --git a/archival/libunarchive/get_header_ar.c b/archival/libunarchive/get_header_ar.c
index 88c0220..0522299 100644
--- a/archival/libunarchive/get_header_ar.c
+++ b/archival/libunarchive/get_header_ar.c
@@ -7,7 +7,7 @@
 #include "libbb.h"
 #include "unarchive.h"
 
-char get_header_ar(archive_handle_t *archive_handle)
+char FAST_FUNC get_header_ar(archive_handle_t *archive_handle)
 {
 	int err;
 	file_header_t *typed = archive_handle->file_header;
diff --git a/archival/libunarchive/get_header_cpio.c b/archival/libunarchive/get_header_cpio.c
index b97b53b..4ed18c6 100644
--- a/archival/libunarchive/get_header_cpio.c
+++ b/archival/libunarchive/get_header_cpio.c
@@ -17,7 +17,7 @@
 	char name[1];
 } hardlinks_t;
 
-char get_header_cpio(archive_handle_t *archive_handle)
+char FAST_FUNC get_header_cpio(archive_handle_t *archive_handle)
 {
 	static hardlinks_t *saved_hardlinks = NULL;
 	static hardlinks_t *saved_hardlinks_created = NULL;
diff --git a/archival/libunarchive/get_header_tar.c b/archival/libunarchive/get_header_tar.c
index 187552c..a0a53c9 100644
--- a/archival/libunarchive/get_header_tar.c
+++ b/archival/libunarchive/get_header_tar.c
@@ -43,7 +43,7 @@
 #define GET_OCTAL(a) getOctal((a), sizeof(a))
 
 void BUG_tar_header_size(void);
-char get_header_tar(archive_handle_t *archive_handle)
+char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
 {
 	static smallint end;
 #if ENABLE_FEATURE_TAR_AUTODETECT
@@ -133,7 +133,7 @@
 	     || memcmp(tar.magic, "\0\0\0\0", 5) != 0)
 	) {
 #if ENABLE_FEATURE_TAR_AUTODETECT
-		char (*get_header_ptr)(archive_handle_t *);
+		char FAST_FUNC (*get_header_ptr)(archive_handle_t *);
 
 		/* tar gz/bz autodetect: check for gz/bz2 magic.
 		 * If it is the very first block, and we see the magic,
diff --git a/archival/libunarchive/get_header_tar_bz2.c b/archival/libunarchive/get_header_tar_bz2.c
index c2cbaff..cfdc016 100644
--- a/archival/libunarchive/get_header_tar_bz2.c
+++ b/archival/libunarchive/get_header_tar_bz2.c
@@ -6,7 +6,7 @@
 #include "libbb.h"
 #include "unarchive.h"
 
-char get_header_tar_bz2(archive_handle_t *archive_handle)
+char FAST_FUNC get_header_tar_bz2(archive_handle_t *archive_handle)
 {
 	/* Can't lseek over pipes */
 	archive_handle->seek = seek_by_read;
diff --git a/archival/libunarchive/get_header_tar_gz.c b/archival/libunarchive/get_header_tar_gz.c
index 9772e33..33c6272 100644
--- a/archival/libunarchive/get_header_tar_gz.c
+++ b/archival/libunarchive/get_header_tar_gz.c
@@ -6,7 +6,7 @@
 #include "libbb.h"
 #include "unarchive.h"
 
-char get_header_tar_gz(archive_handle_t *archive_handle)
+char FAST_FUNC get_header_tar_gz(archive_handle_t *archive_handle)
 {
 #if BB_MMU
 	unsigned char magic[2];
diff --git a/archival/libunarchive/get_header_tar_lzma.c b/archival/libunarchive/get_header_tar_lzma.c
index c859dcc..4ae125f 100644
--- a/archival/libunarchive/get_header_tar_lzma.c
+++ b/archival/libunarchive/get_header_tar_lzma.c
@@ -9,7 +9,7 @@
 #include "libbb.h"
 #include "unarchive.h"
 
-char get_header_tar_lzma(archive_handle_t * archive_handle)
+char FAST_FUNC get_header_tar_lzma(archive_handle_t * archive_handle)
 {
 	/* Can't lseek over pipes */
 	archive_handle->seek = seek_by_read;
diff --git a/archival/libunarchive/header_list.c b/archival/libunarchive/header_list.c
index 8cb8f40..6ec2df3 100644
--- a/archival/libunarchive/header_list.c
+++ b/archival/libunarchive/header_list.c
@@ -5,7 +5,7 @@
 #include "libbb.h"
 #include "unarchive.h"
 
-void header_list(const file_header_t *file_header)
+void FAST_FUNC header_list(const file_header_t *file_header)
 {
 	puts(file_header->name);
 }
diff --git a/archival/libunarchive/header_skip.c b/archival/libunarchive/header_skip.c
index ef2172b..ba4c0ab 100644
--- a/archival/libunarchive/header_skip.c
+++ b/archival/libunarchive/header_skip.c
@@ -5,6 +5,6 @@
 #include "libbb.h"
 #include "unarchive.h"
 
-void header_skip(const file_header_t *file_header ATTRIBUTE_UNUSED)
+void FAST_FUNC header_skip(const file_header_t *file_header ATTRIBUTE_UNUSED)
 {
 }
diff --git a/archival/libunarchive/header_verbose_list.c b/archival/libunarchive/header_verbose_list.c
index ea623ed..f059dd9 100644
--- a/archival/libunarchive/header_verbose_list.c
+++ b/archival/libunarchive/header_verbose_list.c
@@ -6,7 +6,7 @@
 #include "libbb.h"
 #include "unarchive.h"
 
-void header_verbose_list(const file_header_t *file_header)
+void FAST_FUNC header_verbose_list(const file_header_t *file_header)
 {
 	struct tm *mtime = localtime(&(file_header->mtime));
 
diff --git a/archival/libunarchive/init_handle.c b/archival/libunarchive/init_handle.c
index 309d329..ff7d484 100644
--- a/archival/libunarchive/init_handle.c
+++ b/archival/libunarchive/init_handle.c
@@ -6,7 +6,7 @@
 #include "libbb.h"
 #include "unarchive.h"
 
-archive_handle_t *init_handle(void)
+archive_handle_t* FAST_FUNC init_handle(void)
 {
 	archive_handle_t *archive_handle;
 
diff --git a/archival/libunarchive/open_transformer.c b/archival/libunarchive/open_transformer.c
index 86415c7..a6bc623 100644
--- a/archival/libunarchive/open_transformer.c
+++ b/archival/libunarchive/open_transformer.c
@@ -11,8 +11,8 @@
  * On MMU machine, the transform_prog is removed by macro magic
  * in include/unarchive.h. On NOMMU, transformer is removed.
  */
-int open_transformer(int src_fd,
-	USE_DESKTOP(long long) int (*transformer)(int src_fd, int dst_fd),
+int FAST_FUNC open_transformer(int src_fd,
+	USE_DESKTOP(long long) int FAST_FUNC (*transformer)(int src_fd, int dst_fd),
 	const char *transform_prog)
 {
 	struct fd_pair fd_pipe;
diff --git a/archival/libunarchive/seek_by_jump.c b/archival/libunarchive/seek_by_jump.c
index 5288c1d..031598e 100644
--- a/archival/libunarchive/seek_by_jump.c
+++ b/archival/libunarchive/seek_by_jump.c
@@ -6,7 +6,7 @@
 #include "libbb.h"
 #include "unarchive.h"
 
-void seek_by_jump(const archive_handle_t *archive_handle, unsigned amount)
+void FAST_FUNC seek_by_jump(const archive_handle_t *archive_handle, unsigned amount)
 {
 	if (lseek(archive_handle->src_fd, (off_t) amount, SEEK_CUR) == (off_t) -1) {
 		if (errno == ESPIPE)
diff --git a/archival/libunarchive/seek_by_read.c b/archival/libunarchive/seek_by_read.c
index 1f2b805..2326a75 100644
--- a/archival/libunarchive/seek_by_read.c
+++ b/archival/libunarchive/seek_by_read.c
@@ -9,7 +9,7 @@
 /*  If we are reading through a pipe, or from stdin then we can't lseek,
  *  we must read and discard the data to skip over it.
  */
-void seek_by_read(const archive_handle_t *archive_handle, unsigned jump_size)
+void FAST_FUNC seek_by_read(const archive_handle_t *archive_handle, unsigned jump_size)
 {
 	if (jump_size)
 		bb_copyfd_exact_size(archive_handle->src_fd, -1, jump_size);
diff --git a/archival/libunarchive/unpack_ar_archive.c b/archival/libunarchive/unpack_ar_archive.c
index fc1820b..9c2f68b 100644
--- a/archival/libunarchive/unpack_ar_archive.c
+++ b/archival/libunarchive/unpack_ar_archive.c
@@ -6,7 +6,7 @@
 #include "libbb.h"
 #include "unarchive.h"
 
-void unpack_ar_archive(archive_handle_t *ar_archive)
+void FAST_FUNC unpack_ar_archive(archive_handle_t *ar_archive)
 {
 	char magic[7];
 
diff --git a/archival/rpm.c b/archival/rpm.c
index 41b8c81..3d03dbc 100644
--- a/archival/rpm.c
+++ b/archival/rpm.c
@@ -190,7 +190,7 @@
 	archive_handle_t *archive_handle;
 	unsigned char magic[2];
 #if BB_MMU
-	USE_DESKTOP(long long) int (*xformer)(int src_fd, int dst_fd);
+	USE_DESKTOP(long long) int FAST_FUNC (*xformer)(int src_fd, int dst_fd);
 	enum { xformer_prog = 0 };
 #else
 	enum { xformer = 0 };
diff --git a/archival/tar.c b/archival/tar.c
index 0162e06..2a14018 100644
--- a/archival/tar.c
+++ b/archival/tar.c
@@ -396,7 +396,7 @@
 #define exclude_file(excluded_files, file) 0
 #endif
 
-static int writeFileToTarball(const char *fileName, struct stat *statbuf,
+static int FAST_FUNC writeFileToTarball(const char *fileName, struct stat *statbuf,
 			void *userData, int depth ATTRIBUTE_UNUSED)
 {
 	struct TarBallInfo *tbInfo = (struct TarBallInfo *) userData;
@@ -680,7 +680,7 @@
 #endif
 
 #if ENABLE_FEATURE_TAR_COMPRESS
-static char get_header_tar_Z(archive_handle_t *archive_handle)
+static char FAST_FUNC get_header_tar_Z(archive_handle_t *archive_handle)
 {
 	/* Can't lseek over pipes */
 	archive_handle->seek = seek_by_read;
@@ -802,7 +802,7 @@
 int tar_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
 int tar_main(int argc ATTRIBUTE_UNUSED, char **argv)
 {
-	char (*get_header_ptr)(archive_handle_t *) = get_header_tar;
+	char FAST_FUNC (*get_header_ptr)(archive_handle_t *) = get_header_tar;
 	archive_handle_t *tar_handle;
 	char *base_dir = NULL;
 	const char *tar_filename = "-";
diff --git a/coreutils/chmod.c b/coreutils/chmod.c
index 1bd0bd5..994308c 100644
--- a/coreutils/chmod.c
+++ b/coreutils/chmod.c
@@ -34,7 +34,7 @@
  * symbolic links encountered during recursive directory traversals.
  */
 
-static int fileAction(const char *fileName, struct stat *statbuf, void* param, int depth)
+static int FAST_FUNC fileAction(const char *fileName, struct stat *statbuf, void* param, int depth)
 {
 	mode_t newmode;
 
diff --git a/coreutils/chown.c b/coreutils/chown.c
index 78377e6..c14c62d 100644
--- a/coreutils/chown.c
+++ b/coreutils/chown.c
@@ -41,7 +41,7 @@
 
 static struct bb_uidgid_t ugid = { -1, -1 };
 
-static int fileAction(const char *fileName, struct stat *statbuf,
+static int FAST_FUNC fileAction(const char *fileName, struct stat *statbuf,
 		void *cf, int depth ATTRIBUTE_UNUSED)
 {
 	uid_t u = (ugid.uid == (uid_t)-1) ? statbuf->st_uid : ugid.uid;
diff --git a/coreutils/uudecode.c b/coreutils/uudecode.c
index c067476..8b18b7a 100644
--- a/coreutils/uudecode.c
+++ b/coreutils/uudecode.c
@@ -152,7 +152,7 @@
 
 	/* Search for the start of the encoding */
 	while ((line = xmalloc_fgetline(src_stream)) != NULL) {
-		void (*decode_fn_ptr)(FILE * src, FILE * dst);
+		void (*decode_fn_ptr)(FILE *src, FILE *dst);
 		char *line_ptr;
 		FILE *dst_stream;
 		int mode;
diff --git a/debianutils/run_parts.c b/debianutils/run_parts.c
index c9b0907..bf278cc 100644
--- a/debianutils/run_parts.c
+++ b/debianutils/run_parts.c
@@ -77,7 +77,7 @@
 	return (option_mask32 & OPT_r) ? -r : r;
 }
 
-static int act(const char *file, struct stat *statbuf, void *args ATTRIBUTE_UNUSED, int depth)
+static int FAST_FUNC act(const char *file, struct stat *statbuf, void *args ATTRIBUTE_UNUSED, int depth)
 {
 	if (depth == 1)
 		return TRUE;
diff --git a/e2fsprogs/old_e2fsprogs/e2p/e2p.h b/e2fsprogs/old_e2fsprogs/e2p/e2p.h
index 4524700..bad2d6a 100644
--- a/e2fsprogs/old_e2fsprogs/e2p/e2p.h
+++ b/e2fsprogs/old_e2fsprogs/e2p/e2p.h
@@ -37,9 +37,9 @@
 /*void list_super(struct ext2_super_block * s);*/
 void list_super2(struct ext2_super_block * s, FILE *f);
 #define list_super(s) list_super2(s, stdout)
-void print_fs_errors (FILE * f, unsigned short errors);
-void print_flags (FILE * f, unsigned long flags, unsigned options);
-void print_fs_state (FILE * f, unsigned short state);
+void print_fs_errors (FILE *f, unsigned short errors);
+void print_flags (FILE *f, unsigned long flags, unsigned options);
+void print_fs_state (FILE *f, unsigned short state);
 int setflags (int fd, unsigned long flags);
 int setversion (int fd, unsigned long version);
 
diff --git a/e2fsprogs/old_e2fsprogs/e2p/pf.c b/e2fsprogs/old_e2fsprogs/e2p/pf.c
index 55d4bc4..02cbec7 100644
--- a/e2fsprogs/old_e2fsprogs/e2p/pf.c
+++ b/e2fsprogs/old_e2fsprogs/e2p/pf.c
@@ -48,7 +48,7 @@
 	{ 0, NULL, NULL }
 };
 
-void print_flags (FILE * f, unsigned long flags, unsigned options)
+void print_flags (FILE *f, unsigned long flags, unsigned options)
 {
 	int long_opt = (options & PFOPT_LONG);
 	const struct flags_name *fp;
diff --git a/editors/diff.c b/editors/diff.c
index ad089e2..d7eccfb 100644
--- a/editors/diff.c
+++ b/editors/diff.c
@@ -1163,7 +1163,7 @@
 
 #if ENABLE_FEATURE_DIFF_DIR
 /* This function adds a filename to dl, the directory listing. */
-static int add_to_dirlist(const char *filename,
+static int FAST_FUNC add_to_dirlist(const char *filename,
 		struct stat *sb ATTRIBUTE_UNUSED,
 		void *userdata,
 		int depth ATTRIBUTE_UNUSED)
diff --git a/editors/sed.c b/editors/sed.c
index bf01fc6..81cc195 100644
--- a/editors/sed.c
+++ b/editors/sed.c
@@ -23,7 +23,7 @@
   resulting sed_cmd_t structures are appended to a linked list
   (G.sed_cmd_head/G.sed_cmd_tail).
 
-  add_input_file() adds a FILE * to the list of input files.  We need to
+  add_input_file() adds a FILE* to the list of input files.  We need to
   know all input sources ahead of time to find the last line for the $ match.
 
   process_files() does actual sedding, reading data lines from each input FILE *
diff --git a/editors/vi.c b/editors/vi.c
index 7603c97..1b335d9 100644
--- a/editors/vi.c
+++ b/editors/vi.c
@@ -1659,12 +1659,12 @@
 		cmdcnt = 0;
 		end_cmd_q();	// stop adding to q
 		last_status_cksum = 0;	// force status update
-		if ((p > text) && (p[-1] != '\n')) {
+		if ((p[-1] != '\n') && (dot > text)) {
 			p--;
 		}
 	} else if (c == erase_char || c == 8 || c == 127) { // Is this a BS
 		//     123456789
-		if ((p > text) && (p[-1] != '\n')) {
+		if ((p[-1] != '\n') && (dot>text)) {
 			p--;
 			p = text_hole_delete(p, p);	// shrink buffer 1 char
 		}
diff --git a/findutils/find.c b/findutils/find.c
index f75bc9e..51d26c0 100644
--- a/findutils/find.c
+++ b/findutils/find.c
@@ -374,7 +374,7 @@
 #endif
 
 
-static int fileAction(const char *fileName,
+static int FAST_FUNC fileAction(const char *fileName,
 		struct stat *statbuf,
 		void *userData SKIP_FEATURE_FIND_MAXDEPTH(ATTRIBUTE_UNUSED),
 		int depth SKIP_FEATURE_FIND_MAXDEPTH(ATTRIBUTE_UNUSED))
diff --git a/findutils/grep.c b/findutils/grep.c
index 9792ce4..8d18f43 100644
--- a/findutils/grep.c
+++ b/findutils/grep.c
@@ -370,7 +370,7 @@
 	}
 }
 
-static int file_action_grep(const char *filename,
+static int FAST_FUNC file_action_grep(const char *filename,
 			struct stat *statbuf ATTRIBUTE_UNUSED,
 			void* matched,
 			int depth ATTRIBUTE_UNUSED)
diff --git a/include/dump.h b/include/dump.h
index 20c39c4..a060c14 100644
--- a/include/dump.h
+++ b/include/dump.h
@@ -44,9 +44,9 @@
 	int bcnt;
 } FS;
 
-extern void bb_dump_add(const char *fmt);
-extern int bb_dump_dump(char **argv);
-extern int bb_dump_size(FS * fs);
+extern void bb_dump_add(const char *fmt) FAST_FUNC;
+extern int bb_dump_dump(char **argv) FAST_FUNC;
+extern int bb_dump_size(FS * fs) FAST_FUNC;
 
 extern FS *bb_dump_fshead;		/* head of format strings */
 extern int bb_dump_blocksize;				/* data block size */
diff --git a/include/inet_common.h b/include/inet_common.h
index eb4cb73..f4374e5 100644
--- a/include/inet_common.h
+++ b/include/inet_common.h
@@ -12,15 +12,15 @@
 /* hostfirst!=0 If we expect this to be a hostname,
    try hostname database first
  */
-int INET_resolve(const char *name, struct sockaddr_in *s_in, int hostfirst);
+int INET_resolve(const char *name, struct sockaddr_in *s_in, int hostfirst) FAST_FUNC;
 
 /* numeric: & 0x8000: "default" instead of "*",
  *          & 0x4000: host instead of net,
  *          & 0x0fff: don't resolve
  */
 
-int INET6_resolve(const char *name, struct sockaddr_in6 *sin6);
+int INET6_resolve(const char *name, struct sockaddr_in6 *sin6) FAST_FUNC;
 
 /* These return malloced string */
-char *INET_rresolve(struct sockaddr_in *s_in, int numeric, uint32_t netmask);
-char *INET6_rresolve(struct sockaddr_in6 *sin6, int numeric);
+char *INET_rresolve(struct sockaddr_in *s_in, int numeric, uint32_t netmask) FAST_FUNC;
+char *INET6_rresolve(struct sockaddr_in6 *sin6, int numeric) FAST_FUNC;
diff --git a/include/libbb.h b/include/libbb.h
index e92dbc4..d74f802 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -237,18 +237,18 @@
 #define errno (*bb_errno)
 #endif
 
-unsigned long long monotonic_us(void);
-unsigned monotonic_sec(void);
+unsigned long long monotonic_us(void) FAST_FUNC;
+unsigned monotonic_sec(void) FAST_FUNC;
 
-extern void chomp(char *s);
-extern void trim(char *s);
-extern char *skip_whitespace(const char *);
-extern char *skip_non_whitespace(const char *);
-extern char *strrstr(const char *haystack, const char *needle);
+extern void chomp(char *s) FAST_FUNC;
+extern void trim(char *s) FAST_FUNC;
+extern char *skip_whitespace(const char *) FAST_FUNC;
+extern char *skip_non_whitespace(const char *) FAST_FUNC;
+extern char *strrstr(const char *haystack, const char *needle) FAST_FUNC;
 
 //TODO: supply a pointer to char[11] buffer (avoid statics)?
-extern const char *bb_mode_string(mode_t mode);
-extern int is_directory(const char *name, int followLinks, struct stat *statBuf);
+extern const char *bb_mode_string(mode_t mode) FAST_FUNC;
+extern int is_directory(const char *name, int followLinks, struct stat *statBuf) FAST_FUNC;
 enum {	/* DO NOT CHANGE THESE VALUES!  cp.c, mv.c, install.c depend on them. */
 	FILEUTILS_PRESERVE_STATUS = 1,
 	FILEUTILS_DEREFERENCE = 2,
@@ -264,12 +264,12 @@
 #endif
 };
 #define FILEUTILS_CP_OPTSTR "pdRfilsL" USE_SELINUX("c")
-extern int remove_file(const char *path, int flags);
+extern int remove_file(const char *path, int flags) FAST_FUNC;
 /* NB: without FILEUTILS_RECUR in flags, it will basically "cat"
  * the source, not copy (unless "source" is a directory).
  * This makes "cp /dev/null file" and "install /dev/null file" (!!!)
  * work coreutils-compatibly. */
-extern int copy_file(const char *source, const char *dest, int flags);
+extern int copy_file(const char *source, const char *dest, int flags) FAST_FUNC;
 
 enum {
 	ACTION_RECURSE        = (1 << 0),
@@ -279,49 +279,49 @@
 	/*ACTION_REVERSE      = (1 << 4), - unused */
 };
 extern int recursive_action(const char *fileName, unsigned flags,
-	int (*fileAction) (const char *fileName, struct stat* statbuf, void* userData, int depth),
-	int (*dirAction) (const char *fileName, struct stat* statbuf, void* userData, int depth),
-	void* userData, unsigned depth);
-extern int device_open(const char *device, int mode);
+	int FAST_FUNC (*fileAction)(const char *fileName, struct stat* statbuf, void* userData, int depth),
+	int FAST_FUNC (*dirAction)(const char *fileName, struct stat* statbuf, void* userData, int depth),
+	void* userData, unsigned depth) FAST_FUNC;
+extern int device_open(const char *device, int mode) FAST_FUNC;
 enum { GETPTY_BUFSIZE = 16 }; /* more than enough for "/dev/ttyXXX" */
-extern int xgetpty(char *line);
-extern int get_console_fd(void);
-extern void console_make_active(int fd, const int vt_num);
-extern char *find_block_device(const char *path);
+extern int xgetpty(char *line) FAST_FUNC;
+extern int get_console_fd(void) FAST_FUNC;
+extern void console_make_active(int fd, const int vt_num) FAST_FUNC;
+extern char *find_block_device(const char *path) FAST_FUNC;
 /* bb_copyfd_XX print read/write errors and return -1 if they occur */
-extern off_t bb_copyfd_eof(int fd1, int fd2);
-extern off_t bb_copyfd_size(int fd1, int fd2, off_t size);
-extern void bb_copyfd_exact_size(int fd1, int fd2, off_t size);
+extern off_t bb_copyfd_eof(int fd1, int fd2) FAST_FUNC;
+extern off_t bb_copyfd_size(int fd1, int fd2, off_t size) FAST_FUNC;
+extern void bb_copyfd_exact_size(int fd1, int fd2, off_t size) FAST_FUNC;
 /* "short" copy can be detected by return value < size */
 /* this helper yells "short read!" if param is not -1 */
-extern void complain_copyfd_and_die(off_t sz) ATTRIBUTE_NORETURN;
-extern char bb_process_escape_sequence(const char **ptr);
+extern void complain_copyfd_and_die(off_t sz) ATTRIBUTE_NORETURN FAST_FUNC;
+extern char bb_process_escape_sequence(const char **ptr) FAST_FUNC;
 /* xxxx_strip version can modify its parameter:
  * "/"        -> "/"
  * "abc"      -> "abc"
  * "abc/def"  -> "def"
  * "abc/def/" -> "def" !!
  */
-extern char *bb_get_last_path_component_strip(char *path);
+extern char *bb_get_last_path_component_strip(char *path) FAST_FUNC;
 /* "abc/def/" -> "" and it never modifies 'path' */
-extern char *bb_get_last_path_component_nostrip(const char *path);
+extern char *bb_get_last_path_component_nostrip(const char *path) FAST_FUNC;
 
-int ndelay_on(int fd);
-int ndelay_off(int fd);
-int close_on_exec_on(int fd);
-void xdup2(int, int);
-void xmove_fd(int, int);
+int ndelay_on(int fd) FAST_FUNC;
+int ndelay_off(int fd) FAST_FUNC;
+int close_on_exec_on(int fd) FAST_FUNC;
+void xdup2(int, int) FAST_FUNC;
+void xmove_fd(int, int) FAST_FUNC;
 
 
-DIR *xopendir(const char *path);
-DIR *warn_opendir(const char *path);
+DIR *xopendir(const char *path) FAST_FUNC;
+DIR *warn_opendir(const char *path) FAST_FUNC;
 
-/* UNUSED: char *xmalloc_realpath(const char *path); */
-char *xmalloc_readlink(const char *path);
-char *xmalloc_readlink_or_warn(const char *path);
-char *xrealloc_getcwd_or_warn(char *cwd);
+/* UNUSED: char *xmalloc_realpath(const char *path) FAST_FUNC; */
+char *xmalloc_readlink(const char *path) FAST_FUNC;
+char *xmalloc_readlink_or_warn(const char *path) FAST_FUNC;
+char *xrealloc_getcwd_or_warn(char *cwd) FAST_FUNC;
 
-char *xmalloc_follow_symlinks(const char *path);
+char *xmalloc_follow_symlinks(const char *path) FAST_FUNC;
 
 
 enum {
@@ -354,43 +354,43 @@
 		+ (1LL << SIGUSR2)
 		+ 0),
 };
-void bb_signals(int sigs, void (*f)(int));
+void bb_signals(int sigs, void (*f)(int)) FAST_FUNC;
 /* Unlike signal() and bb_signals, sets handler with sigaction()
  * and in a way that while signal handler is run, no other signals
  * will be blocked: */
-void bb_signals_recursive(int sigs, void (*f)(int));
+void bb_signals_recursive(int sigs, void (*f)(int)) FAST_FUNC;
 /* syscalls like read() will be interrupted with EINTR: */
-void signal_no_SA_RESTART_empty_mask(int sig, void (*handler)(int));
+void signal_no_SA_RESTART_empty_mask(int sig, void (*handler)(int)) FAST_FUNC;
 /* syscalls like read() won't be interrupted (though select/poll will be): */
-void signal_SA_RESTART_empty_mask(int sig, void (*handler)(int));
-void wait_for_any_sig(void);
-void kill_myself_with_sig(int sig) ATTRIBUTE_NORETURN;
-void sig_block(int sig);
-void sig_unblock(int sig);
+void signal_SA_RESTART_empty_mask(int sig, void (*handler)(int)) FAST_FUNC;
+void wait_for_any_sig(void) FAST_FUNC;
+void kill_myself_with_sig(int sig) ATTRIBUTE_NORETURN FAST_FUNC;
+void sig_block(int sig) FAST_FUNC;
+void sig_unblock(int sig) FAST_FUNC;
 /* Will do sigaction(signum, act, NULL): */
-int sigaction_set(int sig, const struct sigaction *act);
+int sigaction_set(int sig, const struct sigaction *act) FAST_FUNC;
 /* SIG_BLOCK/SIG_UNBLOCK all signals: */
-int sigprocmask_allsigs(int how);
+int sigprocmask_allsigs(int how) FAST_FUNC;
 
 
-void xsetgid(gid_t gid);
-void xsetuid(uid_t uid);
-void xchdir(const char *path);
-void xchroot(const char *path);
-void xsetenv(const char *key, const char *value);
-void xunlink(const char *pathname);
-void xstat(const char *pathname, struct stat *buf);
-int xopen(const char *pathname, int flags);
-int xopen3(const char *pathname, int flags, int mode);
-int open_or_warn(const char *pathname, int flags);
-int open3_or_warn(const char *pathname, int flags, int mode);
-int open_or_warn_stdin(const char *pathname);
-void xrename(const char *oldpath, const char *newpath);
-int rename_or_warn(const char *oldpath, const char *newpath);
-off_t xlseek(int fd, off_t offset, int whence);
-off_t fdlength(int fd);
+void xsetgid(gid_t gid) FAST_FUNC;
+void xsetuid(uid_t uid) FAST_FUNC;
+void xchdir(const char *path) FAST_FUNC;
+void xchroot(const char *path) FAST_FUNC;
+void xsetenv(const char *key, const char *value) FAST_FUNC;
+void xunlink(const char *pathname) FAST_FUNC;
+void xstat(const char *pathname, struct stat *buf) FAST_FUNC;
+int xopen(const char *pathname, int flags) FAST_FUNC FAST_FUNC;
+int xopen3(const char *pathname, int flags, int mode) FAST_FUNC;
+int open_or_warn(const char *pathname, int flags) FAST_FUNC;
+int open3_or_warn(const char *pathname, int flags, int mode) FAST_FUNC;
+int open_or_warn_stdin(const char *pathname) FAST_FUNC;
+void xrename(const char *oldpath, const char *newpath) FAST_FUNC;
+int rename_or_warn(const char *oldpath, const char *newpath) FAST_FUNC;
+off_t xlseek(int fd, off_t offset, int whence) FAST_FUNC;
+off_t fdlength(int fd) FAST_FUNC;
 
-void xpipe(int filedes[2]);
+void xpipe(int filedes[2]) FAST_FUNC;
 /* In this form code with pipes is much more readable */
 struct fd_pair { int rd; int wr; };
 #define piped_pair(pair)  pipe(&((pair).rd))
@@ -424,22 +424,22 @@
 };
 
 
-int xsocket(int domain, int type, int protocol);
-void xbind(int sockfd, struct sockaddr *my_addr, socklen_t addrlen);
-void xlisten(int s, int backlog);
-void xconnect(int s, const struct sockaddr *s_addr, socklen_t addrlen);
+int xsocket(int domain, int type, int protocol) FAST_FUNC;
+void xbind(int sockfd, struct sockaddr *my_addr, socklen_t addrlen) FAST_FUNC;
+void xlisten(int s, int backlog) FAST_FUNC;
+void xconnect(int s, const struct sockaddr *s_addr, socklen_t addrlen) FAST_FUNC;
 ssize_t xsendto(int s, const void *buf, size_t len, const struct sockaddr *to,
-				socklen_t tolen);
+				socklen_t tolen) FAST_FUNC;
 /* SO_REUSEADDR allows a server to rebind to an address that is already
  * "in use" by old connections to e.g. previous server instance which is
  * killed or crashed. Without it bind will fail until all such connections
  * time out. Linux does not allow multiple live binds on same ip:port
  * regardless of SO_REUSEADDR (unlike some other flavors of Unix).
  * Turn it on before you call bind(). */
-void setsockopt_reuseaddr(int fd); /* On Linux this never fails. */
-int setsockopt_broadcast(int fd);
+void setsockopt_reuseaddr(int fd) FAST_FUNC; /* On Linux this never fails. */
+int setsockopt_broadcast(int fd) FAST_FUNC;
 /* NB: returns port in host byte order */
-unsigned bb_lookup_port(const char *port, const char *protocol, unsigned default_port);
+unsigned bb_lookup_port(const char *port, const char *protocol, unsigned default_port) FAST_FUNC;
 typedef struct len_and_sockaddr {
 	socklen_t len;
 	union {
@@ -468,87 +468,87 @@
  * and if kernel doesn't support it, IPv4.
  */
 #if ENABLE_FEATURE_IPV6
-int xsocket_type(len_and_sockaddr **lsap, int af, int sock_type);
+int xsocket_type(len_and_sockaddr **lsap, int af, int sock_type) FAST_FUNC;
 #else
-int xsocket_type(len_and_sockaddr **lsap, int sock_type);
+int xsocket_type(len_and_sockaddr **lsap, int sock_type) FAST_FUNC;
 #define xsocket_type(lsap, af, sock_type) xsocket_type((lsap), (sock_type))
 #endif
-int xsocket_stream(len_and_sockaddr **lsap);
+int xsocket_stream(len_and_sockaddr **lsap) FAST_FUNC;
 /* Create server socket bound to bindaddr:port. bindaddr can be NULL,
  * numeric IP ("N.N.N.N") or numeric IPv6 address,
  * and can have ":PORT" suffix (for IPv6 use "[X:X:...:X]:PORT").
  * Only if there is no suffix, port argument is used */
 /* NB: these set SO_REUSEADDR before bind */
-int create_and_bind_stream_or_die(const char *bindaddr, int port);
-int create_and_bind_dgram_or_die(const char *bindaddr, int port);
+int create_and_bind_stream_or_die(const char *bindaddr, int port) FAST_FUNC;
+int create_and_bind_dgram_or_die(const char *bindaddr, int port) FAST_FUNC;
 /* Create client TCP socket connected to peer:port. Peer cannot be NULL.
  * Peer can be numeric IP ("N.N.N.N"), numeric IPv6 address or hostname,
  * and can have ":PORT" suffix (for IPv6 use "[X:X:...:X]:PORT").
  * If there is no suffix, port argument is used */
-int create_and_connect_stream_or_die(const char *peer, int port);
+int create_and_connect_stream_or_die(const char *peer, int port) FAST_FUNC;
 /* Connect to peer identified by lsa */
-int xconnect_stream(const len_and_sockaddr *lsa);
+int xconnect_stream(const len_and_sockaddr *lsa) FAST_FUNC;
 /* Return malloc'ed len_and_sockaddr with socket address of host:port
  * Currently will return IPv4 or IPv6 sockaddrs only
  * (depending on host), but in theory nothing prevents e.g.
  * UNIX socket address being returned, IPX sockaddr etc...
  * On error does bb_error_msg and returns NULL */
-len_and_sockaddr* host2sockaddr(const char *host, int port);
+len_and_sockaddr* host2sockaddr(const char *host, int port) FAST_FUNC;
 /* Version which dies on error */
-len_and_sockaddr* xhost2sockaddr(const char *host, int port);
-len_and_sockaddr* xdotted2sockaddr(const char *host, int port);
+len_and_sockaddr* xhost2sockaddr(const char *host, int port) FAST_FUNC;
+len_and_sockaddr* xdotted2sockaddr(const char *host, int port) FAST_FUNC;
 /* Same, useful if you want to force family (e.g. IPv6) */
 #if !ENABLE_FEATURE_IPV6
 #define host_and_af2sockaddr(host, port, af) host2sockaddr((host), (port))
 #define xhost_and_af2sockaddr(host, port, af) xhost2sockaddr((host), (port))
 #else
-len_and_sockaddr* host_and_af2sockaddr(const char *host, int port, sa_family_t af);
-len_and_sockaddr* xhost_and_af2sockaddr(const char *host, int port, sa_family_t af);
+len_and_sockaddr* host_and_af2sockaddr(const char *host, int port, sa_family_t af) FAST_FUNC;
+len_and_sockaddr* xhost_and_af2sockaddr(const char *host, int port, sa_family_t af) FAST_FUNC;
 #endif
 /* Assign sin[6]_port member if the socket is an AF_INET[6] one,
  * otherwise no-op. Useful for ftp.
  * NB: does NOT do htons() internally, just direct assignment. */
-void set_nport(len_and_sockaddr *lsa, unsigned port);
+void set_nport(len_and_sockaddr *lsa, unsigned port) FAST_FUNC;
 /* Retrieve sin[6]_port or return -1 for non-INET[6] lsa's */
-int get_nport(const struct sockaddr *sa);
+int get_nport(const struct sockaddr *sa) FAST_FUNC;
 /* Reverse DNS. Returns NULL on failure. */
-char* xmalloc_sockaddr2host(const struct sockaddr *sa);
+char* xmalloc_sockaddr2host(const struct sockaddr *sa) FAST_FUNC;
 /* This one doesn't append :PORTNUM */
-char* xmalloc_sockaddr2host_noport(const struct sockaddr *sa);
+char* xmalloc_sockaddr2host_noport(const struct sockaddr *sa) FAST_FUNC;
 /* This one also doesn't fall back to dotted IP (returns NULL) */
-char* xmalloc_sockaddr2hostonly_noport(const struct sockaddr *sa);
+char* xmalloc_sockaddr2hostonly_noport(const struct sockaddr *sa) FAST_FUNC;
 /* inet_[ap]ton on steroids */
-char* xmalloc_sockaddr2dotted(const struct sockaddr *sa);
-char* xmalloc_sockaddr2dotted_noport(const struct sockaddr *sa);
+char* xmalloc_sockaddr2dotted(const struct sockaddr *sa) FAST_FUNC;
+char* xmalloc_sockaddr2dotted_noport(const struct sockaddr *sa) FAST_FUNC;
 // "old" (ipv4 only) API
 // users: traceroute.c hostname.c - use _list_ of all IPs
-struct hostent *xgethostbyname(const char *name);
+struct hostent *xgethostbyname(const char *name) FAST_FUNC;
 // Also mount.c and inetd.c are using gethostbyname(),
 // + inet_common.c has additional IPv4-only stuff
 
 
-void socket_want_pktinfo(int fd);
+void socket_want_pktinfo(int fd) FAST_FUNC;
 ssize_t send_to_from(int fd, void *buf, size_t len, int flags,
 		const struct sockaddr *to,
 		const struct sockaddr *from,
-		socklen_t tolen);
+		socklen_t tolen) FAST_FUNC;
 ssize_t recv_from_to(int fd, void *buf, size_t len, int flags,
 		struct sockaddr *from,
 		struct sockaddr *to,
-		socklen_t sa_size);
+		socklen_t sa_size) FAST_FUNC;
 
-char *xstrdup(const char *s);
-char *xstrndup(const char *s, int n);
-char *safe_strncpy(char *dst, const char *src, size_t size);
+char *xstrdup(const char *s) FAST_FUNC;
+char *xstrndup(const char *s, int n) FAST_FUNC;
+char *safe_strncpy(char *dst, const char *src, size_t size) FAST_FUNC;
 /* Guaranteed to NOT be a macro (smallest code). Saves nearly 2k on uclibc.
  * But potentially slow, don't use in one-billion-times loops */
-int bb_putchar(int ch);
-char *xasprintf(const char *format, ...) __attribute__ ((format (printf, 1, 2)));
+int bb_putchar(int ch) FAST_FUNC;
+char *xasprintf(const char *format, ...) __attribute__ ((format (printf, 1, 2))) FAST_FUNC;
 /* Prints unprintable chars ch as ^C or M-c to file
  * (M-c is used only if ch is ORed with PRINTABLE_META),
  * else it is printed as-is (except for ch = 0x9b) */
 enum { PRINTABLE_META = 0x100 };
-void fputc_printable(int ch, FILE *file);
+void fputc_printable(int ch, FILE *file) FAST_FUNC;
 // gcc-4.1.1 still isn't good enough at optimizing it
 // (+200 bytes compared to macro)
 //static ALWAYS_INLINE
@@ -563,89 +563,89 @@
 
 /* dmalloc will redefine these to it's own implementation. It is safe
  * to have the prototypes here unconditionally.  */
-extern void *malloc_or_warn(size_t size);
-extern void *xmalloc(size_t size);
-extern void *xzalloc(size_t size);
-extern void *xrealloc(void *old, size_t size);
+extern void *malloc_or_warn(size_t size) FAST_FUNC;
+extern void *xmalloc(size_t size) FAST_FUNC;
+extern void *xzalloc(size_t size) FAST_FUNC;
+extern void *xrealloc(void *old, size_t size) FAST_FUNC;
 
-extern ssize_t safe_read(int fd, void *buf, size_t count);
-extern ssize_t nonblock_safe_read(int fd, void *buf, size_t count);
+extern ssize_t safe_read(int fd, void *buf, size_t count) FAST_FUNC;
+extern ssize_t nonblock_safe_read(int fd, void *buf, size_t count) FAST_FUNC;
 // NB: will return short read on error, not -1,
 // if some data was read before error occurred
-extern ssize_t full_read(int fd, void *buf, size_t count);
-extern void xread(int fd, void *buf, size_t count);
-extern unsigned char xread_char(int fd);
+extern ssize_t full_read(int fd, void *buf, size_t count) FAST_FUNC;
+extern void xread(int fd, void *buf, size_t count) FAST_FUNC;
+extern unsigned char xread_char(int fd) FAST_FUNC;
 // Reads one line a-la fgets (but doesn't save terminating '\n').
 // Uses single full_read() call, works only on seekable streams.
-extern char *reads(int fd, char *buf, size_t count);
+extern char *reads(int fd, char *buf, size_t count) FAST_FUNC;
 // Reads one line a-la fgets (but doesn't save terminating '\n').
 // Reads byte-by-byte. Useful when it is important to not read ahead.
 // Bytes are appended to pfx (which must be malloced, or NULL).
-extern char *xmalloc_reads(int fd, char *pfx, size_t *maxsz_p);
-extern ssize_t read_close(int fd, void *buf, size_t maxsz);
-extern ssize_t open_read_close(const char *filename, void *buf, size_t maxsz);
+extern char *xmalloc_reads(int fd, char *pfx, size_t *maxsz_p) FAST_FUNC;
+extern ssize_t read_close(int fd, void *buf, size_t maxsz) FAST_FUNC;
+extern ssize_t open_read_close(const char *filename, void *buf, size_t maxsz) FAST_FUNC;
 /* Returns NULL if file can't be opened */
-extern void *xmalloc_open_read_close(const char *filename, size_t *maxsz_p);
+extern void *xmalloc_open_read_close(const char *filename, size_t *maxsz_p) FAST_FUNC;
 /* Never returns NULL */
-extern void *xmalloc_xopen_read_close(const char *filename, size_t *maxsz_p);
+extern void *xmalloc_xopen_read_close(const char *filename, size_t *maxsz_p) FAST_FUNC;
 
-extern ssize_t safe_write(int fd, const void *buf, size_t count);
+extern ssize_t safe_write(int fd, const void *buf, size_t count) FAST_FUNC;
 // NB: will return short write on error, not -1,
 // if some data was written before error occurred
-extern ssize_t full_write(int fd, const void *buf, size_t count);
-extern void xwrite(int fd, const void *buf, size_t count);
-extern void xopen_xwrite_close(const char* file, const char *str);
+extern ssize_t full_write(int fd, const void *buf, size_t count) FAST_FUNC;
+extern void xwrite(int fd, const void *buf, size_t count) FAST_FUNC;
+extern void xopen_xwrite_close(const char* file, const char *str) FAST_FUNC;
 
 /* Reads and prints to stdout till eof, then closes FILE. Exits on error: */
-extern void xprint_and_close_file(FILE *file);
+extern void xprint_and_close_file(FILE *file) FAST_FUNC;
 /* Reads up to (and including) TERMINATING_STRING: */
-extern char *xmalloc_fgets_str(FILE *file, const char *terminating_string);
+extern char *xmalloc_fgets_str(FILE *file, const char *terminating_string) FAST_FUNC;
 /* Chops off TERMINATING_STRING from the end: */
-extern char *xmalloc_fgetline_str(FILE *file, const char *terminating_string);
+extern char *xmalloc_fgetline_str(FILE *file, const char *terminating_string) FAST_FUNC;
 /* Reads up to (and including) "\n" or NUL byte: */
-extern char *xmalloc_fgets(FILE *file);
+extern char *xmalloc_fgets(FILE *file) FAST_FUNC;
 /* Chops off '\n' from the end, unlike fgets: */
-extern char *xmalloc_fgetline(FILE *file);
-extern char *bb_get_chunk_from_file(FILE *file, int *end);
-extern void die_if_ferror(FILE *file, const char *msg);
-extern void die_if_ferror_stdout(void);
-extern void xfflush_stdout(void);
-extern void fflush_stdout_and_exit(int retval) ATTRIBUTE_NORETURN;
-extern int fclose_if_not_stdin(FILE *file);
-extern FILE *xfopen(const char *filename, const char *mode);
+extern char *xmalloc_fgetline(FILE *file) FAST_FUNC;
+extern char *bb_get_chunk_from_file(FILE *file, int *end) FAST_FUNC;
+extern void die_if_ferror(FILE *file, const char *msg) FAST_FUNC;
+extern void die_if_ferror_stdout(void) FAST_FUNC;
+extern void xfflush_stdout(void) FAST_FUNC;
+extern void fflush_stdout_and_exit(int retval) ATTRIBUTE_NORETURN FAST_FUNC;
+extern int fclose_if_not_stdin(FILE *file) FAST_FUNC;
+extern FILE *xfopen(const char *filename, const char *mode) FAST_FUNC;
 /* Prints warning to stderr and returns NULL on failure: */
-extern FILE *fopen_or_warn(const char *filename, const char *mode);
+extern FILE *fopen_or_warn(const char *filename, const char *mode) FAST_FUNC;
 /* "Opens" stdin if filename is special, else just opens file: */
-extern FILE *xfopen_stdin(const char *filename);
-extern FILE *fopen_or_warn_stdin(const char *filename);
+extern FILE *xfopen_stdin(const char *filename) FAST_FUNC;
+extern FILE *fopen_or_warn_stdin(const char *filename) FAST_FUNC;
 
-int bb_pstrcmp(const void *a, const void *b);
-void qsort_string_vector(char **sv, unsigned count);
+int bb_pstrcmp(const void *a, const void *b) /* not FAST_FUNC! */;
+void qsort_string_vector(char **sv, unsigned count) FAST_FUNC;
 
 /* Wrapper which restarts poll on EINTR or ENOMEM.
  * On other errors complains [perror("poll")] and returns.
  * Warning! May take (much) longer than timeout_ms to return!
  * If this is a problem, use bare poll and open-code EINTR/ENOMEM handling */
-int safe_poll(struct pollfd *ufds, nfds_t nfds, int timeout_ms);
+int safe_poll(struct pollfd *ufds, nfds_t nfds, int timeout_ms) FAST_FUNC;
 
-char *safe_gethostname(void);
+char *safe_gethostname(void) FAST_FUNC;
 
 /* Convert each alpha char in str to lower-case */
-char* str_tolower(char *str);
+char* str_tolower(char *str) FAST_FUNC;
 
-char *utoa(unsigned n);
-char *itoa(int n);
+char *utoa(unsigned n) FAST_FUNC;
+char *itoa(int n) FAST_FUNC;
 /* Returns a pointer past the formatted number, does NOT null-terminate */
-char *utoa_to_buf(unsigned n, char *buf, unsigned buflen);
-char *itoa_to_buf(int n, char *buf, unsigned buflen);
+char *utoa_to_buf(unsigned n, char *buf, unsigned buflen) FAST_FUNC;
+char *itoa_to_buf(int n, char *buf, unsigned buflen) FAST_FUNC;
 /* Intelligent formatters of bignums */
-void smart_ulltoa4(unsigned long long ul, char buf[5], const char *scale);
-void smart_ulltoa5(unsigned long long ul, char buf[5], const char *scale);
+void smart_ulltoa4(unsigned long long ul, char buf[5], const char *scale) FAST_FUNC;
+void smart_ulltoa5(unsigned long long ul, char buf[5], const char *scale) FAST_FUNC;
 //TODO: provide pointer to buf (avoid statics)?
 const char *make_human_readable_str(unsigned long long size,
-		unsigned long block_size, unsigned long display_unit);
+		unsigned long block_size, unsigned long display_unit) FAST_FUNC;
 /* Put a string of hex bytes ("1b2e66fe"...), return advanced pointer */
-char *bin2hex(char *buf, const char *cp, int count);
+char *bin2hex(char *buf, const char *cp, int count) FAST_FUNC;
 
 /* Last element is marked by mult == 0 */
 struct suffix_mult {
@@ -658,56 +658,56 @@
  * in many places people want *non-negative* values, but store them
  * in signed int. Therefore we need this one:
  * dies if input is not in [0, INT_MAX] range. Also will reject '-0' etc */
-int xatoi_u(const char *numstr);
+int xatoi_u(const char *numstr) FAST_FUNC;
 /* Useful for reading port numbers */
-uint16_t xatou16(const char *numstr);
+uint16_t xatou16(const char *numstr) FAST_FUNC;
 
 
 /* These parse entries in /etc/passwd and /etc/group.  This is desirable
  * for BusyBox since we want to avoid using the glibc NSS stuff, which
  * increases target size and is often not needed on embedded systems.  */
-long xuname2uid(const char *name);
-long xgroup2gid(const char *name);
+long xuname2uid(const char *name) FAST_FUNC;
+long xgroup2gid(const char *name) FAST_FUNC;
 /* wrapper: allows string to contain numeric uid or gid */
-unsigned long get_ug_id(const char *s, long (*xname2id)(const char *));
+unsigned long get_ug_id(const char *s, long FAST_FUNC (*xname2id)(const char *)) FAST_FUNC;
 /* from chpst. Does not die, returns 0 on failure */
 struct bb_uidgid_t {
 	uid_t uid;
 	gid_t gid;
 };
 /* always sets uid and gid */
-int get_uidgid(struct bb_uidgid_t*, const char*, int numeric_ok);
+int get_uidgid(struct bb_uidgid_t*, const char*, int numeric_ok) FAST_FUNC;
 /* chown-like handling of "user[:[group]" */
-void parse_chown_usergroup_or_die(struct bb_uidgid_t *u, char *user_group);
+void parse_chown_usergroup_or_die(struct bb_uidgid_t *u, char *user_group) FAST_FUNC;
 /* bb_getpwuid, bb_getgrgid:
  * bb_getXXXid(buf, bufsz, id) - copy user/group name or id
  *              as a string to buf, return user/group name or NULL
  * bb_getXXXid(NULL, 0, id) - return user/group name or NULL
  * bb_getXXXid(NULL, -1, id) - return user/group name or exit
 */
-char *bb_getpwuid(char *name, int bufsize, long uid);
-char *bb_getgrgid(char *group, int bufsize, long gid);
+char *bb_getpwuid(char *name, int bufsize, long uid) FAST_FUNC;
+char *bb_getgrgid(char *group, int bufsize, long gid) FAST_FUNC;
 /* versions which cache results (useful for ps, ls etc) */
-const char* get_cached_username(uid_t uid);
-const char* get_cached_groupname(gid_t gid);
-void clear_username_cache(void);
+const char* get_cached_username(uid_t uid) FAST_FUNC;
+const char* get_cached_groupname(gid_t gid) FAST_FUNC;
+void clear_username_cache(void) FAST_FUNC;
 /* internally usernames are saved in fixed-sized char[] buffers */
 enum { USERNAME_MAX_SIZE = 16 - sizeof(int) };
 #if ENABLE_FEATURE_CHECK_NAMES
-void die_if_bad_username(const char* name);
+void die_if_bad_username(const char* name) FAST_FUNC;
 #else
 #define die_if_bad_username(name) ((void)(name))
 #endif
 
-int execable_file(const char *name);
-char *find_execable(const char *filename, char **PATHp);
-int exists_execable(const char *filename);
+int execable_file(const char *name) FAST_FUNC;
+char *find_execable(const char *filename, char **PATHp) FAST_FUNC;
+int exists_execable(const char *filename) FAST_FUNC;
 
 /* BB_EXECxx always execs (it's not doing NOFORK/NOEXEC stuff),
  * but it may exec busybox and call applet instead of searching PATH.
  */
 #if ENABLE_FEATURE_PREFER_APPLETS
-int bb_execvp(const char *file, char *const argv[]);
+int bb_execvp(const char *file, char *const argv[]) FAST_FUNC;
 #define BB_EXECVP(prog,cmd) bb_execvp(prog,cmd)
 #define BB_EXECLP(prog,cmd,...) \
 	execlp((find_applet_by_name(prog) >= 0) ? CONFIG_BUSYBOX_EXEC_PATH : prog, \
@@ -718,10 +718,10 @@
 #endif
 
 /* NOMMU friendy fork+exec */
-pid_t spawn(char **argv);
-pid_t xspawn(char **argv);
+pid_t spawn(char **argv) FAST_FUNC;
+pid_t xspawn(char **argv) FAST_FUNC;
 
-int safe_waitpid(int pid, int *wstat, int options);
+int safe_waitpid(int pid, int *wstat, int options) FAST_FUNC;
 /* Unlike waitpid, waits ONLY for one process.
  * It's safe to pass negative 'pids' from failed [v]fork -
  * wait4pid will return -1 (and will not clobber [v]fork's errno).
@@ -729,14 +729,14 @@
  *      if (rc < 0) bb_perror_msg("%s", argv[0]);
  *      if (rc > 0) bb_error_msg("exit code: %d", rc);
  */
-int wait4pid(int pid);
-int wait_any_nohang(int *wstat);
+int wait4pid(int pid) FAST_FUNC;
+int wait_any_nohang(int *wstat) FAST_FUNC;
 #define wait_crashed(w) ((w) & 127)
 #define wait_exitcode(w) ((w) >> 8)
 #define wait_stopsig(w) ((w) >> 8)
 #define wait_stopped(w) (((w) & 127) == 127)
 /* wait4pid(spawn(argv)) + NOFORK/NOEXEC (if configured) */
-int spawn_and_wait(char **argv);
+int spawn_and_wait(char **argv) FAST_FUNC;
 struct nofork_save_area {
 	jmp_buf die_jmp;
 	const char *applet_name;
@@ -745,11 +745,11 @@
 	int die_sleep;
 	smallint saved;
 };
-void save_nofork_data(struct nofork_save_area *save);
-void restore_nofork_data(struct nofork_save_area *save);
+void save_nofork_data(struct nofork_save_area *save) FAST_FUNC;
+void restore_nofork_data(struct nofork_save_area *save) FAST_FUNC;
 /* Does NOT check that applet is NOFORK, just blindly runs it */
-int run_nofork_applet(int applet_no, char **argv);
-int run_nofork_applet_prime(struct nofork_save_area *old, int applet_no, char **argv);
+int run_nofork_applet(int applet_no, char **argv) FAST_FUNC;
+int run_nofork_applet_prime(struct nofork_save_area *old, int applet_no, char **argv) FAST_FUNC;
 
 /* Helpers for daemonization.
  *
@@ -781,26 +781,26 @@
 	DAEMON_ONLY_SANITIZE = 8, /* internal use */
 };
 #if BB_MMU
-  void forkexit_or_rexec(void);
+  void forkexit_or_rexec(void) FAST_FUNC;
   enum { re_execed = 0 };
 # define forkexit_or_rexec(argv)            forkexit_or_rexec()
 # define bb_daemonize_or_rexec(flags, argv) bb_daemonize_or_rexec(flags)
 # define bb_daemonize(flags)                bb_daemonize_or_rexec(flags, bogus)
 #else
-  void re_exec(char **argv) ATTRIBUTE_NORETURN;
-  void forkexit_or_rexec(char **argv);
+  void re_exec(char **argv) ATTRIBUTE_NORETURN FAST_FUNC;
+  void forkexit_or_rexec(char **argv) FAST_FUNC;
   extern bool re_execed;
-  int  BUG_fork_is_unavailable_on_nommu(void);
-  int  BUG_daemon_is_unavailable_on_nommu(void);
-  void BUG_bb_daemonize_is_unavailable_on_nommu(void);
+  int  BUG_fork_is_unavailable_on_nommu(void) FAST_FUNC;
+  int  BUG_daemon_is_unavailable_on_nommu(void) FAST_FUNC;
+  void BUG_bb_daemonize_is_unavailable_on_nommu(void) FAST_FUNC;
 # define fork()          BUG_fork_is_unavailable_on_nommu()
 # define daemon(a,b)     BUG_daemon_is_unavailable_on_nommu()
 # define bb_daemonize(a) BUG_bb_daemonize_is_unavailable_on_nommu()
 #endif
-void bb_daemonize_or_rexec(int flags, char **argv);
-void bb_sanitize_stdio(void);
+void bb_daemonize_or_rexec(int flags, char **argv) FAST_FUNC;
+void bb_sanitize_stdio(void) FAST_FUNC;
 /* Clear dangerous stuff, set PATH. Return 1 if was run by different user. */
-int sanitize_env_if_suid(void);
+int sanitize_env_if_suid(void) FAST_FUNC;
 
 
 extern const char *const bb_argv_dash[]; /* "-", NULL */
@@ -812,19 +812,19 @@
 extern const char *applet_long_options;
 #endif
 extern uint32_t option_mask32;
-extern uint32_t getopt32(char **argv, const char *applet_opts, ...);
+extern uint32_t getopt32(char **argv, const char *applet_opts, ...) FAST_FUNC;
 
 
 typedef struct llist_t {
 	char *data;
 	struct llist_t *link;
 } llist_t;
-void llist_add_to(llist_t **old_head, void *data);
-void llist_add_to_end(llist_t **list_head, void *data);
-void *llist_pop(llist_t **elm);
-void llist_unlink(llist_t **head, llist_t *elm);
-void llist_free(llist_t *elm, void (*freeit)(void *data));
-llist_t *llist_rev(llist_t *list);
+void llist_add_to(llist_t **old_head, void *data) FAST_FUNC;
+void llist_add_to_end(llist_t **list_head, void *data) FAST_FUNC;
+void *llist_pop(llist_t **elm) FAST_FUNC;
+void llist_unlink(llist_t **head, llist_t *elm) FAST_FUNC;
+void llist_free(llist_t *elm, void (*freeit)(void *data)) FAST_FUNC;
+llist_t *llist_rev(llist_t *list) FAST_FUNC;
 /* BTW, surprisingly, changing API to
  *   llist_t *llist_add_to(llist_t *old_head, void *data)
  * etc does not result in smaller code... */
@@ -834,7 +834,7 @@
 #if ENABLE_FEATURE_PIDFILE || defined(WANT_PIDFILE)
 /* True only if we created pidfile which is *file*, not /dev/null etc */
 extern smallint wrote_pidfile;
-void write_pidfile(const char *path);
+void write_pidfile(const char *path) FAST_FUNC;
 #define remove_pidfile(path) do { if (wrote_pidfile) unlink(path); } while (0)
 #else
 enum { wrote_pidfile = 0 };
@@ -853,20 +853,20 @@
 extern int die_sleep;
 extern int xfunc_error_retval;
 extern jmp_buf die_jmp;
-extern void xfunc_die(void) ATTRIBUTE_NORETURN;
-extern void bb_show_usage(void) ATTRIBUTE_NORETURN;
-extern void bb_error_msg(const char *s, ...) __attribute__ ((format (printf, 1, 2)));
-extern void bb_error_msg_and_die(const char *s, ...) __attribute__ ((noreturn, format (printf, 1, 2)));
-extern void bb_perror_msg(const char *s, ...) __attribute__ ((format (printf, 1, 2)));
-extern void bb_simple_perror_msg(const char *s);
-extern void bb_perror_msg_and_die(const char *s, ...) __attribute__ ((noreturn, format (printf, 1, 2)));
-extern void bb_simple_perror_msg_and_die(const char *s) __attribute__ ((noreturn));
-extern void bb_herror_msg(const char *s, ...) __attribute__ ((format (printf, 1, 2)));
-extern void bb_herror_msg_and_die(const char *s, ...) __attribute__ ((noreturn, format (printf, 1, 2)));
-extern void bb_perror_nomsg_and_die(void) ATTRIBUTE_NORETURN;
-extern void bb_perror_nomsg(void);
-extern void bb_info_msg(const char *s, ...) __attribute__ ((format (printf, 1, 2)));
-extern void bb_verror_msg(const char *s, va_list p, const char *strerr);
+extern void xfunc_die(void) ATTRIBUTE_NORETURN FAST_FUNC;
+extern void bb_show_usage(void) ATTRIBUTE_NORETURN FAST_FUNC;
+extern void bb_error_msg(const char *s, ...) __attribute__ ((format (printf, 1, 2))) FAST_FUNC;
+extern void bb_error_msg_and_die(const char *s, ...) __attribute__ ((noreturn, format (printf, 1, 2))) FAST_FUNC;
+extern void bb_perror_msg(const char *s, ...) __attribute__ ((format (printf, 1, 2))) FAST_FUNC;
+extern void bb_simple_perror_msg(const char *s) FAST_FUNC;
+extern void bb_perror_msg_and_die(const char *s, ...) __attribute__ ((noreturn, format (printf, 1, 2))) FAST_FUNC;
+extern void bb_simple_perror_msg_and_die(const char *s) __attribute__ ((noreturn)) FAST_FUNC;
+extern void bb_herror_msg(const char *s, ...) __attribute__ ((format (printf, 1, 2))) FAST_FUNC;
+extern void bb_herror_msg_and_die(const char *s, ...) __attribute__ ((noreturn, format (printf, 1, 2))) FAST_FUNC;
+extern void bb_perror_nomsg_and_die(void) ATTRIBUTE_NORETURN FAST_FUNC;
+extern void bb_perror_nomsg(void) FAST_FUNC;
+extern void bb_info_msg(const char *s, ...) __attribute__ ((format (printf, 1, 2))) FAST_FUNC;
+extern void bb_verror_msg(const char *s, va_list p, const char *strerr) FAST_FUNC;
 
 /* We need to export XXX_main from libbusybox
  * only if we build "individual" binaries
@@ -893,15 +893,15 @@
 int bbunpack(char **argv,
 	char* (*make_new_name)(char *filename),
 	USE_DESKTOP(long long) int (*unpacker)(void)
-);
+) FAST_FUNC;
 #if ENABLE_ROUTE
-void bb_displayroutes(int noresolve, int netstatfmt);
+void bb_displayroutes(int noresolve, int netstatfmt) FAST_FUNC;
 #endif
 
 
 /* Networking */
-int create_icmp_socket(void);
-int create_icmp6_socket(void);
+int create_icmp_socket(void) FAST_FUNC;
+int create_icmp6_socket(void) FAST_FUNC;
 /* interface.c */
 /* This structure defines protocol families and their handlers. */
 struct aftype {
@@ -909,15 +909,14 @@
 	const char *title;
 	int af;
 	int alen;
-	char *(*print) (unsigned char *);
-	const char *(*sprint) (struct sockaddr *, int numeric);
-	int (*input) (/*int type,*/ const char *bufp, struct sockaddr *);
-	void (*herror) (char *text);
-	int (*rprint) (int options);
-	int (*rinput) (int typ, int ext, char **argv);
-
+	char*       FAST_FUNC (*print)(unsigned char *);
+	const char* FAST_FUNC (*sprint)(struct sockaddr *, int numeric);
+	int         FAST_FUNC (*input)(/*int type,*/ const char *bufp, struct sockaddr *);
+	void        FAST_FUNC (*herror)(char *text);
+	int         FAST_FUNC (*rprint)(int options);
+	int         FAST_FUNC (*rinput)(int typ, int ext, char **argv);
 	/* may modify src */
-	int (*getmask) (char *src, struct sockaddr * mask, char *name);
+	int         FAST_FUNC (*getmask)(char *src, struct sockaddr *mask, char *name);
 };
 /* This structure defines hardware protocols and their handlers. */
 struct hwtype {
@@ -925,95 +924,95 @@
 	const char *title;
 	int type;
 	int alen;
-	char *(*print) (unsigned char *);
-	int (*input) (const char *, struct sockaddr *);
-	int (*activate) (int fd);
+	char* FAST_FUNC (*print)(unsigned char *);
+	int   FAST_FUNC (*input)(const char *, struct sockaddr *);
+	int   FAST_FUNC (*activate)(int fd);
 	int suppress_null_addr;
 };
 extern smallint interface_opt_a;
-int display_interfaces(char *ifname);
+int display_interfaces(char *ifname) FAST_FUNC;
 #if ENABLE_FEATURE_HWIB
-int in_ib(const char *bufp, struct sockaddr *sap);
+int in_ib(const char *bufp, struct sockaddr *sap) FAST_FUNC;
 #else
 #define in_ib(a, b) 1 /* fail */
 #endif
-const struct aftype *get_aftype(const char *name);
-const struct hwtype *get_hwtype(const char *name);
-const struct hwtype *get_hwntype(int type);
+const struct aftype *get_aftype(const char *name) FAST_FUNC;
+const struct hwtype *get_hwtype(const char *name) FAST_FUNC;
+const struct hwtype *get_hwntype(int type) FAST_FUNC;
 
 
 #ifndef BUILD_INDIVIDUAL
-extern int find_applet_by_name(const char *name);
+extern int find_applet_by_name(const char *name) FAST_FUNC;
 /* Returns only if applet is not found. */
-extern void run_applet_and_exit(const char *name, char **argv);
-extern void run_applet_no_and_exit(int a, char **argv) ATTRIBUTE_NORETURN;
+extern void run_applet_and_exit(const char *name, char **argv) FAST_FUNC;
+extern void run_applet_no_and_exit(int a, char **argv) ATTRIBUTE_NORETURN FAST_FUNC;
 #endif
 
 #ifdef HAVE_MNTENT_H
-extern int match_fstype(const struct mntent *mt, const char *fstypes);
-extern struct mntent *find_mount_point(const char *name, const char *table);
+extern int match_fstype(const struct mntent *mt, const char *fstypes) FAST_FUNC;
+extern struct mntent *find_mount_point(const char *name, const char *table) FAST_FUNC;
 #endif
-extern void erase_mtab(const char * name);
-extern unsigned int tty_baud_to_value(speed_t speed);
-extern speed_t tty_value_to_baud(unsigned int value);
-extern void bb_warn_ignoring_args(int n);
+extern void erase_mtab(const char * name) FAST_FUNC;
+extern unsigned int tty_baud_to_value(speed_t speed) FAST_FUNC;
+extern speed_t tty_value_to_baud(unsigned int value) FAST_FUNC;
+extern void bb_warn_ignoring_args(int n) FAST_FUNC;
 
-extern int get_linux_version_code(void);
+extern int get_linux_version_code(void) FAST_FUNC;
 
-extern char *query_loop(const char *device);
-extern int del_loop(const char *device);
+extern char *query_loop(const char *device) FAST_FUNC;
+extern int del_loop(const char *device) FAST_FUNC;
 /* If *devname is not NULL, use that name, otherwise try to find free one,
  * malloc and return it in *devname.
  * return value: 1: read-only loopdev was setup, 0: rw, < 0: error */
-extern int set_loop(char **devname, const char *file, unsigned long long offset);
+extern int set_loop(char **devname, const char *file, unsigned long long offset) FAST_FUNC;
 
 
 //TODO: pass buf pointer or return allocated buf (avoid statics)?
-char *bb_askpass(int timeout, const char * prompt);
-int bb_ask_confirmation(void);
+char *bb_askpass(int timeout, const char * prompt) FAST_FUNC;
+int bb_ask_confirmation(void) FAST_FUNC;
 
-extern int bb_parse_mode(const char* s, mode_t* theMode);
+extern int bb_parse_mode(const char* s, mode_t* theMode) FAST_FUNC;
 
 /* Concatenate path and filename to new allocated buffer.
  * Add "/" only as needed (no duplicate "//" are produced).
  * If path is NULL, it is assumed to be "/".
  * filename should not be NULL. */
-char *concat_path_file(const char *path, const char *filename);
-char *concat_subpath_file(const char *path, const char *filename);
-const char *bb_basename(const char *name);
+char *concat_path_file(const char *path, const char *filename) FAST_FUNC;
+char *concat_subpath_file(const char *path, const char *filename) FAST_FUNC;
+const char *bb_basename(const char *name) FAST_FUNC;
 /* NB: can violate const-ness (similarly to strchr) */
-char *last_char_is(const char *s, int c);
+char *last_char_is(const char *s, int c) FAST_FUNC;
 
 
-USE_DESKTOP(long long) int uncompress(int fd_in, int fd_out);
-int inflate(int in, int out);
+USE_DESKTOP(long long) int uncompress(int fd_in, int fd_out) FAST_FUNC;
+int inflate(int in, int out) FAST_FUNC;
 
 
-int bb_make_directory(char *path, long mode, int flags);
+int bb_make_directory(char *path, long mode, int flags) FAST_FUNC;
 
-int get_signum(const char *name);
-const char *get_signame(int number);
-void print_signames(void);
+int get_signum(const char *name) FAST_FUNC;
+const char *get_signame(int number) FAST_FUNC;
+void print_signames(void) FAST_FUNC;
 
-char *bb_simplify_path(const char *path);
+char *bb_simplify_path(const char *path) FAST_FUNC;
 
 #define FAIL_DELAY 3
-extern void bb_do_delay(int seconds);
-extern void change_identity(const struct passwd *pw);
-extern void run_shell(const char *shell, int loginshell, const char *command, const char **additional_args) ATTRIBUTE_NORETURN;
-extern void run_shell(const char *shell, int loginshell, const char *command, const char **additional_args);
+extern void bb_do_delay(int seconds) FAST_FUNC;
+extern void change_identity(const struct passwd *pw) FAST_FUNC;
+extern void run_shell(const char *shell, int loginshell, const char *command, const char **additional_args) ATTRIBUTE_NORETURN FAST_FUNC;
+extern void run_shell(const char *shell, int loginshell, const char *command, const char **additional_args) FAST_FUNC;
 #if ENABLE_SELINUX
-extern void renew_current_security_context(void);
-extern void set_current_security_context(security_context_t sid);
+extern void renew_current_security_context(void) FAST_FUNC;
+extern void set_current_security_context(security_context_t sid) FAST_FUNC;
 extern context_t set_security_context_component(security_context_t cur_context,
-						char *user, char *role, char *type, char *range);
-extern void setfscreatecon_or_die(security_context_t scontext);
-extern void selinux_preserve_fcontext(int fdesc);
+						char *user, char *role, char *type, char *range) FAST_FUNC;
+extern void setfscreatecon_or_die(security_context_t scontext) FAST_FUNC;
+extern void selinux_preserve_fcontext(int fdesc) FAST_FUNC;
 #else
 #define selinux_preserve_fcontext(fdesc) ((void)0)
 #endif
-extern void selinux_or_die(void);
-extern int restricted_shell(const char *shell);
+extern void selinux_or_die(void) FAST_FUNC;
+extern int restricted_shell(const char *shell) FAST_FUNC;
 
 /* setup_environment:
  * if clear_env = 1: cd(pw->pw_dir), clear environment, then set
@@ -1029,14 +1028,14 @@
  *   SHELL=shell
  * else does nothing
  */
-extern void setup_environment(const char *shell, int clear_env, int change_env, const struct passwd *pw);
-extern int correct_password(const struct passwd *pw);
+extern void setup_environment(const char *shell, int clear_env, int change_env, const struct passwd *pw) FAST_FUNC;
+extern int correct_password(const struct passwd *pw) FAST_FUNC;
 /* Returns a malloced string */
 #if !ENABLE_USE_BB_CRYPT
 #define pw_encrypt(clear, salt, cleanup) pw_encrypt(clear, salt)
 #endif
-extern char *pw_encrypt(const char *clear, const char *salt, int cleanup);
-extern int obscure(const char *old, const char *newval, const struct passwd *pwdp);
+extern char *pw_encrypt(const char *clear, const char *salt, int cleanup) FAST_FUNC;
+extern int obscure(const char *old, const char *newval, const struct passwd *pwdp) FAST_FUNC;
 /* rnd is additional random input. New one is returned.
  * Useful if you call crypt_make_salt many times in a row:
  * rnd = crypt_make_salt(buf1, 4, 0);
@@ -1044,44 +1043,44 @@
  * rnd = crypt_make_salt(buf3, 4, rnd);
  * (otherwise we risk having same salt generated)
  */
-extern int crypt_make_salt(char *p, int cnt, int rnd);
+extern int crypt_make_salt(char *p, int cnt, int rnd) FAST_FUNC;
 /* Returns number of lines changed, or -1 on error */
 extern int update_passwd(const char *filename, const char *username,
-			const char *new_pw);
+			const char *new_pw) FAST_FUNC;
 
-int index_in_str_array(const char *const string_array[], const char *key);
-int index_in_strings(const char *strings, const char *key);
-int index_in_substr_array(const char *const string_array[], const char *key);
-int index_in_substrings(const char *strings, const char *key);
-const char *nth_string(const char *strings, int n);
+int index_in_str_array(const char *const string_array[], const char *key) FAST_FUNC;
+int index_in_strings(const char *strings, const char *key) FAST_FUNC;
+int index_in_substr_array(const char *const string_array[], const char *key) FAST_FUNC;
+int index_in_substrings(const char *strings, const char *key) FAST_FUNC;
+const char *nth_string(const char *strings, int n) FAST_FUNC;
 
-extern void print_login_issue(const char *issue_file, const char *tty);
-extern void print_login_prompt(void);
+extern void print_login_issue(const char *issue_file, const char *tty) FAST_FUNC;
+extern void print_login_prompt(void) FAST_FUNC;
 
 /* NB: typically you want to pass fd 0, not 1. Think 'applet | grep something' */
-int get_terminal_width_height(int fd, unsigned *width, unsigned *height);
+int get_terminal_width_height(int fd, unsigned *width, unsigned *height) FAST_FUNC;
 
 /* NB: "unsigned request" is crucial! "int request" will break some arches! */
-int ioctl_or_perror(int fd, unsigned request, void *argp, const char *fmt,...) __attribute__ ((format (printf, 4, 5)));
-int ioctl_or_perror_and_die(int fd, unsigned request, void *argp, const char *fmt,...) __attribute__ ((format (printf, 4, 5)));
+int ioctl_or_perror(int fd, unsigned request, void *argp, const char *fmt,...) __attribute__ ((format (printf, 4, 5))) FAST_FUNC;
+int ioctl_or_perror_and_die(int fd, unsigned request, void *argp, const char *fmt,...) __attribute__ ((format (printf, 4, 5))) FAST_FUNC;
 #if ENABLE_IOCTL_HEX2STR_ERROR
-int bb_ioctl_or_warn(int fd, unsigned request, void *argp, const char *ioctl_name);
-int bb_xioctl(int fd, unsigned request, void *argp, const char *ioctl_name);
+int bb_ioctl_or_warn(int fd, unsigned request, void *argp, const char *ioctl_name) FAST_FUNC;
+int bb_xioctl(int fd, unsigned request, void *argp, const char *ioctl_name) FAST_FUNC;
 #define ioctl_or_warn(fd,request,argp) bb_ioctl_or_warn(fd,request,argp,#request)
 #define xioctl(fd,request,argp)        bb_xioctl(fd,request,argp,#request)
 #else
-int bb_ioctl_or_warn(int fd, unsigned request, void *argp);
-int bb_xioctl(int fd, unsigned request, void *argp);
+int bb_ioctl_or_warn(int fd, unsigned request, void *argp) FAST_FUNC;
+int bb_xioctl(int fd, unsigned request, void *argp) FAST_FUNC;
 #define ioctl_or_warn(fd,request,argp) bb_ioctl_or_warn(fd,request,argp)
 #define xioctl(fd,request,argp)        bb_xioctl(fd,request,argp)
 #endif
 
-char *is_in_ino_dev_hashtable(const struct stat *statbuf);
-void add_to_ino_dev_hashtable(const struct stat *statbuf, const char *name);
-void reset_ino_dev_hashtable(void);
+char *is_in_ino_dev_hashtable(const struct stat *statbuf) FAST_FUNC;
+void add_to_ino_dev_hashtable(const struct stat *statbuf, const char *name) FAST_FUNC;
+void reset_ino_dev_hashtable(void) FAST_FUNC;
 #ifdef __GLIBC__
 /* At least glibc has horrendously large inline for this, so wrap it */
-unsigned long long bb_makedev(unsigned int major, unsigned int minor);
+unsigned long long bb_makedev(unsigned int major, unsigned int minor) FAST_FUNC;
 #undef makedev
 #define makedev(a,b) bb_makedev(a,b)
 #endif
@@ -1113,15 +1112,15 @@
 	WITH_PATH_LOOKUP = 0x20,
 	FOR_SHELL = DO_HISTORY | SAVE_HISTORY | TAB_COMPLETION | USERNAME_COMPLETION,
 };
-line_input_t *new_line_input_t(int flags);
+line_input_t *new_line_input_t(int flags) FAST_FUNC;
 /* Returns:
  * -1 on read errors or EOF, or on bare Ctrl-D,
  * 0  on ctrl-C (the line entered is still returned in 'command'),
  * >0 length of input string, including terminating '\n'
  */
-int read_line_input(const char* prompt, char* command, int maxsize, line_input_t *state);
+int read_line_input(const char* prompt, char* command, int maxsize, line_input_t *state) FAST_FUNC;
 #else
-int read_line_input(const char* prompt, char* command, int maxsize);
+int read_line_input(const char* prompt, char* command, int maxsize) FAST_FUNC;
 #define read_line_input(prompt, command, maxsize, state) \
 	read_line_input(prompt, command, maxsize)
 #endif
@@ -1197,28 +1196,28 @@
 			| PSSCAN_STIME | PSSCAN_UTIME | PSSCAN_START_TIME
 			| PSSCAN_TTY,
 };
-//procps_status_t* alloc_procps_scan(void);
-void free_procps_scan(procps_status_t* sp);
-procps_status_t* procps_scan(procps_status_t* sp, int flags);
+//procps_status_t* alloc_procps_scan(void) FAST_FUNC;
+void free_procps_scan(procps_status_t* sp) FAST_FUNC;
+procps_status_t* procps_scan(procps_status_t* sp, int flags) FAST_FUNC;
 /* Format cmdline (up to col chars) into char buf[col+1] */
 /* Puts [comm] if cmdline is empty (-> process is a kernel thread) */
-void read_cmdline(char *buf, int col, unsigned pid, const char *comm);
-pid_t *find_pid_by_name(const char* procName);
-pid_t *pidlist_reverse(pid_t *pidList);
+void read_cmdline(char *buf, int col, unsigned pid, const char *comm) FAST_FUNC;
+pid_t *find_pid_by_name(const char* procName) FAST_FUNC;
+pid_t *pidlist_reverse(pid_t *pidList) FAST_FUNC;
 
 
 extern const char bb_uuenc_tbl_base64[];
 extern const char bb_uuenc_tbl_std[];
-void bb_uuencode(char *store, const void *s, int length, const char *tbl);
+void bb_uuencode(char *store, const void *s, int length, const char *tbl) FAST_FUNC;
 
 typedef struct sha1_ctx_t {
 	uint32_t count[2];
 	uint32_t hash[5];
 	uint32_t wbuf[16];
 } sha1_ctx_t;
-void sha1_begin(sha1_ctx_t *ctx);
-void sha1_hash(const void *data, size_t length, sha1_ctx_t *ctx);
-void *sha1_end(void *resbuf, sha1_ctx_t *ctx);
+void sha1_begin(sha1_ctx_t *ctx) FAST_FUNC;
+void sha1_hash(const void *data, size_t length, sha1_ctx_t *ctx) FAST_FUNC;
+void *sha1_end(void *resbuf, sha1_ctx_t *ctx) FAST_FUNC;
 
 typedef struct md5_ctx_t {
 	uint32_t A;
@@ -1229,11 +1228,19 @@
 	uint32_t buflen;
 	char buffer[128];
 } md5_ctx_t;
-void md5_begin(md5_ctx_t *ctx);
-void md5_hash(const void *data, size_t length, md5_ctx_t *ctx);
-void *md5_end(void *resbuf, md5_ctx_t *ctx);
+void md5_begin(md5_ctx_t *ctx) FAST_FUNC;
+void md5_hash(const void *data, size_t length, md5_ctx_t *ctx) FAST_FUNC;
+void *md5_end(void *resbuf, md5_ctx_t *ctx) FAST_FUNC;
 
-uint32_t *crc32_filltable(uint32_t *tbl256, int endian);
+uint32_t *crc32_filltable(uint32_t *tbl256, int endian) FAST_FUNC;
+
+typedef struct masks_labels_t {
+	const char *labels;
+	const int masks[];
+} masks_labels_t;
+int print_flags_separated(const int *masks, const char *labels,
+		int flags, const char *separator) FAST_FUNC;
+int print_flags(const masks_labels_t *ml, int flags) FAST_FUNC;
 
 
 extern const char *applet_name;
@@ -1305,15 +1312,6 @@
 /* "sh" */
 #define DEFAULT_SHELL_SHORT_NAME     (bb_default_login_shell+6)
 
-typedef struct masks_labels_t {
-	const char *labels;
-	const int masks[];
-} masks_labels_t;
-
-int print_flags_separated(const int *masks, const char *labels,
-		int flags, const char *separator);
-extern int print_flags(const masks_labels_t *ml, int flags);
-
 #if ENABLE_FEATURE_DEVFS
 # define CURRENT_VC "/dev/vc/0"
 # define VC_1 "/dev/vc/1"
diff --git a/include/platform.h b/include/platform.h
index cdc1151..8d6ed9a 100644
--- a/include/platform.h
+++ b/include/platform.h
@@ -97,6 +97,13 @@
 # endif
 #endif
 
+#if __GNUC_PREREQ(3,0) && defined(i386)
+/* || defined(__x86_64__)? */
+# define FAST_FUNC __attribute__((regparm(3)))
+#else
+# define FAST_FUNC
+#endif
+
 /* ---- Endian Detection ------------------------------------ */
 
 #if (defined __digital__ && defined __unix__)
diff --git a/include/rtc_.h b/include/rtc_.h
index b8e4de8..2e99058 100644
--- a/include/rtc_.h
+++ b/include/rtc_.h
@@ -13,9 +13,9 @@
 # pragma GCC visibility push(hidden)
 #endif
 
-extern int rtc_adjtime_is_utc(void);
-extern int rtc_xopen(const char **default_rtc, int flags);
-extern time_t rtc_read_time(int fd, int utc);
+extern int rtc_adjtime_is_utc(void) FAST_FUNC;
+extern int rtc_xopen(const char **default_rtc, int flags) FAST_FUNC;
+extern time_t rtc_read_time(int fd, int utc) FAST_FUNC;
 
 /*
  * Everything below this point has been copied from linux/rtc.h
diff --git a/include/unarchive.h b/include/unarchive.h
index a6b0477..721f879 100644
--- a/include/unarchive.h
+++ b/include/unarchive.h
@@ -31,7 +31,7 @@
 
 typedef struct archive_handle_t {
 	/* define if the header and data component should be processed */
-	char (*filter)(struct archive_handle_t *);
+	char FAST_FUNC (*filter)(struct archive_handle_t *);
 	llist_t *accept;
 	/* List of files that have been rejected */
 	llist_t *reject;
@@ -42,13 +42,13 @@
 	file_header_t *file_header;
 
 	/* process the header component, e.g. tar -t */
-	void (*action_header)(const file_header_t *);
+	void FAST_FUNC (*action_header)(const file_header_t *);
 
 	/* process the data component, e.g. extract to filesystem */
-	void (*action_data)(struct archive_handle_t *);
+	void FAST_FUNC (*action_data)(struct archive_handle_t *);
 
 	/* How to process any sub archive, e.g. get_header_tar_gz */
-	char (*action_data_subarchive)(struct archive_handle_t *);
+	char FAST_FUNC (*action_data_subarchive)(struct archive_handle_t *);
 
 	/* Contains the handle to a sub archive */
 	struct archive_handle_t *sub_archive;
@@ -60,7 +60,7 @@
 	off_t offset;
 
 	/* Function that skips data: read_by_char or read_by_skip */
-	void (*seek)(const struct archive_handle_t *archive_handle, const unsigned amount);
+	void FAST_FUNC (*seek)(const struct archive_handle_t *archive_handle, const unsigned amount);
 
 	/* Temporary storage */
 	char *buffer;
@@ -71,62 +71,62 @@
 } archive_handle_t;
 
 
-extern archive_handle_t *init_handle(void);
+extern archive_handle_t *init_handle(void) FAST_FUNC;
 
-extern char filter_accept_all(archive_handle_t *archive_handle);
-extern char filter_accept_list(archive_handle_t *archive_handle);
-extern char filter_accept_list_reassign(archive_handle_t *archive_handle);
-extern char filter_accept_reject_list(archive_handle_t *archive_handle);
+extern char filter_accept_all(archive_handle_t *archive_handle) FAST_FUNC;
+extern char filter_accept_list(archive_handle_t *archive_handle) FAST_FUNC;
+extern char filter_accept_list_reassign(archive_handle_t *archive_handle) FAST_FUNC;
+extern char filter_accept_reject_list(archive_handle_t *archive_handle) FAST_FUNC;
 
-extern void unpack_ar_archive(archive_handle_t *ar_archive);
+extern void unpack_ar_archive(archive_handle_t *ar_archive) FAST_FUNC;
 
-extern void data_skip(archive_handle_t *archive_handle);
-extern void data_extract_all(archive_handle_t *archive_handle);
-extern void data_extract_to_stdout(archive_handle_t *archive_handle);
-extern void data_extract_to_buffer(archive_handle_t *archive_handle);
+extern void data_skip(archive_handle_t *archive_handle) FAST_FUNC;
+extern void data_extract_all(archive_handle_t *archive_handle) FAST_FUNC;
+extern void data_extract_to_stdout(archive_handle_t *archive_handle) FAST_FUNC;
+extern void data_extract_to_buffer(archive_handle_t *archive_handle) FAST_FUNC;
 
-extern void header_skip(const file_header_t *file_header);
-extern void header_list(const file_header_t *file_header);
-extern void header_verbose_list(const file_header_t *file_header);
+extern void header_skip(const file_header_t *file_header) FAST_FUNC;
+extern void header_list(const file_header_t *file_header) FAST_FUNC;
+extern void header_verbose_list(const file_header_t *file_header) FAST_FUNC;
 
-extern char get_header_ar(archive_handle_t *archive_handle);
-extern char get_header_cpio(archive_handle_t *archive_handle);
-extern char get_header_tar(archive_handle_t *archive_handle);
-extern char get_header_tar_bz2(archive_handle_t *archive_handle);
-extern char get_header_tar_lzma(archive_handle_t *archive_handle);
-extern char get_header_tar_gz(archive_handle_t *archive_handle);
+extern char get_header_ar(archive_handle_t *archive_handle) FAST_FUNC;
+extern char get_header_cpio(archive_handle_t *archive_handle) FAST_FUNC;
+extern char get_header_tar(archive_handle_t *archive_handle) FAST_FUNC;
+extern char get_header_tar_bz2(archive_handle_t *archive_handle) FAST_FUNC;
+extern char get_header_tar_lzma(archive_handle_t *archive_handle) FAST_FUNC;
+extern char get_header_tar_gz(archive_handle_t *archive_handle) FAST_FUNC;
 
-extern void seek_by_jump(const archive_handle_t *archive_handle, unsigned amount);
-extern void seek_by_read(const archive_handle_t *archive_handle, unsigned amount);
+extern void seek_by_jump(const archive_handle_t *archive_handle, unsigned amount) FAST_FUNC;
+extern void seek_by_read(const archive_handle_t *archive_handle, unsigned amount) FAST_FUNC;
 
-extern ssize_t archive_xread_all_eof(archive_handle_t *archive_handle, unsigned char *buf, size_t count);
+extern ssize_t archive_xread_all_eof(archive_handle_t *archive_handle, unsigned char *buf, size_t count) FAST_FUNC;
 
-extern void data_align(archive_handle_t *archive_handle, unsigned boundary);
-extern const llist_t *find_list_entry(const llist_t *list, const char *filename);
-extern const llist_t *find_list_entry2(const llist_t *list, const char *filename);
+extern void data_align(archive_handle_t *archive_handle, unsigned boundary) FAST_FUNC;
+extern const llist_t *find_list_entry(const llist_t *list, const char *filename) FAST_FUNC;
+extern const llist_t *find_list_entry2(const llist_t *list, const char *filename) FAST_FUNC;
 
 /* A bit of bunzip2 internals are exposed for compressed help support: */
 typedef struct bunzip_data bunzip_data;
-int start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf, int len);
-int read_bunzip(bunzip_data *bd, char *outbuf, int len);
-void dealloc_bunzip(bunzip_data *bd);
+int start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf, int len) FAST_FUNC;
+int read_bunzip(bunzip_data *bd, char *outbuf, int len) FAST_FUNC;
+void dealloc_bunzip(bunzip_data *bd) FAST_FUNC;
 
 typedef struct inflate_unzip_result {
 	off_t bytes_out;
 	uint32_t crc;
 } inflate_unzip_result;
 
-extern USE_DESKTOP(long long) int unpack_bz2_stream(int src_fd, int dst_fd);
-extern USE_DESKTOP(long long) int inflate_unzip(inflate_unzip_result *res, off_t compr_size, int src_fd, int dst_fd);
-extern USE_DESKTOP(long long) int unpack_gz_stream(int src_fd, int dst_fd);
-extern USE_DESKTOP(long long) int unpack_lzma_stream(int src_fd, int dst_fd);
+extern USE_DESKTOP(long long) int unpack_bz2_stream(int src_fd, int dst_fd) FAST_FUNC;
+extern USE_DESKTOP(long long) int inflate_unzip(inflate_unzip_result *res, off_t compr_size, int src_fd, int dst_fd) FAST_FUNC;
+extern USE_DESKTOP(long long) int unpack_gz_stream(int src_fd, int dst_fd) FAST_FUNC;
+extern USE_DESKTOP(long long) int unpack_lzma_stream(int src_fd, int dst_fd) FAST_FUNC;
 
 #if BB_MMU
 extern int open_transformer(int src_fd,
-	USE_DESKTOP(long long) int (*transformer)(int src_fd, int dst_fd));
+	USE_DESKTOP(long long) int FAST_FUNC (*transformer)(int src_fd, int dst_fd)) FAST_FUNC;
 #define open_transformer(src_fd, transformer, transform_prog) open_transformer(src_fd, transformer)
 #else
-extern int open_transformer(int src_fd, const char *transform_prog);
+extern int open_transformer(int src_fd, const char *transform_prog) FAST_FUNC;
 #define open_transformer(src_fd, transformer, transform_prog) open_transformer(src_fd, transform_prog)
 #endif
 
diff --git a/include/xatonum.h b/include/xatonum.h
index 6cf1299..944ee77 100644
--- a/include/xatonum.h
+++ b/include/xatonum.h
@@ -14,20 +14,20 @@
 /* Provides extern declarations of functions */
 #define DECLARE_STR_CONV(type, T, UT) \
 \
-unsigned type xstrto##UT##_range_sfx(const char *str, int b, unsigned type l, unsigned type u, const struct suffix_mult *sfx); \
-unsigned type xstrto##UT##_range(const char *str, int b, unsigned type l, unsigned type u); \
-unsigned type xstrto##UT##_sfx(const char *str, int b, const struct suffix_mult *sfx); \
-unsigned type xstrto##UT(const char *str, int b); \
-unsigned type xato##UT##_range_sfx(const char *str, unsigned type l, unsigned type u, const struct suffix_mult *sfx); \
-unsigned type xato##UT##_range(const char *str, unsigned type l, unsigned type u); \
-unsigned type xato##UT##_sfx(const char *str, const struct suffix_mult *sfx); \
-unsigned type xato##UT(const char *str); \
-type xstrto##T##_range_sfx(const char *str, int b, type l, type u, const struct suffix_mult *sfx); \
-type xstrto##T##_range(const char *str, int b, type l, type u); \
-type xato##T##_range_sfx(const char *str, type l, type u, const struct suffix_mult *sfx); \
-type xato##T##_range(const char *str, type l, type u); \
-type xato##T##_sfx(const char *str, const struct suffix_mult *sfx); \
-type xato##T(const char *str); \
+unsigned type xstrto##UT##_range_sfx(const char *str, int b, unsigned type l, unsigned type u, const struct suffix_mult *sfx) FAST_FUNC; \
+unsigned type xstrto##UT##_range(const char *str, int b, unsigned type l, unsigned type u) FAST_FUNC; \
+unsigned type xstrto##UT##_sfx(const char *str, int b, const struct suffix_mult *sfx) FAST_FUNC; \
+unsigned type xstrto##UT(const char *str, int b) FAST_FUNC; \
+unsigned type xato##UT##_range_sfx(const char *str, unsigned type l, unsigned type u, const struct suffix_mult *sfx) FAST_FUNC; \
+unsigned type xato##UT##_range(const char *str, unsigned type l, unsigned type u) FAST_FUNC; \
+unsigned type xato##UT##_sfx(const char *str, const struct suffix_mult *sfx) FAST_FUNC; \
+unsigned type xato##UT(const char *str) FAST_FUNC; \
+type xstrto##T##_range_sfx(const char *str, int b, type l, type u, const struct suffix_mult *sfx) FAST_FUNC; \
+type xstrto##T##_range(const char *str, int b, type l, type u) FAST_FUNC; \
+type xato##T##_range_sfx(const char *str, type l, type u, const struct suffix_mult *sfx) FAST_FUNC; \
+type xato##T##_range(const char *str, type l, type u) FAST_FUNC; \
+type xato##T##_sfx(const char *str, const struct suffix_mult *sfx) FAST_FUNC; \
+type xato##T(const char *str) FAST_FUNC; \
 
 /* Unsigned long long functions always exist */
 DECLARE_STR_CONV(long long, ll, ull)
@@ -122,8 +122,8 @@
  *    return value is all-ones in this case.
  */
 
-unsigned long long bb_strtoull(const char *arg, char **endp, int base);
-long long bb_strtoll(const char *arg, char **endp, int base);
+unsigned long long bb_strtoull(const char *arg, char **endp, int base) FAST_FUNC;
+long long bb_strtoll(const char *arg, char **endp, int base) FAST_FUNC;
 
 #if ULONG_MAX == ULLONG_MAX
 static ALWAYS_INLINE
@@ -133,8 +133,8 @@
 long bb_strtol(const char *arg, char **endp, int base)
 { return bb_strtoll(arg, endp, base); }
 #else
-unsigned long bb_strtoul(const char *arg, char **endp, int base);
-long bb_strtol(const char *arg, char **endp, int base);
+unsigned long bb_strtoul(const char *arg, char **endp, int base) FAST_FUNC;
+long bb_strtol(const char *arg, char **endp, int base) FAST_FUNC;
 #endif
 
 #if UINT_MAX == ULLONG_MAX
@@ -152,8 +152,8 @@
 int bb_strtoi(const char *arg, char **endp, int base)
 { return bb_strtol(arg, endp, base); }
 #else
-unsigned bb_strtou(const char *arg, char **endp, int base);
-int bb_strtoi(const char *arg, char **endp, int base);
+unsigned bb_strtou(const char *arg, char **endp, int base) FAST_FUNC;
+int bb_strtoi(const char *arg, char **endp, int base) FAST_FUNC;
 #endif
 
 int BUG_bb_strtou32_unimplemented(void);
diff --git a/include/xregex.h b/include/xregex.h
index d4bf732..90cf124 100644
--- a/include/xregex.h
+++ b/include/xregex.h
@@ -17,8 +17,8 @@
 # pragma GCC visibility push(hidden)
 #endif
 
-char* regcomp_or_errmsg(regex_t *preg, const char *regex, int cflags);
-void xregcomp(regex_t *preg, const char *regex, int cflags);
+char* regcomp_or_errmsg(regex_t *preg, const char *regex, int cflags) FAST_FUNC;
+void xregcomp(regex_t *preg, const char *regex, int cflags) FAST_FUNC;
 
 #if __GNUC_PREREQ(4,1)
 # pragma GCC visibility pop
diff --git a/libbb/appletlib.c b/libbb/appletlib.c
index 06a83e5..0017a4b 100644
--- a/libbb/appletlib.c
+++ b/libbb/appletlib.c
@@ -101,7 +101,7 @@
 	full_write(STDERR_FILENO, str, strlen(str));
 }
 
-void bb_show_usage(void)
+void FAST_FUNC bb_show_usage(void)
 {
 	if (ENABLE_SHOW_USAGE) {
 #ifdef SINGLE_APPLET_STR
@@ -153,7 +153,7 @@
 	return strcmp(name, APPLET_NAME(i));
 }
 #endif
-int find_applet_by_name(const char *name)
+int FAST_FUNC find_applet_by_name(const char *name)
 {
 #if NUM_APPLETS > 8
 	/* Do a binary search to find the applet entry given the name. */
@@ -704,7 +704,7 @@
 	xfunc_die();
 }
 
-void run_applet_no_and_exit(int applet_no, char **argv)
+void FAST_FUNC run_applet_no_and_exit(int applet_no, char **argv)
 {
 	int argc = 1;
 
@@ -722,7 +722,7 @@
 	exit(applet_main[applet_no](argc, argv));
 }
 
-void run_applet_and_exit(const char *name, char **argv)
+void FAST_FUNC run_applet_and_exit(const char *name, char **argv)
 {
 	int applet = find_applet_by_name(name);
 	if (applet >= 0)
diff --git a/libbb/ask_confirmation.c b/libbb/ask_confirmation.c
index 646ec4b..d08bc51 100644
--- a/libbb/ask_confirmation.c
+++ b/libbb/ask_confirmation.c
@@ -13,7 +13,7 @@
 
 #include "libbb.h"
 
-int bb_ask_confirmation(void)
+int FAST_FUNC bb_ask_confirmation(void)
 {
 	int retval = 0;
 	int first = 1;
diff --git a/libbb/bb_askpass.c b/libbb/bb_askpass.c
index 3ad0e97..ca18ade 100644
--- a/libbb/bb_askpass.c
+++ b/libbb/bb_askpass.c
@@ -17,7 +17,7 @@
 {
 }
 
-char *bb_askpass(int timeout, const char *prompt)
+char* FAST_FUNC bb_askpass(int timeout, const char *prompt)
 {
 	/* Was static char[BIGNUM] */
 	enum { sizeof_passwd = 128 };
diff --git a/libbb/bb_basename.c b/libbb/bb_basename.c
index e6832f8..bab4166 100644
--- a/libbb/bb_basename.c
+++ b/libbb/bb_basename.c
@@ -9,7 +9,7 @@
 
 #include "libbb.h"
 
-const char *bb_basename(const char *name)
+const char* FAST_FUNC bb_basename(const char *name)
 {
 	const char *cp = strrchr(name, '/');
 	if (cp)
diff --git a/libbb/bb_do_delay.c b/libbb/bb_do_delay.c
index aa26ade..3d52cc5 100644
--- a/libbb/bb_do_delay.c
+++ b/libbb/bb_do_delay.c
@@ -9,7 +9,7 @@
 
 #include "libbb.h"
 
-void bb_do_delay(int seconds)
+void FAST_FUNC bb_do_delay(int seconds)
 {
 	time_t start, now;
 
diff --git a/libbb/bb_pwd.c b/libbb/bb_pwd.c
index 2bdb662..b03dc83 100644
--- a/libbb/bb_pwd.c
+++ b/libbb/bb_pwd.c
@@ -45,7 +45,7 @@
  * bb_getXXXid(NULL, -1, id) - return user/group name or exit
  */
 /* gets a username given a uid */
-char* bb_getpwuid(char *name, int bufsize, long uid)
+char* FAST_FUNC bb_getpwuid(char *name, int bufsize, long uid)
 {
 	struct passwd *myuser = getpwuid(uid);
 
@@ -54,7 +54,7 @@
 			uid, 'u');
 }
 /* gets a groupname given a gid */
-char* bb_getgrgid(char *group, int bufsize, long gid)
+char* FAST_FUNC bb_getgrgid(char *group, int bufsize, long gid)
 {
 	struct group *mygroup = getgrgid(gid);
 
@@ -64,7 +64,7 @@
 }
 
 /* returns a gid given a group name */
-long xgroup2gid(const char *name)
+long FAST_FUNC xgroup2gid(const char *name)
 {
 	struct group *mygroup;
 
@@ -76,7 +76,7 @@
 }
 
 /* returns a uid given a username */
-long xuname2uid(const char *name)
+long FAST_FUNC xuname2uid(const char *name)
 {
 	struct passwd *myuser;
 
@@ -87,8 +87,8 @@
 	return myuser->pw_uid;
 }
 
-unsigned long get_ug_id(const char *s,
-		long (*xname2id)(const char *))
+unsigned long FAST_FUNC get_ug_id(const char *s,
+		long FAST_FUNC (*xname2id)(const char *))
 {
 	unsigned long r;
 
diff --git a/libbb/bb_qsort.c b/libbb/bb_qsort.c
index e8673ab..9773afa 100644
--- a/libbb/bb_qsort.c
+++ b/libbb/bb_qsort.c
@@ -9,12 +9,12 @@
 
 #include "libbb.h"
 
-int bb_pstrcmp(const void *a, const void *b)
+int /* not FAST_FUNC! */ bb_pstrcmp(const void *a, const void *b)
 {
 	return strcmp(*(char**)a, *(char**)b);
 }
 
-void qsort_string_vector(char **sv, unsigned count)
+void FAST_FUNC qsort_string_vector(char **sv, unsigned count)
 {
 	qsort(sv, count, sizeof(char*), bb_pstrcmp);
 }
diff --git a/libbb/bb_strtonum.c b/libbb/bb_strtonum.c
index 50ef0ba..525c830 100644
--- a/libbb/bb_strtonum.c
+++ b/libbb/bb_strtonum.c
@@ -48,7 +48,7 @@
 }
 
 
-unsigned long long bb_strtoull(const char *arg, char **endp, int base)
+unsigned long long FAST_FUNC bb_strtoull(const char *arg, char **endp, int base)
 {
 	unsigned long long v;
 	char *endptr;
@@ -63,7 +63,7 @@
 	return handle_errors(v, endp, endptr);
 }
 
-long long bb_strtoll(const char *arg, char **endp, int base)
+long long FAST_FUNC bb_strtoll(const char *arg, char **endp, int base)
 {
 	unsigned long long v;
 	char *endptr;
@@ -75,7 +75,7 @@
 }
 
 #if ULONG_MAX != ULLONG_MAX
-unsigned long bb_strtoul(const char *arg, char **endp, int base)
+unsigned long FAST_FUNC bb_strtoul(const char *arg, char **endp, int base)
 {
 	unsigned long v;
 	char *endptr;
@@ -86,7 +86,7 @@
 	return handle_errors(v, endp, endptr);
 }
 
-long bb_strtol(const char *arg, char **endp, int base)
+long FAST_FUNC bb_strtol(const char *arg, char **endp, int base)
 {
 	long v;
 	char *endptr;
@@ -99,7 +99,7 @@
 #endif
 
 #if UINT_MAX != ULONG_MAX
-unsigned bb_strtou(const char *arg, char **endp, int base)
+unsigned FAST_FUNC bb_strtou(const char *arg, char **endp, int base)
 {
 	unsigned long v;
 	char *endptr;
@@ -111,7 +111,7 @@
 	return handle_errors(v, endp, endptr);
 }
 
-int bb_strtoi(const char *arg, char **endp, int base)
+int FAST_FUNC bb_strtoi(const char *arg, char **endp, int base)
 {
 	long v;
 	char *endptr;
@@ -131,7 +131,7 @@
 
 #include <math.h>  /* just for HUGE_VAL */
 #define NOT_DIGIT(a) (((unsigned char)(a-'0')) > 9)
-double bb_strtod(const char *arg, char **endp)
+double FAST_FUNC bb_strtod(const char *arg, char **endp)
 {
 	double v;
 	char *endptr;
diff --git a/libbb/change_identity.c b/libbb/change_identity.c
index da840bf..619db09 100644
--- a/libbb/change_identity.c
+++ b/libbb/change_identity.c
@@ -31,7 +31,7 @@
 #include "libbb.h"
 
 /* Become the user and group(s) specified by PW.  */
-void change_identity(const struct passwd *pw)
+void FAST_FUNC change_identity(const struct passwd *pw)
 {
 	if (initgroups(pw->pw_name, pw->pw_gid) == -1)
 		bb_perror_msg_and_die("can't set groups");
diff --git a/libbb/chomp.c b/libbb/chomp.c
index 8ffaff5..ed4bf6b 100644
--- a/libbb/chomp.c
+++ b/libbb/chomp.c
@@ -10,7 +10,7 @@
 
 #include "libbb.h"
 
-void chomp(char *s)
+void FAST_FUNC chomp(char *s)
 {
 	char *lc = last_char_is(s, '\n');
 
diff --git a/libbb/compare_string_array.c b/libbb/compare_string_array.c
index ec7f94e..43c59e8 100644
--- a/libbb/compare_string_array.c
+++ b/libbb/compare_string_array.c
@@ -7,7 +7,7 @@
 
 /* returns the array index of the string */
 /* (index of first match is returned, or -1) */
-int index_in_str_array(const char *const string_array[], const char *key)
+int FAST_FUNC index_in_str_array(const char *const string_array[], const char *key)
 {
 	int i;
 
@@ -19,7 +19,7 @@
 	return -1;
 }
 
-int index_in_strings(const char *strings, const char *key)
+int FAST_FUNC index_in_strings(const char *strings, const char *key)
 {
 	int idx = 0;
 
@@ -36,7 +36,7 @@
 /* returns the array index of the string, even if it matches only a beginning */
 /* (index of first match is returned, or -1) */
 #ifdef UNUSED
-int index_in_substr_array(const char *const string_array[], const char *key)
+int FAST_FUNC index_in_substr_array(const char *const string_array[], const char *key)
 {
 	int i;
 	int len = strlen(key);
@@ -51,7 +51,7 @@
 }
 #endif
 
-int index_in_substrings(const char *strings, const char *key)
+int FAST_FUNC index_in_substrings(const char *strings, const char *key)
 {
 	int len = strlen(key);
 
@@ -68,7 +68,7 @@
 	return -1;
 }
 
-const char *nth_string(const char *strings, int n)
+const char* FAST_FUNC nth_string(const char *strings, int n)
 {
 	while (n) {
 		n--;
diff --git a/libbb/concat_path_file.c b/libbb/concat_path_file.c
index dd6909f..fb53354 100644
--- a/libbb/concat_path_file.c
+++ b/libbb/concat_path_file.c
@@ -16,7 +16,7 @@
 
 #include "libbb.h"
 
-char *concat_path_file(const char *path, const char *filename)
+char* FAST_FUNC concat_path_file(const char *path, const char *filename)
 {
 	char *lc;
 
diff --git a/libbb/concat_subpath_file.c b/libbb/concat_subpath_file.c
index 1c00588..313fa63 100644
--- a/libbb/concat_subpath_file.c
+++ b/libbb/concat_subpath_file.c
@@ -15,7 +15,7 @@
 
 #include "libbb.h"
 
-char *concat_subpath_file(const char *path, const char *f)
+char* FAST_FUNC concat_subpath_file(const char *path, const char *f)
 {
 	if (f && DOT_OR_DOTDOT(f))
 		return NULL;
diff --git a/libbb/copy_file.c b/libbb/copy_file.c
index 3b83e12..d804ecc 100644
--- a/libbb/copy_file.c
+++ b/libbb/copy_file.c
@@ -71,7 +71,7 @@
  *  0 copy is made or user answered "no" in interactive mode
  *    (failures to preserve mode/owner/times are not reported in exit code)
  */
-int copy_file(const char *source, const char *dest, int flags)
+int FAST_FUNC copy_file(const char *source, const char *dest, int flags)
 {
 	/* This is a recursive function, try to minimize stack usage */
 	/* NB: each struct stat is ~100 bytes */
diff --git a/libbb/copyfd.c b/libbb/copyfd.c
index 08bc6f8..c5f8b5b 100644
--- a/libbb/copyfd.c
+++ b/libbb/copyfd.c
@@ -85,7 +85,7 @@
 
 
 #if 0
-void complain_copyfd_and_die(off_t sz)
+void FAST_FUNC complain_copyfd_and_die(off_t sz)
 {
 	if (sz != -1)
 		bb_error_msg_and_die("short read");
@@ -94,7 +94,7 @@
 }
 #endif
 
-off_t bb_copyfd_size(int fd1, int fd2, off_t size)
+off_t FAST_FUNC bb_copyfd_size(int fd1, int fd2, off_t size)
 {
 	if (size) {
 		return bb_full_fd_action(fd1, fd2, size);
@@ -102,7 +102,7 @@
 	return 0;
 }
 
-void bb_copyfd_exact_size(int fd1, int fd2, off_t size)
+void FAST_FUNC bb_copyfd_exact_size(int fd1, int fd2, off_t size)
 {
 	off_t sz = bb_copyfd_size(fd1, fd2, size);
 	if (sz == size)
@@ -113,7 +113,7 @@
 	xfunc_die();
 }
 
-off_t bb_copyfd_eof(int fd1, int fd2)
+off_t FAST_FUNC bb_copyfd_eof(int fd1, int fd2)
 {
 	return bb_full_fd_action(fd1, fd2, 0);
 }
diff --git a/libbb/correct_password.c b/libbb/correct_password.c
index f47642f..255b048 100644
--- a/libbb/correct_password.c
+++ b/libbb/correct_password.c
@@ -36,7 +36,7 @@
  *
  * NULL pw means "just fake it for login with bad username" */
 
-int correct_password(const struct passwd *pw)
+int FAST_FUNC correct_password(const struct passwd *pw)
 {
 	char *unencrypted, *encrypted;
 	const char *correct;
diff --git a/libbb/crc32.c b/libbb/crc32.c
index acbc458..42079b9 100644
--- a/libbb/crc32.c
+++ b/libbb/crc32.c
@@ -16,7 +16,7 @@
 
 #include "libbb.h"
 
-uint32_t *crc32_filltable(uint32_t *crc_table, int endian)
+uint32_t* FAST_FUNC crc32_filltable(uint32_t *crc_table, int endian)
 {
 	uint32_t polynomial = endian ? 0x04c11db7 : 0xedb88320;
 	uint32_t c;
diff --git a/libbb/create_icmp6_socket.c b/libbb/create_icmp6_socket.c
index a22ac5d..2065517 100644
--- a/libbb/create_icmp6_socket.c
+++ b/libbb/create_icmp6_socket.c
@@ -9,7 +9,7 @@
 #include "libbb.h"
 
 #if ENABLE_FEATURE_IPV6
-int create_icmp6_socket(void)
+int FAST_FUNC create_icmp6_socket(void)
 {
 	int sock;
 #if 0
diff --git a/libbb/create_icmp_socket.c b/libbb/create_icmp_socket.c
index 64beba8..1fa016a 100644
--- a/libbb/create_icmp_socket.c
+++ b/libbb/create_icmp_socket.c
@@ -8,7 +8,7 @@
 
 #include "libbb.h"
 
-int create_icmp_socket(void)
+int FAST_FUNC create_icmp_socket(void)
 {
 	int sock;
 #if 0
diff --git a/libbb/crypt_make_salt.c b/libbb/crypt_make_salt.c
index ebdf024..393eba5 100644
--- a/libbb/crypt_make_salt.c
+++ b/libbb/crypt_make_salt.c
@@ -24,7 +24,7 @@
 	return ('a' - 38 + i);
 }
 
-int crypt_make_salt(char *p, int cnt, int x)
+int FAST_FUNC crypt_make_salt(char *p, int cnt, int x)
 {
 	x += getpid() + time(NULL);
 	do {
diff --git a/libbb/device_open.c b/libbb/device_open.c
index 6907e98..cf8bcf6 100644
--- a/libbb/device_open.c
+++ b/libbb/device_open.c
@@ -10,7 +10,7 @@
 #include "libbb.h"
 
 /* try to open up the specified device */
-int device_open(const char *device, int mode)
+int FAST_FUNC device_open(const char *device, int mode)
 {
 	int m, f, fd;
 
diff --git a/libbb/die_if_bad_username.c b/libbb/die_if_bad_username.c
index 337ac60..602aadc 100644
--- a/libbb/die_if_bad_username.c
+++ b/libbb/die_if_bad_username.c
@@ -16,7 +16,7 @@
  * at the end of the username.
  */
 
-void die_if_bad_username(const char *name)
+void FAST_FUNC die_if_bad_username(const char *name)
 {
 	goto skip; /* 1st char being dash isn't valid */
 	do {
diff --git a/libbb/dump.c b/libbb/dump.c
index 4d6472e..c455952 100644
--- a/libbb/dump.c
+++ b/libbb/dump.c
@@ -33,7 +33,7 @@
 
 static const char lcc[] ALIGN1 = "diouxX";
 
-int bb_dump_size(FS * fs)
+int FAST_FUNC bb_dump_size(FS *fs)
 {
 	FU *fu;
 	int bcnt, cur_size;
@@ -652,7 +652,7 @@
 	}
 }
 
-int bb_dump_dump(char **argv)
+int FAST_FUNC bb_dump_dump(char **argv)
 {
 	FS *tfs;
 
@@ -674,7 +674,7 @@
 	return exitval;
 }
 
-void bb_dump_add(const char *fmt)
+void FAST_FUNC bb_dump_add(const char *fmt)
 {
 	const char *p;
 	char *p1;
diff --git a/libbb/error_msg.c b/libbb/error_msg.c
index 5f53f03..802fd57 100644
--- a/libbb/error_msg.c
+++ b/libbb/error_msg.c
@@ -9,7 +9,7 @@
 
 #include "libbb.h"
 
-void bb_error_msg(const char *s, ...)
+void FAST_FUNC bb_error_msg(const char *s, ...)
 {
 	va_list p;
 
diff --git a/libbb/error_msg_and_die.c b/libbb/error_msg_and_die.c
index addd818..243433b 100644
--- a/libbb/error_msg_and_die.c
+++ b/libbb/error_msg_and_die.c
@@ -9,7 +9,7 @@
 
 #include "libbb.h"
 
-void bb_error_msg_and_die(const char *s, ...)
+void FAST_FUNC bb_error_msg_and_die(const char *s, ...)
 {
 	va_list p;
 
diff --git a/libbb/execable.c b/libbb/execable.c
index 5c2b450..5c7ac16 100644
--- a/libbb/execable.c
+++ b/libbb/execable.c
@@ -13,7 +13,7 @@
  * return 1 if found;
  * return 0 otherwise;
  */
-int execable_file(const char *name)
+int FAST_FUNC execable_file(const char *name)
 {
 	struct stat s;
 	return (!access(name, X_OK) && !stat(name, &s) && S_ISREG(s.st_mode));
@@ -28,7 +28,7 @@
  * return NULL otherwise; (PATHp is undefined)
  * in all cases (*PATHp) contents will be trashed (s/:/NUL/).
  */
-char *find_execable(const char *filename, char **PATHp)
+char* FAST_FUNC find_execable(const char *filename, char **PATHp)
 {
 	char *p, *n;
 
@@ -54,7 +54,7 @@
  * return 1 if found;
  * return 0 otherwise;
  */
-int exists_execable(const char *filename)
+int FAST_FUNC exists_execable(const char *filename)
 {
 	char *path = xstrdup(getenv("PATH"));
 	char *tmp = path;
@@ -70,7 +70,7 @@
 #if ENABLE_FEATURE_PREFER_APPLETS
 /* just like the real execvp, but try to launch an applet named 'file' first
  */
-int bb_execvp(const char *file, char *const argv[])
+int FAST_FUNC bb_execvp(const char *file, char *const argv[])
 {
 	return execvp(find_applet_by_name(file) >= 0 ? bb_busybox_exec_path : file,
 					argv);
diff --git a/libbb/fclose_nonstdin.c b/libbb/fclose_nonstdin.c
index 768ee94..6f3f373 100644
--- a/libbb/fclose_nonstdin.c
+++ b/libbb/fclose_nonstdin.c
@@ -14,7 +14,7 @@
 
 #include "libbb.h"
 
-int fclose_if_not_stdin(FILE *f)
+int FAST_FUNC fclose_if_not_stdin(FILE *f)
 {
 	/* Some more paranoid applets want ferror() check too */
 	int r = ferror(f); /* NB: does NOT set errno! */
diff --git a/libbb/fflush_stdout_and_exit.c b/libbb/fflush_stdout_and_exit.c
index 9f05500..742fb9f 100644
--- a/libbb/fflush_stdout_and_exit.c
+++ b/libbb/fflush_stdout_and_exit.c
@@ -13,7 +13,7 @@
 
 #include "libbb.h"
 
-void fflush_stdout_and_exit(int retval)
+void FAST_FUNC fflush_stdout_and_exit(int retval)
 {
 	if (fflush(stdout))
 		bb_perror_msg_and_die(bb_msg_standard_output);
diff --git a/libbb/fgets_str.c b/libbb/fgets_str.c
index d6fada1..8026a15 100644
--- a/libbb/fgets_str.c
+++ b/libbb/fgets_str.c
@@ -55,12 +55,12 @@
  * including terminating string.
  * Non-terminated string can be returned if EOF is reached.
  * Return NULL if EOF is reached immediately.  */
-char *xmalloc_fgets_str(FILE *file, const char *terminating_string)
+char* FAST_FUNC xmalloc_fgets_str(FILE *file, const char *terminating_string)
 {
 	return xmalloc_fgets_internal(file, terminating_string, 0);
 }
 
-char *xmalloc_fgetline_str(FILE *file, const char *terminating_string)
+char* FAST_FUNC xmalloc_fgetline_str(FILE *file, const char *terminating_string)
 {
 	return xmalloc_fgets_internal(file, terminating_string, 1);
 }
diff --git a/libbb/find_mount_point.c b/libbb/find_mount_point.c
index cb00b98..4cd6b16 100644
--- a/libbb/find_mount_point.c
+++ b/libbb/find_mount_point.c
@@ -17,7 +17,7 @@
  * Given any other file (or directory), find the mount table entry for its
  * filesystem.
  */
-struct mntent *find_mount_point(const char *name, const char *table)
+struct mntent* FAST_FUNC find_mount_point(const char *name, const char *table)
 {
 	struct stat s;
 	dev_t mountDevice;
diff --git a/libbb/find_pid_by_name.c b/libbb/find_pid_by_name.c
index 8dcdb13..ae2f116 100644
--- a/libbb/find_pid_by_name.c
+++ b/libbb/find_pid_by_name.c
@@ -48,7 +48,7 @@
  *  Returns a list of all matching PIDs
  *  It is the caller's duty to free the returned pidlist.
  */
-pid_t* find_pid_by_name(const char* procName)
+pid_t* FAST_FUNC find_pid_by_name(const char* procName)
 {
 	pid_t* pidList;
 	int i = 0;
@@ -74,7 +74,7 @@
 	return pidList;
 }
 
-pid_t *pidlist_reverse(pid_t *pidList)
+pid_t* FAST_FUNC pidlist_reverse(pid_t *pidList)
 {
 	int i = 0;
 	while (pidList[i])
diff --git a/libbb/find_root_device.c b/libbb/find_root_device.c
index 9779f7e..ca46bf5 100644
--- a/libbb/find_root_device.c
+++ b/libbb/find_root_device.c
@@ -62,7 +62,7 @@
 	return retpath;
 }
 
-char *find_block_device(const char *path)
+char* FAST_FUNC find_block_device(const char *path)
 {
 	struct arena a;
 
diff --git a/libbb/full_write.c b/libbb/full_write.c
index 7503c8b..f353b7d 100644
--- a/libbb/full_write.c
+++ b/libbb/full_write.c
@@ -14,7 +14,7 @@
  * This does multiple writes as necessary.
  * Returns the amount written, or -1 on an error.
  */
-ssize_t full_write(int fd, const void *buf, size_t len)
+ssize_t FAST_FUNC full_write(int fd, const void *buf, size_t len)
 {
 	ssize_t cc;
 	ssize_t total;
diff --git a/libbb/get_console.c b/libbb/get_console.c
index 36fe204..d042afa 100644
--- a/libbb/get_console.c
+++ b/libbb/get_console.c
@@ -38,7 +38,7 @@
  * if someone else used X (which does a chown on /dev/console).
  */
 
-int get_console_fd(void)
+int FAST_FUNC get_console_fd(void)
 {
 	static const char *const console_names[] = {
 		DEV_CONSOLE, CURRENT_VC, CURRENT_TTY
@@ -75,7 +75,7 @@
 	VT_WAITACTIVE = 0x5607  /* wait for vt active */
 };
 
-void console_make_active(int fd, const int vt_num)
+void FAST_FUNC console_make_active(int fd, const int vt_num)
 {
 	xioctl(fd, VT_ACTIVATE, (void *)(ptrdiff_t)vt_num);
 	xioctl(fd, VT_WAITACTIVE, (void *)(ptrdiff_t)vt_num);
diff --git a/libbb/get_last_path_component.c b/libbb/get_last_path_component.c
index 0f60215..7c99116 100644
--- a/libbb/get_last_path_component.c
+++ b/libbb/get_last_path_component.c
@@ -14,7 +14,7 @@
  * "abc/def"  -> "def"
  * "abc/def/" -> ""
  */
-char *bb_get_last_path_component_nostrip(const char *path)
+char* FAST_FUNC bb_get_last_path_component_nostrip(const char *path)
 {
 	char *slash = strrchr(path, '/');
 
@@ -30,7 +30,7 @@
  * "abc/def"  -> "def"
  * "abc/def/" -> "def" !!
  */
-char *bb_get_last_path_component_strip(char *path)
+char* FAST_FUNC bb_get_last_path_component_strip(char *path)
 {
 	char *slash = last_char_is(path, '/');
 
diff --git a/libbb/get_line_from_file.c b/libbb/get_line_from_file.c
index b88872d..66ea5a1 100644
--- a/libbb/get_line_from_file.c
+++ b/libbb/get_line_from_file.c
@@ -16,7 +16,7 @@
  * must be free'ed by the caller.  If end is NULL '\n' isn't considered
  * end of line.  If end isn't NULL, length of the chunk read is stored in it.
  * Return NULL if EOF/error */
-char *bb_get_chunk_from_file(FILE *file, int *end)
+char* FAST_FUNC bb_get_chunk_from_file(FILE *file, int *end)
 {
 	int ch;
 	int idx = 0;
@@ -49,7 +49,7 @@
 }
 
 /* Get line, including trailing \n if any */
-char *xmalloc_fgets(FILE *file)
+char* FAST_FUNC xmalloc_fgets(FILE *file)
 {
 	int i;
 
@@ -57,7 +57,7 @@
 }
 
 /* Get line.  Remove trailing \n */
-char *xmalloc_fgetline(FILE *file)
+char* FAST_FUNC xmalloc_fgetline(FILE *file)
 {
 	int i;
 	char *c = bb_get_chunk_from_file(file, &i);
diff --git a/libbb/getopt32.c b/libbb/getopt32.c
index 86c3348..9dba44d 100644
--- a/libbb/getopt32.c
+++ b/libbb/getopt32.c
@@ -316,7 +316,7 @@
 
 uint32_t option_mask32;
 
-uint32_t
+uint32_t FAST_FUNC
 getopt32(char **argv, const char *applet_opts, ...)
 {
 	int argc;
diff --git a/libbb/getpty.c b/libbb/getpty.c
index d43fb82..bc143c2 100644
--- a/libbb/getpty.c
+++ b/libbb/getpty.c
@@ -10,7 +10,7 @@
 
 #define DEBUG 0
 
-int xgetpty(char *line)
+int FAST_FUNC xgetpty(char *line)
 {
 	int p;
 #if ENABLE_FEATURE_DEVPTS
diff --git a/libbb/herror_msg.c b/libbb/herror_msg.c
index 264690b..7e4f640 100644
--- a/libbb/herror_msg.c
+++ b/libbb/herror_msg.c
@@ -9,7 +9,7 @@
 
 #include "libbb.h"
 
-void bb_herror_msg(const char *s, ...)
+void FAST_FUNC bb_herror_msg(const char *s, ...)
 {
 	va_list p;
 
diff --git a/libbb/herror_msg_and_die.c b/libbb/herror_msg_and_die.c
index 894c80f..230fe64 100644
--- a/libbb/herror_msg_and_die.c
+++ b/libbb/herror_msg_and_die.c
@@ -9,7 +9,7 @@
 
 #include "libbb.h"
 
-void bb_herror_msg_and_die(const char *s, ...)
+void FAST_FUNC bb_herror_msg_and_die(const char *s, ...)
 {
 	va_list p;
 
diff --git a/libbb/human_readable.c b/libbb/human_readable.c
index d60ef61..dad26ed 100644
--- a/libbb/human_readable.c
+++ b/libbb/human_readable.c
@@ -28,7 +28,7 @@
 
 #include "libbb.h"
 
-const char *make_human_readable_str(unsigned long long size,
+const char* FAST_FUNC make_human_readable_str(unsigned long long size,
 	unsigned long block_size, unsigned long display_unit)
 {
 	/* The code will adjust for additional (appended) units */
diff --git a/libbb/inet_common.c b/libbb/inet_common.c
index 9c4f496..3a20b4a 100644
--- a/libbb/inet_common.c
+++ b/libbb/inet_common.c
@@ -11,7 +11,7 @@
 #include "libbb.h"
 #include "inet_common.h"
 
-int INET_resolve(const char *name, struct sockaddr_in *s_in, int hostfirst)
+int FAST_FUNC INET_resolve(const char *name, struct sockaddr_in *s_in, int hostfirst)
 {
 	struct hostent *hp;
 #if ENABLE_FEATURE_ETC_NETWORKS
@@ -81,7 +81,7 @@
  *          & 0x4000: host instead of net,
  *          & 0x0fff: don't resolve
  */
-char *INET_rresolve(struct sockaddr_in *s_in, int numeric, uint32_t netmask)
+char* FAST_FUNC INET_rresolve(struct sockaddr_in *s_in, int numeric, uint32_t netmask)
 {
 	/* addr-to-name cache */
 	struct addr {
@@ -165,7 +165,7 @@
 
 #if ENABLE_FEATURE_IPV6
 
-int INET6_resolve(const char *name, struct sockaddr_in6 *sin6)
+int FAST_FUNC INET6_resolve(const char *name, struct sockaddr_in6 *sin6)
 {
 	struct addrinfo req, *ai;
 	int s;
@@ -189,7 +189,7 @@
 #endif
 
 
-char *INET6_rresolve(struct sockaddr_in6 *sin6, int numeric)
+char* FAST_FUNC INET6_rresolve(struct sockaddr_in6 *sin6, int numeric)
 {
 	char name[128];
 	int s;
diff --git a/libbb/info_msg.c b/libbb/info_msg.c
index 3231bc8..ffef05e 100644
--- a/libbb/info_msg.c
+++ b/libbb/info_msg.c
@@ -10,7 +10,7 @@
 #include "libbb.h"
 #include <syslog.h>
 
-void bb_info_msg(const char *s, ...)
+void FAST_FUNC bb_info_msg(const char *s, ...)
 {
 	va_list p;
 	/* va_copy is used because it is not portable
diff --git a/libbb/inode_hash.c b/libbb/inode_hash.c
index 9cca74b..4469671 100644
--- a/libbb/inode_hash.c
+++ b/libbb/inode_hash.c
@@ -27,7 +27,7 @@
  * Return name if statbuf->st_ino && statbuf->st_dev are recorded in
  * ino_dev_hashtable, else return NULL
  */
-char *is_in_ino_dev_hashtable(const struct stat *statbuf)
+char* FAST_FUNC is_in_ino_dev_hashtable(const struct stat *statbuf)
 {
 	ino_dev_hashtable_bucket_t *bucket;
 
@@ -47,7 +47,7 @@
 }
 
 /* Add statbuf to statbuf hash table */
-void add_to_ino_dev_hashtable(const struct stat *statbuf, const char *name)
+void FAST_FUNC add_to_ino_dev_hashtable(const struct stat *statbuf, const char *name)
 {
 	int i;
 	ino_dev_hashtable_bucket_t *bucket;
@@ -69,7 +69,7 @@
 
 #if ENABLE_FEATURE_CLEAN_UP
 /* Clear statbuf hash table */
-void reset_ino_dev_hashtable(void)
+void FAST_FUNC reset_ino_dev_hashtable(void)
 {
 	int i;
 	ino_dev_hashtable_bucket_t *bucket;
diff --git a/libbb/isdirectory.c b/libbb/isdirectory.c
index 1d2477f..28ed3ec 100644
--- a/libbb/isdirectory.c
+++ b/libbb/isdirectory.c
@@ -15,7 +15,7 @@
  * Return TRUE if fileName is a directory.
  * Nonexistent files return FALSE.
  */
-int is_directory(const char *fileName, const int followLinks, struct stat *statBuf)
+int FAST_FUNC is_directory(const char *fileName, const int followLinks, struct stat *statBuf)
 {
 	int status;
 	struct stat astatBuf;
diff --git a/libbb/kernel_version.c b/libbb/kernel_version.c
index 50b82ae..8b9c4ec 100644
--- a/libbb/kernel_version.c
+++ b/libbb/kernel_version.c
@@ -16,7 +16,7 @@
  *
  *     if (get_linux_version_code() > KERNEL_VERSION(2,2,11)) { <stuff> }
  */
-int get_linux_version_code(void)
+int FAST_FUNC get_linux_version_code(void)
 {
 	struct utsname name;
 	char *s;
diff --git a/libbb/last_char_is.c b/libbb/last_char_is.c
index aaa85dd..b059256 100644
--- a/libbb/last_char_is.c
+++ b/libbb/last_char_is.c
@@ -12,7 +12,7 @@
 /* Find out if the last character of a string matches the one given.
  * Don't underrun the buffer if the string length is 0.
  */
-char* last_char_is(const char *s, int c)
+char* FAST_FUNC last_char_is(const char *s, int c)
 {
 	if (s && *s) {
 		size_t sz = strlen(s) - 1;
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index fb595c0..42f372f 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -1352,7 +1352,7 @@
  * 0  on ctrl-C (the line entered is still returned in 'command'),
  * >0 length of input string, including terminating '\n'
  */
-int read_line_input(const char *prompt, char *command, int maxsize, line_input_t *st)
+int FAST_FUNC read_line_input(const char *prompt, char *command, int maxsize, line_input_t *st)
 {
 #if ENABLE_FEATURE_TAB_COMPLETION
 	smallint lastWasTab = FALSE;
@@ -1845,7 +1845,7 @@
 	return command_len;
 }
 
-line_input_t *new_line_input_t(int flags)
+line_input_t* FAST_FUNC new_line_input_t(int flags)
 {
 	line_input_t *n = xzalloc(sizeof(*n));
 	n->flags = flags;
@@ -1855,7 +1855,7 @@
 #else
 
 #undef read_line_input
-int read_line_input(const char* prompt, char* command, int maxsize)
+int FAST_FUNC read_line_input(const char* prompt, char* command, int maxsize)
 {
 	fputs(prompt, stdout);
 	fflush(stdout);
diff --git a/libbb/llist.c b/libbb/llist.c
index 4b3971b..094c652 100644
--- a/libbb/llist.c
+++ b/libbb/llist.c
@@ -13,7 +13,7 @@
 #include "libbb.h"
 
 /* Add data to the start of the linked list.  */
-void llist_add_to(llist_t **old_head, void *data)
+void FAST_FUNC llist_add_to(llist_t **old_head, void *data)
 {
 	llist_t *new_head = xmalloc(sizeof(llist_t));
 
@@ -23,7 +23,7 @@
 }
 
 /* Add data to the end of the linked list.  */
-void llist_add_to_end(llist_t **list_head, void *data)
+void FAST_FUNC llist_add_to_end(llist_t **list_head, void *data)
 {
 	llist_t *new_item = xmalloc(sizeof(llist_t));
 
@@ -42,7 +42,7 @@
 }
 
 /* Remove first element from the list and return it */
-void *llist_pop(llist_t **head)
+void* FAST_FUNC llist_pop(llist_t **head)
 {
 	void *data, *next;
 
@@ -58,7 +58,7 @@
 }
 
 /* Unlink arbitrary given element from the list */
-void llist_unlink(llist_t **head, llist_t *elm)
+void FAST_FUNC llist_unlink(llist_t **head, llist_t *elm)
 {
 	llist_t *crt;
 
@@ -80,7 +80,7 @@
 
 /* Recursively free all elements in the linked list.  If freeit != NULL
  * call it on each datum in the list */
-void llist_free(llist_t *elm, void (*freeit) (void *data))
+void FAST_FUNC llist_free(llist_t *elm, void (*freeit) (void *data))
 {
 	while (elm) {
 		void *data = llist_pop(&elm);
@@ -92,7 +92,7 @@
 
 #ifdef UNUSED
 /* Reverse list order. */
-llist_t *llist_rev(llist_t *list)
+llist_t* FAST_FUNC llist_rev(llist_t *list)
 {
 	llist_t *rev = NULL;
 
diff --git a/libbb/login.c b/libbb/login.c
index a5be2c8..346ff13 100644
--- a/libbb/login.c
+++ b/libbb/login.c
@@ -18,7 +18,7 @@
 static const char fmtstr_d[] ALIGN1 = "%A, %d %B %Y";
 static const char fmtstr_t[] ALIGN1 = "%H:%M:%S";
 
-void print_login_issue(const char *issue_file, const char *tty)
+void FAST_FUNC print_login_issue(const char *issue_file, const char *tty)
 {
 	FILE *fd;
 	int c;
@@ -86,7 +86,7 @@
 	fflush(stdout);
 }
 
-void print_login_prompt(void)
+void FAST_FUNC print_login_prompt(void)
 {
 	char *hostname = safe_gethostname();
 
@@ -112,7 +112,7 @@
 	"LD_NOWARN" "\0"
 	"LD_KEEPDIR" "\0";
 
-int sanitize_env_if_suid(void)
+int FAST_FUNC sanitize_env_if_suid(void)
 {
 	const char *p;
 
diff --git a/libbb/loop.c b/libbb/loop.c
index 6934b7a..7d2b420 100644
--- a/libbb/loop.c
+++ b/libbb/loop.c
@@ -44,7 +44,7 @@
 } bb_loop_info;
 #endif
 
-char *query_loop(const char *device)
+char* FAST_FUNC query_loop(const char *device)
 {
 	int fd;
 	bb_loop_info loopinfo;
@@ -61,7 +61,7 @@
 }
 
 
-int del_loop(const char *device)
+int FAST_FUNC del_loop(const char *device)
 {
 	int fd, rc;
 
@@ -79,7 +79,7 @@
    search will re-use an existing loop device already bound to that
    file/offset if it finds one.
  */
-int set_loop(char **device, const char *file, unsigned long long offset)
+int FAST_FUNC set_loop(char **device, const char *file, unsigned long long offset)
 {
 	char dev[LOOP_NAMESIZE];
 	char *try;
diff --git a/libbb/make_directory.c b/libbb/make_directory.c
index 8841c95..5c71aff 100644
--- a/libbb/make_directory.c
+++ b/libbb/make_directory.c
@@ -26,7 +26,7 @@
 
 /* This function is used from NOFORK applets. It must not allocate anything */
 
-int bb_make_directory (char *path, long mode, int flags)
+int FAST_FUNC bb_make_directory(char *path, long mode, int flags)
 {
 	mode_t mask;
 	const char *fail_msg;
diff --git a/libbb/match_fstype.c b/libbb/match_fstype.c
index bd4dbb0..99e2767 100644
--- a/libbb/match_fstype.c
+++ b/libbb/match_fstype.c
@@ -12,7 +12,7 @@
 
 #include "libbb.h"
 
-int match_fstype(const struct mntent *mt, const char *fstype)
+int FAST_FUNC match_fstype(const struct mntent *mt, const char *fstype)
 {
 	int no = 0;
 	int len;
diff --git a/libbb/md5.c b/libbb/md5.c
index 8d4b9fe..4ab06eb 100644
--- a/libbb/md5.c
+++ b/libbb/md5.c
@@ -24,7 +24,7 @@
 /* Initialize structure containing state of computation.
  * (RFC 1321, 3.3: Step 3)
  */
-void md5_begin(md5_ctx_t *ctx)
+void FAST_FUNC md5_begin(md5_ctx_t *ctx)
 {
 	ctx->A = 0x67452301;
 	ctx->B = 0xefcdab89;
@@ -371,7 +371,7 @@
  * This function's internal buffer remembers previous data until it has 64
  * bytes worth to pass on.  Call md5_end() to flush this buffer. */
 
-void md5_hash(const void *buffer, size_t len, md5_ctx_t *ctx)
+void FAST_FUNC md5_hash(const void *buffer, size_t len, md5_ctx_t *ctx)
 {
 	char *buf=(char *)buffer;
 
@@ -410,7 +410,7 @@
  * IMPORTANT: On some systems it is required that RESBUF is correctly
  * aligned for a 32 bits value.
  */
-void *md5_end(void *resbuf, md5_ctx_t *ctx)
+void* FAST_FUNC md5_end(void *resbuf, md5_ctx_t *ctx)
 {
 	char *buf = ctx->buffer;
 	int i;
diff --git a/libbb/mode_string.c b/libbb/mode_string.c
index d17cc4a..b9975f4 100644
--- a/libbb/mode_string.c
+++ b/libbb/mode_string.c
@@ -51,7 +51,7 @@
 /*                                  0123456789abcdef */
 static const char mode_chars[7] ALIGN1 = "rwxSTst";
 
-const char *bb_mode_string(mode_t mode)
+const char* FAST_FUNC bb_mode_string(mode_t mode)
 {
 	static char buf[12];
 	char *p = buf;
@@ -91,7 +91,7 @@
 /*                                  0123456789abcdef */
 static const char mode_chars[7] = "rwxSTst";
 
-const char *bb_mode_string(mode_t mode)
+const char* FAST_FUNC bb_mode_string(mode_t mode)
 {
 	static char buf[12];
 	char *p = buf;
diff --git a/libbb/mtab.c b/libbb/mtab.c
index 18386ef..57654a6 100644
--- a/libbb/mtab.c
+++ b/libbb/mtab.c
@@ -11,7 +11,7 @@
 #include "libbb.h"
 
 #if ENABLE_FEATURE_MTAB_SUPPORT
-void erase_mtab(const char *name)
+void FAST_FUNC erase_mtab(const char *name)
 {
 	struct mntent *entries = NULL;
 	int i, count = 0;
diff --git a/libbb/obscure.c b/libbb/obscure.c
index 1841b27..19b8752 100644
--- a/libbb/obscure.c
+++ b/libbb/obscure.c
@@ -157,7 +157,7 @@
 	return NULL;
 }
 
-int obscure(const char *old, const char *newval, const struct passwd *pw)
+int FAST_FUNC obscure(const char *old, const char *newval, const struct passwd *pw)
 {
 	const char *msg;
 
diff --git a/libbb/parse_mode.c b/libbb/parse_mode.c
index fd54900..40105dd 100644
--- a/libbb/parse_mode.c
+++ b/libbb/parse_mode.c
@@ -15,7 +15,7 @@
 
 #define FILEMODEBITS (S_ISUID | S_ISGID | S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO)
 
-int bb_parse_mode(const char *s, mode_t *current_mode)
+int FAST_FUNC bb_parse_mode(const char *s, mode_t *current_mode)
 {
 	static const mode_t who_mask[] = {
 		S_ISUID | S_ISGID | S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO, /* a */
diff --git a/libbb/perror_msg.c b/libbb/perror_msg.c
index af9ff59..6c8e1b5 100644
--- a/libbb/perror_msg.c
+++ b/libbb/perror_msg.c
@@ -9,7 +9,7 @@
 
 #include "libbb.h"
 
-void bb_perror_msg(const char *s, ...)
+void FAST_FUNC bb_perror_msg(const char *s, ...)
 {
 	va_list p;
 
@@ -19,7 +19,7 @@
 	va_end(p);
 }
 
-void bb_simple_perror_msg(const char *s)
+void FAST_FUNC bb_simple_perror_msg(const char *s)
 {
 	bb_perror_msg("%s", s);
 }
diff --git a/libbb/perror_msg_and_die.c b/libbb/perror_msg_and_die.c
index 7b50073..15615fa 100644
--- a/libbb/perror_msg_and_die.c
+++ b/libbb/perror_msg_and_die.c
@@ -9,7 +9,7 @@
 
 #include "libbb.h"
 
-void bb_perror_msg_and_die(const char *s, ...)
+void FAST_FUNC bb_perror_msg_and_die(const char *s, ...)
 {
 	va_list p;
 
@@ -20,7 +20,7 @@
 	xfunc_die();
 }
 
-void bb_simple_perror_msg_and_die(const char *s)
+void FAST_FUNC bb_simple_perror_msg_and_die(const char *s)
 {
 	bb_perror_msg_and_die("%s", s);
 }
diff --git a/libbb/perror_nomsg.c b/libbb/perror_nomsg.c
index 62ce888..a157caa 100644
--- a/libbb/perror_nomsg.c
+++ b/libbb/perror_nomsg.c
@@ -11,11 +11,12 @@
  * modified definition without "attribute (format)"
  * instead of including libbb.h */
 //#include "libbb.h"
-extern void bb_perror_msg(const char *s, ...);
+#include "platform.h"
+extern void bb_perror_msg(const char *s, ...) FAST_FUNC;
 
 /* suppress gcc "no previous prototype" warning */
-void bb_perror_nomsg(void);
-void bb_perror_nomsg(void)
+void FAST_FUNC bb_perror_nomsg(void);
+void FAST_FUNC bb_perror_nomsg(void)
 {
 	bb_perror_msg(0);
 }
diff --git a/libbb/perror_nomsg_and_die.c b/libbb/perror_nomsg_and_die.c
index dab3df6..d56e05d 100644
--- a/libbb/perror_nomsg_and_die.c
+++ b/libbb/perror_nomsg_and_die.c
@@ -11,11 +11,12 @@
  * modified definition without "attribute (format)"
  * instead of including libbb.h */
 //#include "libbb.h"
-extern void bb_perror_msg_and_die(const char *s, ...);
+#include "platform.h"
+extern void bb_perror_msg_and_die(const char *s, ...) FAST_FUNC;
 
 /* suppress gcc "no previous prototype" warning */
-void bb_perror_nomsg_and_die(void);
-void bb_perror_nomsg_and_die(void)
+void FAST_FUNC bb_perror_nomsg_and_die(void);
+void FAST_FUNC bb_perror_nomsg_and_die(void)
 {
 	bb_perror_msg_and_die(0);
 }
diff --git a/libbb/pidfile.c b/libbb/pidfile.c
index cafa789..7b8fee2 100644
--- a/libbb/pidfile.c
+++ b/libbb/pidfile.c
@@ -13,7 +13,7 @@
 
 smallint wrote_pidfile;
 
-void write_pidfile(const char *path)
+void FAST_FUNC write_pidfile(const char *path)
 {
 	int pid_fd;
 	char *end;
diff --git a/libbb/print_flags.c b/libbb/print_flags.c
index a1dcc01..a8c4f9c 100644
--- a/libbb/print_flags.c
+++ b/libbb/print_flags.c
@@ -9,7 +9,7 @@
 #include <libbb.h>
 
 /* returns a set with the flags not printed */
-int print_flags_separated(const int *masks, const char *labels, int flags, const char *separator)
+int FAST_FUNC print_flags_separated(const int *masks, const char *labels, int flags, const char *separator)
 {
 	const char *need_separator = NULL;
 	while (*labels) {
@@ -19,14 +19,14 @@
 				labels);
 			need_separator = separator;
 			flags &= ~ *masks;
+			masks++;
 		}
-		masks++;
 		labels += strlen(labels) + 1;
 	}
 	return flags;
 }
 
-int print_flags(const masks_labels_t *ml, int flags)
+int FAST_FUNC print_flags(const masks_labels_t *ml, int flags)
 {
 	return print_flags_separated(ml->masks, ml->labels, flags, NULL);
 }
diff --git a/libbb/printable.c b/libbb/printable.c
index 676758a..ae93359 100644
--- a/libbb/printable.c
+++ b/libbb/printable.c
@@ -9,7 +9,7 @@
 
 #include "libbb.h"
 
-void fputc_printable(int ch, FILE *file)
+void FAST_FUNC fputc_printable(int ch, FILE *file)
 {
 	if ((ch & (0x80 + PRINTABLE_META)) == (0x80 + PRINTABLE_META)) {
 		fputs("M-", file);
diff --git a/libbb/process_escape_sequence.c b/libbb/process_escape_sequence.c
index 1cadbd3..4d03bd6 100644
--- a/libbb/process_escape_sequence.c
+++ b/libbb/process_escape_sequence.c
@@ -16,7 +16,7 @@
 #undef _tolower
 #define _tolower(X) ((X)|((char) 0x20))
 
-char bb_process_escape_sequence(const char **ptr)
+char FAST_FUNC bb_process_escape_sequence(const char **ptr)
 {
 	static const char charmap[] ALIGN1 = {
 		'a',  'b',  'f',  'n',  'r',  't',  'v',  '\\', 0,
diff --git a/libbb/procps.c b/libbb/procps.c
index 8946917..7d49d83 100644
--- a/libbb/procps.c
+++ b/libbb/procps.c
@@ -30,7 +30,7 @@
 	cp->cache = NULL;
 	cp->size = 0;
 }
-void clear_username_cache(void)
+void FAST_FUNC clear_username_cache(void)
 {
 	clear_cache(&username);
 	clear_cache(&groupname);
@@ -52,7 +52,7 @@
 }
 #endif
 
-typedef char* ug_func(char *name, int bufsize, long uid);
+typedef char* FAST_FUNC ug_func(char *name, int bufsize, long uid);
 static char* get_cached(cache_t *cp, unsigned id, ug_func* fp)
 {
 	int i;
@@ -66,11 +66,11 @@
 	fp(cp->cache[i].name, sizeof(cp->cache[i].name), id);
 	return cp->cache[i].name;
 }
-const char* get_cached_username(uid_t uid)
+const char* FAST_FUNC get_cached_username(uid_t uid)
 {
 	return get_cached(&username, uid, bb_getpwuid);
 }
-const char* get_cached_groupname(gid_t gid)
+const char* FAST_FUNC get_cached_groupname(gid_t gid)
 {
 	return get_cached(&groupname, gid, bb_getgrgid);
 }
@@ -78,7 +78,7 @@
 
 #define PROCPS_BUFSIZE 1024
 
-static int read_to_buf(const char *filename, void *buf)
+static int FAST_FUNC read_to_buf(const char *filename, void *buf)
 {
 	int fd;
 	/* open_read_close() would do two reads, checking for EOF.
@@ -93,7 +93,7 @@
 	return ret;
 }
 
-static procps_status_t *alloc_procps_scan(void)
+static procps_status_t* FAST_FUNC alloc_procps_scan(void)
 {
 	unsigned n = getpagesize();
 	procps_status_t* sp = xzalloc(sizeof(procps_status_t));
@@ -107,7 +107,7 @@
 	return sp;
 }
 
-void free_procps_scan(procps_status_t* sp)
+void FAST_FUNC free_procps_scan(procps_status_t* sp)
 {
 	closedir(sp->dir);
 	free(sp->argv0);
@@ -163,7 +163,7 @@
 #endif
 
 void BUG_comm_size(void);
-procps_status_t *procps_scan(procps_status_t* sp, int flags)
+procps_status_t* FAST_FUNC procps_scan(procps_status_t* sp, int flags)
 {
 	struct dirent *entry;
 	char buf[PROCPS_BUFSIZE];
@@ -402,7 +402,7 @@
 	return sp;
 }
 
-void read_cmdline(char *buf, int col, unsigned pid, const char *comm)
+void FAST_FUNC read_cmdline(char *buf, int col, unsigned pid, const char *comm)
 {
 	ssize_t sz;
 	char filename[sizeof("/proc//cmdline") + sizeof(int)*3];
diff --git a/libbb/pw_encrypt.c b/libbb/pw_encrypt.c
index 7363186..469e71f 100644
--- a/libbb/pw_encrypt.c
+++ b/libbb/pw_encrypt.c
@@ -50,7 +50,7 @@
 	des_ctx = NULL;
 }
 
-char *pw_encrypt(const char *clear, const char *salt, int cleanup)
+char* FAST_FUNC pw_encrypt(const char *clear, const char *salt, int cleanup)
 {
 	char *encrypted;
 
@@ -70,7 +70,7 @@
 
 #else /* if !ENABLE_USE_BB_CRYPT */
 
-char *pw_encrypt(const char *clear, const char *salt, int cleanup)
+char* FAST_FUNC pw_encrypt(const char *clear, const char *salt, int cleanup)
 {
 #if 0 /* was CONFIG_FEATURE_SHA1_PASSWORDS, but there is no such thing??? */
 	if (strncmp(salt, "$2$", 3) == 0) {
diff --git a/libbb/read.c b/libbb/read.c
index fa9874d..7b80412 100644
--- a/libbb/read.c
+++ b/libbb/read.c
@@ -9,7 +9,7 @@
 
 #include "libbb.h"
 
-ssize_t safe_read(int fd, void *buf, size_t count)
+ssize_t FAST_FUNC safe_read(int fd, void *buf, size_t count)
 {
 	ssize_t n;
 
@@ -56,7 +56,7 @@
  * which detects EAGAIN and uses poll() to wait on the fd.
  * Thankfully, poll() doesn't care about O_NONBLOCK flag.
  */
-ssize_t nonblock_safe_read(int fd, void *buf, size_t count)
+ssize_t FAST_FUNC nonblock_safe_read(int fd, void *buf, size_t count)
 {
 	struct pollfd pfd[1];
 	ssize_t n;
@@ -78,7 +78,7 @@
  * Returns the amount read, or -1 on an error.
  * A short read is returned on an end of file.
  */
-ssize_t full_read(int fd, void *buf, size_t len)
+ssize_t FAST_FUNC full_read(int fd, void *buf, size_t len)
 {
 	ssize_t cc;
 	ssize_t total;
@@ -107,7 +107,7 @@
 }
 
 // Die with an error message if we can't read the entire buffer.
-void xread(int fd, void *buf, size_t count)
+void FAST_FUNC xread(int fd, void *buf, size_t count)
 {
 	if (count) {
 		ssize_t size = full_read(fd, buf, count);
@@ -117,7 +117,7 @@
 }
 
 // Die with an error message if we can't read one character.
-unsigned char xread_char(int fd)
+unsigned char FAST_FUNC xread_char(int fd)
 {
 	char tmp;
 	xread(fd, &tmp, 1);
@@ -125,7 +125,7 @@
 }
 
 // Read one line a-la fgets. Works only on seekable streams
-char *reads(int fd, char *buffer, size_t size)
+char* FAST_FUNC reads(int fd, char *buffer, size_t size)
 {
 	char *p;
 
@@ -152,7 +152,7 @@
 // Reads one line a-la fgets (but doesn't save terminating '\n').
 // Reads byte-by-byte. Useful when it is important to not read ahead.
 // Bytes are appended to pfx (which must be malloced, or NULL).
-char *xmalloc_reads(int fd, char *buf, size_t *maxsz_p)
+char* FAST_FUNC xmalloc_reads(int fd, char *buf, size_t *maxsz_p)
 {
 	char *p;
 	size_t sz = buf ? strlen(buf) : 0;
@@ -185,7 +185,7 @@
 	return xrealloc(buf, p - buf);
 }
 
-ssize_t read_close(int fd, void *buf, size_t size)
+ssize_t FAST_FUNC read_close(int fd, void *buf, size_t size)
 {
 	/*int e;*/
 	size = full_read(fd, buf, size);
@@ -195,7 +195,7 @@
 	return size;
 }
 
-ssize_t open_read_close(const char *filename, void *buf, size_t size)
+ssize_t FAST_FUNC open_read_close(const char *filename, void *buf, size_t size)
 {
 	int fd = open(filename, O_RDONLY);
 	if (fd < 0)
@@ -205,7 +205,7 @@
 
 // Read (potentially big) files in one go. File size is estimated
 // by stat.
-void *xmalloc_open_read_close(const char *filename, size_t *sizep)
+void* FAST_FUNC xmalloc_open_read_close(const char *filename, size_t *sizep)
 {
 	char *buf;
 	size_t size;
@@ -247,7 +247,7 @@
 
 // Read (potentially big) files in one go. File size is estimated by
 // lseek to end.
-void *xmalloc_open_read_close(const char *filename, size_t *sizep)
+void* FAST_FUNC xmalloc_open_read_close(const char *filename, size_t *sizep)
 {
 	char *buf;
 	size_t size;
@@ -284,7 +284,7 @@
 }
 #endif
 
-void *xmalloc_xopen_read_close(const char *filename, size_t *sizep)
+void* FAST_FUNC xmalloc_xopen_read_close(const char *filename, size_t *sizep)
 {
 	void *buf = xmalloc_open_read_close(filename, sizep);
 	if (!buf)
diff --git a/libbb/recursive_action.c b/libbb/recursive_action.c
index fe9ba2e..9b6951f 100644
--- a/libbb/recursive_action.c
+++ b/libbb/recursive_action.c
@@ -22,7 +22,7 @@
  * is so stinking huge.
  */
 
-static int true_action(const char *fileName ATTRIBUTE_UNUSED,
+static int FAST_FUNC true_action(const char *fileName ATTRIBUTE_UNUSED,
 		struct stat *statbuf ATTRIBUTE_UNUSED,
 		void* userData ATTRIBUTE_UNUSED,
 		int depth ATTRIBUTE_UNUSED)
@@ -53,10 +53,10 @@
  * 1: stat(statbuf). Calls dirAction and optionally recurse on link to dir.
  */
 
-int recursive_action(const char *fileName,
+int FAST_FUNC recursive_action(const char *fileName,
 		unsigned flags,
-		int (*fileAction)(const char *fileName, struct stat *statbuf, void* userData, int depth),
-		int (*dirAction)(const char *fileName, struct stat *statbuf, void* userData, int depth),
+		int FAST_FUNC (*fileAction)(const char *fileName, struct stat *statbuf, void* userData, int depth),
+		int FAST_FUNC (*dirAction)(const char *fileName, struct stat *statbuf, void* userData, int depth),
 		void* userData,
 		unsigned depth)
 {
diff --git a/libbb/remove_file.c b/libbb/remove_file.c
index 21878dc..8b14f07 100644
--- a/libbb/remove_file.c
+++ b/libbb/remove_file.c
@@ -11,7 +11,7 @@
 
 /* Used from NOFORK applets. Must not allocate anything */
 
-int remove_file(const char *path, int flags)
+int FAST_FUNC remove_file(const char *path, int flags)
 {
 	struct stat path_stat;
 
diff --git a/libbb/restricted_shell.c b/libbb/restricted_shell.c
index dc4cfb4..2a5073f 100644
--- a/libbb/restricted_shell.c
+++ b/libbb/restricted_shell.c
@@ -32,7 +32,7 @@
 
 /* Return 1 if SHELL is a restricted shell (one not returned by
    getusershell), else 0, meaning it is a standard shell.  */
-int restricted_shell(const char *shell)
+int FAST_FUNC restricted_shell(const char *shell)
 {
 	char *line;
 
diff --git a/libbb/rtc.c b/libbb/rtc.c
index 78f10c6..1fdeee8 100644
--- a/libbb/rtc.c
+++ b/libbb/rtc.c
@@ -11,7 +11,7 @@
 # define ADJTIME_PATH "/etc/adjtime"
 #endif
 
-int rtc_adjtime_is_utc(void)
+int FAST_FUNC rtc_adjtime_is_utc(void)
 {
 	int utc = 0;
 	FILE *f = fopen(ADJTIME_PATH, "r");
@@ -40,7 +40,7 @@
 	return utc;
 }
 
-int rtc_xopen(const char **default_rtc, int flags)
+int FAST_FUNC rtc_xopen(const char **default_rtc, int flags)
 {
 	int rtc;
 
@@ -59,7 +59,7 @@
 	return xopen(*default_rtc, flags);
 }
 
-time_t rtc_read_time(int fd, int utc)
+time_t FAST_FUNC rtc_read_time(int fd, int utc)
 {
 	struct tm tm;
 	char *oldtz = 0;
diff --git a/libbb/run_shell.c b/libbb/run_shell.c
index 239887d..2ccb3a1 100644
--- a/libbb/run_shell.c
+++ b/libbb/run_shell.c
@@ -36,14 +36,12 @@
 #if ENABLE_SELINUX
 static security_context_t current_sid;
 
-void
-renew_current_security_context(void)
+void FAST_FUNC renew_current_security_context(void)
 {
 	freecon(current_sid);  /* Release old context  */
 	getcon(&current_sid);  /* update */
 }
-void
-set_current_security_context(security_context_t sid)
+void FAST_FUNC set_current_security_context(security_context_t sid)
 {
 	freecon(current_sid);  /* Release old context  */
 	current_sid = sid;
@@ -56,7 +54,7 @@
    If ADDITIONAL_ARGS is nonzero, pass it to the shell as more
    arguments.  */
 
-void run_shell(const char *shell, int loginshell, const char *command, const char **additional_args)
+void FAST_FUNC run_shell(const char *shell, int loginshell, const char *command, const char **additional_args)
 {
 	const char **args;
 	int argno = 1;
diff --git a/libbb/safe_gethostname.c b/libbb/safe_gethostname.c
index 3b24edb..1f8b2a8 100644
--- a/libbb/safe_gethostname.c
+++ b/libbb/safe_gethostname.c
@@ -33,7 +33,7 @@
  * This is an illegal first character for a hostname.
  * The returned malloced string must be freed by the caller.
  */
-char *safe_gethostname(void)
+char* FAST_FUNC safe_gethostname(void)
 {
 	struct utsname uts;
 
diff --git a/libbb/safe_poll.c b/libbb/safe_poll.c
index d2b773c..58c7bda 100644
--- a/libbb/safe_poll.c
+++ b/libbb/safe_poll.c
@@ -12,7 +12,7 @@
 /* Wrapper which restarts poll on EINTR or ENOMEM.
  * On other errors does perror("poll") and returns.
  * Warning! May take longer than timeout_ms to return! */
-int safe_poll(struct pollfd *ufds, nfds_t nfds, int timeout)
+int FAST_FUNC safe_poll(struct pollfd *ufds, nfds_t nfds, int timeout)
 {
 	while (1) {
 		int n = poll(ufds, nfds, timeout);
diff --git a/libbb/safe_strncpy.c b/libbb/safe_strncpy.c
index cc42583..649fa10 100644
--- a/libbb/safe_strncpy.c
+++ b/libbb/safe_strncpy.c
@@ -10,7 +10,7 @@
 #include "libbb.h"
 
 /* Like strncpy but make sure the resulting string is always 0 terminated. */
-char *safe_strncpy(char *dst, const char *src, size_t size)
+char* FAST_FUNC safe_strncpy(char *dst, const char *src, size_t size)
 {
 	if (!size) return dst;
 	dst[--size] = '\0';
diff --git a/libbb/safe_write.c b/libbb/safe_write.c
index 5bbb82e..e3561f3 100644
--- a/libbb/safe_write.c
+++ b/libbb/safe_write.c
@@ -9,7 +9,7 @@
 
 #include "libbb.h"
 
-ssize_t safe_write(int fd, const void *buf, size_t count)
+ssize_t FAST_FUNC safe_write(int fd, const void *buf, size_t count)
 {
 	ssize_t n;
 
diff --git a/libbb/selinux_common.c b/libbb/selinux_common.c
index 7478cc7..5fdbe9d 100644
--- a/libbb/selinux_common.c
+++ b/libbb/selinux_common.c
@@ -7,7 +7,7 @@
 #include "libbb.h"
 #include <selinux/context.h>
 
-context_t set_security_context_component(security_context_t cur_context,
+context_t FAST_FUNC set_security_context_component(security_context_t cur_context,
 					 char *user, char *role, char *type, char *range)
 {
 	context_t con = context_new(cur_context);
@@ -29,7 +29,7 @@
 	return NULL;
 }
 
-void setfscreatecon_or_die(security_context_t scontext)
+void FAST_FUNC setfscreatecon_or_die(security_context_t scontext)
 {
 	if (setfscreatecon(scontext) < 0) {
 		/* Can be NULL. All known printf implementations
@@ -39,7 +39,7 @@
 	}
 }
 
-void selinux_preserve_fcontext(int fdesc)
+void FAST_FUNC selinux_preserve_fcontext(int fdesc)
 {
 	security_context_t context;
 
diff --git a/libbb/setup_environment.c b/libbb/setup_environment.c
index 6e3575c..04e333f 100644
--- a/libbb/setup_environment.c
+++ b/libbb/setup_environment.c
@@ -30,7 +30,7 @@
 
 #include "libbb.h"
 
-void setup_environment(const char *shell, int clear_env, int change_env, const struct passwd *pw)
+void FAST_FUNC setup_environment(const char *shell, int clear_env, int change_env, const struct passwd *pw)
 {
 	if (clear_env) {
 		const char *term;
diff --git a/libbb/sha1.c b/libbb/sha1.c
index 552dcad..cc7edd8 100644
--- a/libbb/sha1.c
+++ b/libbb/sha1.c
@@ -83,7 +83,7 @@
 	ctx->hash[4] += e;
 }
 
-void sha1_begin(sha1_ctx_t *ctx)
+void FAST_FUNC sha1_begin(sha1_ctx_t *ctx)
 {
 	ctx->count[0] = ctx->count[1] = 0;
 	ctx->hash[0] = 0x67452301;
@@ -95,7 +95,7 @@
 
 /* SHA1 hash data in an array of bytes into hash buffer and call the        */
 /* hash_compile function as required.                                       */
-void sha1_hash(const void *data, size_t length, sha1_ctx_t *ctx)
+void FAST_FUNC sha1_hash(const void *data, size_t length, sha1_ctx_t *ctx)
 {
 	uint32_t pos = (uint32_t) (ctx->count[0] & SHA1_MASK);
 	uint32_t freeb = SHA1_BLOCK_SIZE - pos;
@@ -116,7 +116,7 @@
 	memcpy(((unsigned char *) ctx->wbuf) + pos, sp, length);
 }
 
-void *sha1_end(void *resbuf, sha1_ctx_t *ctx)
+void* FAST_FUNC sha1_end(void *resbuf, sha1_ctx_t *ctx)
 {
 	/* SHA1 Final padding and digest calculation  */
 #if BB_BIG_ENDIAN
diff --git a/libbb/signals.c b/libbb/signals.c
index 1342c89..9591146 100644
--- a/libbb/signals.c
+++ b/libbb/signals.c
@@ -12,19 +12,19 @@
 #include "libbb.h"
 
 /* Saves 2 bytes on x86! Oh my... */
-int sigaction_set(int signum, const struct sigaction *act)
+int FAST_FUNC sigaction_set(int signum, const struct sigaction *act)
 {
 	return sigaction(signum, act, NULL);
 }
 
-int sigprocmask_allsigs(int how)
+int FAST_FUNC sigprocmask_allsigs(int how)
 {
 	sigset_t set;
 	sigfillset(&set);
 	return sigprocmask(how, &set, NULL);
 }
 
-void bb_signals(int sigs, void (*f)(int))
+void FAST_FUNC bb_signals(int sigs, void (*f)(int))
 {
 	int sig_no = 0;
 	int bit = 1;
@@ -39,7 +39,7 @@
 	}
 }
 
-void bb_signals_recursive(int sigs, void (*f)(int))
+void FAST_FUNC bb_signals_recursive(int sigs, void (*f)(int))
 {
 	int sig_no = 0;
 	int bit = 1;
@@ -60,7 +60,7 @@
 	}
 }
 
-void sig_block(int sig)
+void FAST_FUNC sig_block(int sig)
 {
 	sigset_t ss;
 	sigemptyset(&ss);
@@ -68,7 +68,7 @@
 	sigprocmask(SIG_BLOCK, &ss, NULL);
 }
 
-void sig_unblock(int sig)
+void FAST_FUNC sig_unblock(int sig)
 {
 	sigset_t ss;
 	sigemptyset(&ss);
@@ -76,7 +76,7 @@
 	sigprocmask(SIG_UNBLOCK, &ss, NULL);
 }
 
-void wait_for_any_sig(void)
+void FAST_FUNC wait_for_any_sig(void)
 {
 	sigset_t ss;
 	sigemptyset(&ss);
@@ -84,7 +84,7 @@
 }
 
 /* Assuming the sig is fatal */
-void kill_myself_with_sig(int sig)
+void FAST_FUNC kill_myself_with_sig(int sig)
 {
 	signal(sig, SIG_DFL);
 	sig_unblock(sig);
@@ -92,7 +92,7 @@
 	_exit(EXIT_FAILURE); /* Should not reach it */
 }
 
-void signal_SA_RESTART_empty_mask(int sig, void (*handler)(int))
+void FAST_FUNC signal_SA_RESTART_empty_mask(int sig, void (*handler)(int))
 {
 	struct sigaction sa;
 	memset(&sa, 0, sizeof(sa));
@@ -102,7 +102,7 @@
 	sigaction_set(sig, &sa);
 }
 
-void signal_no_SA_RESTART_empty_mask(int sig, void (*handler)(int))
+void FAST_FUNC signal_no_SA_RESTART_empty_mask(int sig, void (*handler)(int))
 {
 	struct sigaction sa;
 	memset(&sa, 0, sizeof(sa));
diff --git a/libbb/simplify_path.c b/libbb/simplify_path.c
index 29e371d..367f1f0 100644
--- a/libbb/simplify_path.c
+++ b/libbb/simplify_path.c
@@ -9,7 +9,7 @@
 
 #include "libbb.h"
 
-char *bb_simplify_path(const char *path)
+char* FAST_FUNC bb_simplify_path(const char *path)
 {
 	char *s, *start, *p;
 
diff --git a/libbb/skip_whitespace.c b/libbb/skip_whitespace.c
index 87b5f23..e85f385 100644
--- a/libbb/skip_whitespace.c
+++ b/libbb/skip_whitespace.c
@@ -9,7 +9,7 @@
 
 #include "libbb.h"
 
-char *skip_whitespace(const char *s)
+char* FAST_FUNC skip_whitespace(const char *s)
 {
 	/* NB: isspace('\0') returns 0 */
 	while (isspace(*s)) ++s;
@@ -17,7 +17,7 @@
 	return (char *) s;
 }
 
-char *skip_non_whitespace(const char *s)
+char* FAST_FUNC skip_non_whitespace(const char *s)
 {
 	while (*s && !isspace(*s)) ++s;
 
diff --git a/libbb/speed_table.c b/libbb/speed_table.c
index 94a2962..646f914 100644
--- a/libbb/speed_table.c
+++ b/libbb/speed_table.c
@@ -56,7 +56,7 @@
 
 enum { NUM_SPEEDS = ARRAY_SIZE(speeds) };
 
-unsigned int tty_baud_to_value(speed_t speed)
+unsigned FAST_FUNC tty_baud_to_value(speed_t speed)
 {
 	int i = 0;
 
@@ -72,7 +72,7 @@
 	return 0;
 }
 
-speed_t tty_value_to_baud(unsigned int value)
+speed_t FAST_FUNC tty_value_to_baud(unsigned int value)
 {
 	int i = 0;
 
diff --git a/libbb/str_tolower.c b/libbb/str_tolower.c
index 037f717..f402e8e 100644
--- a/libbb/str_tolower.c
+++ b/libbb/str_tolower.c
@@ -4,7 +4,8 @@
  * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
  */
 #include "libbb.h"
-char* str_tolower(char *str)
+
+char* FAST_FUNC str_tolower(char *str)
 {
 	char *c;
 	for (c = str; *c; ++c)
diff --git a/libbb/strrstr.c b/libbb/strrstr.c
index f61dd51..d5cd44b 100644
--- a/libbb/strrstr.c
+++ b/libbb/strrstr.c
@@ -19,7 +19,7 @@
  * The strrstr() function finds the last occurrence of the substring needle
  * in the string haystack. The terminating nul characters are not compared.
  */
-char* strrstr(const char *haystack, const char *needle)
+char* FAST_FUNC strrstr(const char *haystack, const char *needle)
 {
 	char *r = NULL;
 
diff --git a/libbb/time.c b/libbb/time.c
index 07c0094..7d3ac91 100644
--- a/libbb/time.c
+++ b/libbb/time.c
@@ -20,14 +20,14 @@
 
 /* libc has incredibly messy way of doing this,
  * typically requiring -lrt. We just skip all this mess */
-unsigned long long monotonic_us(void)
+unsigned long long FAST_FUNC monotonic_us(void)
 {
 	struct timespec ts;
 	if (syscall(__NR_clock_gettime, CLOCK_MONOTONIC, &ts))
 		bb_error_msg_and_die("clock_gettime(MONOTONIC) failed");
 	return ts.tv_sec * 1000000ULL + ts.tv_nsec/1000;
 }
-unsigned monotonic_sec(void)
+unsigned FAST_FUNC monotonic_sec(void)
 {
 	struct timespec ts;
 	if (syscall(__NR_clock_gettime, CLOCK_MONOTONIC, &ts))
@@ -35,14 +35,14 @@
 	return ts.tv_sec;
 }
 #else
-unsigned long long monotonic_us(void)
+unsigned long long FAST_FUNC monotonic_us(void)
 {
 	struct timeval tv;
 	gettimeofday(&tv, NULL);
 	return tv.tv_sec * 1000000ULL + tv.tv_usec;
 }
 
-unsigned monotonic_sec(void)
+unsigned FAST_FUNC monotonic_sec(void)
 {
 	return time(NULL);
 }
diff --git a/libbb/trim.c b/libbb/trim.c
index 94ccaf7..ea20ff3 100644
--- a/libbb/trim.c
+++ b/libbb/trim.c
@@ -10,7 +10,7 @@
 
 #include "libbb.h"
 
-void trim(char *s)
+void FAST_FUNC trim(char *s)
 {
 	size_t len = strlen(s);
 	size_t lws;
diff --git a/libbb/u_signal_names.c b/libbb/u_signal_names.c
index 1dcbf5f..915eea5 100644
--- a/libbb/u_signal_names.c
+++ b/libbb/u_signal_names.c
@@ -121,7 +121,7 @@
 
 // Convert signal name to number.
 
-int get_signum(const char *name)
+int FAST_FUNC get_signum(const char *name)
 {
 	unsigned i;
 
@@ -155,7 +155,7 @@
 
 // Convert signal number to name
 
-const char *get_signame(int number)
+const char* FAST_FUNC get_signame(int number)
 {
 	if ((unsigned)number < ARRAY_SIZE(signals)) {
 		if (signals[number][0]) /* if it's not an empty str */
@@ -168,7 +168,7 @@
 
 // Print the whole signal list
 
-void print_signames(void)
+void FAST_FUNC print_signames(void)
 {
 	unsigned signo;
 
diff --git a/libbb/udp_io.c b/libbb/udp_io.c
index c99e516..b31f284 100644
--- a/libbb/udp_io.c
+++ b/libbb/udp_io.c
@@ -13,7 +13,7 @@
  * This asks kernel to let us know dst addr/port of incoming packets
  * We don't check for errors here. Not supported == won't be used
  */
-void
+void FAST_FUNC
 socket_want_pktinfo(int fd)
 {
 #ifdef IP_PKTINFO
@@ -25,7 +25,7 @@
 }
 
 
-ssize_t
+ssize_t FAST_FUNC
 send_to_from(int fd, void *buf, size_t len, int flags,
 		const struct sockaddr *to,
 		const struct sockaddr *from,
@@ -100,7 +100,7 @@
  * _Only_ IP/IPv6 address part of 'to' is _maybe_ modified.
  * Typical usage is to preinit 'to' with "default" value
  * before calling recv_from_to(). */
-ssize_t
+ssize_t FAST_FUNC
 recv_from_to(int fd, void *buf, size_t len, int flags,
 		struct sockaddr *from, struct sockaddr *to,
 		socklen_t sa_size)
diff --git a/libbb/update_passwd.c b/libbb/update_passwd.c
index d10e863..88bc28c 100644
--- a/libbb/update_passwd.c
+++ b/libbb/update_passwd.c
@@ -36,7 +36,7 @@
 #define check_selinux_update_passwd(username) ((void)0)
 #endif
 
-int update_passwd(const char *filename, const char *username,
+int FAST_FUNC update_passwd(const char *filename, const char *username,
 			const char *new_pw)
 {
 	struct stat sb;
diff --git a/libbb/uuencode.c b/libbb/uuencode.c
index 0aedf33..67d98d5 100644
--- a/libbb/uuencode.c
+++ b/libbb/uuencode.c
@@ -39,7 +39,7 @@
  * buffer of at least 1+BASE64_LENGTH(length) bytes.
  * where BASE64_LENGTH(len) = (4 * ((LENGTH + 2) / 3))
  */
-void bb_uuencode(char *p, const void *src, int length, const char *tbl)
+void FAST_FUNC bb_uuencode(char *p, const void *src, int length, const char *tbl)
 {
 	const unsigned char *s = src;
 
diff --git a/libbb/vdprintf.c b/libbb/vdprintf.c
index 726d563..09fffbc 100644
--- a/libbb/vdprintf.c
+++ b/libbb/vdprintf.c
@@ -10,7 +10,7 @@
 #include "libbb.h"
 
 #if defined(__GLIBC__) && __GLIBC__ < 2
-int vdprintf(int d, const char *format, va_list ap)
+int FAST_FUNC vdprintf(int d, const char *format, va_list ap)
 {
 	char buf[BUF_SIZE];
 	int len;
diff --git a/libbb/verror_msg.c b/libbb/verror_msg.c
index 5c6df48..58846d5 100644
--- a/libbb/verror_msg.c
+++ b/libbb/verror_msg.c
@@ -13,7 +13,7 @@
 smallint logmode = LOGMODE_STDIO;
 const char *msg_eol = "\n";
 
-void bb_verror_msg(const char *s, va_list p, const char* strerr)
+void FAST_FUNC bb_verror_msg(const char *s, va_list p, const char* strerr)
 {
 	char *msg;
 	int applet_len, strerr_len, msgeol_len, used;
@@ -70,7 +70,7 @@
 /* Code size is approximately the same, but currently it's the only user
  * of writev in entire bbox. __libc_writev in uclibc is ~50 bytes. */
 
-void bb_verror_msg(const char *s, va_list p, const char* strerr)
+void FAST_FUNC bb_verror_msg(const char *s, va_list p, const char* strerr)
 {
 	int strerr_len, msgeol_len;
 	struct iovec iov[3];
diff --git a/libbb/vfork_daemon_rexec.c b/libbb/vfork_daemon_rexec.c
index 9624efb..37d4c27 100644
--- a/libbb/vfork_daemon_rexec.c
+++ b/libbb/vfork_daemon_rexec.c
@@ -20,7 +20,7 @@
 
 /* This does a fork/exec in one call, using vfork().  Returns PID of new child,
  * -1 for failure.  Runs argv[0], searching path if that has no / in it. */
-pid_t spawn(char **argv)
+pid_t FAST_FUNC spawn(char **argv)
 {
 	/* Compiler should not optimize stores here */
 	volatile int failed;
@@ -58,7 +58,7 @@
 }
 
 /* Die with an error message if we can't spawn a child process. */
-pid_t xspawn(char **argv)
+pid_t FAST_FUNC xspawn(char **argv)
 {
 	pid_t pid = spawn(argv);
 	if (pid < 0)
@@ -66,7 +66,7 @@
 	return pid;
 }
 
-int safe_waitpid(int pid, int *wstat, int options)
+int FAST_FUNC safe_waitpid(int pid, int *wstat, int options)
 {
 	int r;
 
@@ -76,13 +76,13 @@
 	return r;
 }
 
-int wait_any_nohang(int *wstat)
+int FAST_FUNC wait_any_nohang(int *wstat)
 {
 	return safe_waitpid(-1, wstat, WNOHANG);
 }
 
 // Wait for the specified child PID to exit, returning child's error return.
-int wait4pid(int pid)
+int FAST_FUNC wait4pid(int pid)
 {
 	int status;
 
@@ -101,7 +101,7 @@
 }
 
 #if ENABLE_FEATURE_PREFER_APPLETS
-void save_nofork_data(struct nofork_save_area *save)
+void FAST_FUNC save_nofork_data(struct nofork_save_area *save)
 {
 	memcpy(&save->die_jmp, &die_jmp, sizeof(die_jmp));
 	save->applet_name = applet_name;
@@ -111,7 +111,7 @@
 	save->saved = 1;
 }
 
-void restore_nofork_data(struct nofork_save_area *save)
+void FAST_FUNC restore_nofork_data(struct nofork_save_area *save)
 {
 	memcpy(&die_jmp, &save->die_jmp, sizeof(die_jmp));
 	applet_name = save->applet_name;
@@ -120,7 +120,7 @@
 	die_sleep = save->die_sleep;
 }
 
-int run_nofork_applet_prime(struct nofork_save_area *old, int applet_no, char **argv)
+int FAST_FUNC run_nofork_applet_prime(struct nofork_save_area *old, int applet_no, char **argv)
 {
 	int rc, argc;
 
@@ -166,7 +166,7 @@
 	return rc & 0xff; /* don't confuse people with "exitcodes" >255 */
 }
 
-int run_nofork_applet(int applet_no, char **argv)
+int FAST_FUNC run_nofork_applet(int applet_no, char **argv)
 {
 	struct nofork_save_area old;
 
@@ -176,7 +176,7 @@
 }
 #endif /* FEATURE_PREFER_APPLETS */
 
-int spawn_and_wait(char **argv)
+int FAST_FUNC spawn_and_wait(char **argv)
 {
 	int rc;
 #if ENABLE_FEATURE_PREFER_APPLETS
@@ -210,7 +210,7 @@
 }
 
 #if !BB_MMU
-void re_exec(char **argv)
+void FAST_FUNC re_exec(char **argv)
 {
 	/* high-order bit of first char in argv[0] is a hidden
 	 * "we have (already) re-execed, don't do it again" flag */
@@ -219,7 +219,7 @@
 	bb_perror_msg_and_die("exec %s", bb_busybox_exec_path);
 }
 
-void forkexit_or_rexec(char **argv)
+void FAST_FUNC forkexit_or_rexec(char **argv)
 {
 	pid_t pid;
 	/* Maybe we are already re-execed and come here again? */
@@ -237,7 +237,7 @@
 #else
 /* Dance around (void)...*/
 #undef forkexit_or_rexec
-void forkexit_or_rexec(void)
+void FAST_FUNC forkexit_or_rexec(void)
 {
 	pid_t pid;
 	pid = fork();
@@ -252,7 +252,7 @@
 
 /* Due to a #define in libbb.h on MMU systems we actually have 1 argument -
  * char **argv "vanishes" */
-void bb_daemonize_or_rexec(int flags, char **argv)
+void FAST_FUNC bb_daemonize_or_rexec(int flags, char **argv)
 {
 	int fd;
 
@@ -286,7 +286,7 @@
 	}
 }
 
-void bb_sanitize_stdio(void)
+void FAST_FUNC bb_sanitize_stdio(void)
 {
 	bb_daemonize_or_rexec(DAEMON_ONLY_SANITIZE, NULL);
 }
diff --git a/libbb/warn_ignoring_args.c b/libbb/warn_ignoring_args.c
index be78a44..65dea32 100644
--- a/libbb/warn_ignoring_args.c
+++ b/libbb/warn_ignoring_args.c
@@ -9,7 +9,7 @@
 
 #include "libbb.h"
 
-void bb_warn_ignoring_args(int n)
+void FAST_FUNC bb_warn_ignoring_args(int n)
 {
 	if (n) {
 		bb_error_msg("ignoring all arguments");
diff --git a/libbb/wfopen.c b/libbb/wfopen.c
index 9248874..ac365c2 100644
--- a/libbb/wfopen.c
+++ b/libbb/wfopen.c
@@ -9,7 +9,7 @@
 
 #include "libbb.h"
 
-FILE *fopen_or_warn(const char *path, const char *mode)
+FILE* FAST_FUNC fopen_or_warn(const char *path, const char *mode)
 {
 	FILE *fp = fopen(path, mode);
 	if (!fp) {
diff --git a/libbb/wfopen_input.c b/libbb/wfopen_input.c
index a7c1c32..46ff7a6 100644
--- a/libbb/wfopen_input.c
+++ b/libbb/wfopen_input.c
@@ -14,7 +14,7 @@
 
 #include "libbb.h"
 
-FILE *fopen_or_warn_stdin(const char *filename)
+FILE* FAST_FUNC fopen_or_warn_stdin(const char *filename)
 {
 	FILE *fp = stdin;
 
@@ -26,7 +26,7 @@
 	return fp;
 }
 
-FILE *xfopen_stdin(const char *filename)
+FILE* FAST_FUNC xfopen_stdin(const char *filename)
 {
 	FILE *fp = fopen_or_warn_stdin(filename);
 	if (fp)
@@ -34,7 +34,7 @@
 	xfunc_die();	/* We already output an error message. */
 }
 
-int open_or_warn_stdin(const char *filename)
+int FAST_FUNC open_or_warn_stdin(const char *filename)
 {
 	int fd = STDIN_FILENO;
 
diff --git a/libbb/write.c b/libbb/write.c
index b628b49..e8a9eff 100644
--- a/libbb/write.c
+++ b/libbb/write.c
@@ -11,7 +11,7 @@
 
 /* Open file and write string str to it, close file.
  * Die on any open or write-error.  */
-void xopen_xwrite_close(const char* file, const char* str)
+void FAST_FUNC xopen_xwrite_close(const char* file, const char* str)
 {
 	int fd = xopen(file, O_WRONLY);
 
diff --git a/libbb/xatonum.c b/libbb/xatonum.c
index a410ae9..3cdf634 100644
--- a/libbb/xatonum.c
+++ b/libbb/xatonum.c
@@ -59,12 +59,12 @@
 
 /* A few special cases */
 
-int xatoi_u(const char *numstr)
+int FAST_FUNC xatoi_u(const char *numstr)
 {
 	return xatou_range(numstr, 0, INT_MAX);
 }
 
-uint16_t xatou16(const char *numstr)
+uint16_t FAST_FUNC xatou16(const char *numstr)
 {
 	return xatou_range(numstr, 0, 0xffff);
 }
diff --git a/libbb/xatonum_template.c b/libbb/xatonum_template.c
index 9f9dc11..2360ae8 100644
--- a/libbb/xatonum_template.c
+++ b/libbb/xatonum_template.c
@@ -12,7 +12,7 @@
 #define XSTR_STRTOU strtoul
 */
 
-unsigned type xstrtou(_range_sfx)(const char *numstr, int base,
+unsigned type FAST_FUNC xstrtou(_range_sfx)(const char *numstr, int base,
 		unsigned type lower,
 		unsigned type upper,
 		const struct suffix_mult *suffixes)
@@ -71,25 +71,25 @@
 	bb_error_msg_and_die("invalid number '%s'", numstr);
 }
 
-unsigned type xstrtou(_range)(const char *numstr, int base,
+unsigned type FAST_FUNC xstrtou(_range)(const char *numstr, int base,
 		unsigned type lower,
 		unsigned type upper)
 {
 	return xstrtou(_range_sfx)(numstr, base, lower, upper, NULL);
 }
 
-unsigned type xstrtou(_sfx)(const char *numstr, int base,
+unsigned type FAST_FUNC xstrtou(_sfx)(const char *numstr, int base,
 		const struct suffix_mult *suffixes)
 {
 	return xstrtou(_range_sfx)(numstr, base, 0, XSTR_UTYPE_MAX, suffixes);
 }
 
-unsigned type xstrtou()(const char *numstr, int base)
+unsigned type FAST_FUNC xstrtou()(const char *numstr, int base)
 {
 	return xstrtou(_range_sfx)(numstr, base, 0, XSTR_UTYPE_MAX, NULL);
 }
 
-unsigned type xatou(_range_sfx)(const char *numstr,
+unsigned type FAST_FUNC xatou(_range_sfx)(const char *numstr,
 		unsigned type lower,
 		unsigned type upper,
 		const struct suffix_mult *suffixes)
@@ -97,27 +97,27 @@
 	return xstrtou(_range_sfx)(numstr, 10, lower, upper, suffixes);
 }
 
-unsigned type xatou(_range)(const char *numstr,
+unsigned type FAST_FUNC xatou(_range)(const char *numstr,
 		unsigned type lower,
 		unsigned type upper)
 {
 	return xstrtou(_range_sfx)(numstr, 10, lower, upper, NULL);
 }
 
-unsigned type xatou(_sfx)(const char *numstr,
+unsigned type FAST_FUNC xatou(_sfx)(const char *numstr,
 		const struct suffix_mult *suffixes)
 {
 	return xstrtou(_range_sfx)(numstr, 10, 0, XSTR_UTYPE_MAX, suffixes);
 }
 
-unsigned type xatou()(const char *numstr)
+unsigned type FAST_FUNC xatou()(const char *numstr)
 {
 	return xatou(_sfx)(numstr, NULL);
 }
 
 /* Signed ones */
 
-type xstrto(_range_sfx)(const char *numstr, int base,
+type FAST_FUNC xstrto(_range_sfx)(const char *numstr, int base,
 		type lower,
 		type upper,
 		const struct suffix_mult *suffixes)
@@ -148,12 +148,12 @@
 	return r;
 }
 
-type xstrto(_range)(const char *numstr, int base, type lower, type upper)
+type FAST_FUNC xstrto(_range)(const char *numstr, int base, type lower, type upper)
 {
 	return xstrto(_range_sfx)(numstr, base, lower, upper, NULL);
 }
 
-type xato(_range_sfx)(const char *numstr,
+type FAST_FUNC xato(_range_sfx)(const char *numstr,
 		type lower,
 		type upper,
 		const struct suffix_mult *suffixes)
@@ -161,17 +161,17 @@
 	return xstrto(_range_sfx)(numstr, 10, lower, upper, suffixes);
 }
 
-type xato(_range)(const char *numstr, type lower, type upper)
+type FAST_FUNC xato(_range)(const char *numstr, type lower, type upper)
 {
 	return xstrto(_range_sfx)(numstr, 10, lower, upper, NULL);
 }
 
-type xato(_sfx)(const char *numstr, const struct suffix_mult *suffixes)
+type FAST_FUNC xato(_sfx)(const char *numstr, const struct suffix_mult *suffixes)
 {
 	return xstrto(_range_sfx)(numstr, 10, XSTR_TYPE_MIN, XSTR_TYPE_MAX, suffixes);
 }
 
-type xato()(const char *numstr)
+type FAST_FUNC xato()(const char *numstr)
 {
 	return xstrto(_range_sfx)(numstr, 10, XSTR_TYPE_MIN, XSTR_TYPE_MAX, NULL);
 }
diff --git a/libbb/xconnect.c b/libbb/xconnect.c
index 950aee8..d48c503 100644
--- a/libbb/xconnect.c
+++ b/libbb/xconnect.c
@@ -9,16 +9,16 @@
 #include <netinet/in.h>
 #include "libbb.h"
 
-void setsockopt_reuseaddr(int fd)
+void FAST_FUNC setsockopt_reuseaddr(int fd)
 {
 	setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &const_int_1, sizeof(const_int_1));
 }
-int setsockopt_broadcast(int fd)
+int FAST_FUNC setsockopt_broadcast(int fd)
 {
 	return setsockopt(fd, SOL_SOCKET, SO_BROADCAST, &const_int_1, sizeof(const_int_1));
 }
 
-void xconnect(int s, const struct sockaddr *s_addr, socklen_t addrlen)
+void FAST_FUNC xconnect(int s, const struct sockaddr *s_addr, socklen_t addrlen)
 {
 	if (connect(s, s_addr, addrlen) < 0) {
 		if (ENABLE_FEATURE_CLEAN_UP)
@@ -35,7 +35,7 @@
  * If "port" is a number use it as the port.
  * If "port" is a name it is looked up in /etc/services, if it isnt found return
  * default_port */
-unsigned bb_lookup_port(const char *port, const char *protocol, unsigned default_port)
+unsigned FAST_FUNC bb_lookup_port(const char *port, const char *protocol, unsigned default_port)
 {
 	unsigned port_nr = default_port;
 	if (port) {
@@ -60,7 +60,7 @@
 /* "Old" networking API - only IPv4 */
 
 /*
-void bb_lookup_host(struct sockaddr_in *s_in, const char *host)
+void FAST_FUNC bb_lookup_host(struct sockaddr_in *s_in, const char *host)
 {
 	struct hostent *he;
 
@@ -71,7 +71,7 @@
 }
 
 
-int xconnect_tcp_v4(struct sockaddr_in *s_addr)
+int FAST_FUNC xconnect_tcp_v4(struct sockaddr_in *s_addr)
 {
 	int s = xsocket(AF_INET, SOCK_STREAM, 0);
 	xconnect(s, (struct sockaddr*) s_addr, sizeof(*s_addr));
@@ -82,7 +82,7 @@
 /* "New" networking API */
 
 
-int get_nport(const struct sockaddr *sa)
+int FAST_FUNC get_nport(const struct sockaddr *sa)
 {
 #if ENABLE_FEATURE_IPV6
 	if (sa->sa_family == AF_INET6) {
@@ -96,7 +96,7 @@
 	return -1;
 }
 
-void set_nport(len_and_sockaddr *lsa, unsigned port)
+void FAST_FUNC set_nport(len_and_sockaddr *lsa, unsigned port)
 {
 #if ENABLE_FEATURE_IPV6
 	if (lsa->u.sa.sa_family == AF_INET6) {
@@ -205,34 +205,34 @@
 #endif
 
 #if ENABLE_FEATURE_IPV6
-len_and_sockaddr* host_and_af2sockaddr(const char *host, int port, sa_family_t af)
+len_and_sockaddr* FAST_FUNC host_and_af2sockaddr(const char *host, int port, sa_family_t af)
 {
 	return str2sockaddr(host, port, af, 0);
 }
 
-len_and_sockaddr* xhost_and_af2sockaddr(const char *host, int port, sa_family_t af)
+len_and_sockaddr* FAST_FUNC xhost_and_af2sockaddr(const char *host, int port, sa_family_t af)
 {
 	return str2sockaddr(host, port, af, DIE_ON_ERROR);
 }
 #endif
 
-len_and_sockaddr* host2sockaddr(const char *host, int port)
+len_and_sockaddr* FAST_FUNC host2sockaddr(const char *host, int port)
 {
 	return str2sockaddr(host, port, AF_UNSPEC, 0);
 }
 
-len_and_sockaddr* xhost2sockaddr(const char *host, int port)
+len_and_sockaddr* FAST_FUNC xhost2sockaddr(const char *host, int port)
 {
 	return str2sockaddr(host, port, AF_UNSPEC, DIE_ON_ERROR);
 }
 
-len_and_sockaddr* xdotted2sockaddr(const char *host, int port)
+len_and_sockaddr* FAST_FUNC xdotted2sockaddr(const char *host, int port)
 {
 	return str2sockaddr(host, port, AF_UNSPEC, AI_NUMERICHOST | DIE_ON_ERROR);
 }
 
 #undef xsocket_type
-int xsocket_type(len_and_sockaddr **lsap, USE_FEATURE_IPV6(int family,) int sock_type)
+int FAST_FUNC xsocket_type(len_and_sockaddr **lsap, USE_FEATURE_IPV6(int family,) int sock_type)
 {
 	SKIP_FEATURE_IPV6(enum { family = AF_INET };)
 	len_and_sockaddr *lsa;
@@ -264,7 +264,7 @@
 	return fd;
 }
 
-int xsocket_stream(len_and_sockaddr **lsap)
+int FAST_FUNC xsocket_stream(len_and_sockaddr **lsap)
 {
 	return xsocket_type(lsap, USE_FEATURE_IPV6(AF_UNSPEC,) SOCK_STREAM);
 }
@@ -288,18 +288,18 @@
 	return fd;
 }
 
-int create_and_bind_stream_or_die(const char *bindaddr, int port)
+int FAST_FUNC create_and_bind_stream_or_die(const char *bindaddr, int port)
 {
 	return create_and_bind_or_die(bindaddr, port, SOCK_STREAM);
 }
 
-int create_and_bind_dgram_or_die(const char *bindaddr, int port)
+int FAST_FUNC create_and_bind_dgram_or_die(const char *bindaddr, int port)
 {
 	return create_and_bind_or_die(bindaddr, port, SOCK_DGRAM);
 }
 
 
-int create_and_connect_stream_or_die(const char *peer, int port)
+int FAST_FUNC create_and_connect_stream_or_die(const char *peer, int port)
 {
 	int fd;
 	len_and_sockaddr *lsa;
@@ -312,7 +312,7 @@
 	return fd;
 }
 
-int xconnect_stream(const len_and_sockaddr *lsa)
+int FAST_FUNC xconnect_stream(const len_and_sockaddr *lsa)
 {
 	int fd = xsocket(lsa->u.sa.sa_family, SOCK_STREAM, 0);
 	xconnect(fd, &lsa->u.sa, lsa->len);
@@ -322,7 +322,7 @@
 /* We hijack this constant to mean something else */
 /* It doesn't hurt because we will add this bit anyway */
 #define IGNORE_PORT NI_NUMERICSERV
-static char* sockaddr2str(const struct sockaddr *sa, int flags)
+static char* FAST_FUNC sockaddr2str(const struct sockaddr *sa, int flags)
 {
 	char host[128];
 	char serv[16];
@@ -361,26 +361,26 @@
 	/*return xstrdup(host);*/
 }
 
-char* xmalloc_sockaddr2host(const struct sockaddr *sa)
+char* FAST_FUNC xmalloc_sockaddr2host(const struct sockaddr *sa)
 {
 	return sockaddr2str(sa, 0);
 }
 
-char* xmalloc_sockaddr2host_noport(const struct sockaddr *sa)
+char* FAST_FUNC xmalloc_sockaddr2host_noport(const struct sockaddr *sa)
 {
 	return sockaddr2str(sa, IGNORE_PORT);
 }
 
-char* xmalloc_sockaddr2hostonly_noport(const struct sockaddr *sa)
+char* FAST_FUNC xmalloc_sockaddr2hostonly_noport(const struct sockaddr *sa)
 {
 	return sockaddr2str(sa, NI_NAMEREQD | IGNORE_PORT);
 }
-char* xmalloc_sockaddr2dotted(const struct sockaddr *sa)
+char* FAST_FUNC xmalloc_sockaddr2dotted(const struct sockaddr *sa)
 {
 	return sockaddr2str(sa, NI_NUMERICHOST);
 }
 
-char* xmalloc_sockaddr2dotted_noport(const struct sockaddr *sa)
+char* FAST_FUNC xmalloc_sockaddr2dotted_noport(const struct sockaddr *sa)
 {
 	return sockaddr2str(sa, NI_NUMERICHOST | IGNORE_PORT);
 }
diff --git a/libbb/xfunc_die.c b/libbb/xfunc_die.c
index 357494d..ba9fe93 100644
--- a/libbb/xfunc_die.c
+++ b/libbb/xfunc_die.c
@@ -17,7 +17,7 @@
 jmp_buf die_jmp;
 #endif
 
-void xfunc_die(void)
+void FAST_FUNC xfunc_die(void)
 {
 	if (die_sleep) {
 		if ((ENABLE_FEATURE_PREFER_APPLETS || ENABLE_HUSH)
diff --git a/libbb/xfuncs.c b/libbb/xfuncs.c
index fe3c647..8ef305b 100644
--- a/libbb/xfuncs.c
+++ b/libbb/xfuncs.c
@@ -25,17 +25,17 @@
 #include "libbb.h"
 
 /* Turn on nonblocking I/O on a fd */
-int ndelay_on(int fd)
+int FAST_FUNC ndelay_on(int fd)
 {
 	return fcntl(fd, F_SETFL, fcntl(fd,F_GETFL) | O_NONBLOCK);
 }
 
-int ndelay_off(int fd)
+int FAST_FUNC ndelay_off(int fd)
 {
 	return fcntl(fd, F_SETFL, fcntl(fd,F_GETFL) & ~O_NONBLOCK);
 }
 
-int close_on_exec_on(int fd)
+int FAST_FUNC close_on_exec_on(int fd)
 {
 	return fcntl(fd, F_SETFD, FD_CLOEXEC);
 }
@@ -43,7 +43,7 @@
 /* Convert unsigned long long value into compact 4-char
  * representation. Examples: "1234", "1.2k", " 27M", "123T"
  * String is not terminated (buf[4] is untouched) */
-void smart_ulltoa4(unsigned long long ul, char buf[5], const char *scale)
+void FAST_FUNC smart_ulltoa4(unsigned long long ul, char buf[5], const char *scale)
 {
 	const char *fmt;
 	char c;
@@ -91,7 +91,7 @@
 
 /* Convert unsigned long long value into compact 5-char representation.
  * String is not terminated (buf[5] is untouched) */
-void smart_ulltoa5(unsigned long long ul, char buf[6], const char *scale)
+void FAST_FUNC smart_ulltoa5(unsigned long long ul, char buf[6], const char *scale)
 {
 	const char *fmt;
 	char c;
@@ -149,7 +149,7 @@
 // A truncated result contains the first few digits of the result ala strncpy.
 // Returns a pointer past last generated digit, does _not_ store NUL.
 void BUG_sizeof_unsigned_not_4(void);
-char *utoa_to_buf(unsigned n, char *buf, unsigned buflen)
+char* FAST_FUNC utoa_to_buf(unsigned n, char *buf, unsigned buflen)
 {
 	unsigned i, out, res;
 	if (sizeof(unsigned) != 4)
@@ -170,7 +170,7 @@
 }
 
 /* Convert signed integer to ascii, like utoa_to_buf() */
-char *itoa_to_buf(int n, char *buf, unsigned buflen)
+char* FAST_FUNC itoa_to_buf(int n, char *buf, unsigned buflen)
 {
 	if (buflen && n < 0) {
 		n = -n;
@@ -190,7 +190,7 @@
 static char local_buf[sizeof(int) * 3];
 
 // Convert unsigned integer to ascii using a static buffer (returned).
-char *utoa(unsigned n)
+char* FAST_FUNC utoa(unsigned n)
 {
 	*(utoa_to_buf(n, local_buf, sizeof(local_buf))) = '\0';
 
@@ -198,7 +198,7 @@
 }
 
 /* Convert signed integer to ascii using a static buffer (returned). */
-char *itoa(int n)
+char* FAST_FUNC itoa(int n)
 {
 	*(itoa_to_buf(n, local_buf, sizeof(local_buf))) = '\0';
 
@@ -206,7 +206,7 @@
 }
 
 /* Emit a string of hex representation of bytes */
-char *bin2hex(char *p, const char *cp, int count)
+char* FAST_FUNC bin2hex(char *p, const char *cp, int count)
 {
 	while (count) {
 		unsigned char c = *cp++;
@@ -220,7 +220,7 @@
 
 /* Return how long the file at fd is, if there's any way to determine it. */
 #ifdef UNUSED
-off_t fdlength(int fd)
+off_t FAST_FUNC fdlength(int fd)
 {
 	off_t bottom = 0, top = 0, pos;
 	long size;
@@ -262,7 +262,7 @@
 
 /* It is perfectly ok to pass in a NULL for either width or for
  * height, in which case that value will not be set.  */
-int get_terminal_width_height(int fd, unsigned *width, unsigned *height)
+int FAST_FUNC get_terminal_width_height(int fd, unsigned *width, unsigned *height)
 {
 	struct winsize win = { 0, 0, 0, 0 };
 	int ret = ioctl(fd, TIOCGWINSZ, &win);
diff --git a/libbb/xfuncs_printf.c b/libbb/xfuncs_printf.c
index 105939b..108e140 100644
--- a/libbb/xfuncs_printf.c
+++ b/libbb/xfuncs_printf.c
@@ -33,7 +33,7 @@
  * included after these prototypes in libbb.h, all is well.
  */
 // Warn if we can't allocate size bytes of memory.
-void *malloc_or_warn(size_t size)
+void* FAST_FUNC malloc_or_warn(size_t size)
 {
 	void *ptr = malloc(size);
 	if (ptr == NULL && size != 0)
@@ -42,7 +42,7 @@
 }
 
 // Die if we can't allocate size bytes of memory.
-void *xmalloc(size_t size)
+void* FAST_FUNC xmalloc(size_t size)
 {
 	void *ptr = malloc(size);
 	if (ptr == NULL && size != 0)
@@ -53,7 +53,7 @@
 // Die if we can't resize previously allocated memory.  (This returns a pointer
 // to the new memory, which may or may not be the same as the old memory.
 // It'll copy the contents to a new chunk and free the old one if necessary.)
-void *xrealloc(void *ptr, size_t size)
+void* FAST_FUNC xrealloc(void *ptr, size_t size)
 {
 	ptr = realloc(ptr, size);
 	if (ptr == NULL && size != 0)
@@ -63,7 +63,7 @@
 #endif /* DMALLOC */
 
 // Die if we can't allocate and zero size bytes of memory.
-void *xzalloc(size_t size)
+void* FAST_FUNC xzalloc(size_t size)
 {
 	void *ptr = xmalloc(size);
 	memset(ptr, 0, size);
@@ -71,7 +71,7 @@
 }
 
 // Die if we can't copy a string to freshly allocated memory.
-char * xstrdup(const char *s)
+char* FAST_FUNC xstrdup(const char *s)
 {
 	char *t;
 
@@ -88,7 +88,7 @@
 
 // Die if we can't allocate n+1 bytes (space for the null terminator) and copy
 // the (possibly truncated to length n) string into it.
-char *xstrndup(const char *s, int n)
+char* FAST_FUNC xstrndup(const char *s, int n)
 {
 	int m;
 	char *t;
@@ -112,9 +112,9 @@
 	return memcpy(t, s, n);
 }
 
-// Die if we can't open a file and return a FILE * to it.
+// Die if we can't open a file and return a FILE* to it.
 // Notice we haven't got xfread(), This is for use with fscanf() and friends.
-FILE *xfopen(const char *path, const char *mode)
+FILE* FAST_FUNC xfopen(const char *path, const char *mode)
 {
 	FILE *fp = fopen(path, mode);
 	if (fp == NULL)
@@ -123,7 +123,7 @@
 }
 
 // Die if we can't open a file and return a fd.
-int xopen3(const char *pathname, int flags, int mode)
+int FAST_FUNC xopen3(const char *pathname, int flags, int mode)
 {
 	int ret;
 
@@ -135,13 +135,13 @@
 }
 
 // Die if we can't open an existing file and return a fd.
-int xopen(const char *pathname, int flags)
+int FAST_FUNC xopen(const char *pathname, int flags)
 {
 	return xopen3(pathname, flags, 0666);
 }
 
 // Warn if we can't open a file and return a fd.
-int open3_or_warn(const char *pathname, int flags, int mode)
+int FAST_FUNC open3_or_warn(const char *pathname, int flags, int mode)
 {
 	int ret;
 
@@ -153,24 +153,24 @@
 }
 
 // Warn if we can't open a file and return a fd.
-int open_or_warn(const char *pathname, int flags)
+int FAST_FUNC open_or_warn(const char *pathname, int flags)
 {
 	return open3_or_warn(pathname, flags, 0666);
 }
 
-void xunlink(const char *pathname)
+void FAST_FUNC xunlink(const char *pathname)
 {
 	if (unlink(pathname))
 		bb_perror_msg_and_die("can't remove file '%s'", pathname);
 }
 
-void xrename(const char *oldpath, const char *newpath)
+void FAST_FUNC xrename(const char *oldpath, const char *newpath)
 {
 	if (rename(oldpath, newpath))
 		bb_perror_msg_and_die("can't move '%s' to '%s'", oldpath, newpath);
 }
 
-int rename_or_warn(const char *oldpath, const char *newpath)
+int FAST_FUNC rename_or_warn(const char *oldpath, const char *newpath)
 {
 	int n = rename(oldpath, newpath);
 	if (n)
@@ -178,20 +178,20 @@
 	return n;
 }
 
-void xpipe(int filedes[2])
+void FAST_FUNC xpipe(int filedes[2])
 {
 	if (pipe(filedes))
 		bb_perror_msg_and_die("can't create pipe");
 }
 
-void xdup2(int from, int to)
+void FAST_FUNC xdup2(int from, int to)
 {
 	if (dup2(from, to) != to)
 		bb_perror_msg_and_die("can't duplicate file descriptor");
 }
 
 // "Renumber" opened fd
-void xmove_fd(int from, int to)
+void FAST_FUNC xmove_fd(int from, int to)
 {
 	if (from == to)
 		return;
@@ -200,7 +200,7 @@
 }
 
 // Die with an error message if we can't write the entire buffer.
-void xwrite(int fd, const void *buf, size_t count)
+void FAST_FUNC xwrite(int fd, const void *buf, size_t count)
 {
 	if (count) {
 		ssize_t size = full_write(fd, buf, count);
@@ -210,7 +210,7 @@
 }
 
 // Die with an error message if we can't lseek to the right spot.
-off_t xlseek(int fd, off_t offset, int whence)
+off_t FAST_FUNC xlseek(int fd, off_t offset, int whence)
 {
 	off_t off = lseek(fd, offset, whence);
 	if (off == (off_t)-1) {
@@ -221,8 +221,8 @@
 	return off;
 }
 
-// Die with supplied filename if this FILE * has ferror set.
-void die_if_ferror(FILE *fp, const char *fn)
+// Die with supplied filename if this FILE* has ferror set.
+void FAST_FUNC die_if_ferror(FILE *fp, const char *fn)
 {
 	if (ferror(fp)) {
 		/* ferror doesn't set useful errno */
@@ -231,13 +231,13 @@
 }
 
 // Die with an error message if stdout has ferror set.
-void die_if_ferror_stdout(void)
+void FAST_FUNC die_if_ferror_stdout(void)
 {
 	die_if_ferror(stdout, bb_msg_standard_output);
 }
 
 // Die with an error message if we have trouble flushing stdout.
-void xfflush_stdout(void)
+void FAST_FUNC xfflush_stdout(void)
 {
 	if (fflush(stdout)) {
 		bb_perror_msg_and_die(bb_msg_standard_output);
@@ -245,7 +245,7 @@
 }
 
 
-int bb_putchar(int ch)
+int FAST_FUNC bb_putchar(int ch)
 {
 	/* time.c needs putc(ch, stdout), not putchar(ch).
 	 * it does "stdout = stderr;", but then glibc's putchar()
@@ -253,9 +253,9 @@
 	return putc(ch, stdout);
 }
 
-/* Die with an error message if we can't copy an entire FILE * to stdout,
+/* Die with an error message if we can't copy an entire FILE* to stdout,
  * then close that file. */
-void xprint_and_close_file(FILE *file)
+void FAST_FUNC xprint_and_close_file(FILE *file)
 {
 	fflush(stdout);
 	// copyfd outputs error messages for us.
@@ -267,7 +267,7 @@
 
 // Die with an error message if we can't malloc() enough space and do an
 // sprintf() into that space.
-char *xasprintf(const char *format, ...)
+char* FAST_FUNC xasprintf(const char *format, ...)
 {
 	va_list p;
 	int r;
@@ -295,7 +295,7 @@
 }
 
 #if 0 /* If we will ever meet a libc which hasn't [f]dprintf... */
-int fdprintf(int fd, const char *format, ...)
+int FAST_FUNC fdprintf(int fd, const char *format, ...)
 {
 	va_list p;
 	int r;
@@ -327,7 +327,7 @@
 }
 #endif
 
-void xsetenv(const char *key, const char *value)
+void FAST_FUNC xsetenv(const char *key, const char *value)
 {
 	if (setenv(key, value, 1))
 		bb_error_msg_and_die(bb_msg_memory_exhausted);
@@ -336,32 +336,32 @@
 // Die with an error message if we can't set gid.  (Because resource limits may
 // limit this user to a given number of processes, and if that fills up the
 // setgid() will fail and we'll _still_be_root_, which is bad.)
-void xsetgid(gid_t gid)
+void FAST_FUNC xsetgid(gid_t gid)
 {
 	if (setgid(gid)) bb_perror_msg_and_die("setgid");
 }
 
 // Die with an error message if we can't set uid.  (See xsetgid() for why.)
-void xsetuid(uid_t uid)
+void FAST_FUNC xsetuid(uid_t uid)
 {
 	if (setuid(uid)) bb_perror_msg_and_die("setuid");
 }
 
 // Die if we can't chdir to a new path.
-void xchdir(const char *path)
+void FAST_FUNC xchdir(const char *path)
 {
 	if (chdir(path))
 		bb_perror_msg_and_die("chdir(%s)", path);
 }
 
-void xchroot(const char *path)
+void FAST_FUNC xchroot(const char *path)
 {
 	if (chroot(path))
 		bb_perror_msg_and_die("can't change root directory to %s", path);
 }
 
 // Print a warning message if opendir() fails, but don't die.
-DIR *warn_opendir(const char *path)
+DIR* FAST_FUNC warn_opendir(const char *path)
 {
 	DIR *dp;
 
@@ -372,7 +372,7 @@
 }
 
 // Die with an error message if opendir() fails.
-DIR *xopendir(const char *path)
+DIR* FAST_FUNC xopendir(const char *path)
 {
 	DIR *dp;
 
@@ -383,7 +383,7 @@
 }
 
 // Die with an error message if we can't open a new socket.
-int xsocket(int domain, int type, int protocol)
+int FAST_FUNC xsocket(int domain, int type, int protocol)
 {
 	int r = socket(domain, type, protocol);
 
@@ -404,20 +404,20 @@
 }
 
 // Die with an error message if we can't bind a socket to an address.
-void xbind(int sockfd, struct sockaddr *my_addr, socklen_t addrlen)
+void FAST_FUNC xbind(int sockfd, struct sockaddr *my_addr, socklen_t addrlen)
 {
 	if (bind(sockfd, my_addr, addrlen)) bb_perror_msg_and_die("bind");
 }
 
 // Die with an error message if we can't listen for connections on a socket.
-void xlisten(int s, int backlog)
+void FAST_FUNC xlisten(int s, int backlog)
 {
 	if (listen(s, backlog)) bb_perror_msg_and_die("listen");
 }
 
 /* Die with an error message if sendto failed.
  * Return bytes sent otherwise  */
-ssize_t xsendto(int s, const  void *buf, size_t len, const struct sockaddr *to,
+ssize_t FAST_FUNC xsendto(int s, const  void *buf, size_t len, const struct sockaddr *to,
 				socklen_t tolen)
 {
 	ssize_t ret = sendto(s, buf, len, 0, to, tolen);
@@ -430,14 +430,14 @@
 }
 
 // xstat() - a stat() which dies on failure with meaningful error message
-void xstat(const char *name, struct stat *stat_buf)
+void FAST_FUNC xstat(const char *name, struct stat *stat_buf)
 {
 	if (stat(name, stat_buf))
 		bb_perror_msg_and_die("can't stat '%s'", name);
 }
 
 // selinux_or_die() - die if SELinux is disabled.
-void selinux_or_die(void)
+void FAST_FUNC selinux_or_die(void)
 {
 #if ENABLE_SELINUX
 	int rc = is_selinux_enabled();
@@ -451,7 +451,7 @@
 #endif
 }
 
-int ioctl_or_perror_and_die(int fd, unsigned request, void *argp, const char *fmt,...)
+int FAST_FUNC ioctl_or_perror_and_die(int fd, unsigned request, void *argp, const char *fmt,...)
 {
 	int ret;
 	va_list p;
@@ -467,7 +467,7 @@
 	return ret;
 }
 
-int ioctl_or_perror(int fd, unsigned request, void *argp, const char *fmt,...)
+int FAST_FUNC ioctl_or_perror(int fd, unsigned request, void *argp, const char *fmt,...)
 {
 	va_list p;
 	int ret = ioctl(fd, request, argp);
@@ -481,7 +481,7 @@
 }
 
 #if ENABLE_IOCTL_HEX2STR_ERROR
-int bb_ioctl_or_warn(int fd, unsigned request, void *argp, const char *ioctl_name)
+int FAST_FUNC bb_ioctl_or_warn(int fd, unsigned request, void *argp, const char *ioctl_name)
 {
 	int ret;
 
@@ -490,7 +490,7 @@
 		bb_simple_perror_msg(ioctl_name);
 	return ret;
 }
-int bb_xioctl(int fd, unsigned request, void *argp, const char *ioctl_name)
+int FAST_FUNC bb_xioctl(int fd, unsigned request, void *argp, const char *ioctl_name)
 {
 	int ret;
 
@@ -500,7 +500,7 @@
 	return ret;
 }
 #else
-int bb_ioctl_or_warn(int fd, unsigned request, void *argp)
+int FAST_FUNC bb_ioctl_or_warn(int fd, unsigned request, void *argp)
 {
 	int ret;
 
@@ -509,7 +509,7 @@
 		bb_perror_msg("ioctl %#x failed", request);
 	return ret;
 }
-int bb_xioctl(int fd, unsigned request, void *argp)
+int FAST_FUNC bb_xioctl(int fd, unsigned request, void *argp)
 {
 	int ret;
 
diff --git a/libbb/xgetcwd.c b/libbb/xgetcwd.c
index c194e23..eefe1d6 100644
--- a/libbb/xgetcwd.c
+++ b/libbb/xgetcwd.c
@@ -14,7 +14,7 @@
    If argument is not NULL (previous usage allocate memory), call free()
 */
 
-char *
+char* FAST_FUNC
 xrealloc_getcwd_or_warn(char *cwd)
 {
 #define PATH_INCR 64
diff --git a/libbb/xgethostbyname.c b/libbb/xgethostbyname.c
index 3bb522d..f1839f7 100644
--- a/libbb/xgethostbyname.c
+++ b/libbb/xgethostbyname.c
@@ -10,7 +10,7 @@
 //#include <netdb.h>
 #include "libbb.h"
 
-struct hostent *xgethostbyname(const char *name)
+struct hostent* FAST_FUNC xgethostbyname(const char *name)
 {
 	struct hostent *retval = gethostbyname(name);
 	if (!retval)
diff --git a/libbb/xreadlink.c b/libbb/xreadlink.c
index 0b961b6..2cfc575 100644
--- a/libbb/xreadlink.c
+++ b/libbb/xreadlink.c
@@ -10,7 +10,7 @@
  * NOTE: This function returns a malloced char* that you will have to free
  * yourself.
  */
-char *xmalloc_readlink(const char *path)
+char* FAST_FUNC xmalloc_readlink(const char *path)
 {
 	enum { GROWBY = 80 }; /* how large we will grow strings by */
 
@@ -42,7 +42,7 @@
  * those at the tail.
  * A malloced char* is returned, which must be freed by the caller.
  */
-char *xmalloc_follow_symlinks(const char *path)
+char* FAST_FUNC xmalloc_follow_symlinks(const char *path)
 {
 	char *buf;
 	char *lpc;
@@ -84,7 +84,7 @@
 	}
 }
 
-char *xmalloc_readlink_or_warn(const char *path)
+char* FAST_FUNC xmalloc_readlink_or_warn(const char *path)
 {
 	char *buf = xmalloc_readlink(path);
 	if (!buf) {
@@ -96,7 +96,7 @@
 
 /* UNUSED */
 #if 0
-char *xmalloc_realpath(const char *path)
+char* FAST_FUNC xmalloc_realpath(const char *path)
 {
 #if defined(__GLIBC__) && !defined(__UCLIBC__)
 	/* glibc provides a non-standard extension */
diff --git a/libbb/xregcomp.c b/libbb/xregcomp.c
index 157132c..abfa35f 100644
--- a/libbb/xregcomp.c
+++ b/libbb/xregcomp.c
@@ -11,7 +11,7 @@
 #include "libbb.h"
 #include "xregex.h"
 
-char* regcomp_or_errmsg(regex_t *preg, const char *regex, int cflags)
+char* FAST_FUNC regcomp_or_errmsg(regex_t *preg, const char *regex, int cflags)
 {
 	int ret = regcomp(preg, regex, cflags);
 	if (ret) {
@@ -23,7 +23,7 @@
 	return NULL;
 }
 
-void xregcomp(regex_t *preg, const char *regex, int cflags)
+void FAST_FUNC xregcomp(regex_t *preg, const char *regex, int cflags)
 {
 	char *errmsg = regcomp_or_errmsg(preg, regex, cflags);
 	if (errmsg) {
diff --git a/libpwdgrp/uidgid_get.c b/libpwdgrp/uidgid_get.c
index b0085c4..88f4e25 100644
--- a/libpwdgrp/uidgid_get.c
+++ b/libpwdgrp/uidgid_get.c
@@ -28,7 +28,7 @@
 #include "libbb.h"
 
 /* Always sets uid and gid */
-int get_uidgid(struct bb_uidgid_t *u, const char *ug, int numeric_ok)
+int FAST_FUNC get_uidgid(struct bb_uidgid_t *u, const char *ug, int numeric_ok)
 {
 	struct passwd *pwd;
 	struct group *gr;
@@ -84,7 +84,7 @@
  * "user:group" sets uid and gid
  * ('unset' uid or gid is actually set to -1)
  */
-void parse_chown_usergroup_or_die(struct bb_uidgid_t *u, char *user_group)
+void FAST_FUNC parse_chown_usergroup_or_die(struct bb_uidgid_t *u, char *user_group)
 {
 	char *group;
 
diff --git a/loginutils/addgroup.c b/loginutils/addgroup.c
index c9495b2..863ccdf 100644
--- a/loginutils/addgroup.c
+++ b/loginutils/addgroup.c
@@ -72,7 +72,7 @@
 #if ENABLE_FEATURE_ADDUSER_TO_GROUP
 static void add_user_to_group(char **args,
 		const char *path,
-		FILE *(*fopen_func)(const char *fileName, const char *mode))
+		FILE* FAST_FUNC (*fopen_func)(const char *fileName, const char *mode))
 {
 	char *line;
 	int len = strlen(args[1]);
diff --git a/loginutils/deluser.c b/loginutils/deluser.c
index c67ad72..5625371 100644
--- a/loginutils/deluser.c
+++ b/loginutils/deluser.c
@@ -19,7 +19,7 @@
 
 static void del_line_matching(char **args,
 		const char *filename,
-		FILE *(*fopen_func)(const char *fileName, const char *mode))
+		FILE* FAST_FUNC (*fopen_func)(const char *fileName, const char *mode))
 {
 	FILE *passwd;
 	smallint error = NAME_NOT_FOUND;
diff --git a/modutils/depmod.c b/modutils/depmod.c
index fdeb7dc..4fba537 100644
--- a/modutils/depmod.c
+++ b/modutils/depmod.c
@@ -47,7 +47,7 @@
 	} while (1);
 	return ptr;
 }
-static int fileAction(const char *fname, struct stat *sb,
+static int FAST_FUNC fileAction(const char *fname, struct stat *sb,
 					void ATTRIBUTE_UNUSED *data, int ATTRIBUTE_UNUSED depth)
 {
 	size_t len = sb->st_size;
diff --git a/modutils/insmod.c b/modutils/insmod.c
index 4ac1a1e..8217629 100644
--- a/modutils/insmod.c
+++ b/modutils/insmod.c
@@ -791,7 +791,7 @@
 /*======================================================================*/
 
 
-static int check_module_name_match(const char *filename,
+static int FAST_FUNC check_module_name_match(const char *filename,
 		struct stat *statbuf ATTRIBUTE_UNUSED,
 		void *userdata, int depth ATTRIBUTE_UNUSED)
 {
@@ -3306,7 +3306,7 @@
 
 /*======================================================================*/
 
-static struct obj_file *obj_load(FILE * fp, int loadprogbits ATTRIBUTE_UNUSED)
+static struct obj_file *obj_load(FILE *fp, int loadprogbits ATTRIBUTE_UNUSED)
 {
 	struct obj_file *f;
 	ElfW(Shdr) * section_headers;
@@ -3512,7 +3512,7 @@
  * kernel for the module
  */
 
-static int obj_load_progbits(FILE * fp, struct obj_file* f, char* imagebase)
+static int obj_load_progbits(FILE *fp, struct obj_file *f, char *imagebase)
 {
 	ElfW(Addr) base = f->baseaddr;
 	struct obj_section* sec;
diff --git a/networking/dnsd.c b/networking/dnsd.c
index 97ba2dc..bd0fad4 100644
--- a/networking/dnsd.c
+++ b/networking/dnsd.c
@@ -109,7 +109,7 @@
  * Presently the dot is copied into name without
  * converting to a length/string substring for that label.
  */
-static int getfileentry(FILE * fp, struct dns_entry *s)
+static int getfileentry(FILE *fp, struct dns_entry *s)
 {
 	unsigned int a,b,c,d;
 	char *line, *r, *name;
diff --git a/networking/interface.c b/networking/interface.c
index efae03b..afc7d0b 100644
--- a/networking/interface.c
+++ b/networking/interface.c
@@ -89,7 +89,7 @@
 #endif
 
 /* Display an Internet socket address. */
-static const char *INET_sprint(struct sockaddr *sap, int numeric)
+static const char* FAST_FUNC INET_sprint(struct sockaddr *sap, int numeric)
 {
 	static char *buff;
 
@@ -143,7 +143,7 @@
 }
 #endif
 
-static int INET_input(/*int type,*/ const char *bufp, struct sockaddr *sap)
+static int FAST_FUNC INET_input(/*int type,*/ const char *bufp, struct sockaddr *sap)
 {
 	return INET_resolve(bufp, (struct sockaddr_in *) sap, 0);
 /*
@@ -159,19 +159,19 @@
 }
 
 static const struct aftype inet_aftype = {
-	.name =		"inet",
-	.title =	"DARPA Internet",
-	.af =		AF_INET,
-	.alen =		4,
-	.sprint =	INET_sprint,
-	.input =	INET_input,
+	.name   = "inet",
+	.title  = "DARPA Internet",
+	.af     = AF_INET,
+	.alen   = 4,
+	.sprint = INET_sprint,
+	.input  = INET_input,
 };
 
 #ifdef HAVE_AFINET6
 
 /* Display an Internet socket address. */
 /* dirty! struct sockaddr usually doesn't suffer for inet6 addresses, fst. */
-static const char *INET6_sprint(struct sockaddr *sap, int numeric)
+static const char* FAST_FUNC INET6_sprint(struct sockaddr *sap, int numeric)
 {
 	static char *buff;
 
@@ -198,7 +198,7 @@
 }
 #endif
 
-static int INET6_input(/*int type,*/ const char *bufp, struct sockaddr *sap)
+static int FAST_FUNC INET6_input(/*int type,*/ const char *bufp, struct sockaddr *sap)
 {
 	return INET6_resolve(bufp, (struct sockaddr_in6 *) sap);
 /*
@@ -212,18 +212,18 @@
 }
 
 static const struct aftype inet6_aftype = {
-	.name =		"inet6",
-	.title =	"IPv6",
-	.af =		AF_INET6,
-	.alen =		sizeof(struct in6_addr),
-	.sprint =	INET6_sprint,
-	.input =	INET6_input,
+	.name   = "inet6",
+	.title  = "IPv6",
+	.af     = AF_INET6,
+	.alen   = sizeof(struct in6_addr),
+	.sprint = INET6_sprint,
+	.input  = INET6_input,
 };
 
 #endif /* HAVE_AFINET6 */
 
 /* Display an UNSPEC address. */
-static char *UNSPEC_print(unsigned char *ptr)
+static char* FAST_FUNC UNSPEC_print(unsigned char *ptr)
 {
 	static char *buff;
 
@@ -244,7 +244,7 @@
 }
 
 /* Display an UNSPEC socket address. */
-static const char *UNSPEC_sprint(struct sockaddr *sap, int numeric ATTRIBUTE_UNUSED)
+static const char* FAST_FUNC UNSPEC_sprint(struct sockaddr *sap, int numeric ATTRIBUTE_UNUSED)
 {
 	if (sap->sa_family == 0xFFFF || sap->sa_family == 0)
 		return "[NONE SET]";
@@ -270,7 +270,7 @@
 };
 
 /* Check our protocol family table for this family. */
-const struct aftype *get_aftype(const char *name)
+const struct aftype* FAST_FUNC get_aftype(const char *name)
 {
 	const struct aftype *const *afp;
 
@@ -720,7 +720,7 @@
 #endif
 
 /* Display an Ethernet address in readable format. */
-static char *pr_ether(unsigned char *ptr)
+static char* FAST_FUNC ether_print(unsigned char *ptr)
 {
 	static char *buff;
 
@@ -732,15 +732,15 @@
 	return buff;
 }
 
-static int in_ether(const char *bufp, struct sockaddr *sap);
+static int FAST_FUNC ether_input(const char *bufp, struct sockaddr *sap);
 
 static const struct hwtype ether_hwtype = {
-	.name =		"ether",
-	.title =	"Ethernet",
-	.type =		ARPHRD_ETHER,
-	.alen =		ETH_ALEN,
-	.print =	pr_ether,
-	.input =	in_ether
+	.name  = "ether",
+	.title = "Ethernet",
+	.type  = ARPHRD_ETHER,
+	.alen  = ETH_ALEN,
+	.print = ether_print,
+	.input = ether_input
 };
 
 static unsigned hexchar2int(char c)
@@ -754,7 +754,7 @@
 }
 
 /* Input an Ethernet address and convert to binary. */
-static int in_ether(const char *bufp, struct sockaddr *sap)
+static int FAST_FUNC ether_input(const char *bufp, struct sockaddr *sap)
 {
 	unsigned char *ptr;
 	char c;
@@ -813,12 +813,12 @@
 #endif
 #if ENABLE_FEATURE_HWIB
 static const struct hwtype ib_hwtype = {
-	.name =			"infiniband",
-	.title =		"InfiniBand",
-	.type =			ARPHRD_INFINIBAND,
-	.alen =			INFINIBAND_ALEN,
-	.print =		UNSPEC_print,
-	.input =		in_ib,
+	.name  = "infiniband",
+	.title = "InfiniBand",
+	.type  = ARPHRD_INFINIBAND,
+	.alen  = INFINIBAND_ALEN,
+	.print = UNSPEC_print,
+	.input = in_ib,
 };
 #endif
 
@@ -852,7 +852,7 @@
 #endif
 
 /* Check our hardware type table for this type. */
-const struct hwtype *get_hwtype(const char *name)
+const struct hwtype* FAST_FUNC get_hwtype(const char *name)
 {
 	const struct hwtype *const *hwp;
 
@@ -866,7 +866,7 @@
 }
 
 /* Check our hardware type table for this type. */
-const struct hwtype *get_hwntype(int type)
+const struct hwtype* FAST_FUNC get_hwntype(int type)
 {
 	const struct hwtype *const *hwp;
 
@@ -1214,7 +1214,7 @@
 
 #if ENABLE_FEATURE_HWIB
 /* Input an Infiniband address and convert to binary. */
-int in_ib(const char *bufp, struct sockaddr *sap)
+int FAST_FUNC in_ib(const char *bufp, struct sockaddr *sap)
 {
 	unsigned char *ptr;
 	char c;
@@ -1272,7 +1272,7 @@
 #endif
 
 
-int display_interfaces(char *ifname)
+int FAST_FUNC display_interfaces(char *ifname)
 {
 	int status;
 
diff --git a/networking/route.c b/networking/route.c
index 857424c..0146fa0 100644
--- a/networking/route.c
+++ b/networking/route.c
@@ -476,7 +476,7 @@
 }
 
 /* also used in netstat */
-void bb_displayroutes(int noresolve, int netstatfmt)
+void FAST_FUNC bb_displayroutes(int noresolve, int netstatfmt)
 {
 	char devname[64], flags[16], *sdest, *sgw;
 	unsigned long d, g, m;
diff --git a/selinux/chcon.c b/selinux/chcon.c
index 288e93a..505ae4f 100644
--- a/selinux/chcon.c
+++ b/selinux/chcon.c
@@ -28,7 +28,7 @@
 static char *range = NULL;
 static char *specified_context = NULL;
 
-static int change_filedir_context(
+static int FAST_FUNC change_filedir_context(
 		const char *fname,
 		struct stat *stbuf ATTRIBUTE_UNUSED,
 		void *userData ATTRIBUTE_UNUSED,
diff --git a/selinux/setfiles.c b/selinux/setfiles.c
index 82e9e53..991c13c 100644
--- a/selinux/setfiles.c
+++ b/selinux/setfiles.c
@@ -391,7 +391,7 @@
  * This function is called by recursive_action on each file during
  * the directory traversal.
  */
-static int apply_spec(
+static int FAST_FUNC apply_spec(
 		const char *file,
 		struct stat *sb,
 		void *userData ATTRIBUTE_UNUSED,
diff --git a/shell/hush.c b/shell/hush.c
index 032482c..72186f9 100644
--- a/shell/hush.c
+++ b/shell/hush.c
@@ -498,7 +498,7 @@
 	/* Was using fancy stuff:
 	 * (interactive_fd ? bb_error_msg : bb_error_msg_and_die)(...params...)
 	 * but it SEGVs. ?! Oh well... explicit temp ptr works around that */
-	void (*fp)(const char *s, ...);
+	void FAST_FUNC (*fp)(const char *s, ...);
 
 	fp = (interactive_fd ? bb_error_msg : bb_error_msg_and_die);
 	fp(msg ? "%s: %s" : "syntax error", "syntax error", msg);
diff --git a/shell/lash_unused.c b/shell/lash_unused.c
index d57f584..59199de 100644
--- a/shell/lash_unused.c
+++ b/shell/lash_unused.c
@@ -116,11 +116,11 @@
 /* function prototypes for shell stuff */
 static void checkjobs(struct jobset *job_list);
 static void remove_job(struct jobset *j_list, struct job *job);
-static int get_command_bufsiz(FILE * source, char *command);
+static int get_command_bufsiz(FILE *source, char *command);
 static int parse_command(char **command_ptr, struct job *job, int *inbg);
 static int run_command(struct job *newjob, int inbg, int outpipe[2]);
 static int pseudo_exec(struct child_prog *cmd) ATTRIBUTE_NORETURN;
-static int busy_loop(FILE * input);
+static int busy_loop(FILE *input);
 
 
 /* Table of built-in functions (these are non-forking builtins, meaning they
@@ -643,7 +643,7 @@
 static line_input_t *line_input_state;
 #endif
 
-static int get_command_bufsiz(FILE * source, char *command)
+static int get_command_bufsiz(FILE *source, char *command)
 {
 	const char *prompt_str;
 
@@ -1326,7 +1326,7 @@
 	return 0;
 }
 
-static int busy_loop(FILE * input)
+static int busy_loop(FILE *input)
 {
 	char *command;
 	char *next_command = NULL;
diff --git a/util-linux/mdev.c b/util-linux/mdev.c
index e1edd20..bacccc0 100644
--- a/util-linux/mdev.c
+++ b/util-linux/mdev.c
@@ -299,7 +299,7 @@
 }
 
 /* File callback for /sys/ traversal */
-static int fileAction(const char *fileName,
+static int FAST_FUNC fileAction(const char *fileName,
 		struct stat *statbuf ATTRIBUTE_UNUSED,
 		void *userData,
 		int depth ATTRIBUTE_UNUSED)
@@ -319,7 +319,7 @@
 }
 
 /* Directory callback for /sys/ traversal */
-static int dirAction(const char *fileName ATTRIBUTE_UNUSED,
+static int FAST_FUNC dirAction(const char *fileName ATTRIBUTE_UNUSED,
 		struct stat *statbuf ATTRIBUTE_UNUSED,
 		void *userData ATTRIBUTE_UNUSED,
 		int depth)