dd: speed measurement. optional.

function                                             old     new   delta
dd_output_status                                      68     289    +221
dd_main                                             1463    1482     +19
write_and_stats                                       64      75     +11
packed_usage                                       26529   26526      -3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/1 up/down: 251/-3)            Total: 248 bytes
Signed-off-by: Gabor Heja <kakaopor@kakaopor.hu>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
diff --git a/coreutils/dd.c b/coreutils/dd.c
index c5c9476..768a129 100644
--- a/coreutils/dd.c
+++ b/coreutils/dd.c
@@ -34,6 +34,10 @@
 
 struct globals {
 	off_t out_full, out_part, in_full, in_part;
+#if ENABLE_FEATURE_DD_THIRD_STATUS_LINE
+	unsigned long long total_bytes;
+	unsigned long long begin_time_us;
+#endif
 };
 #define G (*(struct globals*)&bb_common_bufsiz1)
 #define INIT_G() do { \
@@ -44,11 +48,50 @@
 
 static void dd_output_status(int UNUSED_PARAM cur_signal)
 {
+#if ENABLE_FEATURE_DD_THIRD_STATUS_LINE
+	unsigned long long total;
+	unsigned long long diff_scaled;
+	unsigned long long diff_us = monotonic_us(); /* before fprintf */
+#endif
+
 	/* Deliberately using %u, not %d */
 	fprintf(stderr, "%"OFF_FMT"u+%"OFF_FMT"u records in\n"
 			"%"OFF_FMT"u+%"OFF_FMT"u records out\n",
 			G.in_full, G.in_part,
 			G.out_full, G.out_part);
+
+#if ENABLE_FEATURE_DD_THIRD_STATUS_LINE
+	fprintf(stderr, "%llu bytes (%sB) copied, ",
+			G.total_bytes,
+			/* show fractional digit, use suffixes */
+			make_human_readable_str(G.total_bytes, 1, 0)
+	);
+	/* Corner cases:
+	 * ./busybox dd </dev/null >/dev/null
+	 * ./busybox dd bs=1M count=2000 </dev/zero >/dev/null
+	 * (echo DONE) | ./busybox dd >/dev/null
+	 * (sleep 1; echo DONE) | ./busybox dd >/dev/null
+	 */
+	diff_us -= G.begin_time_us;
+	/* We need to calculate "(total * 1M) / usec" without overflow.
+	 * this would work too, but is bigger than integer code below.
+	 * total = G.total_bytes * (double)(1024 * 1024) / (diff_us ? diff_us : 1);
+	 */
+	diff_scaled = diff_us;
+	total = G.total_bytes;
+	while (total > MAXINT(unsigned long long) / (1024 * 1024)) {
+		total >>= 1;
+		diff_scaled >>= 1;
+	}
+	total *= (1024 * 1024);
+	if (diff_scaled > 1)
+		total /= diff_scaled;
+	fprintf(stderr, "%f seconds, %sB/s\n",
+			diff_us / 1000000.0,
+			/* show fractional digit, use suffixes */
+			make_human_readable_str(total, 1, 0)
+	);
+#endif
 }
 
 static ssize_t full_write_or_warn(const void *buf, size_t len,
@@ -70,13 +113,16 @@
 		G.out_full++;
 	else if (n) /* > 0 */
 		G.out_part++;
+#if ENABLE_FEATURE_DD_THIRD_STATUS_LINE
+	G.total_bytes += n;
+#endif
 	return 0;
 }
 
 #if ENABLE_LFS
-#define XATOU_SFX xatoull_sfx
+# define XATOU_SFX xatoull_sfx
 #else
-#define XATOU_SFX xatoul_sfx
+# define XATOU_SFX xatoul_sfx
 #endif
 
 int dd_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
@@ -157,10 +203,6 @@
 	INIT_G();
 	//fflush(NULL); - is this needed because of NOEXEC?
 
-#if ENABLE_FEATURE_DD_SIGNAL_HANDLING
-	signal_SA_RESTART_empty_mask(SIGUSR1, dd_output_status);
-#endif
-
 	for (n = 1; argv[n]; n++) {
 		int what;
 		char *val;
@@ -246,6 +288,14 @@
 		flags |= FLAG_TWOBUFS;
 		obuf = xmalloc(obs);
 	}
+
+#if ENABLE_FEATURE_DD_SIGNAL_HANDLING
+	signal_SA_RESTART_empty_mask(SIGUSR1, dd_output_status);
+#endif
+#if ENABLE_FEATURE_DD_THIRD_STATUS_LINE
+	G.begin_time_us = monotonic_us();
+#endif
+
 	if (infile != NULL)
 		xmove_fd(xopen(infile, O_RDONLY), ifd);
 	else {