lineedit: partially fix wide and combining chars editing

Signed-off-by: Tomas Heinrich <heinrich.tomas@gmail.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
diff --git a/include/unicode.h b/include/unicode.h
index 4e29272..747026a 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -35,6 +35,16 @@
 #  define LAST_SUPPORTED_WCHAR CONFIG_LAST_SUPPORTED_WCHAR
 # endif
 
+# if LAST_SUPPORTED_WCHAR < 0x300
+#  undef ENABLE_UNICODE_COMBINING_WCHARS
+#  define ENABLE_UNICODE_COMBINING_WCHARS 0
+# endif
+
+# if LAST_SUPPORTED_WCHAR < 0x1100
+#  undef ENABLE_UNICODE_WIDE_WCHARS
+#  define ENABLE_UNICODE_WIDE_WCHARS 0
+# endif
+
 # if LAST_SUPPORTED_WCHAR < 0x590
 #  undef  ENABLE_UNICODE_BIDI_SUPPORT
 #  define ENABLE_UNICODE_BIDI_SUPPORT 0
@@ -92,6 +102,7 @@
 int iswspace(wint_t wc) FAST_FUNC;
 int iswalnum(wint_t wc) FAST_FUNC;
 int iswpunct(wint_t wc) FAST_FUNC;
+int wcwidth(unsigned ucs) FAST_FUNC;
 #  if ENABLE_UNICODE_BIDI_SUPPORT
 #   undef unicode_bidi_isrtl
 int unicode_bidi_isrtl(wint_t wc) FAST_FUNC;
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index 7fffe7b..9f2d657 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -42,14 +42,10 @@
 #include "libbb.h"
 #include "unicode.h"
 
-/* FIXME: obsolete CONFIG item? */
-#define ENABLE_FEATURE_NONPRINTABLE_INVERSE_PUT 0
-
 #ifdef TEST
 # define ENABLE_FEATURE_EDITING 0
 # define ENABLE_FEATURE_TAB_COMPLETION 0
 # define ENABLE_FEATURE_USERNAME_COMPLETION 0
-# define ENABLE_FEATURE_NONPRINTABLE_INVERSE_PUT 0
 #endif
 
 
@@ -97,10 +93,10 @@
 
 
 # if ENABLE_UNICODE_PRESERVE_BROKEN
-#  define unicode_mark_inv_wchar(wc)   ((wc) | 0x20000000)
-#  define unicode_is_inv_wchar(wc)     ((wc) & 0x20000000)
+#  define unicode_mark_raw_byte(wc)   ((wc) | 0x20000000)
+#  define unicode_is_raw_byte(wc)     ((wc) & 0x20000000)
 # else
-#  define unicode_is_inv_wchar(wc)     0
+#  define unicode_is_raw_byte(wc)     0
 # endif
 
 
@@ -240,7 +236,7 @@
 		wchar_t wc;
 		int n = srcpos;
 		while ((wc = command_ps[srcpos]) != 0
-		    && !unicode_is_inv_wchar(wc)
+		    && !unicode_is_raw_byte(wc)
 		) {
 			srcpos++;
 		}
@@ -269,15 +265,45 @@
 	mbstate_t mbst = { 0 };
 	ssize_t len;
 
-	if (unicode_is_inv_wchar(c))
-		c = CONFIG_SUBST_WCHAR;
 	len = wcrtomb(buf, c, &mbst);
 	if (len > 0) {
 		buf[len] = '\0';
 		fputs(buf, stdout);
 	}
 }
-#else
+# if ENABLE_UNICODE_COMBINING_WCHARS || ENABLE_UNICODE_WIDE_WCHARS
+static wchar_t adjust_width_and_validate_wc(unsigned *width_adj, wchar_t wc)
+# else
+static wchar_t adjust_width_and_validate_wc(wchar_t wc)
+#  define adjust_width_and_validate_wc(width_adj, wc) \
+	((*(width_adj))++, adjust_width_and_validate_wc(wc))
+# endif
+{
+	int w = 1;
+
+	if (unicode_status == UNICODE_ON) {
+		if (unicode_is_raw_byte(wc)
+		 || (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)
+		) {
+			goto subst;
+		}
+		w = wcwidth(wc);
+		if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0)
+		 || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)
+		 || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)
+		) {
+ subst:
+			w = 1;
+			wc = CONFIG_SUBST_WCHAR;
+		}
+	}
+
+# if ENABLE_UNICODE_COMBINING_WCHARS || ENABLE_UNICODE_WIDE_WCHARS
+	*width_adj += w;
+#endif
+	return wc;
+}
+#else /* !UNICODE */
 static size_t load_string(const char *src, int maxsize)
 {
 	safe_strncpy(command_ps, src, maxsize);
@@ -290,6 +316,8 @@
 }
 # endif
 # define BB_PUTCHAR(c) bb_putchar(c)
+/* Should never be called: */
+int adjust_width_and_validate_wc(unsigned *width_adj, int wc);
 #endif
 
 
@@ -300,6 +328,8 @@
 static void put_cur_glyph_and_inc_cursor(void)
 {
 	CHAR_T c = command_ps[cursor];
+	unsigned width = 0;
+	int ofs_to_right;
 
 	if (c == BB_NUL) {
 		/* erase character after end of input string */
@@ -307,28 +337,23 @@
 	} else {
 		/* advance cursor only if we aren't at the end yet */
 		cursor++;
-		cmdedit_x++;
+		if (unicode_status == UNICODE_ON) {
+			IF_UNICODE_WIDE_WCHARS(width = cmdedit_x;)
+			c = adjust_width_and_validate_wc(&cmdedit_x, c);
+			IF_UNICODE_WIDE_WCHARS(width = cmdedit_x - width;)
+		} else {
+			cmdedit_x++;
+		}
 	}
 
-#if ENABLE_FEATURE_NONPRINTABLE_INVERSE_PUT
-	/* Display non-printable characters in reverse */
-	if (!BB_isprint(c)) {
-		if (c >= 128)
-			c -= 128;
-		if (c < ' ')
-			c += '@';
-		if (c == 127)
-			c = '?';
-		printf("\033[7m%c\033[0m", c);
-	} else
-#endif
-	{
+	ofs_to_right = cmdedit_x - cmdedit_termw;
+	if (!ENABLE_UNICODE_WIDE_WCHARS || ofs_to_right <= 0) {
+		/* c fits on this line */
 		BB_PUTCHAR(c);
 	}
-	if (cmdedit_x >= cmdedit_termw) {
-		/* terminal is scrolled down */
-		cmdedit_y++;
-		cmdedit_x = 0;
+
+	if (ofs_to_right >= 0) {
+		/* we go to the next line */
 #if HACK_FOR_WRONG_WIDTH
 		/* This works better if our idea of term width is wrong
 		 * and it is actually wider (often happens on serial lines).
@@ -351,6 +376,14 @@
 		BB_PUTCHAR(c);
 		bb_putchar('\b');
 #endif
+		cmdedit_y++;
+		if (!ENABLE_UNICODE_WIDE_WCHARS || ofs_to_right == 0) {
+			width = 0;
+		} else { /* ofs_to_right > 0 */
+			/* wide char c didn't fit on prev line */
+			BB_PUTCHAR(c);
+		}
+		cmdedit_x = width;
 	}
 }
 
@@ -389,10 +422,22 @@
 
 	if (num > cursor)
 		num = cursor;
-	if (!num)
+	if (num == 0)
 		return;
 	cursor -= num;
 
+	if ((ENABLE_UNICODE_COMBINING_WCHARS || ENABLE_UNICODE_WIDE_WCHARS)
+	 && unicode_status == UNICODE_ON
+	) {
+		/* correct NUM to be equal to _screen_ width */
+		int n = num;
+		num = 0;
+		while (--n >= 0)
+			adjust_width_and_validate_wc(&num, command_ps[cursor + n]);
+		if (num == 0)
+			return;
+	}
+
 	if (cmdedit_x >= num) {
 		cmdedit_x -= num;
 		if (num <= 4) {
@@ -412,6 +457,8 @@
 	}
 
 	/* Need to go one or more lines up */
+//FIXME: this does not work correctly if prev line has one "unfilled" screen position
+//caused by wide unicode char not fitting in that one screen position.
 	num -= cmdedit_x;
 	{
 		unsigned w = cmdedit_termw; /* volatile var */
@@ -765,21 +812,13 @@
 	}
 
 	/* mask \+symbol and convert '\t' to ' ' */
-	for (i = j = 0; matchBuf[i]; i++, j++)
+	for (i = j = 0; matchBuf[i]; i++, j++) {
 		if (matchBuf[i] == '\\') {
 			collapse_pos(j, j + 1);
 			int_buf[j] |= QUOT;
 			i++;
-#if ENABLE_FEATURE_NONPRINTABLE_INVERSE_PUT
-			if (matchBuf[i] == '\t')  /* algorithm equivalent */
-				int_buf[j] = ' ' | QUOT;
-#endif
 		}
-#if ENABLE_FEATURE_NONPRINTABLE_INVERSE_PUT
-		else if (matchBuf[i] == '\t')
-			int_buf[j] = ' ';
-#endif
-
+	}
 	/* mask "symbols" or 'symbols' */
 	c2 = 0;
 	for (i = 0; int_buf[i]; i++) {
@@ -1774,7 +1813,7 @@
 # if !ENABLE_UNICODE_PRESERVE_BROKEN
 				ic = CONFIG_SUBST_WCHAR;
 # else
-				ic = unicode_mark_inv_wchar(unicode_buf[0]);
+				ic = unicode_mark_raw_byte(unicode_buf[0]);
 # endif
 			} else {
 				/* Valid unicode char, return its code */
@@ -2384,9 +2423,6 @@
 		"% ";
 #endif
 
-#if ENABLE_FEATURE_NONPRINTABLE_INVERSE_PUT
-	setlocale(LC_ALL, "");
-#endif
 	while (1) {
 		int l;
 		l = read_line_input(prompt, buff);
diff --git a/libbb/unicode.c b/libbb/unicode.c
index d1c6167..eb0ea61 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -418,7 +418,7 @@
  * This implementation assumes that wchar_t characters are encoded
  * in ISO 10646.
  */
-static int wcwidth(unsigned ucs)
+int FAST_FUNC wcwidth(unsigned ucs)
 {
 # if LAST_SUPPORTED_WCHAR >= 0x300
 	/* sorted list of non-overlapping intervals of non-spacing characters */