lineedit: partially fix wide and combining chars editing
Signed-off-by: Tomas Heinrich <heinrich.tomas@gmail.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
diff --git a/include/unicode.h b/include/unicode.h
index 4e29272..747026a 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -35,6 +35,16 @@
# define LAST_SUPPORTED_WCHAR CONFIG_LAST_SUPPORTED_WCHAR
# endif
+# if LAST_SUPPORTED_WCHAR < 0x300
+# undef ENABLE_UNICODE_COMBINING_WCHARS
+# define ENABLE_UNICODE_COMBINING_WCHARS 0
+# endif
+
+# if LAST_SUPPORTED_WCHAR < 0x1100
+# undef ENABLE_UNICODE_WIDE_WCHARS
+# define ENABLE_UNICODE_WIDE_WCHARS 0
+# endif
+
# if LAST_SUPPORTED_WCHAR < 0x590
# undef ENABLE_UNICODE_BIDI_SUPPORT
# define ENABLE_UNICODE_BIDI_SUPPORT 0
@@ -92,6 +102,7 @@
int iswspace(wint_t wc) FAST_FUNC;
int iswalnum(wint_t wc) FAST_FUNC;
int iswpunct(wint_t wc) FAST_FUNC;
+int wcwidth(unsigned ucs) FAST_FUNC;
# if ENABLE_UNICODE_BIDI_SUPPORT
# undef unicode_bidi_isrtl
int unicode_bidi_isrtl(wint_t wc) FAST_FUNC;
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index 7fffe7b..9f2d657 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -42,14 +42,10 @@
#include "libbb.h"
#include "unicode.h"
-/* FIXME: obsolete CONFIG item? */
-#define ENABLE_FEATURE_NONPRINTABLE_INVERSE_PUT 0
-
#ifdef TEST
# define ENABLE_FEATURE_EDITING 0
# define ENABLE_FEATURE_TAB_COMPLETION 0
# define ENABLE_FEATURE_USERNAME_COMPLETION 0
-# define ENABLE_FEATURE_NONPRINTABLE_INVERSE_PUT 0
#endif
@@ -97,10 +93,10 @@
# if ENABLE_UNICODE_PRESERVE_BROKEN
-# define unicode_mark_inv_wchar(wc) ((wc) | 0x20000000)
-# define unicode_is_inv_wchar(wc) ((wc) & 0x20000000)
+# define unicode_mark_raw_byte(wc) ((wc) | 0x20000000)
+# define unicode_is_raw_byte(wc) ((wc) & 0x20000000)
# else
-# define unicode_is_inv_wchar(wc) 0
+# define unicode_is_raw_byte(wc) 0
# endif
@@ -240,7 +236,7 @@
wchar_t wc;
int n = srcpos;
while ((wc = command_ps[srcpos]) != 0
- && !unicode_is_inv_wchar(wc)
+ && !unicode_is_raw_byte(wc)
) {
srcpos++;
}
@@ -269,15 +265,45 @@
mbstate_t mbst = { 0 };
ssize_t len;
- if (unicode_is_inv_wchar(c))
- c = CONFIG_SUBST_WCHAR;
len = wcrtomb(buf, c, &mbst);
if (len > 0) {
buf[len] = '\0';
fputs(buf, stdout);
}
}
-#else
+# if ENABLE_UNICODE_COMBINING_WCHARS || ENABLE_UNICODE_WIDE_WCHARS
+static wchar_t adjust_width_and_validate_wc(unsigned *width_adj, wchar_t wc)
+# else
+static wchar_t adjust_width_and_validate_wc(wchar_t wc)
+# define adjust_width_and_validate_wc(width_adj, wc) \
+ ((*(width_adj))++, adjust_width_and_validate_wc(wc))
+# endif
+{
+ int w = 1;
+
+ if (unicode_status == UNICODE_ON) {
+ if (unicode_is_raw_byte(wc)
+ || (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)
+ ) {
+ goto subst;
+ }
+ w = wcwidth(wc);
+ if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0)
+ || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)
+ || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)
+ ) {
+ subst:
+ w = 1;
+ wc = CONFIG_SUBST_WCHAR;
+ }
+ }
+
+# if ENABLE_UNICODE_COMBINING_WCHARS || ENABLE_UNICODE_WIDE_WCHARS
+ *width_adj += w;
+#endif
+ return wc;
+}
+#else /* !UNICODE */
static size_t load_string(const char *src, int maxsize)
{
safe_strncpy(command_ps, src, maxsize);
@@ -290,6 +316,8 @@
}
# endif
# define BB_PUTCHAR(c) bb_putchar(c)
+/* Should never be called: */
+int adjust_width_and_validate_wc(unsigned *width_adj, int wc);
#endif
@@ -300,6 +328,8 @@
static void put_cur_glyph_and_inc_cursor(void)
{
CHAR_T c = command_ps[cursor];
+ unsigned width = 0;
+ int ofs_to_right;
if (c == BB_NUL) {
/* erase character after end of input string */
@@ -307,28 +337,23 @@
} else {
/* advance cursor only if we aren't at the end yet */
cursor++;
- cmdedit_x++;
+ if (unicode_status == UNICODE_ON) {
+ IF_UNICODE_WIDE_WCHARS(width = cmdedit_x;)
+ c = adjust_width_and_validate_wc(&cmdedit_x, c);
+ IF_UNICODE_WIDE_WCHARS(width = cmdedit_x - width;)
+ } else {
+ cmdedit_x++;
+ }
}
-#if ENABLE_FEATURE_NONPRINTABLE_INVERSE_PUT
- /* Display non-printable characters in reverse */
- if (!BB_isprint(c)) {
- if (c >= 128)
- c -= 128;
- if (c < ' ')
- c += '@';
- if (c == 127)
- c = '?';
- printf("\033[7m%c\033[0m", c);
- } else
-#endif
- {
+ ofs_to_right = cmdedit_x - cmdedit_termw;
+ if (!ENABLE_UNICODE_WIDE_WCHARS || ofs_to_right <= 0) {
+ /* c fits on this line */
BB_PUTCHAR(c);
}
- if (cmdedit_x >= cmdedit_termw) {
- /* terminal is scrolled down */
- cmdedit_y++;
- cmdedit_x = 0;
+
+ if (ofs_to_right >= 0) {
+ /* we go to the next line */
#if HACK_FOR_WRONG_WIDTH
/* This works better if our idea of term width is wrong
* and it is actually wider (often happens on serial lines).
@@ -351,6 +376,14 @@
BB_PUTCHAR(c);
bb_putchar('\b');
#endif
+ cmdedit_y++;
+ if (!ENABLE_UNICODE_WIDE_WCHARS || ofs_to_right == 0) {
+ width = 0;
+ } else { /* ofs_to_right > 0 */
+ /* wide char c didn't fit on prev line */
+ BB_PUTCHAR(c);
+ }
+ cmdedit_x = width;
}
}
@@ -389,10 +422,22 @@
if (num > cursor)
num = cursor;
- if (!num)
+ if (num == 0)
return;
cursor -= num;
+ if ((ENABLE_UNICODE_COMBINING_WCHARS || ENABLE_UNICODE_WIDE_WCHARS)
+ && unicode_status == UNICODE_ON
+ ) {
+ /* correct NUM to be equal to _screen_ width */
+ int n = num;
+ num = 0;
+ while (--n >= 0)
+ adjust_width_and_validate_wc(&num, command_ps[cursor + n]);
+ if (num == 0)
+ return;
+ }
+
if (cmdedit_x >= num) {
cmdedit_x -= num;
if (num <= 4) {
@@ -412,6 +457,8 @@
}
/* Need to go one or more lines up */
+//FIXME: this does not work correctly if prev line has one "unfilled" screen position
+//caused by wide unicode char not fitting in that one screen position.
num -= cmdedit_x;
{
unsigned w = cmdedit_termw; /* volatile var */
@@ -765,21 +812,13 @@
}
/* mask \+symbol and convert '\t' to ' ' */
- for (i = j = 0; matchBuf[i]; i++, j++)
+ for (i = j = 0; matchBuf[i]; i++, j++) {
if (matchBuf[i] == '\\') {
collapse_pos(j, j + 1);
int_buf[j] |= QUOT;
i++;
-#if ENABLE_FEATURE_NONPRINTABLE_INVERSE_PUT
- if (matchBuf[i] == '\t') /* algorithm equivalent */
- int_buf[j] = ' ' | QUOT;
-#endif
}
-#if ENABLE_FEATURE_NONPRINTABLE_INVERSE_PUT
- else if (matchBuf[i] == '\t')
- int_buf[j] = ' ';
-#endif
-
+ }
/* mask "symbols" or 'symbols' */
c2 = 0;
for (i = 0; int_buf[i]; i++) {
@@ -1774,7 +1813,7 @@
# if !ENABLE_UNICODE_PRESERVE_BROKEN
ic = CONFIG_SUBST_WCHAR;
# else
- ic = unicode_mark_inv_wchar(unicode_buf[0]);
+ ic = unicode_mark_raw_byte(unicode_buf[0]);
# endif
} else {
/* Valid unicode char, return its code */
@@ -2384,9 +2423,6 @@
"% ";
#endif
-#if ENABLE_FEATURE_NONPRINTABLE_INVERSE_PUT
- setlocale(LC_ALL, "");
-#endif
while (1) {
int l;
l = read_line_input(prompt, buff);
diff --git a/libbb/unicode.c b/libbb/unicode.c
index d1c6167..eb0ea61 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -418,7 +418,7 @@
* This implementation assumes that wchar_t characters are encoded
* in ISO 10646.
*/
-static int wcwidth(unsigned ucs)
+int FAST_FUNC wcwidth(unsigned ucs)
{
# if LAST_SUPPORTED_WCHAR >= 0x300
/* sorted list of non-overlapping intervals of non-spacing characters */