commit cd09482231de2f3c585e95e8fc8518531c856cab
parent c5955d14cedf015fb2f5e6f25f1c042a075195b8
Author: Benno Schulenberg <bensberg@telfort.nl>
Date: Mon, 10 Jun 2019 19:27:42 +0200
tweaks: elide a function that is an amalgam of three others
In addition, the function was used just once, had a weird return value,
and now some more code can be excluded from a non-UTF8 build.
Make use of the fact that any single-byte character always occupies
just one column, and call the costly mbtowc() and wcwidth() only for
characters that actually are multibyte.
Diffstat:
3 files changed, 37 insertions(+), 51 deletions(-)
diff --git a/src/chars.c b/src/chars.c
@@ -200,36 +200,6 @@ char control_mbrep(const char *c, bool isdata)
return control_rep(*c);
}
-/* Assess how many bytes the given (multibyte) character occupies. Return -1
- * if the byte sequence is invalid, and return the number of bytes minus 8
- * when it encodes an invalid codepoint. Also, in the second parameter,
- * return the number of columns that the character occupies. */
-int length_of_char(const char *c, int *width)
-{
-#ifdef ENABLE_UTF8
- if (use_utf8 && (signed char)*c < 0) {
- wchar_t wc;
- int charlen = mbtowc(&wc, c, MAXCHARLEN);
-
- /* If the sequence is invalid... */
- if (charlen < 0)
- return -1;
-
- /* If the codepoint is invalid... */
- if (!is_valid_unicode(wc))
- return charlen - 8;
- else {
- *width = wcwidth(wc);
- /* If the codepoint is unassigned, assume a width of one. */
- if (*width < 0)
- *width = 1;
- return charlen;
- }
- } else
-#endif
- return 1;
-}
-
/* This function is equivalent to wcwidth() for multibyte characters. */
int mbwidth(const char *c)
{
diff --git a/src/proto.h b/src/proto.h
@@ -210,7 +210,6 @@ bool is_ascii_cntrl_char(int c);
bool is_cntrl_mbchar(const char *c);
bool is_word_mbchar(const char *c, bool allow_punct);
char control_mbrep(const char *c, bool isdata);
-int length_of_char(const char *c, int *width);
int mbwidth(const char *c);
char *make_mbchar(long chr, int *chr_mb_len);
int char_length(const char *pointer);
diff --git a/src/winio.c b/src/winio.c
@@ -1922,7 +1922,7 @@ char *display_string(const char *buf, size_t column, size_t span,
}
while (*buf != '\0' && (column < beyond || mbwidth(buf) == 0)) {
- int charlength, charwidth = 1;
+ int charlength, charwidth;
if (*buf == ' ') {
/* Show a space as a visible character, or as a space. */
@@ -1961,7 +1961,7 @@ char *display_string(const char *buf, size_t column, size_t span,
continue;
}
- charlength = length_of_char(buf, &charwidth);
+ charlength = mblen(buf, MAXCHARLEN);
/* If buf contains a control character, represent it. */
if (is_cntrl_mbchar(buf)) {
@@ -1972,29 +1972,46 @@ char *display_string(const char *buf, size_t column, size_t span,
continue;
}
- /* If buf contains a valid non-control character, simply copy it. */
- if (charlength > 0) {
- for (; charlength > 0; charlength--)
- converted[index++] = *(buf++);
+ /* A one-byte character is necessarily one column wide. */
+ if (charlength == 1) {
+ converted[index++] = *(buf++);
+ column++;
+ continue;
+ }
- column += charwidth;
-#ifdef USING_OLD_NCURSES
- if (charwidth > 1)
- seen_wide = TRUE;
-#endif
+#ifdef ENABLE_UTF8
+ /* For a multibyte character, check whether it is valid,
+ * and determine whether it occupies one or two columns. */
+ wchar_t wc;
+ int length = mbtowc(&wc, buf, MAXCHARLEN);
+
+ if (charlength != length)
+ die("Different character lengths");
+
+ /* When invalid, represent it with the Replacement Character. */
+ if (charlength < 0 || !is_valid_unicode(wc)) {
+ converted[index++] = '\xEF';
+ converted[index++] = '\xBF';
+ converted[index++] = '\xBD';
+ column++;
+ buf += (charlength > 0 ? charlength : 1);
continue;
}
- /* Represent an invalid starter byte with the Replacement Character. */
- converted[index++] = '\xEF';
- converted[index++] = '\xBF';
- converted[index++] = '\xBD';
- column++;
- buf++;
+ /* For any valid character, just copy its bytes. */
+ for (; charlength > 0; charlength--)
+ converted[index++] = *(buf++);
+
+ charwidth = wcwidth(wc);
+
+ /* If the codepoint is unassigned, assume a width of one. */
+ column += (charwidth < 0 ? 1 : charwidth);
- /* For invalid codepoints, skip extra bytes. */
- if (charlength < -1)
- buf += charlength + 7;
+#ifdef USING_OLD_NCURSES
+ if (charwidth > 1)
+ seen_wide = TRUE;
+#endif
+#endif /* ENABLE_UTF8 */
}
/* If there is more text than can be shown, make room for the ">". */