nano

nano with my custom patches
git clone git://bsandro.tech/nano
Log | Files | Refs | README | LICENSE

commit cd09482231de2f3c585e95e8fc8518531c856cab
parent c5955d14cedf015fb2f5e6f25f1c042a075195b8
Author: Benno Schulenberg <bensberg@telfort.nl>
Date:   Mon, 10 Jun 2019 19:27:42 +0200

tweaks: elide a function that is an amalgam of three others

In addition, the function was used just once, had a weird return value,
and now some more code can be excluded from a non-UTF8 build.

Make use of the fact that any single-byte character always occupies
just one column, and call the costly mbtowc() and wcwidth() only for
characters that actually are multibyte.

Diffstat:
Msrc/chars.c | 30------------------------------
Msrc/proto.h | 1-
Msrc/winio.c | 57+++++++++++++++++++++++++++++++++++++--------------------
3 files changed, 37 insertions(+), 51 deletions(-)

diff --git a/src/chars.c b/src/chars.c @@ -200,36 +200,6 @@ char control_mbrep(const char *c, bool isdata) return control_rep(*c); } -/* Assess how many bytes the given (multibyte) character occupies. Return -1 - * if the byte sequence is invalid, and return the number of bytes minus 8 - * when it encodes an invalid codepoint. Also, in the second parameter, - * return the number of columns that the character occupies. */ -int length_of_char(const char *c, int *width) -{ -#ifdef ENABLE_UTF8 - if (use_utf8 && (signed char)*c < 0) { - wchar_t wc; - int charlen = mbtowc(&wc, c, MAXCHARLEN); - - /* If the sequence is invalid... */ - if (charlen < 0) - return -1; - - /* If the codepoint is invalid... */ - if (!is_valid_unicode(wc)) - return charlen - 8; - else { - *width = wcwidth(wc); - /* If the codepoint is unassigned, assume a width of one. */ - if (*width < 0) - *width = 1; - return charlen; - } - } else -#endif - return 1; -} - /* This function is equivalent to wcwidth() for multibyte characters. */ int mbwidth(const char *c) { diff --git a/src/proto.h b/src/proto.h @@ -210,7 +210,6 @@ bool is_ascii_cntrl_char(int c); bool is_cntrl_mbchar(const char *c); bool is_word_mbchar(const char *c, bool allow_punct); char control_mbrep(const char *c, bool isdata); -int length_of_char(const char *c, int *width); int mbwidth(const char *c); char *make_mbchar(long chr, int *chr_mb_len); int char_length(const char *pointer); diff --git a/src/winio.c b/src/winio.c @@ -1922,7 +1922,7 @@ char *display_string(const char *buf, size_t column, size_t span, } while (*buf != '\0' && (column < beyond || mbwidth(buf) == 0)) { - int charlength, charwidth = 1; + int charlength, charwidth; if (*buf == ' ') { /* Show a space as a visible character, or as a space. */ @@ -1961,7 +1961,7 @@ char *display_string(const char *buf, size_t column, size_t span, continue; } - charlength = length_of_char(buf, &charwidth); + charlength = mblen(buf, MAXCHARLEN); /* If buf contains a control character, represent it. */ if (is_cntrl_mbchar(buf)) { @@ -1972,29 +1972,46 @@ char *display_string(const char *buf, size_t column, size_t span, continue; } - /* If buf contains a valid non-control character, simply copy it. */ - if (charlength > 0) { - for (; charlength > 0; charlength--) - converted[index++] = *(buf++); + /* A one-byte character is necessarily one column wide. */ + if (charlength == 1) { + converted[index++] = *(buf++); + column++; + continue; + } - column += charwidth; -#ifdef USING_OLD_NCURSES - if (charwidth > 1) - seen_wide = TRUE; -#endif +#ifdef ENABLE_UTF8 + /* For a multibyte character, check whether it is valid, + * and determine whether it occupies one or two columns. */ + wchar_t wc; + int length = mbtowc(&wc, buf, MAXCHARLEN); + + if (charlength != length) + die("Different character lengths"); + + /* When invalid, represent it with the Replacement Character. */ + if (charlength < 0 || !is_valid_unicode(wc)) { + converted[index++] = '\xEF'; + converted[index++] = '\xBF'; + converted[index++] = '\xBD'; + column++; + buf += (charlength > 0 ? charlength : 1); continue; } - /* Represent an invalid starter byte with the Replacement Character. */ - converted[index++] = '\xEF'; - converted[index++] = '\xBF'; - converted[index++] = '\xBD'; - column++; - buf++; + /* For any valid character, just copy its bytes. */ + for (; charlength > 0; charlength--) + converted[index++] = *(buf++); + + charwidth = wcwidth(wc); + + /* If the codepoint is unassigned, assume a width of one. */ + column += (charwidth < 0 ? 1 : charwidth); - /* For invalid codepoints, skip extra bytes. */ - if (charlength < -1) - buf += charlength + 7; +#ifdef USING_OLD_NCURSES + if (charwidth > 1) + seen_wide = TRUE; +#endif +#endif /* ENABLE_UTF8 */ } /* If there is more text than can be shown, make room for the ">". */