commit 0894587305387fec8ab78199a69e3d7abcc21e2f
parent 960e848cc7943e60b94861d6eb72e53de0c90ece
Author: Benno Schulenberg <bensberg@justemail.net>
Date: Mon, 6 Jun 2016 12:48:26 +0200
screen: elide another intermediate buffer for every visible character
Diffstat:
3 files changed, 51 insertions(+), 39 deletions(-)
diff --git a/src/chars.c b/src/chars.c
@@ -35,8 +35,6 @@
static bool use_utf8 = FALSE;
/* Whether we've enabled UTF-8 support. */
-static const char *const bad_mbchar = "\xEF\xBF\xBD";
-static const int bad_mbchar_len = 3;
/* Enable UTF-8 support. */
void utf8_init(void)
@@ -230,38 +228,32 @@ char control_mbrep(const char *c)
return control_rep(*c);
}
-/* c is a multibyte non-control character. We return that multibyte
- * character. If crep is an invalid multibyte sequence, it will be
- * replaced with Unicode 0xFFFD (Replacement Character). */
-char *mbrep(const char *c, char *crep, int *crep_len)
+/* Assess how many bytes the given (multibyte) character occupies. Return -1
+ * if the byte sequence is invalid, and return the number of bytes minus 8
+ * when the byte sequence encodes an invalid codepoint. */
+int length_of_char(const char *c)
{
- assert(c != NULL && crep != NULL && crep_len != NULL);
+ assert(c != NULL);
#ifdef ENABLE_UTF8
if (use_utf8) {
wchar_t wc;
+ int charlen = mbtowc(&wc, c, MB_CUR_MAX);
- /* Reject invalid Unicode characters. */
- if (mbtowc(&wc, c, MB_CUR_MAX) < 0 || !is_valid_unicode(wc)) {
+ /* If the sequence is invalid... */
+ if (charlen < 0) {
mbtowc_reset();
- *crep_len = bad_mbchar_len;
- strncpy(crep, bad_mbchar, *crep_len);
- } else {
- *crep_len = wctomb(crep, wc);
-
- if (*crep_len < 0) {
- wctomb_reset();
- *crep_len = 0;
- }
+ return -1;
}
+
+ /* If the codepoint is invalid... */
+ if (!is_valid_unicode(wc))
+ return charlen - 8;
+ else
+ return charlen;
} else
#endif
- {
- *crep_len = 1;
- *crep = *c;
- }
-
- return crep;
+ return 1;
}
/* This function is equivalent to wcwidth() for multibyte characters. */
diff --git a/src/proto.h b/src/proto.h
@@ -188,7 +188,7 @@ bool is_punct_mbchar(const char *c);
bool is_word_mbchar(const char *c, bool allow_punct);
char control_rep(const signed char c);
char control_mbrep(const char *c);
-char *mbrep(const char *c, char *crep, int *crep_len);
+int length_of_char(const char *c);
int mbwidth(const char *c);
int mb_cur_max(void);
char *make_mbchar(long chr, int *chr_mb_len);
diff --git a/src/winio.c b/src/winio.c
@@ -1780,6 +1780,8 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
}
while (*buf != '\0') {
+ int charlength;
+
if (*buf == ' ') {
/* Show a space as a visible character, or as a space. */
#ifndef NANO_TINY
@@ -1792,6 +1794,8 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
#endif
converted[index++] = ' ';
start_col++;
+ buf++;
+ continue;
} else if (*buf == '\t') {
/* Show a tab as a visible character, or as as a space. */
#ifndef NANO_TINY
@@ -1809,30 +1813,46 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
converted[index++] = ' ';
start_col++;
}
+ buf++;
+ continue;
+ }
+
+ charlength = length_of_char(buf);
+
/* If buf contains a control character, represent it. */
- } else if (is_cntrl_mbchar(buf)) {
+ if (is_cntrl_mbchar(buf)) {
converted[index++] = '^';
converted[index++] = control_mbrep(buf);
start_col += 2;
- /* If buf contains a non-control character, interpret it. If buf
- * contains an invalid multibyte sequence, display it as such. */
- } else {
- char *character = charalloc(mb_cur_max());
- int charlen, i;
- character = mbrep(buf, character, &charlen);
-
- for (i = 0; i < charlen; i++)
- converted[index++] = character[i];
+ buf += charlength;
+ continue;
+ }
- start_col += mbwidth(character);
+ /* If buf contains a valid non-control character, simply copy it. */
+ if (charlength > 0) {
+ int width = mbwidth(buf);
- free(character);
+ for (; charlength > 0; charlength--)
+ converted[index++] = *(buf++);
- if (mbwidth(buf) > 1)
+ start_col += width;
+ if (width > 1)
seen_wide = TRUE;
+
+ continue;
}
- buf += parse_mbchar(buf, NULL, NULL);
+ /* Represent an invalid sequence with the Replacement Character. */
+ converted[index++] = '\xEF';
+ converted[index++] = '\xBF';
+ converted[index++] = '\xBD';
+
+ start_col += 1;
+ buf++;
+
+ /* For invalid codepoints, skip extra bytes. */
+ if (charlength < -1)
+ buf += charlength + 7;
}
/* Null-terminate converted. */