tweaks: elide a function that is an amalgam of three others - nano

commit cd09482231de2f3c585e95e8fc8518531c856cab
parent c5955d14cedf015fb2f5e6f25f1c042a075195b8
Author: Benno Schulenberg <bensberg@telfort.nl>
Date:   Mon, 10 Jun 2019 19:27:42 +0200

tweaks: elide a function that is an amalgam of three others

In addition, the function was used just once, had a weird return value,
and now some more code can be excluded from a non-UTF8 build.

Make use of the fact that any single-byte character always occupies
just one column, and call the costly mbtowc() and wcwidth() only for
characters that actually are multibyte.

Diffstat:
M src/chars.c  | 30 ------------------------------
M src/proto.h  | 1 -
M src/winio.c  | 57 +++++++++++++++++++++++++++++++++++++--------------------

3 files changed, 37 insertions(+), 51 deletions(-)
diff --git a/src/chars.c b/src/chars.c
@@ -200,36 +200,6 @@ char control_mbrep(const char *c, bool isdata)
 		return control_rep(*c);
 }
 
-/* Assess how many bytes the given (multibyte) character occupies.  Return -1
- * if the byte sequence is invalid, and return the number of bytes minus 8
- * when it encodes an invalid codepoint.  Also, in the second parameter,
- * return the number of columns that the character occupies. */
-int length_of_char(const char *c, int *width)
-{
-#ifdef ENABLE_UTF8
-	if (use_utf8 && (signed char)*c < 0) {
-		wchar_t wc;
-		int charlen = mbtowc(&wc, c, MAXCHARLEN);
-
-		/* If the sequence is invalid... */
-		if (charlen < 0)
-			return -1;
-
-		/* If the codepoint is invalid... */
-		if (!is_valid_unicode(wc))
-			return charlen - 8;
-		else {
-			*width = wcwidth(wc);
-			/* If the codepoint is unassigned, assume a width of one. */
-			if (*width < 0)
-				*width = 1;
-			return charlen;
-		}
-	} else
-#endif
-		return 1;
-}
-
 /* This function is equivalent to wcwidth() for multibyte characters. */
 int mbwidth(const char *c)
 {
diff --git a/src/proto.h b/src/proto.h
@@ -210,7 +210,6 @@ bool is_ascii_cntrl_char(int c);
 bool is_cntrl_mbchar(const char *c);
 bool is_word_mbchar(const char *c, bool allow_punct);
 char control_mbrep(const char *c, bool isdata);
-int length_of_char(const char *c, int *width);
 int mbwidth(const char *c);
 char *make_mbchar(long chr, int *chr_mb_len);
 int char_length(const char *pointer);
diff --git a/src/winio.c b/src/winio.c
@@ -1922,7 +1922,7 @@ char *display_string(const char *buf, size_t column, size_t span,
 	}
 
 	while (*buf != '\0' && (column < beyond || mbwidth(buf) == 0)) {
-		int charlength, charwidth = 1;
+		int charlength, charwidth;
 
 		if (*buf == ' ') {
 			/* Show a space as a visible character, or as a space. */
@@ -1961,7 +1961,7 @@ char *display_string(const char *buf, size_t column, size_t span,
 			continue;
 		}
 
-		charlength = length_of_char(buf, &charwidth);
+		charlength = mblen(buf, MAXCHARLEN);
 
 		/* If buf contains a control character, represent it. */
 		if (is_cntrl_mbchar(buf)) {
@@ -1972,29 +1972,46 @@ char *display_string(const char *buf, size_t column, size_t span,
 			continue;
 		}
 
-		/* If buf contains a valid non-control character, simply copy it. */
-		if (charlength > 0) {
-			for (; charlength > 0; charlength--)
-				converted[index++] = *(buf++);
+		/* A one-byte character is necessarily one column wide. */
+		if (charlength == 1) {
+			converted[index++] = *(buf++);
+			column++;
+			continue;
+		}
 
-			column += charwidth;
-#ifdef USING_OLD_NCURSES
-			if (charwidth > 1)
-				seen_wide = TRUE;
-#endif
+#ifdef ENABLE_UTF8
+		/* For a multibyte character, check whether it is valid,
+		 * and determine whether it occupies one or two columns. */
+		wchar_t wc;
+		int length = mbtowc(&wc, buf, MAXCHARLEN);
+
+		if (charlength != length)
+			die("Different character lengths");
+
+		/* When invalid, represent it with the Replacement Character. */
+		if (charlength < 0 || !is_valid_unicode(wc)) {
+			converted[index++] = '\xEF';
+			converted[index++] = '\xBF';
+			converted[index++] = '\xBD';
+			column++;
+			buf += (charlength > 0 ? charlength : 1);
 			continue;
 		}
 
-		/* Represent an invalid starter byte with the Replacement Character. */
-		converted[index++] = '\xEF';
-		converted[index++] = '\xBF';
-		converted[index++] = '\xBD';
-		column++;
-		buf++;
+		/* For any valid character, just copy its bytes. */
+		for (; charlength > 0; charlength--)
+			converted[index++] = *(buf++);
+
+		charwidth = wcwidth(wc);
+
+		/* If the codepoint is unassigned, assume a width of one. */
+		column += (charwidth < 0 ? 1 : charwidth);
 
-		/* For invalid codepoints, skip extra bytes. */
-		if (charlength < -1)
-			buf += charlength + 7;
+#ifdef USING_OLD_NCURSES
+		if (charwidth > 1)
+			seen_wide = TRUE;
+#endif
+#endif /* ENABLE_UTF8 */
 	}
 
 	/* If there is more text than can be shown, make room for the ">". */

	nano nano with my custom patches
	git clone git://bsandro.tech/nano
	Log \| Files \| Refs \| README \| LICENSE

M	src/chars.c	\|	30	------------------------------
M	src/proto.h	\|	1	-
M	src/winio.c	\|	57	+++++++++++++++++++++++++++++++++++++--------------------