nano

nano with my custom patches
git clone git://bsandro.tech/nano
Log | Files | Refs | README | LICENSE

commit 5129e718d7f2f669bdbf26c81a0ed920940b4a06
parent 8c406bc875e2893564721eeafe1c51b1a25192b0
Author: Benno Schulenberg <bensberg@telfort.nl>
Date:   Wed,  6 Jan 2021 12:41:49 +0100

chars: speed up the handling of invalid UTF-8 starter bytes

The first byte of a multi-byte UTF-8 sequence must be in the range
0xC2...0xFF.  Any other byte cannot be a starter byte and can thus
immediately be treated as a single byte.

Diffstat:
Msrc/chars.c | 14++++++--------
Msrc/winio.c | 2+-
2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/chars.c b/src/chars.c @@ -180,7 +180,7 @@ char control_mbrep(const char *c, bool isdata) int mbwidth(const char *c) { /* Ask for the width only when the character isn't plain ASCII. */ - if ((signed char)*c <= 0) { + if ((unsigned char)*c > 0xC1) { wchar_t wc; int width; @@ -227,7 +227,7 @@ int char_length(const char *pointer) { #ifdef ENABLE_UTF8 /* If possibly a multibyte character, get its length; otherwise, it's 1. */ - if ((signed char)*pointer < 0) { + if ((unsigned char)*pointer > 0xC1) { int length = mblen(pointer, MAXCHARLEN); return (length < 0 ? 1 : length); @@ -243,7 +243,7 @@ size_t mbstrlen(const char *pointer) while (*pointer != '\0') { #ifdef ENABLE_UTF8 - if ((signed char)*pointer < 0) { + if ((unsigned char)*pointer > 0xC1) { int length = mblen(pointer, MAXCHARLEN); pointer += (length < 0 ? 1 : length); @@ -265,7 +265,7 @@ int collect_char(const char *string, char *thechar) #ifdef ENABLE_UTF8 /* If this is a UTF-8 starter byte, get the number of bytes of the character. */ - if ((signed char)*string < 0) { + if ((unsigned char)*string > 0xC1) { charlen = mblen(string, MAXCHARLEN); /* When the multibyte sequence is invalid, only take the first byte. */ @@ -286,7 +286,7 @@ int collect_char(const char *string, char *thechar) int advance_over(const char *string, size_t *column) { #ifdef ENABLE_UTF8 - if ((signed char)*string < 0) { + if ((unsigned char)*string > 0xC1) { int charlen = mblen(string, MAXCHARLEN); if (charlen > 0) { @@ -310,10 +310,8 @@ int advance_over(const char *string, size_t *column) *column += 2; } else if (*string == 0x7F) *column += 2; -#ifndef ENABLE_UTF8 - else if (0x7F < (unsigned char)*string && (unsigned char)*string < 0xA0) + else if (!use_utf8 && 0x7F < (unsigned char)*string && (unsigned char)*string < 0xA0) *column += 2; -#endif else *column += 1; diff --git a/src/winio.c b/src/winio.c @@ -2141,7 +2141,7 @@ void minibar(void) sprintf(hexadecimal, openfile->current->next ? "U+000A" : "------"); else if (*this_position == '\n') sprintf(hexadecimal, "U+0000"); - else if ((unsigned char)*this_position >= 0x80 && + else if ((unsigned char)*this_position > 0xC1 && mbtowc(&widecode, this_position, MAXCHARLEN) >= 0) sprintf(hexadecimal, "U+%04X", widecode); else