commit cc2b19c8fd8ba832a339c53ee719f0d5ab09b8c4
parent 430d3bad7af4a90e13b1cc764661045e85475f09
Author: Benno Schulenberg <bensberg@telfort.nl>
Date: Sun, 3 Jun 2018 18:27:15 +0200
chars: speed up the counting of string length for the plain ASCII case
For UTF-8, if the most significant bit of a byte is zero, it means the
character is just a single byte and we can skip the call of mblen().
For files consisting of pure ASCII bytes (between 0x00 and 0x7F), this
change reduces the counting time of mbstrlen() by ninety six percent.
This partially addresses https://savannah.gnu.org/bugs/?50406.
Diffstat:
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/src/chars.c b/src/chars.c
@@ -540,9 +540,13 @@ size_t mbstrlen(const char *s)
size_t n = 0;
while (*s != '\0' && maxlen > 0) {
- int length = mblen(s, MAXCHARLEN);
+ if ((signed char)*s < 0) {
+ int length = mblen(s, MAXCHARLEN);
+
+ s += (length < 0 ? 1 : length);
+ } else
+ s++;
- s += (length < 0 ? 1 : length);
maxlen--;
n++;
}