commit 38156d4491cd2134874e6cdc8eed10e4bb584a35
parent 666644efbcd386361d8397cf078e7d032ce21571
Author: David Lawrence Ramsey <pooka109@gmail.com>
Date: Tue, 15 Mar 2005 05:44:03 +0000
make the rest of the justify code support multibyte characters
git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2371 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
Diffstat:
M | ChangeLog | | | 12 | ++++++------ |
M | src/chars.c | | | 45 | +++++++++++++++++++++++++++++++++++++++++++++ |
M | src/nano.c | | | 98 | +++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------- |
M | src/proto.h | | | 3 | +++ |
4 files changed, 120 insertions(+), 38 deletions(-)
diff --git a/ChangeLog b/ChangeLog
@@ -175,12 +175,12 @@ CVS code -
paragraph-searching utility functions when possible instead of
duplicating code. Also overhaul the justify code to make it
leave the right number of spaces at the ends of the lines of a
- paragraph, to make it (partially) support multibyte
- characters, and to make it simpler. Also, don't remove a
- space after a duplicate character in punct anymore, as it
- doesn't really make us more compatible with Pico. New
- functions do_para_begin_void() and do_para_end_void(); changes
- to justify_format(), do_para_begin(), inpar(), do_para_end(),
+ paragraph, to make it support multibyte characters, and to
+ make it simpler. Also, don't remove a space after a duplicate
+ character in punct anymore, as it doesn't really make us more
+ compatible with Pico. New functions mbstrchr(),
+ do_para_begin_void(), and do_para_end_void(); changes to
+ justify_format(), do_para_begin(), inpar(), do_para_end(),
break_line(), do_para_search() (renamed find_paragraph()), and
do_justify(); removal of breakable(). (DLR)
- Still more steps toward full wide/multibyte character support.
diff --git a/src/chars.c b/src/chars.c
@@ -811,3 +811,48 @@ size_t mbstrnlen(const char *s, size_t maxlen)
nstrnlen(s, maxlen);
#endif
}
+
+#ifndef DISABLE_JUSTIFY
+/* This function is equivalent to strchr() for multibyte strings. */
+char *mbstrchr(const char *s, char *c)
+{
+ assert(s != NULL && c != NULL);
+
+#ifdef NANO_WIDE
+ if (!ISSET(NO_UTF8)) {
+ char *s_mb = charalloc(MB_CUR_MAX);
+ const char *q = s;
+ wchar_t ws, wc;
+ int s_mb_len, c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
+
+ if (c_mb_len <= 0) {
+ mbtowc(NULL, NULL, 0);
+ wc = (unsigned char)*c;
+ }
+
+ while (*s != '\0') {
+ s_mb_len = parse_mbchar(s, s_mb, NULL, NULL);
+
+ if (mbtowc(&ws, s_mb, s_mb_len) <= 0) {
+ mbtowc(NULL, NULL, 0);
+ ws = (unsigned char)*s;
+ }
+
+ if (ws == wc)
+ break;
+
+ s += s_mb_len;
+ q += s_mb_len;
+ }
+
+ free(s_mb);
+
+ if (ws != wc)
+ q = NULL;
+
+ return (char *)q;
+ } else
+#endif
+ return strchr(s, *c);
+}
+#endif
diff --git a/src/nano.c b/src/nano.c
@@ -2382,68 +2382,95 @@ void justify_format(filestruct *paragraph, size_t skip)
new_end = new_paragraph_data + skip;
while (*end != '\0') {
+ int end_len;
+
/* If this character is blank, make sure that it's a space with
* no blanks after it. */
- if (is_blank_char(*end)) {
+ if (is_blank_mbchar(end)) {
+ end_len = parse_mbchar(end, NULL, NULL, NULL);
+
*new_end = ' ';
new_end++;
- end++;
+ end += end_len;
- while (*end != '\0' && is_blank_char(*end)) {
- end++;
- shift++;
+ while (*end != '\0' && is_blank_mbchar(end)) {
+ end_len = parse_mbchar(end, NULL, NULL, NULL);
+
+ end += end_len;
+ shift += end_len;
#ifndef NANO_SMALL
/* Keep track of the change in the current line. */
if (mark_beginbuf == paragraph &&
mark_beginx >= end - paragraph->data)
- mark_shift++;
+ mark_shift += end_len;
#endif
}
/* If this character is punctuation optionally followed by a
* bracket and then followed by blanks, make sure there are no
* more than two blanks after it, and make sure that the blanks
* are spaces. */
- } else if (strchr(punct, *end) != NULL) {
- *new_end = *end;
- new_end++;
- end++;
+ } else if (mbstrchr(punct, end) != NULL) {
+ end_len = parse_mbchar(end, NULL, NULL, NULL);
- if (*end != '\0' && strchr(brackets, *end) != NULL) {
+ while (end_len > 0) {
*new_end = *end;
new_end++;
end++;
+ end_len--;
+ }
+
+ if (*end != '\0' && mbstrchr(brackets, end) != NULL) {
+ end_len = parse_mbchar(end, NULL, NULL, NULL);
+
+ while (end_len > 0) {
+ *new_end = *end;
+ new_end++;
+ end++;
+ end_len--;
+ }
}
- if (*end != '\0' && is_blank_char(*end)) {
+ if (*end != '\0' && is_blank_mbchar(end)) {
+ end_len = parse_mbchar(end, NULL, NULL, NULL);
+
*new_end = ' ';
new_end++;
- end++;
+ end += end_len;
}
- if (*end != '\0' && is_blank_char(*end)) {
+ if (*end != '\0' && is_blank_mbchar(end)) {
+ end_len = parse_mbchar(end, NULL, NULL, NULL);
+
*new_end = ' ';
new_end++;
- end++;
+ end += end_len;
}
- while (*end != '\0' && is_blank_char(*end)) {
- end++;
- shift++;
+ while (*end != '\0' && is_blank_mbchar(end)) {
+ end_len = parse_mbchar(end, NULL, NULL, NULL);
+
+ end += end_len;
+ shift += end_len;
#ifndef NANO_SMALL
- /* Keep track of the change in the current line. */
- if (mark_beginbuf == paragraph &&
- mark_beginx >= end - paragraph->data)
- mark_shift++;
+ /* Keep track of the change in the current line. */
+ if (mark_beginbuf == paragraph &&
+ mark_beginx >= end - paragraph->data)
+ mark_shift += end_len;
#endif
}
/* If this character is neither blank nor punctuation, leave it
* alone. */
} else {
- *new_end = *end;
- new_end++;
- end++;
+ end_len = parse_mbchar(end, NULL, NULL, NULL);
+
+ while (end_len > 0) {
+ *new_end = *end;
+ new_end++;
+ end++;
+ end_len--;
+ }
}
}
@@ -2743,11 +2770,11 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
* found with short enough display width. */
ssize_t cur_loc = 0;
/* Current index in line. */
+ int line_len;
assert(line != NULL);
while (*line != '\0' && goal >= 0) {
- int line_len;
size_t pos = 0;
line_len = parse_mbchar(line, NULL, NULL, &pos);
@@ -2770,7 +2797,7 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
bool found_blank = FALSE;
while (*line != '\0') {
- int line_len = parse_mbchar(line, NULL, NULL, NULL);
+ line_len = parse_mbchar(line, NULL, NULL, NULL);
if (is_blank_mbchar(line)) {
if (!found_blank)
@@ -2786,11 +2813,18 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
}
}
- /* Perhaps the character after blank_loc is a blank. But because
- * of justify_format(), there can be only two adjacent. */
- if (*(line - cur_loc + blank_loc + 1) == ' ' ||
- *(line - cur_loc + blank_loc + 1) == '\0')
- blank_loc++;
+ /* Move to the last blank after blank_loc, if there is one. */
+ line -= cur_loc;
+ line += blank_loc;
+ line_len = parse_mbchar(line, NULL, NULL, NULL);
+ line += line_len;
+
+ while (*line != '\0' && is_blank_mbchar(line)) {
+ line_len = parse_mbchar(line, NULL, NULL, NULL);
+
+ line += line_len;
+ blank_loc += line_len;
+ }
return blank_loc;
}
diff --git a/src/proto.h b/src/proto.h
@@ -207,6 +207,9 @@ size_t mbstrlen(const char *s);
size_t nstrnlen(const char *s, size_t maxlen);
#endif
size_t mbstrnlen(const char *s, size_t maxlen);
+#ifndef DISABLE_JUSTIFY
+char *mbstrchr(const char *s, char *c);
+#endif
/* Public functions in color.c. */
#ifdef ENABLE_COLOR