commit fc693210d5044b0735bfed92433a7cfb979f5521
parent c0b9d19ed792d3a7391febc0f58569320162f4cf
Author: David Lawrence Ramsey <pooka109@gmail.com>
Date: Thu, 23 Dec 2004 17:43:27 +0000
more steps toward UTF-8 support: port all the parts of DB's UTF-8 patch
that I currently understand to current CVS, with modifications of mine
to autodetect UTF-8 support and to display multibyte strings instead of
wide strings
git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2193 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
Diffstat:
9 files changed, 543 insertions(+), 116 deletions(-)
diff --git a/ChangeLog b/ChangeLog
@@ -50,6 +50,16 @@ CVS code -
in order for output to work properly. (DLR; buffered
input/output based on ideas from mutt 1.4.2.1; double-Escape
input of Unicode characters suggested by Michael Piefel)
+ - More steps toward wide character/multibyte character support.
+ Movement and cursor display in the edit window should now work
+ properly with files containing multibyte characters, and text
+ display of such files should work properly some of the time.
+ New functions control_rep(), parse_char(), move_left(),
+ move_right(), and display_string_len(); changes to do_left(),
+ do_right(), do_delete(), breakable(), break_line(),
+ do_output(), get_buffer(), unget_input(), actual_x(),
+ strnlenpt(), display_string(), titlebar(), and do_credits().
+ (David Benbennick and DLR)
- cut.c:
do_cut_text()
- If keep_cutbuffer is FALSE, only blow away the text in the
@@ -92,6 +102,10 @@ CVS code -
loop if there are no more paragraphs after the current one and
the paragraph search left us on the magicline, so as to avoid
a segfault. (DLR)
+ main()
+ - Try to automatically detect whether UTF-8 support is needed by
+ setting the NO_UTF8 flag if setlocale() returns a string that
+ doesn't contain "UTF-8". (DLR)
- winio.c:
titlebar()
- Rename some variables for consistency, make space an int
@@ -135,6 +149,8 @@ CVS code -
- Remove specific references to control key shortcuts. (DLR)
- Check for the wide version of ncurses, without which multibyte
strings don't seem to be displayed properly. (DLR)
+ - Check for stddef.h and wchar.h, for those systems that need
+ one of the two for the wcwidth() prototype. (DLR)
- doc/nanorc.sample:
- Add return to the "c-file" regexes. (DLR)
diff --git a/configure.ac b/configure.ac
@@ -40,7 +40,7 @@ AM_GNU_GETTEXT([external], [need-ngettext])
dnl Checks for header files.
AC_HEADER_STDC
-AC_CHECK_HEADERS(fcntl.h getopt.h libintl.h limits.h regex.h termio.h termios.h unistd.h)
+AC_CHECK_HEADERS(fcntl.h getopt.h libintl.h limits.h regex.h stddef.h termio.h termios.h unistd.h wchar.h)
AC_CHECK_HEADER(regex.h,
AC_MSG_CHECKING([for broken regexec])
AC_TRY_RUN([
diff --git a/src/move.c b/src/move.c
@@ -252,7 +252,7 @@ void do_left(int allow_update)
{
size_t pww_save = placewewant;
if (current_x > 0)
- current_x--;
+ current_x = move_left(current->data, current_x);
else if (current != fileage) {
do_up();
current_x = strlen(current->data);
@@ -274,7 +274,7 @@ void do_right(int allow_update)
assert(current_x <= strlen(current->data));
if (current->data[current_x] != '\0')
- current_x++;
+ current_x = move_right(current->data, current_x);
else if (current->next != NULL) {
do_down();
current_x = 0;
diff --git a/src/nano.c b/src/nano.c
@@ -1185,18 +1185,25 @@ void do_delete(void)
placewewant = xplustabs();
if (current->data[current_x] != '\0') {
- size_t linelen = strlen(current->data + current_x);
+ int char_len = parse_char(current->data + current_x, NULL,
+ NULL
+#ifdef NANO_WIDE
+ , NULL
+#endif
+ );
+ size_t line_len = strlen(current->data + current_x);
assert(current_x < strlen(current->data));
/* Let's get dangerous. */
- charmove(¤t->data[current_x], ¤t->data[current_x + 1],
- linelen);
+ charmove(¤t->data[current_x],
+ ¤t->data[current_x + char_len],
+ line_len - char_len + 1);
- null_at(¤t->data, linelen + current_x - 1);
+ null_at(¤t->data, current_x + line_len - char_len);
#ifndef NANO_SMALL
if (current_x < mark_beginx && mark_beginbuf == current)
- mark_beginx--;
+ mark_beginx -= char_len;
#endif
} else if (current != filebot && (current->next != filebot ||
current->data[0] == '\0')) {
@@ -1211,8 +1218,8 @@ void do_delete(void)
if (current->data[current_x] == '\0')
do_refresh = TRUE;
- current->data = charealloc(current->data, current_x +
- strlen(foo->data) + 1);
+ current->data = charealloc(current->data,
+ current_x + strlen(foo->data) + 1);
strcpy(current->data + current_x, foo->data);
#ifndef NANO_SMALL
if (mark_beginbuf == current->next) {
@@ -1227,13 +1234,13 @@ void do_delete(void)
delete_node(foo);
renumber(current);
totlines--;
+ totsize--;
#ifndef DISABLE_WRAPPING
wrap_reset();
#endif
} else
return;
- totsize--;
set_modified();
#ifdef ENABLE_COLOR
@@ -2494,15 +2501,21 @@ filestruct *backup_lines(filestruct *first_line, size_t par_len, size_t
/* Is it possible to break line at or before goal? */
bool breakable(const char *line, ssize_t goal)
{
- for (; *line != '\0' && goal >= 0; line++) {
+ while (*line != '\0' && goal >= 0) {
+ size_t pos = 0;
+
if (isblank(*line))
return TRUE;
- if (is_cntrl_char(*line))
- goal -= 2;
- else
- goal -= 1;
+ line += parse_char(line, NULL, &pos
+#ifdef NANO_WIDE
+ , NULL
+#endif
+ );
+
+ goal -= pos;
}
+
/* If goal is not negative, the whole line (one word) was short
* enough. */
return goal >= 0;
@@ -2522,32 +2535,49 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
/* Current index in line. */
assert(line != NULL);
- for (; *line != '\0' && goal >= 0; line++, cur_loc++) {
+
+ while (*line != '\0' && goal >= 0) {
+ size_t pos = 0;
+ int line_len;
+
if (*line == ' ')
space_loc = cur_loc;
+
assert(*line != '\t');
- if (is_cntrl_char(*line))
- goal -= 2;
- else
- goal--;
+ line_len = parse_char(line, NULL, &pos
+#ifdef NANO_WIDE
+ , NULL
+#endif
+ );
+
+ goal -= pos;
+ line += line_len;
+ cur_loc += line_len;
}
+
if (goal >= 0)
/* In fact, the whole line displays shorter than goal. */
return cur_loc;
+
if (space_loc == -1) {
/* No space found short enough. */
- if (force)
- for (; *line != '\0'; line++, cur_loc++)
- if (*line == ' ' && *(line + 1) != ' ' && *(line + 1) != '\0')
+ if (force) {
+ for (; *line != '\0'; line++, cur_loc++) {
+ if (*line == ' ' && *(line + 1) != ' ' &&
+ *(line + 1) != '\0')
return cur_loc;
- return -1;
+ }
+ return -1;
+ }
}
+
/* Perhaps the character after space_loc is a space. But because
* of justify_format(), there can be only two adjacent. */
if (*(line - cur_loc + space_loc + 1) == ' ' ||
*(line - cur_loc + space_loc + 1) == '\0')
space_loc++;
+
return space_loc;
}
@@ -3639,13 +3669,7 @@ void do_output(int *kbinput, size_t kbinput_len)
mark_beginx += key_len;
#endif
- {
- /* FIXME: The movement functions should take multibyte
- * characters into account. */
- int j;
- for (j = 0; j < key_len; j++)
- do_right(FALSE);
- }
+ do_right(FALSE);
#ifndef DISABLE_WRAPPING
/* If we're wrapping text, we need to call edit_refresh(). */
@@ -3759,7 +3783,21 @@ int main(int argc, char **argv)
};
#endif
+#ifdef NANO_WIDE
+ {
+ /* If the locale set doesn't exist, or it exists but doesn't
+ * include the string "UTF-8", we shouldn't use UTF-8
+ * support. */
+ char *locale = setlocale(LC_ALL, "");
+
+ if (locale == NULL || (locale != NULL &&
+ strstr(locale, "UTF-8") == NULL))
+ SET(NO_UTF8);
+ }
+#else
setlocale(LC_ALL, "");
+#endif
+
#ifdef ENABLE_NLS
bindtextdomain(PACKAGE, LOCALEDIR);
textdomain(PACKAGE);
diff --git a/src/nano.h b/src/nano.h
@@ -83,6 +83,7 @@
#define N_(string) gettext_noop(string)
/* Mark a string that will be sent to gettext later. */
+#include <stddef.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "config.h"
diff --git a/src/proto.h b/src/proto.h
@@ -475,7 +475,15 @@ int is_blank_char(int c);
int is_cntrl_char(int c);
bool is_byte_char(int c);
int num_of_digits(int n);
+unsigned char control_rep(unsigned char c);
bool parse_num(const char *str, ssize_t *val);
+int parse_char(const char *str, int *chr, size_t *col
+#ifdef NANO_WIDE
+ , bool *bad_char
+#endif
+ );
+size_t move_left(const char *str, size_t pos);
+size_t move_right(const char *str, size_t pos);
void align(char **strp);
void null_at(char **data, size_t index);
void unsunder(char *str, size_t true_len);
@@ -570,7 +578,10 @@ void blank_edit(void);
void blank_statusbar(void);
void check_statusblank(void);
void blank_bottombars(void);
-char *display_string(const char *buf, size_t start_col, size_t len);
+size_t display_string_len(const char *buf, size_t start_col, size_t
+ end_col);
+char *display_string(const char *buf, size_t start_col, size_t len, bool
+ dollars);
void nanoget_repaint(const char *buf, const char *inputbuf, size_t x);
int nanogetstr(bool allow_tabs, const char *buf, const char *def,
#ifndef NANO_SMALL
diff --git a/src/search.c b/src/search.c
@@ -83,7 +83,7 @@ void not_found_msg(const char *str)
assert(str != NULL);
- disp = display_string(str, 0, (COLS / 2) + 1);
+ disp = display_string(str, 0, (COLS / 2) + 1, FALSE);
numchars = strnlen(disp, COLS / 2);
statusbar(_("\"%.*s%s\" not found"), numchars, disp,
@@ -150,7 +150,7 @@ int search_init(bool replacing, bool use_answer)
#endif
if (last_search[0] != '\0') {
- char *disp = display_string(last_search, 0, COLS / 3);
+ char *disp = display_string(last_search, 0, COLS / 3, FALSE);
buf = charalloc(COLS / 3 + 7);
/* We use COLS / 3 here because we need to see more on the
@@ -748,7 +748,8 @@ ssize_t do_replace_loop(const char *needle, const filestruct
size_t xpt = xplustabs();
exp_word = display_string(current->data, xpt,
- strnlenpt(current->data, match_len + current_x) - xpt);
+ strnlenpt(current->data, match_len + current_x) - xpt,
+ FALSE);
curs_set(0);
do_replace_highlight(TRUE, exp_word);
diff --git a/src/utils.c b/src/utils.c
@@ -33,6 +33,10 @@
#include "proto.h"
#include "nano.h"
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
+
#ifdef HAVE_REGEX_H
#ifdef BROKEN_REGEXEC
#undef regexec
@@ -92,6 +96,19 @@ int num_of_digits(int n)
return i;
}
+/* c is a control character. It displays as ^@, ^?, or ^[ch] where ch
+ * is c + 64. We return that character. */
+unsigned char control_rep(unsigned char c)
+{
+ /* Treat newlines embedded in a line as encoded nulls. */
+ if (c == '\n')
+ return '@';
+ else if (c == NANO_CONTROL_8)
+ return '?';
+ else
+ return c + 64;
+}
+
/* Read a ssize_t from str, and store it in *val (if val is not NULL).
* On error, we return FALSE and don't change *val. Otherwise, we
* return TRUE. */
@@ -113,6 +130,143 @@ bool parse_num(const char *str, ssize_t *val)
return TRUE;
}
+/* Parse a multi-byte character from str. Return the number of bytes
+ * used. If chr isn't NULL, store the wide character in it. If col
+ * isn't NULL, store the new display width in it. If *str is '\t', we
+ * expect col to have the current display width. If bad_char isn't
+ * NULL, set it to TRUE if we have a null byte or a bad multibyte
+ * character. */
+int parse_char(const char *str, int *chr, size_t *col
+#ifdef NANO_WIDE
+ , bool *bad_char
+#endif
+ )
+{
+ int wide_str, wide_str_len;
+
+ assert(str != NULL);
+
+#ifdef NANO_WIDE
+ if (bad_char != NULL)
+ *bad_char = FALSE;
+
+ if (!ISSET(NO_UTF8)) {
+ wchar_t tmp;
+
+ /* Get the wide character equivalent of the multibyte
+ * character. */
+ wide_str_len = mbtowc(&tmp, str, MB_CUR_MAX);
+ wide_str = (int)tmp;
+
+ /* If str contains a null byte or an invalid multibyte
+ * character, interpret str's first byte as a single-byte
+ * sequence and set bad_char to TRUE. */
+ if (wide_str_len <= 0) {
+ wide_str_len = 1;
+ wide_str = (unsigned char)*str;
+ if (bad_char != NULL)
+ *bad_char = TRUE;
+ }
+
+ /* Save the wide character in chr. */
+ if (chr != NULL)
+ *chr = wide_str;
+
+ /* Save the column width of the wide character in col. */
+ if (col != NULL) {
+ /* If we have a tab, get its width in columns using the
+ * current value of col. */
+ if (wide_str == '\t')
+ *col += tabsize - *col % tabsize;
+ /* If we have a control character, get its width using one
+ * column for the "^" that will be displayed in front of it,
+ * and the width in columns of its visible equivalent as
+ * returned by control_rep(). */
+ else if (is_cntrl_char(wide_str)) {
+ char *ctrl_wide_str = charalloc(MB_CUR_MAX);
+
+ (*col)++;
+ wide_str = control_rep((unsigned char)wide_str);
+
+ if (wctomb(ctrl_wide_str, (wchar_t)wide_str) != -1)
+ *col += wcwidth(wide_str);
+
+ free(ctrl_wide_str);
+ /* If we have a normal character, get its width in columns
+ * normally. */
+ } else
+ *col += wcwidth(wide_str);
+ }
+ } else {
+#endif
+ /* Interpret str's first character as a single-byte sequence. */
+ wide_str_len = 1;
+ wide_str = (unsigned char)*str;
+
+ /* Save the single-byte sequence in chr as though it's a wide
+ * character. */
+ if (chr != NULL)
+ *chr = wide_str;
+
+ if (col != NULL) {
+ /* If we have a tab, get its width in columns using the
+ * current value of col. */
+ if (wide_str == '\t')
+ *col += tabsize - *col % tabsize;
+ /* If we have a control character, it's two columns wide:
+ * one column for the "^" that will be displayed in front of
+ * it, and one column for its visible equivalent as returned
+ * by control_rep(). */
+ else if (is_cntrl_char(wide_str))
+ *col += 2;
+ /* If we have a normal character, it's one column wide. */
+ else
+ (*col)++;
+ }
+#ifdef NANO_WIDE
+ }
+#endif
+
+ return wide_str_len;
+}
+
+/* Return the index in str of the beginning of the character before the
+ * one at pos. */
+size_t move_left(const char *str, size_t pos)
+{
+ size_t pos_prev = pos;
+
+ assert(str != NULL && pos <= strlen(str));
+
+ /* There is no library function to move backward one multibyte
+ * character. Here is the naive, O(pos) way to do it. */
+ while (TRUE) {
+ int str_len = parse_char(str + pos - pos_prev, NULL, NULL
+#ifdef NANO_WIDE
+ , NULL
+#endif
+ );
+
+ if (pos_prev <= str_len)
+ break;
+
+ pos_prev -= str_len;
+ }
+
+ return pos - pos_prev;
+}
+
+/* Return the index in str of the beginning of the character after the
+ * one at pos. */
+size_t move_right(const char *str, size_t pos)
+{
+ return pos + parse_char(str + pos, NULL, NULL
+#ifdef NANO_WIDE
+ , NULL
+#endif
+ );
+}
+
/* Fix the memory allocation for a string. */
void align(char **strp)
{
diff --git a/src/winio.c b/src/winio.c
@@ -32,6 +32,10 @@
#include "proto.h"
#include "nano.h"
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
+
static buffer *key_buffer = NULL;
/* The default keystroke buffer,
* containing all the keystrokes we have
@@ -1625,38 +1629,50 @@ size_t actual_x(const char *str, size_t xplus)
assert(str != NULL);
- for (; length < xplus && *str != '\0'; i++, str++) {
- if (*str == '\t')
- length += tabsize - (length % tabsize);
- else if (is_cntrl_char(*str))
- length += 2;
- else
- length++;
- }
- assert(length == strnlenpt(str - i, i));
- assert(i <= strlen(str - i));
+ while (*str != '\0') {
+ int str_len = parse_char(str, NULL, &length
+#ifdef NANO_WIDE
+ , NULL
+#endif
+ );
- if (length > xplus)
- i--;
+ if (length > xplus)
+ break;
+
+ i += str_len;
+ str += str_len;
+ }
return i;
}
/* A strlen() with tabs factored in, similar to xplustabs(). How many
- * columns wide are the first size characters of buf? */
-size_t strnlenpt(const char *buf, size_t size)
+ * columns wide are the first size characters of str? */
+size_t strnlenpt(const char *str, size_t size)
{
size_t length = 0;
+ /* The screen display width to str[i]. */
- assert(buf != NULL);
- for (; *buf != '\0' && size != 0; size--, buf++) {
- if (*buf == '\t')
- length += tabsize - (length % tabsize);
- else if (is_cntrl_char(*buf))
- length += 2;
- else
- length++;
+ if (size == 0)
+ return 0;
+
+ assert(str != NULL);
+
+ while (*str != '\0') {
+ int str_len = parse_char(str, NULL, &length
+#ifdef NANO_WIDE
+ , NULL
+#endif
+ );
+
+ str += str_len;
+
+ if (size <= str_len)
+ break;
+
+ size -= str_len;
}
+
return length;
}
@@ -1704,19 +1720,101 @@ void blank_bottombars(void)
}
}
+/* buf is a multibyte string to be displayed. We need to expand tabs
+ * and control characters. How many bytes do we need to display buf
+ * properly, not counting the null terminator? start_col is the column
+ * of *buf (usually 0). We display to (end_col - 1). */
+size_t display_string_len(const char *buf, size_t start_col, size_t
+ end_col)
+{
+ size_t retval = 0;
+
+ assert(buf != NULL);
+
+ /* Throughout the loop, we maintain the fact that *buf displays at
+ * column start_col. */
+ while (start_col <= end_col && *buf != '\0') {
+ int wide_buf;
+ /* The current wide character. */
+ int wide_buf_len;
+ /* How many bytes wide is this character? */
+ size_t old_col = start_col;
+ bool bad_char;
+
+ wide_buf_len = parse_char(buf, &wide_buf, &start_col
+#ifdef NANO_WIDE
+ , &bad_char
+#endif
+ );
+
+#ifdef NANO_WIDE
+ /* If buf contains a null byte or an invalid multibyte
+ * character, interpret its first byte as though it's a wide
+ * character. */
+ if (!ISSET(NO_UTF8) && bad_char) {
+ char *bad_wide_buf = charalloc(MB_CUR_MAX);
+ int bad_wide_buf_len;
+
+ /* If we have a control character, add one byte to account
+ * for the "^" that will be displayed in front of it, and
+ * translate the character to its visible equivalent as
+ * returned by control_rep(). */
+ if (is_cntrl_char(wide_buf)) {
+ retval++;
+ wide_buf = control_rep((unsigned char)wide_buf);
+ }
+
+ /* Translate the wide character to its multibyte
+ * equivalent. */
+ bad_wide_buf_len = wctomb(bad_wide_buf, (wchar_t)wide_buf);
+
+ if (bad_wide_buf_len != -1)
+ retval += bad_wide_buf_len;
+
+ free(bad_wide_buf);
+ } else
+#endif
+ /* If we have a tab, get its width in bytes using the current
+ * value of col. */
+ if (wide_buf == '\t')
+ retval += start_col - old_col;
+ /* If we have a control character, add one byte to account for
+ * the "^" that will be displayed in front of it, and translate
+ * the byte to its visible equivalent as returned by
+ * control_rep(). */
+ else if (is_cntrl_char(wide_buf)) {
+ char ctrl_wide_buf = control_rep((unsigned char)wide_buf);
+
+ retval += parse_char(&ctrl_wide_buf, NULL, NULL
+#ifdef NANO_WIDE
+ , NULL
+#endif
+ ) + 1;
+
+ /* If we have a normal character, add its width in bytes
+ * normally. */
+ } else
+ retval += wide_buf_len;
+ buf += wide_buf_len;
+ }
+
+ return retval;
+}
+
/* Convert buf into a string that can be displayed on screen. The
* caller wants to display buf starting with column start_col, and
* extending for at most len columns. start_col is zero-based. len is
* one-based, so len == 0 means you get "" returned. The returned
- * string is dynamically allocated, and should be freed. */
-char *display_string(const char *buf, size_t start_col, size_t len)
+ * string is dynamically allocated, and should be freed. If dollars is
+ * TRUE, the caller might put "$" at the beginning or end of the line if
+ * it's too long. */
+char *display_string(const char *buf, size_t start_col, size_t len, bool
+ dollars)
{
size_t start_index;
/* Index in buf of first character shown in return value. */
size_t column;
/* Screen column start_index corresponds to. */
- size_t end_index;
- /* Index in buf of last character shown in return value. */
size_t alloc_len;
/* The length of memory allocated for converted. */
char *converted;
@@ -1724,54 +1822,155 @@ char *display_string(const char *buf, size_t start_col, size_t len)
size_t index;
/* Current position in converted. */
+ /* If dollars is TRUE, make room for the "$" at the end of the
+ * line. Also make sure that we don't try to display only part of a
+ * multicolumn character there. */
+ if (dollars && len > 0 && strlenpt(buf) > start_col + len)
+ len--;
+
if (len == 0)
return mallocstrcpy(NULL, "");
start_index = actual_x(buf, start_col);
column = strnlenpt(buf, start_index);
+
assert(column <= start_col);
- end_index = actual_x(buf, start_col + len - 1);
- alloc_len = strnlenpt(buf, end_index + 1) - column;
- if (len > alloc_len + column - start_col)
- len = alloc_len + column - start_col;
+
+ alloc_len = display_string_len(buf + start_index, start_col,
+ column + len) + 2;
converted = charalloc(alloc_len + 1);
- buf += start_index;
index = 0;
- for (; index < alloc_len; buf++) {
- if (*buf == '\t') {
+ if (column > start_col || (dollars && column > 0 &&
+ buf[start_index] != '\t')) {
+ int wide_buf, wide_buf_len;
+
+ /* We don't display all of buf[start_index] since it starts to
+ * the left of the screen. */
+ wide_buf_len = parse_char(buf + start_index, &wide_buf, NULL
+#ifdef NANO_WIDE
+ , NULL
+#endif
+ );
+
+ if (is_cntrl_char(wide_buf)) {
+ if (column > start_col) {
+ char *ctrl_wide_buf = charalloc(MB_CUR_MAX);
+ int ctrl_wide_buf_len, i;
+
+ wide_buf = control_rep((unsigned char)wide_buf);
+ ctrl_wide_buf_len = wctomb(ctrl_wide_buf,
+ (wchar_t)wide_buf);
+
+ for (i = 0; i < ctrl_wide_buf_len; i++)
+ converted[index++] = ctrl_wide_buf[i];
+
+ free(ctrl_wide_buf);
+ start_index += wide_buf_len;
+ }
+ } else if (wcwidth(wide_buf) > 1) {
+ /* If dollars is TRUE, make room for the "$" at the
+ * beginning of the line. Also make sure that we don't try
+ * to display only part of a multicolumn character there. */
+ converted[0] = ' ';
+ index = 1;
+ if (dollars && column == start_col) {
+ converted[1] = ' ';
+ index = 2;
+ }
+ start_index += wide_buf_len;
+ }
+ }
+
+ while (index < alloc_len && buf[start_index] != '\0') {
+ int wide_buf, wide_buf_len;
+ bool bad_char;
+
+ wide_buf_len = parse_char(buf + start_index, &wide_buf, NULL
+#ifdef NANO_WIDE
+ , &bad_char
+#endif
+ );
+
+#ifdef NANO_WIDE
+ if (!ISSET(NO_UTF8) && bad_char) {
+ char *bad_wide_buf = charalloc(MB_CUR_MAX);
+ int bad_wide_buf_len, i;
+
+ if (is_cntrl_char(wide_buf)) {
+ converted[index++] = '^';
+ start_col++;
+ wide_buf = control_rep((unsigned char)wide_buf);
+ }
+
+ bad_wide_buf_len = wctomb(bad_wide_buf, (wchar_t)wide_buf);
+
+ for (i = 0; i < bad_wide_buf_len; i++)
+ converted[index++] = bad_wide_buf[i];
+
+ free(bad_wide_buf);
+
+ start_col += wcwidth((wchar_t)wide_buf);
+ } else
+#endif
+ if (wide_buf == '\t') {
converted[index++] =
#if !defined(NANO_SMALL) && defined(ENABLE_NANORC)
ISSET(WHITESPACE_DISPLAY) ? whitespace[0] :
#endif
' ';
- while ((column + index) % tabsize)
+ start_col++;
+ while ((column + index) % tabsize) {
converted[index++] = ' ';
- } else if (is_cntrl_char(*buf)) {
+ start_col++;
+ }
+ } else if (is_cntrl_char(wide_buf)) {
+ char *ctrl_wide_buf = charalloc(MB_CUR_MAX);
+ int ctrl_wide_buf_len, i;
+
converted[index++] = '^';
- if (*buf == '\n')
- /* Treat newlines embedded in a line as encoded nulls;
- * the line in question should be run through unsunder()
- * before reaching here. */
- converted[index++] = '@';
- else if (*buf == NANO_CONTROL_8)
- converted[index++] = '?';
- else
- converted[index++] = *buf + 64;
- } else if (*buf == ' ')
+ start_col++;
+ wide_buf = control_rep((unsigned char)wide_buf);
+
+ ctrl_wide_buf_len = wctomb(ctrl_wide_buf,
+ (wchar_t)wide_buf);
+
+ for (i = 0; i < ctrl_wide_buf_len; i++)
+ converted[index++] = ctrl_wide_buf[i];
+
+ free(ctrl_wide_buf);
+
+ start_col += wcwidth((wchar_t)wide_buf);
+ } else if (wide_buf == ' ') {
converted[index++] =
#if !defined(NANO_SMALL) && defined(ENABLE_NANORC)
ISSET(WHITESPACE_DISPLAY) ? whitespace[1] :
#endif
' ';
- else
- converted[index++] = *buf;
+ start_col++;
+ } else {
+ int i;
+
+ for (i = 0; i < wide_buf_len; i++)
+ converted[index++] = buf[start_index + i];
+
+#ifdef NANO_WIDE
+ if (!ISSET(NO_UTF8))
+ start_col += wcwidth((wchar_t)wide_buf);
+ else
+#endif
+ start_col++;
+ }
+
+ start_index += wide_buf_len;
}
- assert(len <= alloc_len + column - start_col);
- charmove(converted, converted + start_col - column, len);
- null_at(&converted, len);
- return charealloc(converted, len + 1);
+ /* Make sure that converted is at most len columns wide. */
+ converted[index] = '\0';
+ index = actual_x(converted, len);
+ null_at(&converted, index);
+
+ return converted;
}
/* Repaint the statusbar when getting a character in nanogetstr(). buf
@@ -1796,10 +1995,12 @@ void nanoget_repaint(const char *buf, const char *inputbuf, size_t x)
waddch(bottomwin, x_real < wid ? ' ' : '$');
if (COLS > 2) {
size_t page_start = x_real - x_real % wid;
- char *expanded = display_string(inputbuf, page_start, wid);
+ char *expanded = display_string(inputbuf, page_start, wid,
+ FALSE);
assert(wid > 0);
assert(strlen(expanded) <= wid);
+
waddstr(bottomwin, expanded);
free(expanded);
wmove(bottomwin, 0, COLS - wid + x_real - page_start);
@@ -2249,21 +2450,19 @@ void titlebar(const char *path)
{
int space;
/* The space we have available for display. */
- size_t verlen = strlen(VERMSG) + 1;
- /* The length of the version message. */
+ size_t verlen = strlenpt(VERMSG) + 1;
+ /* The length of the version message in columns. */
const char *prefix;
/* "File:", "Dir:", or "New Buffer". Goes before filename. */
size_t prefixlen;
- /* strlen(prefix) + 1. */
+ /* The length of the prefix in columns, plus one. */
const char *state;
/* "Modified", "View", or spaces the length of "Modified".
* Tells the state of this buffer. */
size_t statelen = 0;
- /* strlen(state) + 1. */
+ /* The length of the state in columns, plus one. */
char *exppath = NULL;
/* The file name, expanded for display. */
- size_t exppathlen = 0;
- /* strlen(exppath) + 1. */
bool newfie = FALSE;
/* Do we say "New Buffer"? */
bool dots = FALSE;
@@ -2299,10 +2498,10 @@ void titlebar(const char *path)
state = _("View");
else {
if (space > 0)
- statelen = strnlen(_("Modified"), space - 1) + 1;
+ statelen = strnlenpt(_("Modified"), space - 1) + 1;
state = &hblank[COLS - statelen];
}
- statelen = strnlen(state, COLS);
+ statelen = strnlenpt(state, COLS);
/* We need a space before state. */
if ((ISSET(MODIFIED) || ISSET(VIEW_MODE)) && statelen < COLS)
statelen++;
@@ -2322,7 +2521,7 @@ void titlebar(const char *path)
} else
prefix = _("File:");
assert(statelen < space);
- prefixlen = strnlen(prefix, space - statelen);
+ prefixlen = strnlenpt(prefix, space - statelen);
/* If newfie is FALSE, we need a space after prefix. */
if (!newfie && prefixlen + statelen < space)
prefixlen++;
@@ -2337,36 +2536,40 @@ void titlebar(const char *path)
if (!newfie) {
size_t lenpt = strlenpt(path), start_col;
- if (lenpt > space)
- start_col = actual_x(path, lenpt - space);
- else
- start_col = 0;
- exppath = display_string(path, start_col, space);
dots = (lenpt > space);
- exppathlen = strlen(exppath);
+
+ if (dots) {
+ start_col = lenpt - space + 3;
+ space -= 3;
+ } else
+ start_col = 0;
+
+ exppath = display_string(path, start_col, space, FALSE);
}
if (!dots) {
+ size_t exppathlen = newfie ? 0 : strlenpt(exppath);
+ /* The length of the expanded filename. */
+
/* There is room for the whole filename, so we center it. */
waddnstr(topwin, hblank, (space - exppathlen) / 3);
waddnstr(topwin, prefix, prefixlen);
if (!newfie) {
- assert(strlen(prefix) + 1 == prefixlen);
+ assert(strlenpt(prefix) + 1 == prefixlen);
+
waddch(topwin, ' ');
waddstr(topwin, exppath);
}
} else {
/* We will say something like "File: ...ename". */
waddnstr(topwin, prefix, prefixlen);
- if (space == 0 || newfie)
+ if (space <= -3 || newfie)
goto the_end;
waddch(topwin, ' ');
- waddnstr(topwin, "...", space);
- if (space <= 3)
+ waddnstr(topwin, "...", space + 3);
+ if (space <= 0)
goto the_end;
- space -= 3;
- assert(exppathlen == space + 3);
- waddnstr(topwin, exppath + 3, space);
+ waddstr(topwin, exppath);
}
the_end:
@@ -2414,17 +2617,17 @@ void statusbar(const char *msg, ...)
blank_statusbar();
if (COLS >= 4) {
- char *bar;
- char *foo;
+ char *bar, *foo;
size_t start_x = 0, foo_len;
#if !defined(NANO_SMALL) && defined(ENABLE_NANORC)
bool old_whitespace = ISSET(WHITESPACE_DISPLAY);
+
UNSET(WHITESPACE_DISPLAY);
#endif
bar = charalloc(COLS - 3);
vsnprintf(bar, COLS - 3, msg, ap);
va_end(ap);
- foo = display_string(bar, 0, COLS - 4);
+ foo = display_string(bar, 0, COLS - 4, FALSE);
#if !defined(NANO_SMALL) && defined(ENABLE_NANORC)
if (old_whitespace)
SET(WHITESPACE_DISPLAY);
@@ -2923,7 +3126,7 @@ void update_line(const filestruct *fileptr, size_t index)
/* Expand the line, replacing tabs with spaces, and control
* characters with their displayed forms. */
- converted = display_string(fileptr->data, page_start, COLS);
+ converted = display_string(fileptr->data, page_start, COLS, TRUE);
/* Paint the line. */
edit_add(fileptr, converted, line, page_start);
@@ -3569,7 +3772,10 @@ void do_credits(void)
"David Benbennick",
"Ken Tyler",
"Sven Guckes",
- "Florian König",
+#ifdef NANO_WIDE
+ !ISSET(NO_UTF8) ? "Florian K\xC3\xB6nig" :
+#endif
+ "Florian König",
"Pauli Virtanen",
"Daniele Medri",
"Clement Laforet",
@@ -3644,7 +3850,7 @@ void do_credits(void)
what = _(xlcredits[xlpos]);
xlpos++;
}
- start_x = COLS / 2 - strlen(what) / 2 - 1;
+ start_x = COLS / 2 - strlenpt(what) / 2 - 1;
mvwaddstr(edit, editwinrows - 1 - editwinrows % 2, start_x,
what);
}