more steps toward UTF-8 support: port all the parts of DB's UTF-8 patch that I currently understand to current CVS, with modifications of mine to autodetect UTF-8 support and to display multibyte strings instead of wide strings - nano

commit fc693210d5044b0735bfed92433a7cfb979f5521
parent c0b9d19ed792d3a7391febc0f58569320162f4cf
Author: David Lawrence Ramsey <pooka109@gmail.com>
Date:   Thu, 23 Dec 2004 17:43:27 +0000

more steps toward UTF-8 support: port all the parts of DB's UTF-8 patch
that I currently understand to current CVS, with modifications of mine
to autodetect UTF-8 support and to display multibyte strings instead of
wide strings


git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2193 35c25a1d-7b9e-4130-9fde-d3aeb78583b8

Diffstat:
M ChangeLog  | 16 ++++++++++++++++
M configure.ac  | 2 +-
M src/move.c  | 4 ++--
M src/nano.c  | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
M src/nano.h  | 1 +
M src/proto.h  | 13 ++++++++++++-
M src/search.c  | 7 ++++---
M src/utils.c  | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/winio.c  | 366 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------

9 files changed, 543 insertions(+), 116 deletions(-)
diff --git a/ChangeLog b/ChangeLog
@@ -50,6 +50,16 @@ CVS code -
 	  in order for output to work properly. (DLR; buffered
 	  input/output based on ideas from mutt 1.4.2.1; double-Escape
 	  input of Unicode characters suggested by Michael Piefel)
+	- More steps toward wide character/multibyte character support.
+	  Movement and cursor display in the edit window should now work
+	  properly with files containing multibyte characters, and text
+	  display of such files should work properly some of the time.
+	  New functions control_rep(), parse_char(), move_left(),
+	  move_right(), and display_string_len(); changes to do_left(),
+	  do_right(), do_delete(), breakable(), break_line(),
+	  do_output(), get_buffer(), unget_input(), actual_x(),
+	  strnlenpt(), display_string(), titlebar(), and do_credits().
+	  (David Benbennick and DLR)
 - cut.c:
   do_cut_text()
 	- If keep_cutbuffer is FALSE, only blow away the text in the
@@ -92,6 +102,10 @@ CVS code -
 	  loop if there are no more paragraphs after the current one and
 	  the paragraph search left us on the magicline, so as to avoid
 	  a segfault. (DLR)
+  main()
+	- Try to automatically detect whether UTF-8 support is needed by
+	  setting the NO_UTF8 flag if setlocale() returns a string that
+	  doesn't contain "UTF-8". (DLR)
 - winio.c:
   titlebar()
 	- Rename some variables for consistency, make space an int
@@ -135,6 +149,8 @@ CVS code -
 	- Remove specific references to control key shortcuts. (DLR)
 	- Check for the wide version of ncurses, without which multibyte
 	  strings don't seem to be displayed properly. (DLR)
+	- Check for stddef.h and wchar.h, for those systems that need
+	  one of the two for the wcwidth() prototype. (DLR)
 - doc/nanorc.sample:
 	- Add return to the "c-file" regexes. (DLR)
 
diff --git a/configure.ac b/configure.ac
@@ -40,7 +40,7 @@ AM_GNU_GETTEXT([external], [need-ngettext])
 
 dnl Checks for header files.
 AC_HEADER_STDC
-AC_CHECK_HEADERS(fcntl.h getopt.h libintl.h limits.h regex.h termio.h termios.h unistd.h)
+AC_CHECK_HEADERS(fcntl.h getopt.h libintl.h limits.h regex.h stddef.h termio.h termios.h unistd.h wchar.h)
 AC_CHECK_HEADER(regex.h,
     AC_MSG_CHECKING([for broken regexec])
     AC_TRY_RUN([
diff --git a/src/move.c b/src/move.c
@@ -252,7 +252,7 @@ void do_left(int allow_update)
 {
     size_t pww_save = placewewant;
     if (current_x > 0)
-	current_x--;
+	current_x = move_left(current->data, current_x);
     else if (current != fileage) {
 	do_up();
 	current_x = strlen(current->data);
@@ -274,7 +274,7 @@ void do_right(int allow_update)
     assert(current_x <= strlen(current->data));
 
     if (current->data[current_x] != '\0')
-	current_x++;
+	current_x = move_right(current->data, current_x);
     else if (current->next != NULL) {
 	do_down();
 	current_x = 0;
diff --git a/src/nano.c b/src/nano.c
@@ -1185,18 +1185,25 @@ void do_delete(void)
     placewewant = xplustabs();
 
     if (current->data[current_x] != '\0') {
-	size_t linelen = strlen(current->data + current_x);
+	int char_len = parse_char(current->data + current_x, NULL,
+		NULL
+#ifdef NANO_WIDE
+		, NULL
+#endif
+		);
+	size_t line_len = strlen(current->data + current_x);
 
 	assert(current_x < strlen(current->data));
 
 	/* Let's get dangerous. */
-	charmove(&current->data[current_x], &current->data[current_x + 1],
-		linelen);
+	charmove(&current->data[current_x],
+		&current->data[current_x + char_len],
+		line_len - char_len + 1);
 
-	null_at(&current->data, linelen + current_x - 1);
+	null_at(&current->data, current_x + line_len - char_len);
 #ifndef NANO_SMALL
 	if (current_x < mark_beginx && mark_beginbuf == current)
-	    mark_beginx--;
+	    mark_beginx -= char_len;
 #endif
     } else if (current != filebot && (current->next != filebot ||
 	current->data[0] == '\0')) {
@@ -1211,8 +1218,8 @@ void do_delete(void)
 	if (current->data[current_x] == '\0')
 	    do_refresh = TRUE;
 
-	current->data = charealloc(current->data, current_x +
-		strlen(foo->data) + 1);
+	current->data = charealloc(current->data,
+		current_x + strlen(foo->data) + 1);
 	strcpy(current->data + current_x, foo->data);
 #ifndef NANO_SMALL
 	if (mark_beginbuf == current->next) {
@@ -1227,13 +1234,13 @@ void do_delete(void)
 	delete_node(foo);
 	renumber(current);
 	totlines--;
+	totsize--;
 #ifndef DISABLE_WRAPPING
 	wrap_reset();
 #endif
     } else
 	return;
 
-    totsize--;
     set_modified();
 
 #ifdef ENABLE_COLOR
@@ -2494,15 +2501,21 @@ filestruct *backup_lines(filestruct *first_line, size_t par_len, size_t
 /* Is it possible to break line at or before goal? */
 bool breakable(const char *line, ssize_t goal)
 {
-    for (; *line != '\0' && goal >= 0; line++) {
+    while (*line != '\0' && goal >= 0) {
+	size_t pos = 0;
+
 	if (isblank(*line))
 	    return TRUE;
 
-	if (is_cntrl_char(*line))
-	    goal -= 2;
-	else
-	    goal -= 1;
+	line += parse_char(line, NULL, &pos
+#ifdef NANO_WIDE
+		, NULL
+#endif
+		);
+
+	goal -= pos;
     }
+
     /* If goal is not negative, the whole line (one word) was short
      * enough. */
     return goal >= 0;
@@ -2522,32 +2535,49 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
 	/* Current index in line. */
 
     assert(line != NULL);
-    for (; *line != '\0' && goal >= 0; line++, cur_loc++) {
+
+    while (*line != '\0' && goal >= 0) {
+	size_t pos = 0;
+	int line_len;
+
 	if (*line == ' ')
 	    space_loc = cur_loc;
+
 	assert(*line != '\t');
 
-	if (is_cntrl_char(*line))
-	    goal -= 2;
-	else
-	    goal--;
+	line_len = parse_char(line, NULL, &pos
+#ifdef NANO_WIDE
+		, NULL
+#endif
+		);
+
+	goal -= pos;
+	line += line_len;
+	cur_loc += line_len;
     }
+
     if (goal >= 0)
 	/* In fact, the whole line displays shorter than goal. */
 	return cur_loc;
+
     if (space_loc == -1) {
 	/* No space found short enough. */
-	if (force)
-	    for (; *line != '\0'; line++, cur_loc++)
-		if (*line == ' ' && *(line + 1) != ' ' && *(line + 1) != '\0')
+	if (force) {
+	    for (; *line != '\0'; line++, cur_loc++) {
+		if (*line == ' ' && *(line + 1) != ' ' &&
+			*(line + 1) != '\0')
 		    return cur_loc;
-	return -1;
+	    }
+	    return -1;
+	}
     }
+
     /* Perhaps the character after space_loc is a space.  But because
      * of justify_format(), there can be only two adjacent. */
     if (*(line - cur_loc + space_loc + 1) == ' ' ||
 	*(line - cur_loc + space_loc + 1) == '\0')
 	space_loc++;
+
     return space_loc;
 }
 
@@ -3639,13 +3669,7 @@ void do_output(int *kbinput, size_t kbinput_len)
 	    mark_beginx += key_len;
 #endif
 
-	{
-	    /* FIXME: The movement functions should take multibyte
-	     * characters into account. */
-	    int j;
-	    for (j = 0; j < key_len; j++)
-		do_right(FALSE);
-	}
+	do_right(FALSE);
 
 #ifndef DISABLE_WRAPPING
 	/* If we're wrapping text, we need to call edit_refresh(). */
@@ -3759,7 +3783,21 @@ int main(int argc, char **argv)
     };
 #endif
 
+#ifdef NANO_WIDE
+    {
+	/* If the locale set doesn't exist, or it exists but doesn't
+	 * include the string "UTF-8", we shouldn't use UTF-8
+	 * support. */
+	char *locale = setlocale(LC_ALL, "");
+
+	if (locale == NULL || (locale != NULL &&
+		strstr(locale, "UTF-8") == NULL))
+	    SET(NO_UTF8);
+    }
+#else
     setlocale(LC_ALL, "");
+#endif
+
 #ifdef ENABLE_NLS
     bindtextdomain(PACKAGE, LOCALEDIR);
     textdomain(PACKAGE);
diff --git a/src/nano.h b/src/nano.h
@@ -83,6 +83,7 @@
 #define N_(string) gettext_noop(string)
 	/* Mark a string that will be sent to gettext later. */
 
+#include <stddef.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include "config.h"
diff --git a/src/proto.h b/src/proto.h
@@ -475,7 +475,15 @@ int is_blank_char(int c);
 int is_cntrl_char(int c);
 bool is_byte_char(int c);
 int num_of_digits(int n);
+unsigned char control_rep(unsigned char c);
 bool parse_num(const char *str, ssize_t *val);
+int parse_char(const char *str, int *chr, size_t *col
+#ifdef NANO_WIDE
+	, bool *bad_char
+#endif
+	);
+size_t move_left(const char *str, size_t pos);
+size_t move_right(const char *str, size_t pos);
 void align(char **strp);
 void null_at(char **data, size_t index);
 void unsunder(char *str, size_t true_len);
@@ -570,7 +578,10 @@ void blank_edit(void);
 void blank_statusbar(void);
 void check_statusblank(void);
 void blank_bottombars(void);
-char *display_string(const char *buf, size_t start_col, size_t len);
+size_t display_string_len(const char *buf, size_t start_col, size_t
+	end_col);
+char *display_string(const char *buf, size_t start_col, size_t len, bool
+	dollars);
 void nanoget_repaint(const char *buf, const char *inputbuf, size_t x);
 int nanogetstr(bool allow_tabs, const char *buf, const char *def,
 #ifndef NANO_SMALL
diff --git a/src/search.c b/src/search.c
@@ -83,7 +83,7 @@ void not_found_msg(const char *str)
  
     assert(str != NULL);
 
-    disp = display_string(str, 0, (COLS / 2) + 1);
+    disp = display_string(str, 0, (COLS / 2) + 1, FALSE);
     numchars = strnlen(disp, COLS / 2);
 
     statusbar(_("\"%.*s%s\" not found"), numchars, disp,
@@ -150,7 +150,7 @@ int search_init(bool replacing, bool use_answer)
 #endif
 
     if (last_search[0] != '\0') {
-	char *disp = display_string(last_search, 0, COLS / 3);
+	char *disp = display_string(last_search, 0, COLS / 3, FALSE);
 
 	buf = charalloc(COLS / 3 + 7);
 	/* We use COLS / 3 here because we need to see more on the
@@ -748,7 +748,8 @@ ssize_t do_replace_loop(const char *needle, const filestruct
 	    size_t xpt = xplustabs();
 
 	    exp_word = display_string(current->data, xpt,
-		strnlenpt(current->data, match_len + current_x) - xpt);
+		strnlenpt(current->data, match_len + current_x) - xpt,
+		FALSE);
 
 	    curs_set(0);
 	    do_replace_highlight(TRUE, exp_word);
diff --git a/src/utils.c b/src/utils.c
@@ -33,6 +33,10 @@
 #include "proto.h"
 #include "nano.h"
 
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
+
 #ifdef HAVE_REGEX_H
 #ifdef BROKEN_REGEXEC
 #undef regexec
@@ -92,6 +96,19 @@ int num_of_digits(int n)
     return i;
 }
 
+/* c is a control character.  It displays as ^@, ^?, or ^[ch] where ch
+ * is c + 64.  We return that character. */
+unsigned char control_rep(unsigned char c)
+{
+    /* Treat newlines embedded in a line as encoded nulls. */
+    if (c == '\n')
+	return '@';
+    else if (c == NANO_CONTROL_8)
+	return '?';
+    else
+	return c + 64;
+}
+
 /* Read a ssize_t from str, and store it in *val (if val is not NULL).
  * On error, we return FALSE and don't change *val.  Otherwise, we
  * return TRUE. */
@@ -113,6 +130,143 @@ bool parse_num(const char *str, ssize_t *val)
     return TRUE;
 }
 
+/* Parse a multi-byte character from str.  Return the number of bytes
+ * used.  If chr isn't NULL, store the wide character in it.  If col
+ * isn't NULL, store the new display width in it.  If *str is '\t', we
+ * expect col to have the current display width.  If bad_char isn't
+ * NULL, set it to TRUE if we have a null byte or a bad multibyte
+ * character. */
+int parse_char(const char *str, int *chr, size_t *col
+#ifdef NANO_WIDE
+	, bool *bad_char
+#endif
+	)
+{
+    int wide_str, wide_str_len;
+
+    assert(str != NULL);
+
+#ifdef NANO_WIDE
+    if (bad_char != NULL)
+	*bad_char = FALSE;
+
+    if (!ISSET(NO_UTF8)) {
+	wchar_t tmp;
+
+	/* Get the wide character equivalent of the multibyte
+	 * character. */
+	wide_str_len = mbtowc(&tmp, str, MB_CUR_MAX);
+	wide_str = (int)tmp;
+
+	/* If str contains a null byte or an invalid multibyte
+	 * character, interpret str's first byte as a single-byte
+	 * sequence and set bad_char to TRUE. */
+	if (wide_str_len <= 0) {
+	    wide_str_len = 1;
+	    wide_str = (unsigned char)*str;
+	    if (bad_char != NULL)
+		*bad_char = TRUE;
+	}
+
+	/* Save the wide character in chr. */
+	if (chr != NULL)
+	    *chr = wide_str;
+
+	/* Save the column width of the wide character in col. */
+	if (col != NULL) {
+	    /* If we have a tab, get its width in columns using the
+	     * current value of col. */
+	    if (wide_str == '\t')
+		*col += tabsize - *col % tabsize;
+	    /* If we have a control character, get its width using one
+	     * column for the "^" that will be displayed in front of it,
+	     * and the width in columns of its visible equivalent as
+	     * returned by control_rep(). */
+	    else if (is_cntrl_char(wide_str)) {
+		char *ctrl_wide_str = charalloc(MB_CUR_MAX);
+
+		(*col)++;
+		wide_str = control_rep((unsigned char)wide_str);
+
+		if (wctomb(ctrl_wide_str, (wchar_t)wide_str) != -1)
+		    *col += wcwidth(wide_str);
+
+		free(ctrl_wide_str);
+	    /* If we have a normal character, get its width in columns
+	     * normally. */
+	    } else
+		*col += wcwidth(wide_str);
+	}
+    } else {
+#endif
+	/* Interpret str's first character as a single-byte sequence. */
+	wide_str_len = 1;
+	wide_str = (unsigned char)*str;
+
+	/* Save the single-byte sequence in chr as though it's a wide
+	 * character. */
+	if (chr != NULL)
+	    *chr = wide_str;
+
+	if (col != NULL) {
+	    /* If we have a tab, get its width in columns using the
+	     * current value of col. */
+	    if (wide_str == '\t')
+		*col += tabsize - *col % tabsize;
+	    /* If we have a control character, it's two columns wide:
+	     * one column for the "^" that will be displayed in front of
+	     * it, and one column for its visible equivalent as returned
+	     * by control_rep(). */
+	    else if (is_cntrl_char(wide_str))
+		*col += 2;
+	    /* If we have a normal character, it's one column wide. */
+	    else
+		(*col)++;
+	}
+#ifdef NANO_WIDE
+    }
+#endif
+
+    return wide_str_len;
+}
+
+/* Return the index in str of the beginning of the character before the
+ * one at pos. */
+size_t move_left(const char *str, size_t pos)
+{
+    size_t pos_prev = pos;
+
+    assert(str != NULL && pos <= strlen(str));
+
+    /* There is no library function to move backward one multibyte
+     * character.  Here is the naive, O(pos) way to do it. */
+    while (TRUE) {
+	int str_len = parse_char(str + pos - pos_prev, NULL, NULL
+#ifdef NANO_WIDE
+		, NULL
+#endif
+		);
+
+	if (pos_prev <= str_len)
+	    break;
+
+	pos_prev -= str_len;
+    }
+
+    return pos - pos_prev;
+}
+
+/* Return the index in str of the beginning of the character after the
+ * one at pos. */
+size_t move_right(const char *str, size_t pos)
+{
+    return pos + parse_char(str + pos, NULL, NULL
+#ifdef NANO_WIDE
+	, NULL
+#endif
+	);
+}
+
 /* Fix the memory allocation for a string. */
 void align(char **strp)
 {
diff --git a/src/winio.c b/src/winio.c
@@ -32,6 +32,10 @@
 #include "proto.h"
 #include "nano.h"
 
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
+
 static buffer *key_buffer = NULL;
 				/* The default keystroke buffer,
 				 * containing all the keystrokes we have
@@ -1625,38 +1629,50 @@ size_t actual_x(const char *str, size_t xplus)
 
     assert(str != NULL);
 
-    for (; length < xplus && *str != '\0'; i++, str++) {
-	if (*str == '\t')
-	    length += tabsize - (length % tabsize);
-	else if (is_cntrl_char(*str))
-	    length += 2;
-	else
-	    length++;
-    }
-    assert(length == strnlenpt(str - i, i));
-    assert(i <= strlen(str - i));
+    while (*str != '\0') {
+	int str_len = parse_char(str, NULL, &length
+#ifdef NANO_WIDE
+		, NULL
+#endif
+		);
 
-    if (length > xplus)
-	i--;
+	if (length > xplus)
+	    break;
+
+	i += str_len;
+	str += str_len;
+    }
 
     return i;
 }
 
 /* A strlen() with tabs factored in, similar to xplustabs().  How many
- * columns wide are the first size characters of buf? */
-size_t strnlenpt(const char *buf, size_t size)
+ * columns wide are the first size characters of str? */
+size_t strnlenpt(const char *str, size_t size)
 {
     size_t length = 0;
+	/* The screen display width to str[i]. */
 
-    assert(buf != NULL);
-    for (; *buf != '\0' && size != 0; size--, buf++) {
-	if (*buf == '\t')
-	    length += tabsize - (length % tabsize);
-	else if (is_cntrl_char(*buf))
-	    length += 2;
-	else
-	    length++;
+    if (size == 0)
+	return 0;
+
+    assert(str != NULL);
+
+    while (*str != '\0') {
+	int str_len = parse_char(str, NULL, &length
+#ifdef NANO_WIDE
+		, NULL
+#endif
+		);
+
+	str += str_len;
+
+	if (size <= str_len)
+	    break;
+
+	size -= str_len;
     }
+
     return length;
 }
 
@@ -1704,19 +1720,101 @@ void blank_bottombars(void)
     }
 }
 
+/* buf is a multibyte string to be displayed.  We need to expand tabs
+ * and control characters.  How many bytes do we need to display buf
+ * properly, not counting the null terminator?  start_col is the column
+ * of *buf (usually 0).  We display to (end_col - 1). */
+size_t display_string_len(const char *buf, size_t start_col, size_t
+	end_col)
+{
+    size_t retval = 0;
+
+    assert(buf != NULL);
+
+    /* Throughout the loop, we maintain the fact that *buf displays at
+     * column start_col. */
+    while (start_col <= end_col && *buf != '\0') {
+	int wide_buf;
+	    /* The current wide character. */
+	int wide_buf_len;
+	    /* How many bytes wide is this character? */
+	size_t old_col = start_col;
+	bool bad_char;
+
+	wide_buf_len = parse_char(buf, &wide_buf, &start_col
+#ifdef NANO_WIDE
+		, &bad_char
+#endif
+		);
+
+#ifdef NANO_WIDE
+	/* If buf contains a null byte or an invalid multibyte
+	 * character, interpret its first byte as though it's a wide
+	 * character. */
+	if (!ISSET(NO_UTF8) && bad_char) {
+	    char *bad_wide_buf = charalloc(MB_CUR_MAX);
+	    int bad_wide_buf_len;
+
+	    /* If we have a control character, add one byte to account
+	     * for the "^" that will be displayed in front of it, and
+	     * translate the character to its visible equivalent as
+	     * returned by control_rep(). */
+	    if (is_cntrl_char(wide_buf)) {
+		retval++;
+		wide_buf = control_rep((unsigned char)wide_buf);
+	    }
+
+	    /* Translate the wide character to its multibyte
+	     * equivalent. */
+	    bad_wide_buf_len = wctomb(bad_wide_buf, (wchar_t)wide_buf);
+
+	    if (bad_wide_buf_len != -1)
+		retval += bad_wide_buf_len;
+
+	    free(bad_wide_buf);
+	} else
+#endif
+	/* If we have a tab, get its width in bytes using the current
+	 * value of col. */
+	if (wide_buf == '\t')
+	    retval += start_col - old_col;
+	/* If we have a control character, add one byte to account for
+	 * the "^" that will be displayed in front of it, and translate
+	 * the byte to its visible equivalent as returned by
+	 * control_rep(). */
+	else if (is_cntrl_char(wide_buf)) {
+	    char ctrl_wide_buf = control_rep((unsigned char)wide_buf);
+
+	    retval += parse_char(&ctrl_wide_buf, NULL, NULL
+#ifdef NANO_WIDE
+		, NULL
+#endif
+		) + 1;
+
+	/* If we have a normal character, add its width in bytes
+	 * normally. */
+	} else
+	    retval += wide_buf_len;
+	buf += wide_buf_len;
+    }
+
+    return retval;
+}
+
 /* Convert buf into a string that can be displayed on screen.  The
  * caller wants to display buf starting with column start_col, and
  * extending for at most len columns.  start_col is zero-based.  len is
  * one-based, so len == 0 means you get "" returned.  The returned
- * string is dynamically allocated, and should be freed. */
-char *display_string(const char *buf, size_t start_col, size_t len)
+ * string is dynamically allocated, and should be freed.  If dollars is
+ * TRUE, the caller might put "$" at the beginning or end of the line if
+ * it's too long. */
+char *display_string(const char *buf, size_t start_col, size_t len, bool
+	dollars)
 {
     size_t start_index;
 	/* Index in buf of first character shown in return value. */
     size_t column;
 	/* Screen column start_index corresponds to. */
-    size_t end_index;
-	/* Index in buf of last character shown in return value. */
     size_t alloc_len;
 	/* The length of memory allocated for converted. */
     char *converted;
@@ -1724,54 +1822,155 @@ char *display_string(const char *buf, size_t start_col, size_t len)
     size_t index;
 	/* Current position in converted. */
 
+    /* If dollars is TRUE, make room for the "$" at the end of the
+     * line.  Also make sure that we don't try to display only part of a
+     * multicolumn character there. */
+    if (dollars && len > 0 && strlenpt(buf) > start_col + len)
+	len--;
+
     if (len == 0)
 	return mallocstrcpy(NULL, "");
 
     start_index = actual_x(buf, start_col);
     column = strnlenpt(buf, start_index);
+
     assert(column <= start_col);
-    end_index = actual_x(buf, start_col + len - 1);
-    alloc_len = strnlenpt(buf, end_index + 1) - column;
-    if (len > alloc_len + column - start_col)
-	len = alloc_len + column - start_col;
+
+    alloc_len = display_string_len(buf + start_index, start_col,
+	column + len) + 2;
     converted = charalloc(alloc_len + 1);
-    buf += start_index;
     index = 0;
 
-    for (; index < alloc_len; buf++) {
-	if (*buf == '\t') {
+    if (column > start_col || (dollars && column > 0 &&
+		buf[start_index] != '\t')) {
+	int wide_buf, wide_buf_len;
+
+	/* We don't display all of buf[start_index] since it starts to
+	 * the left of the screen. */
+	wide_buf_len = parse_char(buf + start_index, &wide_buf, NULL
+#ifdef NANO_WIDE
+		, NULL
+#endif
+		);
+
+	if (is_cntrl_char(wide_buf)) {
+	    if (column > start_col) {
+		char *ctrl_wide_buf = charalloc(MB_CUR_MAX);
+		int ctrl_wide_buf_len, i;
+
+		wide_buf = control_rep((unsigned char)wide_buf);
+		ctrl_wide_buf_len = wctomb(ctrl_wide_buf,
+			(wchar_t)wide_buf);
+
+		for (i = 0; i < ctrl_wide_buf_len; i++)
+		    converted[index++] = ctrl_wide_buf[i];
+
+		free(ctrl_wide_buf);
+		start_index += wide_buf_len;
+	    }
+	} else if (wcwidth(wide_buf) > 1) {
+	    /* If dollars is TRUE, make room for the "$" at the
+	     * beginning of the line.  Also make sure that we don't try
+	     * to display only part of a multicolumn character there. */
+	    converted[0] = ' ';
+	    index = 1;
+	    if (dollars && column == start_col) {
+		converted[1] = ' ';
+		index = 2;
+	    }
+	    start_index += wide_buf_len;
+	}
+    }
+
+    while (index < alloc_len && buf[start_index] != '\0') {
+	int wide_buf, wide_buf_len;
+	bool bad_char;
+
+	wide_buf_len = parse_char(buf + start_index, &wide_buf, NULL
+#ifdef NANO_WIDE
+		, &bad_char
+#endif
+		);
+
+#ifdef NANO_WIDE
+	if (!ISSET(NO_UTF8) && bad_char) {
+	    char *bad_wide_buf = charalloc(MB_CUR_MAX);
+	    int bad_wide_buf_len, i;
+
+	    if (is_cntrl_char(wide_buf)) {
+		converted[index++] = '^';
+		start_col++;
+		wide_buf = control_rep((unsigned char)wide_buf);
+	    }
+
+	    bad_wide_buf_len = wctomb(bad_wide_buf, (wchar_t)wide_buf);
+
+	    for (i = 0; i < bad_wide_buf_len; i++)
+		converted[index++] = bad_wide_buf[i];
+
+	    free(bad_wide_buf);
+
+	    start_col += wcwidth((wchar_t)wide_buf);
+	} else
+#endif
+	if (wide_buf == '\t') {
 	    converted[index++] =
 #if !defined(NANO_SMALL) && defined(ENABLE_NANORC)
 		ISSET(WHITESPACE_DISPLAY) ? whitespace[0] :
 #endif
 		' '; 
-	    while ((column + index) % tabsize)
+	    start_col++;
+	    while ((column + index) % tabsize) {
 		converted[index++] = ' ';
-	} else if (is_cntrl_char(*buf)) {
+		start_col++;
+	    }
+	} else if (is_cntrl_char(wide_buf)) {
+	    char *ctrl_wide_buf = charalloc(MB_CUR_MAX);
+	    int ctrl_wide_buf_len, i;
+
 	    converted[index++] = '^';
-	    if (*buf == '\n')
-		/* Treat newlines embedded in a line as encoded nulls;
-		 * the line in question should be run through unsunder()
-		 * before reaching here. */
-		converted[index++] = '@';
-	    else if (*buf == NANO_CONTROL_8)
-		converted[index++] = '?';
-	    else
-		converted[index++] = *buf + 64;
-	} else if (*buf == ' ')
+	    start_col++;
+	    wide_buf = control_rep((unsigned char)wide_buf);
+
+	    ctrl_wide_buf_len = wctomb(ctrl_wide_buf,
+		(wchar_t)wide_buf);
+
+	    for (i = 0; i < ctrl_wide_buf_len; i++)
+		converted[index++] = ctrl_wide_buf[i];
+
+	    free(ctrl_wide_buf);
+
+	    start_col += wcwidth((wchar_t)wide_buf);
+	} else if (wide_buf == ' ') {
 	    converted[index++] =
 #if !defined(NANO_SMALL) && defined(ENABLE_NANORC)
 		ISSET(WHITESPACE_DISPLAY) ? whitespace[1] :
 #endif
 		' ';
-	else
-	    converted[index++] = *buf;
+	    start_col++;
+	} else {
+	    int i;
+
+	    for (i = 0; i < wide_buf_len; i++)
+		converted[index++] = buf[start_index + i];
+
+#ifdef NANO_WIDE
+	    if (!ISSET(NO_UTF8))
+		start_col += wcwidth((wchar_t)wide_buf);
+	    else
+#endif
+		start_col++;
+	}
+
+	start_index += wide_buf_len;
     }
-    assert(len <= alloc_len + column - start_col);
-    charmove(converted, converted + start_col - column, len);
-    null_at(&converted, len);
 
-    return charealloc(converted, len + 1);
+    /* Make sure that converted is at most len columns wide. */
+    converted[index] = '\0';
+    index = actual_x(converted, len);
+    null_at(&converted, index);
+
+    return converted;
 }
 
 /* Repaint the statusbar when getting a character in nanogetstr().  buf
@@ -1796,10 +1995,12 @@ void nanoget_repaint(const char *buf, const char *inputbuf, size_t x)
 	waddch(bottomwin, x_real < wid ? ' ' : '$');
     if (COLS > 2) {
 	size_t page_start = x_real - x_real % wid;
-	char *expanded = display_string(inputbuf, page_start, wid);
+	char *expanded = display_string(inputbuf, page_start, wid,
+		FALSE);
 
 	assert(wid > 0);
 	assert(strlen(expanded) <= wid);
+
 	waddstr(bottomwin, expanded);
 	free(expanded);
 	wmove(bottomwin, 0, COLS - wid + x_real - page_start);
@@ -2249,21 +2450,19 @@ void titlebar(const char *path)
 {
     int space;
 	/* The space we have available for display. */
-    size_t verlen = strlen(VERMSG) + 1;
-	/* The length of the version message. */
+    size_t verlen = strlenpt(VERMSG) + 1;
+	/* The length of the version message in columns. */
     const char *prefix;
 	/* "File:", "Dir:", or "New Buffer".  Goes before filename. */
     size_t prefixlen;
-	/* strlen(prefix) + 1. */
+	/* The length of the prefix in columns, plus one. */
     const char *state;
 	/* "Modified", "View", or spaces the length of "Modified".
 	 * Tells the state of this buffer. */
     size_t statelen = 0;
-	/* strlen(state) + 1. */
+	/* The length of the state in columns, plus one. */
     char *exppath = NULL;
 	/* The file name, expanded for display. */
-    size_t exppathlen = 0;
-	/* strlen(exppath) + 1. */
     bool newfie = FALSE;
 	/* Do we say "New Buffer"? */
     bool dots = FALSE;
@@ -2299,10 +2498,10 @@ void titlebar(const char *path)
 	state = _("View");
     else {
 	if (space > 0)
-	    statelen = strnlen(_("Modified"), space - 1) + 1;
+	    statelen = strnlenpt(_("Modified"), space - 1) + 1;
 	state = &hblank[COLS - statelen];
     }
-    statelen = strnlen(state, COLS);
+    statelen = strnlenpt(state, COLS);
     /* We need a space before state. */
     if ((ISSET(MODIFIED) || ISSET(VIEW_MODE)) && statelen < COLS)
 	statelen++;
@@ -2322,7 +2521,7 @@ void titlebar(const char *path)
     } else
 	prefix = _("File:");
     assert(statelen < space);
-    prefixlen = strnlen(prefix, space - statelen);
+    prefixlen = strnlenpt(prefix, space - statelen);
     /* If newfie is FALSE, we need a space after prefix. */
     if (!newfie && prefixlen + statelen < space)
 	prefixlen++;
@@ -2337,36 +2536,40 @@ void titlebar(const char *path)
     if (!newfie) {
 	size_t lenpt = strlenpt(path), start_col;
 
-	if (lenpt > space)
-	    start_col = actual_x(path, lenpt - space);
-	else
-	    start_col = 0;
-	exppath = display_string(path, start_col, space);
 	dots = (lenpt > space);
-	exppathlen = strlen(exppath);
+
+	if (dots) {
+	    start_col = lenpt - space + 3;
+	    space -= 3;
+	} else
+	    start_col = 0;
+
+	exppath = display_string(path, start_col, space, FALSE);
     }
 
     if (!dots) {
+	size_t exppathlen = newfie ? 0 : strlenpt(exppath);
+	    /* The length of the expanded filename. */
+
 	/* There is room for the whole filename, so we center it. */
 	waddnstr(topwin, hblank, (space - exppathlen) / 3);
 	waddnstr(topwin, prefix, prefixlen);
 	if (!newfie) {
-	    assert(strlen(prefix) + 1 == prefixlen);
+	    assert(strlenpt(prefix) + 1 == prefixlen);
+
 	    waddch(topwin, ' ');
 	    waddstr(topwin, exppath);
 	}
     } else {
 	/* We will say something like "File: ...ename". */
 	waddnstr(topwin, prefix, prefixlen);
-	if (space == 0 || newfie)
+	if (space <= -3 || newfie)
 	    goto the_end;
 	waddch(topwin, ' ');
-	waddnstr(topwin, "...", space);
-	if (space <= 3)
+	waddnstr(topwin, "...", space + 3);
+	if (space <= 0)
 	    goto the_end;
-	space -= 3;
-	assert(exppathlen == space + 3);
-	waddnstr(topwin, exppath + 3, space);
+	waddstr(topwin, exppath);
     }
 
   the_end:
@@ -2414,17 +2617,17 @@ void statusbar(const char *msg, ...)
     blank_statusbar();
 
     if (COLS >= 4) {
-	char *bar;
-	char *foo;
+	char *bar, *foo;
 	size_t start_x = 0, foo_len;
 #if !defined(NANO_SMALL) && defined(ENABLE_NANORC)
 	bool old_whitespace = ISSET(WHITESPACE_DISPLAY);
+
 	UNSET(WHITESPACE_DISPLAY);
 #endif
 	bar = charalloc(COLS - 3);
 	vsnprintf(bar, COLS - 3, msg, ap);
 	va_end(ap);
-	foo = display_string(bar, 0, COLS - 4);
+	foo = display_string(bar, 0, COLS - 4, FALSE);
 #if !defined(NANO_SMALL) && defined(ENABLE_NANORC)
 	if (old_whitespace)
 	    SET(WHITESPACE_DISPLAY);
@@ -2923,7 +3126,7 @@ void update_line(const filestruct *fileptr, size_t index)
 
     /* Expand the line, replacing tabs with spaces, and control
      * characters with their displayed forms. */
-    converted = display_string(fileptr->data, page_start, COLS);
+    converted = display_string(fileptr->data, page_start, COLS, TRUE);
 
     /* Paint the line. */
     edit_add(fileptr, converted, line, page_start);
@@ -3569,7 +3772,10 @@ void do_credits(void)
 	"David Benbennick",
 	"Ken Tyler",
 	"Sven Guckes",
-	"Florian K�nig",
+#ifdef NANO_WIDE
+	!ISSET(NO_UTF8) ? "Florian K\xC3\xB6nig" :
+#endif
+		"Florian K�nig",
 	"Pauli Virtanen",
 	"Daniele Medri",
 	"Clement Laforet",
@@ -3644,7 +3850,7 @@ void do_credits(void)
 		what = _(xlcredits[xlpos]);
 		xlpos++;
 	    }
-	    start_x = COLS / 2 - strlen(what) / 2 - 1;
+	    start_x = COLS / 2 - strlenpt(what) / 2 - 1;
 	    mvwaddstr(edit, editwinrows - 1 - editwinrows % 2, start_x,
 		what);
 	}

	nano nano with my custom patches
	git clone git://bsandro.tech/nano
	Log \| Files \| Refs \| README \| LICENSE

M	ChangeLog	\|	16	++++++++++++++++
M	configure.ac	\|	2	+-
M	src/move.c	\|	4	++--
M	src/nano.c	\|	96	+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
M	src/nano.h	\|	1	+
M	src/proto.h	\|	13	++++++++++++-
M	src/search.c	\|	7	++++---
M	src/utils.c	\|	154	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/winio.c	\|	366	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------