commit 455a918071c659458f1f34b7a219dcbe0a9c2aeb
parent 296152ec67e10c96e29254909876f86b238e4927
Author: Benno Schulenberg <bensberg@justemail.net>
Date: Sat, 18 Apr 2015 20:07:31 +0000
Making sure an invalid starting byte of a multibyte sequence is properly
terminated, to prevent the displaying of ghost characters.
git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@5206 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
Diffstat:
2 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/ChangeLog b/ChangeLog
@@ -1,6 +1,14 @@
2015-04-18 Benno Schulenberg <bensberg@justemail.net>
* src/global.c, src/nano.c, doc/man/nanorc.5, doc/texinfo/nano.texi:
Make the descriptions of the multibuffer feature more accurate.
+ * src/winio.c (display_string): Make sure an invalid starting byte
+ of a multibyte sequence is properly terminated, so that it doesn't
+ pick up lingering bytes of any previous content. This prevents the
+ displaying of ghosts -- characters that aren't really there -- when a
+ file contains valid ánd invalid UTF-8 sequences. For an example see:
+ https://lists.gnu.org/archive/html/nano-devel/2015-04/msg00052.html.
+ Also make two comments more accurate: an invalid multibyte sequence
+ will never be categorized as a control character or anything else.
2015-04-18 Mark Oteiza <mvoteiza@udel.edu>
* doc/syntax/{python,ruby,sh,tex}.nanorc: Add a linter definition.
diff --git a/src/winio.c b/src/winio.c
@@ -2004,9 +2004,7 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
converted[index++] = ' ';
start_col++;
}
- /* If buf contains a control character, interpret it. If buf
- * contains an invalid multibyte control character, display it
- * as such. */
+ /* If buf contains a control character, interpret it. */
} else if (is_cntrl_mbchar(buf_mb)) {
char *ctrl_buf_mb = charalloc(mb_cur_max());
int ctrl_buf_mb_len, i;
@@ -2036,13 +2034,17 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
#endif
converted[index++] = ' ';
start_col++;
- /* If buf contains a non-control character, interpret it. If
- * buf contains an invalid multibyte non-control character,
- * display it as such. */
+ /* If buf contains a non-control character, interpret it. If buf
+ * contains an invalid multibyte sequence, display it as such. */
} else {
char *nctrl_buf_mb = charalloc(mb_cur_max());
int nctrl_buf_mb_len, i;
+ /* Make sure an invalid sequence-starter byte is properly
+ * terminated, so that it doesn't pick up lingering bytes
+ * of any previous content. */
+ null_at(&buf_mb, buf_mb_len);
+
nctrl_buf_mb = mbrep(buf_mb, nctrl_buf_mb,
&nctrl_buf_mb_len);