commit 6f12992cea6f7cb9a6e30f3dfafa50a25a8e17a6
parent d88423eaae789ab64b64ce08a90839a03964d7db
Author: Benno Schulenberg <bensberg@justemail.net>
Date: Thu, 30 Jun 2016 18:02:45 +0200
new feature: add the option --wordchars, to set extra word characters
This allows the user to specify which other characters, besides the
default alphanumeric ones, should be considered as part of a word, so
that word operations like Ctrl+Left and Ctrl+Right will pass them by.
Using this option overrides the option --wordbounds.
This fulfills https://savannah.gnu.org/bugs/?47283.
Diffstat:
10 files changed, 74 insertions(+), 14 deletions(-)
diff --git a/doc/man/nano.1 b/doc/man/nano.1
@@ -148,9 +148,14 @@ keystroke instead of 25. Note that \fB\-c\fP overrides this.
Show the current version number and exit.
.TP
.BR \-W ", " \-\-wordbounds
-Detect word boundaries more accurately by treating punctuation
+Detect word boundaries differently by treating punctuation
characters as part of a word.
.TP
+.BR "\-X ""\fIcharacters\fB""" ", " "\-\-wordchars=""" \fIcharacters """
+Specify which other characters (besides the normal alphanumeric ones)
+should be considered as part of a word. This overrides option
+\fB\-W\fR (\fB\-\-wordbounds\fR).
+.TP
.BR \-Y\ \fIname\fR ", " \-\-syntax= \fIname
Specify the name of the syntax highlighting to use from among the ones
defined in the \fInanorc\fP files.
diff --git a/doc/man/nanorc.5 b/doc/man/nanorc.5
@@ -253,8 +253,13 @@ Set the two characters used to indicate the presence of tabs and
spaces. They must be single-column characters.
.TP
.B set wordbounds
-Detect word boundaries more accurately by treating punctuation
+Detect word boundaries differently by treating punctuation
characters as parts of words.
+.TP
+.B set wordchars \fIstring\fP
+Specify which other characters (besides the normal alphanumeric ones)
+should be considered as parts of words. This overrides the option
+\fBwordbounds\fR.
.SH SYNTAX HIGHLIGHTING
Coloring the different syntactic elements of a file
diff --git a/doc/nanorc.sample.in b/doc/nanorc.sample.in
@@ -178,10 +178,15 @@
## The default otherwise:
# set whitespace ">."
-## Detect word boundaries more accurately by treating punctuation
+## Detect word boundaries differently by treating punctuation
## characters as parts of words.
# set wordbounds
+## The characters (besides alphanumeric ones) that should be considered
+## as parts of words. This option does not have a default value. When
+## set, it overrides option 'set wordbounds'.
+# set wordchars "<_>."
+
## Paint the interface elements of nano.
## This is an example; by default there are no colors.
diff --git a/doc/texinfo/nano.texi b/doc/texinfo/nano.texi
@@ -231,9 +231,15 @@ Show the current version number and exit.
@item -W
@itemx --wordbounds
-Detect word boundaries more accurately by treating punctuation
+Detect word boundaries differently by treating punctuation
characters as parts of words.
+@item -X "@var{characters}"
+@itemx --wordchars="@var{characters}"
+Specify which other characters (besides the normal alphanumeric ones)
+should be considered as parts of words. This overrides option
+@option{-W} (@option{--wordbounds}).
+
@item -Y @var{name}
@itemx --syntax=@var{name}
Specify a specific syntax from the nanorc files to use for highlighting.
@@ -831,9 +837,14 @@ spaces. They must be single-column characters. The default pair
for a UTF-8 locale is @t{"»·"}, and for other locales @t{">."}.
@item set wordbounds
-Detect word boundaries more accurately by treating punctuation
+Detect word boundaries differently by treating punctuation
characters as part of a word.
+@item set wordchars "@var{string}"
+Specify which other characters (besides the normal alphanumeric ones)
+should be considered as parts of words. This overrides the option
+@code{wordbounds}.
+
@end table
@node Syntax Highlighting
diff --git a/src/chars.c b/src/chars.c
@@ -183,15 +183,26 @@ bool is_punct_mbchar(const char *c)
return ispunct((unsigned char)*c);
}
-/* Return TRUE for a multibyte character found in a word (currently only
- * an alphanumeric or punctuation character, and only the latter if
- * allow_punct is TRUE) and FALSE otherwise. */
+/* Return TRUE when the given multibyte character c is a word-forming
+ * character (that is: alphanumeric, or specified in wordchars, or
+ * punctuation when allow_punct is TRUE), and FALSE otherwise. */
bool is_word_mbchar(const char *c, bool allow_punct)
{
assert(c != NULL);
- return is_alnum_mbchar(c) || (allow_punct ? is_punct_mbchar(c) :
- FALSE);
+ if (is_alnum_mbchar(c))
+ return TRUE;
+
+ if (word_chars != NULL && *word_chars != '\0') {
+ char *symbol = charalloc(MB_CUR_MAX + 1);
+ int symlen = parse_mbchar(c, symbol, NULL);
+
+ symbol[symlen] = '\0';
+
+ return (strstr(word_chars, symbol) != NULL);
+ }
+
+ return (allow_punct && is_punct_mbchar(c));
}
/* Return the visible representation of control character c. */
diff --git a/src/global.c b/src/global.c
@@ -124,6 +124,9 @@ size_t quotelen;
#endif
#endif
+char *word_chars = NULL;
+ /* Nonalphanumeric characters that also form words. */
+
bool nodelay_mode = FALSE;
/* Are we checking for a cancel wile doing something? */
@@ -1669,6 +1672,7 @@ void thanks_for_all_the_fish(void)
delwin(edit);
delwin(bottomwin);
+ free(word_chars);
#ifndef DISABLE_JUSTIFY
free(quotestr);
#ifdef HAVE_REGEX_H
diff --git a/src/nano.c b/src/nano.c
@@ -860,6 +860,8 @@ void usage(void)
#ifndef NANO_TINY
print_opt("-W", "--wordbounds",
N_("Detect word boundaries more accurately"));
+ print_opt("-X", "--wordchars",
+ N_("Which other characters are word parts"));
#endif
#ifndef DISABLE_COLOR
if (!ISSET(RESTRICTED))
@@ -1995,6 +1997,7 @@ int main(int argc, char **argv)
{"smooth", 0, NULL, 'S'},
{"quickblank", 0, NULL, 'U'},
{"wordbounds", 0, NULL, 'W'},
+ {"wordchars", 1, NULL, 'X'},
{"autoindent", 0, NULL, 'i'},
{"cut", 0, NULL, 'k'},
{"unix", 0, NULL, 'u'},
@@ -2040,11 +2043,11 @@ int main(int argc, char **argv)
while ((optchr =
#ifdef HAVE_GETOPT_LONG
getopt_long(argc, argv,
- "ABC:DEFGHIKLNOPQ:RST:UVWY:abcdefghijklmno:pqr:s:tuvwxz$",
+ "ABC:DEFGHIKLNOPQ:RST:UVWX:Y:abcdefghijklmno:pqr:s:tuvwxz$",
long_options, NULL)
#else
getopt(argc, argv,
- "ABC:DEFGHIKLNOPQ:RST:UVWY:abcdefghijklmno:pqr:s:tuvwxz$")
+ "ABC:DEFGHIKLNOPQ:RST:UVWX:Y:abcdefghijklmno:pqr:s:tuvwxz$")
#endif
) != -1) {
switch (optchr) {
@@ -2146,6 +2149,9 @@ int main(int argc, char **argv)
case 'W':
SET(WORD_BOUNDS);
break;
+ case 'X':
+ word_chars = mallocstrcpy(word_chars, optarg);
+ break;
#endif
#ifndef DISABLE_COLOR
case 'Y':
@@ -2279,6 +2285,7 @@ int main(int argc, char **argv)
#endif
#ifndef NANO_TINY
char *backup_dir_cpy = backup_dir;
+ char *word_chars_cpy = word_chars;
#endif
#ifndef DISABLE_JUSTIFY
char *quotestr_cpy = quotestr;
@@ -2297,6 +2304,7 @@ int main(int argc, char **argv)
#endif
#ifndef NANO_TINY
backup_dir = NULL;
+ word_chars = NULL;
#endif
#ifndef DISABLE_JUSTIFY
quotestr = NULL;
@@ -2327,6 +2335,10 @@ int main(int argc, char **argv)
free(backup_dir);
backup_dir = backup_dir_cpy;
}
+ if (word_chars_cpy != NULL) {
+ free(word_chars);
+ word_chars = word_chars_cpy;
+ }
#endif
#ifndef DISABLE_JUSTIFY
if (quotestr_cpy != NULL) {
diff --git a/src/proto.h b/src/proto.h
@@ -91,7 +91,10 @@ extern size_t quotelen;
#endif
#endif /* !DISABLE_JUSTIFY */
+extern char *word_chars;
+
extern bool nodelay_mode;
+
extern char *answer;
extern ssize_t tabsize;
diff --git a/src/rcfile.c b/src/rcfile.c
@@ -102,6 +102,7 @@ static const rcoption rcopts[] = {
{"unix", MAKE_IT_UNIX},
{"whitespace", 0},
{"wordbounds", WORD_BOUNDS},
+ {"wordchars", 0},
#endif
#ifndef DISABLE_COLOR
{"titlecolor", 0},
@@ -1177,6 +1178,9 @@ void parse_rcfile(FILE *rcstream
if (strcasecmp(rcopts[i].name, "backupdir") == 0)
backup_dir = option;
else
+ if (strcasecmp(rcopts[i].name, "wordchars") == 0)
+ word_chars = option;
+ else
#endif
#ifndef DISABLE_SPELLER
if (strcasecmp(rcopts[i].name, "speller") == 0)
diff --git a/src/utils.c b/src/utils.c
@@ -294,8 +294,8 @@ bool is_separate_word(size_t position, size_t length, const char *buf)
* word isn't a non-punctuation "word" character, and if we're at
* the end of the line or the character after the word isn't a
* non-punctuation "word" character, we have a whole word. */
- retval = (position == 0 || !is_word_mbchar(before, FALSE)) &&
- (word_end == strlen(buf) || !is_word_mbchar(after, FALSE));
+ retval = (position == 0 || !is_alnum_mbchar(before)) &&
+ (word_end == strlen(buf) || !is_alnum_mbchar(after));
free(before);
free(after);