Allow to change the substitution character

A pointer to the misc structure has to be passed to a certain number of functions for that, but this prepares the ground for future developments of this type. The code to interpret \u (UTF-8) sequences has also been made more robust for better manage incorrect entries.
author: pgen <p.gen.progs@gmail.com> 2020-07-12 00:11:00 +0200
committer: pgen <p.gen.progs@gmail.com> 2020-07-18 18:31:03 +0200
commit: ecb544da53f6c7cba0191f4d7c5764dcd6b864a2 (patch)
tree: 1479a4ba9bc5df3bd0c56b23ae696804aad0e07e
parent: 01f9896fb17ea1cb7228dcd13b655f3812a72919 (diff)
6 files changed, 239 insertions, 173 deletions
diff --git a/smenu.1 b/smenu.1
index de3144d..b328c7b 100644
--- a/smenu.1
+++ b/smenu.1
@@ -108,25 +108,25 @@ characters \fIa\fP \fIb\fP \fIt\fP \fIn\fP \fIv\fP \fIf\fP \fIr\fP and
 
 UTF-8 sequences introduced by \fI\\u\fP are also understood.
 \fI\\u\fP can be followed by 2,4,6 or 8 hexadecimal characters.
-An invalid UTF-8 sequence will be replaced by a dot  (\fI.\fP), see
-also below.
-
-Example: \fI\\uc3a9\fP means latin small letter e with acute.
+Here is an example of using \fI\\u\fP to represent a lowercase latin
+e with acute: \fI\\uc3a9\fP.
 .PP
 Note that with most shells, the \fI\\\fP before the \fIu\fP need to be
 protected or escaped.
 .PP
-Note also that a dot, which is the consequence of an invalid UTF-8
-sequence, will not be distinguished from a dot normally present in the
-input stream.
-.PP
 Quotations (single and double) in the input stream can be used to ignore
 the word separators so that a group of words are taken as a single entity.
 .PP
 Non printable characters in words that are not delimiters are
 converted to their traditional form (\fI\\n\fP for end-of-line,
 \fI\\t\fP for tabulation...) by default.
-A single dot (\fI.\fP) is also used as a placeholder otherwise.
+.PP
+An invalid UTF-8 sequence or other non-printable character will be
+replaced by a dot (\fI.\fP) by default.
+.PP
+There are nevertheless a possibilities to change this substitution
+character with another \fBASCII\fP printable one with the help of the
+command line option \fB-.\fP|\fB-dot\fP|\fB-invalid\fP.
 .PP
 Words containing only spaces, entered directly or resulting from a
 substitution, are also rejected unless they are not selectable.
@@ -134,16 +134,16 @@ This allows special effects like creating blank lines for example.
 These words are also kept in column mode, selectable or not.
 .PP
 \fBWarning\fP, \fBUTF-8\fP encoded codepoints are quietly converted
-into dots (\fI.\fP) when the user locale is not \fBUTF-8\fP aware like
-\fBPOSIX\fP or \fBC\fP by example.
+into the substitution character when the user locale is not \fBUTF-8\fP
+aware like \fBPOSIX\fP or \fBC\fP by example.
 .PP
 smenu has an option to define a set of characters or UTF-8 sequences
-wich should be ignored when reading words from the input.
+which should be ignored when reading words.
 This can be very useful when dealing with inputs where the EOL sequence
 consists in more than one character.
 .PP
 A typical example is DOS or Windows files with lines ending with
-\fICRLF\fI.
+\fICRLF\fP.
 In such a case one might decide to ignore all \fICR\fP characters from
 the input.
 .PP
@@ -670,7 +670,7 @@ by \fB-I\fP|\fB-si\fP|\fB-subst_included\fP or
 
 Displays a message above the window.
 If the current locale is not \fIUTF-8\fP, then all \fIUTF-8\fP characters
-in it will be converted into a dot.
+will be replaced by the substitution character.
 
 \fI\\u\fP sequences can be used in the message.
 
@@ -1017,6 +1017,12 @@ value is used.
 
 Replaces all non-printable characters by a blank.
 If this results in a blank word, it will be potentially deleted.
+.IP "\fB-.\fP|\fB-dot\fP|\fB-invalid\fP"
+(Allowed in all contexts)
+
+Sets the substitution character for non-printable characters.
+When this parameter is not used, the default substitution character is
+a single dot.
 .IP "\fB-M\fP|\fB-middle\fP|\fB-center\fP"
 (Allowed in all contexts)
 
diff --git a/smenu.c b/smenu.c
index 7a057ff..07e262c 100644
--- a/smenu.c
+++ b/smenu.c
@@ -418,7 +418,8 @@ apply_attr(term_t * term, attr_t attr)
 /* ===================================================== */
 int
 ini_cb(win_t * win, term_t * term, limits_t * limits, timers_t * timers,
-       misc_t * misc, const char * section, const char * name, char * value)
+       misc_t * misc, langinfo_t * langinfo, const char * section,
+       const char * name, char * value)
 {
   int error      = 0;
   int has_colors = (term->colors > 7);
@@ -605,6 +606,7 @@ ini_cb(win_t * win, term_t * term, limits_t * limits, timers_t * timers,
     /* [misc] section */
     /* """""""""""""""" */
     if (strcmp(name, "default_search_method") == 0)
+    {
       if (misc->default_search_method == NONE)
       {
         if (strcmp(value, "prefix") == 0)
@@ -614,6 +616,7 @@ ini_cb(win_t * win, term_t * term, limits_t * limits, timers_t * timers,
         else if (strcmp(value, "substring") == 0)
           misc->default_search_method = SUBSTRING;
       }
+    }
   }
 
 out:
@@ -633,10 +636,10 @@ out:
 /* ======================================================================== */
 int
 ini_load(const char * filename, win_t * win, term_t * term, limits_t * limits,
-         timers_t * timers, misc_t * misc,
+         timers_t * timers, misc_t * misc, langinfo_t * langinfo,
          int (*report)(win_t * win, term_t * term, limits_t * limits,
-                       timers_t * timers, misc_t * misc, const char * section,
-                       const char * name, char * value))
+                       timers_t * timers, misc_t * misc, langinfo_t * langinfo,
+                       const char * section, const char * name, char * value))
 {
   char   name[64]     = "";
   char   value[256]   = "";
@@ -685,7 +688,8 @@ ini_load(const char * filename, win_t * win, term_t * term, limits_t * limits,
 
       /* Callback function calling */
       /* """"""""""""""""""""""""" */
-      error = report(win, term, limits, timers, misc, section, name, value);
+      error = report(win, term, limits, timers, misc, langinfo, section, name,
+                     value);
 
       if (error)
         goto out;
@@ -1502,7 +1506,7 @@ void
 parse_selectors(char * str, filters_t * filter, char * unparsed,
                 ll_t ** inc_interval_list, ll_t ** inc_regex_list,
                 ll_t ** exc_interval_list, ll_t ** exc_regex_list,
-                langinfo_t * langinfo)
+                langinfo_t * langinfo, misc_t * misc)
 {
   char         mark; /* Value to set */
   char         c;
@@ -1514,7 +1518,7 @@ parse_selectors(char * str, filters_t * filter, char * unparsed,
   /* Replace the UTF-8 ascii representation in the selector by */
   /* their binary values.                                      */
   /* """"""""""""""""""""""""""""""""""""""""""""""""""""""""" */
-  utf8_interpret(str, langinfo);
+  utf8_interpret(str, langinfo, misc->invalid_char_substitute);
 
   /* Get the first character to see if this is */
   /* an additive or restrictive operation.     */
@@ -2063,7 +2067,7 @@ fail:
 /* Memory space for d must have been allocated before.          */
 /* ============================================================ */
 void
-strip_ansi_color(char * s, toggle_t * toggle)
+strip_ansi_color(char * s, toggle_t * toggle, misc_t * misc)
 {
   char * p   = s;
   long   len = strlen(s);
@@ -2075,18 +2079,16 @@ strip_ansi_color(char * s, toggle_t * toggle)
     if ((*s == 0x1b) && (*(s + 1) == '['))
     {
       while ((*s != '\0') && (*s++ != 'm'))
-      {
-        /* Do nothing */
-      }
+        ;
     }
-    /* Convert a single \x1b in '.' */
-    /* """""""""""""""""""""""""""" */
+    /* Convert a single \x1b in the invalid substitute character */
+    /* """"""""""""""""""""""""""""""""""""""""""""""""""""""""" */
     else if (*s == 0x1b)
     {
       if (toggle->blank_nonprintable && len > 1)
         *s++ = ' ';
       else
-        *s++ = '.';
+        *s++ = misc->invalid_char_substitute;
       p++;
     }
     /* No ESC char, we can move on */
@@ -2301,7 +2303,7 @@ buffer_cmp(const void * a, const void * b)
 /* ===================================================================== */
 int
 get_bytes(FILE * input, char * utf8_buffer, ll_t * zapped_glyphs_list,
-          langinfo_t * langinfo)
+          langinfo_t * langinfo, misc_t * misc)
 {
   int byte;
   int last;
@@ -2341,7 +2343,7 @@ get_bytes(FILE * input, char * utf8_buffer, ll_t * zapped_glyphs_list,
     /* """""""""""""""""""""""""""""""""""""""""""""""""""""""" */
     if (langinfo->utf8 && !utf8_validate(utf8_buffer, last))
     {
-      byte = utf8_buffer[0] = '.';
+      byte = utf8_buffer[0] = misc->invalid_char_substitute;
       utf8_buffer[1]        = '\0';
     }
   } while (ll_find(zapped_glyphs_list, utf8_buffer, buffer_cmp) != NULL);
@@ -2355,7 +2357,8 @@ get_bytes(FILE * input, char * utf8_buffer, ll_t * zapped_glyphs_list,
 /* dest must be long enough to contain the expanded string                */
 /* ====================================================================== */
 size_t
-expand(char * src, char * dest, langinfo_t * langinfo, toggle_t * toggle)
+expand(char * src, char * dest, langinfo_t * langinfo, toggle_t * toggle,
+       misc_t * misc)
 {
   char   c;
   int    n;
@@ -2390,7 +2393,7 @@ expand(char * src, char * dest, langinfo_t * langinfo, toggle_t * toggle)
           /* ''''''''''''''' */
         } while (--n && ('\0' != *(src++)));
 
-        *(ptr++) = '.';
+        *(ptr++) = misc->invalid_char_substitute;
         len++;
       }
     }
@@ -2459,7 +2462,7 @@ expand(char * src, char * dest, langinfo_t * langinfo, toggle_t * toggle)
               *(ptr++) = ' ';
             else
             {
-              *(ptr++)   = '.';
+              *(ptr++)   = misc->invalid_char_substitute;
               all_spaces = 0;
             }
           }
@@ -2489,7 +2492,7 @@ char *
 get_word(FILE * input, ll_t * word_delims_list, ll_t * record_delims_list,
          ll_t * zapped_glyphs_list, char * utf8_buffer, unsigned char * is_last,
          toggle_t * toggle, langinfo_t * langinfo, win_t * win,
-         limits_t * limits)
+         limits_t * limits, misc_t * misc)
 {
   char * temp = NULL;
   int    byte;
@@ -2501,7 +2504,7 @@ get_word(FILE * input, ll_t * word_delims_list, ll_t * record_delims_list,
 
   /* Skip leading delimiters */
   /* """"""""""""""""""""""" */
-  byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo);
+  byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo, misc);
 
   while (byte == EOF
          || ll_find(word_delims_list, utf8_buffer, buffer_cmp) != NULL)
@@ -2509,7 +2512,7 @@ get_word(FILE * input, ll_t * word_delims_list, ll_t * record_delims_list,
     if (byte == EOF)
       return NULL;
 
-    byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo);
+    byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo, misc);
   }
 
   /* Allocate initial word storage space */
@@ -2637,7 +2640,7 @@ get_word(FILE * input, ll_t * word_delims_list, ll_t * record_delims_list,
     is_special = 0;
 
   next:
-    byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo);
+    byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo, misc);
   }
 
   /* Nul-terminate the word to make it a string */
@@ -2647,18 +2650,18 @@ get_word(FILE * input, ll_t * word_delims_list, ll_t * record_delims_list,
   /* Replace the UTF-8 ASCII representations in the word just */
   /* read by their binary values.                             */
   /* """""""""""""""""""""""""""""""""""""""""""""""""""""""" */
-  utf8_interpret(temp, langinfo);
+  utf8_interpret(temp, langinfo, misc->invalid_char_substitute);
 
   /* Skip all field delimiters before a record delimiter */
   /* """"""""""""""""""""""""""""""""""""""""""""""""""" */
   if (ll_find(record_delims_list, utf8_buffer, buffer_cmp) == NULL)
   {
-    byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo);
+    byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo, misc);
 
     while (byte != EOF
            && ll_find(word_delims_list, utf8_buffer, buffer_cmp) != NULL
            && ll_find(record_delims_list, utf8_buffer, buffer_cmp) == NULL)
-      byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo);
+      byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo, misc);
 
     if (langinfo->utf8 && utf8_get_length(utf8_buffer[0]) > 1)
     {
@@ -2684,7 +2687,7 @@ get_word(FILE * input, ll_t * word_delims_list, ll_t * record_delims_list,
 
   /* Remove the ANSI color escape sequences from the word */
   /* """""""""""""""""""""""""""""""""""""""""""""""""""" */
-  strip_ansi_color(temp, toggle);
+  strip_ansi_color(temp, toggle, misc);
 
   return temp;
 }
@@ -4791,9 +4794,10 @@ start_pattern_action(char * ctx_name, char * opt_name, char * param,
 {
   char **      pre_selection_index = opt_data[0];
   langinfo_t * langinfo            = opt_data[1];
+  misc_t *     misc                = opt_data[2];
 
   *pre_selection_index = xstrdup(values[0]);
-  utf8_interpret(*pre_selection_index, langinfo);
+  utf8_interpret(*pre_selection_index, langinfo, misc->invalid_char_substitute);
 }
 
 void
@@ -4803,11 +4807,12 @@ title_action(char * ctx_name, char * opt_name, char * param, int nb_values,
 {
   char **      message  = opt_data[0];
   langinfo_t * langinfo = opt_data[1];
+  misc_t *     misc     = opt_data[2];
 
   *message = xstrdup(values[0]);
   if (!langinfo->utf8)
-    utf8_sanitize(*message);
-  utf8_interpret(*message, langinfo);
+    utf8_sanitize(*message, misc->invalid_char_substitute);
+  utf8_interpret(*message, langinfo, misc->invalid_char_substitute);
 }
 
 void
@@ -4883,12 +4888,28 @@ toggle_action(char * ctx_name, char * opt_name, char * param, int nb_values,
 }
 
 void
+invalid_char_action(char * ctx_name, char * opt_name, char * param,
+                    int nb_values, char ** values, int nb_opt_data,
+                    void ** opt_data, int nb_ctx_data, void ** ctx_data)
+{
+  misc_t * misc = opt_data[0];
+
+  char ic = *values[0];
+
+  if (isprint(ic))
+    misc->invalid_char_substitute = ic;
+  else
+    misc->invalid_char_substitute = '.';
+}
+
+void
 gutter_action(char * ctx_name, char * opt_name, char * param, int nb_values,
               char ** values, int nb_opt_data, void ** opt_data,
               int nb_ctx_data, void ** ctx_data)
 {
   win_t *      win      = opt_data[0];
   langinfo_t * langinfo = opt_data[1];
+  misc_t *     misc     = opt_data[2];
 
   if (nb_values == 0)
   {
@@ -4917,8 +4938,9 @@ gutter_action(char * ctx_name, char * opt_name, char * param, int nb_values,
 
     gutter = xstrdup(values[0]);
 
-    utf8_interpret(gutter, langinfo); /* Guarantees a well formed *
-                                       * UTF-8 string             */
+    utf8_interpret(gutter, langinfo,
+                   misc->invalid_char_substitute); /* Guarantees a well formed *
+                                                    * UTF-8 string */
 
     win->gutter_nb = utf8_strlen(gutter);
     win->gutter_a  = xmalloc(win->gutter_nb * sizeof(char *));
@@ -5017,6 +5039,7 @@ post_subst_action(char * ctx_name, char * opt_name, char * param, int nb_values,
 {
   ll_t **      list     = opt_data[0];
   langinfo_t * langinfo = opt_data[1];
+  misc_t *     misc     = opt_data[2];
 
   sed_t * sed_node;
   int     i;
@@ -5028,7 +5051,7 @@ post_subst_action(char * ctx_name, char * opt_name, char * param, int nb_values,
   {
     sed_node          = xmalloc(sizeof(sed_t));
     sed_node->pattern = xstrdup(values[i]);
-    utf8_interpret(sed_node->pattern, langinfo);
+    utf8_interpret(sed_node->pattern, langinfo, misc->invalid_char_substitute);
     sed_node->stop = 0;
     ll_append(*list, sed_node);
   }
@@ -5044,13 +5067,15 @@ special_level_action(char * ctx_name, char * opt_name, char * param,
   term_t *     term            = opt_data[2];
   langinfo_t * langinfo        = opt_data[3];
   attr_t *     init_attr       = opt_data[4];
+  misc_t *     misc            = opt_data[5];
 
   attr_t attr = *init_attr;
   char   opt  = param[strlen(param) - 1]; /* last character of param */
   int    i;
 
   special_pattern[opt - '1'] = xstrdup(values[0]);
-  utf8_interpret(special_pattern[opt - '1'], langinfo);
+  utf8_interpret(special_pattern[opt - '1'], langinfo,
+                 misc->invalid_char_substitute);
 
   /* Parse optional additional arguments */
   /* """"""""""""""""""""""""""""""""""" */
@@ -5230,6 +5255,7 @@ timeout_action(char * ctx_name, char * opt_name, char * param, int nb_values,
                int nb_ctx_data, void ** ctx_data)
 {
   langinfo_t * langinfo = opt_data[0];
+  misc_t *     misc     = opt_data[1];
 
   if (strcmp(opt_name, "hidden_timeout") == 0)
     quiet_timeout = 1;
@@ -5244,7 +5270,7 @@ timeout_action(char * ctx_name, char * opt_name, char * param, int nb_values,
     {
       timeout.mode = WORD;
       timeout_word = xstrdup(values[1]);
-      utf8_interpret(timeout_word, langinfo);
+      utf8_interpret(timeout_word, langinfo, misc->invalid_char_substitute);
     }
     else
     {
@@ -5272,9 +5298,10 @@ force_first_column_action(char * ctx_name, char * opt_name, char * param,
 {
   char **      first_word_pattern = opt_data[0];
   langinfo_t * langinfo           = opt_data[1];
+  misc_t *     misc               = opt_data[2];
 
   *first_word_pattern = xstrdup(values[0]);
-  utf8_interpret(*first_word_pattern, langinfo);
+  utf8_interpret(*first_word_pattern, langinfo, misc->invalid_char_substitute);
 }
 
 void
@@ -5284,9 +5311,10 @@ force_last_column_action(char * ctx_name, char * opt_name, char * param,
 {
   char **      last_word_pattern = opt_data[0];
   langinfo_t * langinfo          = opt_data[1];
+  misc_t *     misc              = opt_data[2];
 
   *last_word_pattern = xstrdup(values[0]);
-  utf8_interpret(*last_word_pattern, langinfo);
+  utf8_interpret(*last_word_pattern, langinfo, misc->invalid_char_substitute);
 }
 
 void
@@ -5296,9 +5324,10 @@ zapped_glyphs_action(char * ctx_name, char * opt_name, char * param,
 {
   char **      glyph    = opt_data[0];
   langinfo_t * langinfo = opt_data[1];
+  misc_t *     misc     = opt_data[2];
 
   *glyph = xstrdup(values[0]);
-  utf8_interpret(*glyph, langinfo);
+  utf8_interpret(*glyph, langinfo, misc->invalid_char_substitute);
 }
 
 void
@@ -5308,9 +5337,10 @@ separators_action(char * ctx_name, char * opt_name, char * param, int nb_values,
 {
   char **      sep      = opt_data[0];
   langinfo_t * langinfo = opt_data[1];
+  misc_t *     misc     = opt_data[2];
 
   *sep = xstrdup(values[0]);
-  utf8_interpret(*sep, langinfo);
+  utf8_interpret(*sep, langinfo, misc->invalid_char_substitute);
 }
 
 void
@@ -5321,13 +5351,14 @@ tag_mode_action(char * ctx_name, char * opt_name, char * param, int nb_values,
   toggle_t *   toggle   = opt_data[0];
   win_t *      win      = opt_data[1];
   langinfo_t * langinfo = opt_data[2];
+  misc_t *     misc     = opt_data[3];
 
   toggle->taggable = 1;
 
   if (nb_values == 1)
   {
     win->sel_sep = xstrdup(values[0]);
-    utf8_interpret(win->sel_sep, langinfo);
+    utf8_interpret(win->sel_sep, langinfo, misc->invalid_char_substitute);
   }
 }
 
@@ -5339,6 +5370,7 @@ pin_mode_action(char * ctx_name, char * opt_name, char * param, int nb_values,
   toggle_t *   toggle   = opt_data[0];
   win_t *      win      = opt_data[1];
   langinfo_t * langinfo = opt_data[2];
+  misc_t *     misc     = opt_data[3];
 
   toggle->taggable = 1;
   toggle->pinable  = 1;
@@ -5346,7 +5378,7 @@ pin_mode_action(char * ctx_name, char * opt_name, char * param, int nb_values,
   if (nb_values == 1)
   {
     win->sel_sep = xstrdup(values[0]);
-    utf8_interpret(win->sel_sep, langinfo);
+    utf8_interpret(win->sel_sep, langinfo, misc->invalid_char_substitute);
   }
 }
 
@@ -5417,6 +5449,7 @@ da_options_action(char * ctx_name, char * opt_name, char * param, int nb_values,
 {
   langinfo_t * langinfo      = opt_data[0];
   long *       daccess_index = opt_data[1];
+  misc_t *     misc          = opt_data[2];
 
   int       pos;
   wchar_t * w;
@@ -5435,7 +5468,7 @@ da_options_action(char * ctx_name, char * opt_name, char * param, int nb_values,
         free(daccess.left);
 
         daccess.left = xstrdup(value + 2);
-        utf8_interpret(daccess.left, langinfo);
+        utf8_interpret(daccess.left, langinfo, misc->invalid_char_substitute);
 
         if (utf8_strlen(daccess.left) != 1)
         {
@@ -5460,7 +5493,7 @@ da_options_action(char * ctx_name, char * opt_name, char * param, int nb_values,
         free(daccess.right);
 
         daccess.right = xstrdup(value + 2);
-        utf8_interpret(daccess.right, langinfo);
+        utf8_interpret(daccess.right, langinfo, misc->invalid_char_substitute);
 
         if (utf8_strlen(daccess.right) != 1)
         {
@@ -5601,7 +5634,8 @@ da_options_action(char * ctx_name, char * opt_name, char * param, int nb_values,
         free(daccess.num_sep);
 
         daccess.num_sep = xstrdup(value + 2);
-        utf8_interpret(daccess.num_sep, langinfo);
+        utf8_interpret(daccess.num_sep, langinfo,
+                       misc->invalid_char_substitute);
 
         if (utf8_strlen(daccess.num_sep) != 1)
         {
@@ -6193,6 +6227,10 @@ main(int argc, char * argv[])
   /* """"""""""""""""""""""""""""""""""""""""""""""" */
   get_terminal_size(&term.nlines, &term.ncolumns);
 
+  /* Default substitution character on invalid input. */
+  /* """""""""""""""""""""""""""""""""""""""""""""""" */
+  misc.invalid_char_substitute = '.';
+
   /* Command line options setting */
   /* """""""""""""""""""""""""""" */
   ctxopt_init(argv[0], "stop_if_non_option=No "
@@ -6211,6 +6249,7 @@ main(int argc, char * argv[])
                    "[zapped_glyphs #bytes] "
                    "[lines [#height]] "
                    "[blank_nonprintable] "
+                   "[*invalid_character #invalid_char_subst] "
                    "[center_mode] "
                    "[clean] "
                    "[keep_spaces] "
@@ -6333,6 +6372,7 @@ main(int argc, char * argv[])
   ctxopt_add_opt_settings(parameters, "field_da_number",
                           "-F -en -embedded_number");
   ctxopt_add_opt_settings(parameters, "da_options", "-D -data -options");
+  ctxopt_add_opt_settings(parameters, "invalid_character", "-. -dot -invalid");
   ctxopt_add_opt_settings(parameters, "blank_nonprintable", "-b -blank");
   ctxopt_add_opt_settings(parameters, "center_mode", "-M -middle -center");
   ctxopt_add_opt_settings(parameters, "clean",
@@ -6389,6 +6429,8 @@ main(int argc, char * argv[])
   /* """""""""""""" */
 
   ctxopt_add_opt_settings(actions, "auto_tag", toggle_action, &toggle, NULL);
+  ctxopt_add_opt_settings(actions, "invalid_character", invalid_char_action,
+                          &misc, NULL);
   ctxopt_add_opt_settings(actions, "blank_nonprintable", toggle_action, &toggle,
                           NULL);
   ctxopt_add_opt_settings(actions, "center_mode", center_mode_action, &win,
@@ -6407,7 +6449,7 @@ main(int argc, char * argv[])
   ctxopt_add_opt_settings(actions, "exclude_re", exclude_re_action,
                           &pattern_def_include, &exclude_pattern, NULL);
   ctxopt_add_opt_settings(actions, "gutter", gutter_action, &win, &langinfo,
-                          NULL);
+                          &misc, NULL);
   ctxopt_add_opt_settings(actions, "help", help_action, NULL);
   ctxopt_add_opt_settings(actions, "long_help", long_help_action, NULL);
   ctxopt_add_opt_settings(actions, "usage", usage_action, NULL);
@@ -6418,56 +6460,57 @@ main(int argc, char * argv[])
   ctxopt_add_opt_settings(actions, "no_scoll_bar", toggle_action, &toggle,
                           NULL);
   ctxopt_add_opt_settings(actions, "start_pattern", start_pattern_action,
-                          &pre_selection_index, &langinfo, NULL);
+                          &pre_selection_index, &langinfo, &misc, NULL);
   ctxopt_add_opt_settings(actions, "title", title_action, &message, &langinfo,
-                          NULL);
+                          &misc, NULL);
   ctxopt_add_opt_settings(actions, "validate_in_search_mode", toggle_action,
                           &toggle, NULL);
   ctxopt_add_opt_settings(actions, "version", version_action, NULL);
   ctxopt_add_opt_settings(actions, "visual_bell", toggle_action, &toggle, NULL);
   ctxopt_add_opt_settings(actions, "wide_mode", wide_mode_action, &win, NULL);
   ctxopt_add_opt_settings(actions, "post_subst_all", post_subst_action,
-                          &sed_list, &langinfo, NULL);
+                          &sed_list, &langinfo, &misc, NULL);
   ctxopt_add_opt_settings(actions, "post_subst_included", post_subst_action,
-                          &include_sed_list, &langinfo, NULL);
+                          &include_sed_list, &langinfo, &misc, NULL);
   ctxopt_add_opt_settings(actions, "post_subst_excluded", post_subst_action,
-                          &exclude_sed_list, &langinfo, NULL);
+                          &exclude_sed_list, &langinfo, &misc, NULL);
   ctxopt_add_opt_settings(actions, "special_level_1", special_level_action,
                           special_pattern, &win, &term, &langinfo, &init_attr,
-                          NULL);
+                          &misc, NULL);
   ctxopt_add_opt_settings(actions, "special_level_2", special_level_action,
                           special_pattern, &win, &term, &langinfo, &init_attr,
-                          NULL);
+                          &misc, NULL);
   ctxopt_add_opt_settings(actions, "special_level_3", special_level_action,
                           special_pattern, &win, &term, &langinfo, &init_attr,
-                          NULL);
+                          &misc, NULL);
   ctxopt_add_opt_settings(actions, "special_level_4", special_level_action,
                           special_pattern, &win, &term, &langinfo, &init_attr,
-                          NULL);
+                          &misc, NULL);
   ctxopt_add_opt_settings(actions, "special_level_5", special_level_action,
                           special_pattern, &win, &term, &langinfo, &init_attr,
-                          NULL);
+                          &misc, NULL);
   ctxopt_add_opt_settings(actions, "attributes", attributes_action, &win, &term,
                           &init_attr, NULL);
-  ctxopt_add_opt_settings(actions, "timeout", timeout_action, &langinfo, NULL);
+  ctxopt_add_opt_settings(actions, "timeout", timeout_action, &langinfo, &misc,
+                          NULL);
   ctxopt_add_opt_settings(actions, "hidden_timeout", timeout_action, &langinfo,
                           NULL);
   ctxopt_add_opt_settings(actions, "force_first_column",
                           force_first_column_action, &first_word_pattern,
-                          &langinfo, NULL);
+                          &langinfo, &misc, NULL);
   ctxopt_add_opt_settings(actions, "force_last_column",
                           force_last_column_action, &last_word_pattern,
-                          &langinfo, NULL);
+                          &langinfo, &misc, NULL);
   ctxopt_add_opt_settings(actions, "word_separators", separators_action, &iws,
-                          &langinfo, NULL);
+                          &langinfo, &misc, NULL);
   ctxopt_add_opt_settings(actions, "line_separators", separators_action, &ils,
-                          &langinfo, NULL);
+                          &langinfo, &misc, NULL);
   ctxopt_add_opt_settings(actions, "zapped_glyphs", zapped_glyphs_action, &zg,
-                          &langinfo, NULL);
+                          &langinfo, &misc, NULL);
   ctxopt_add_opt_settings(actions, "tag_mode", tag_mode_action, &toggle, &win,
-                          &langinfo, NULL);
+                          &langinfo, &misc, NULL);
   ctxopt_add_opt_settings(actions, "pin_mode", pin_mode_action, &toggle, &win,
-                          &langinfo, NULL);
+                          &langinfo, &misc, NULL);
   ctxopt_add_opt_settings(actions, "search_method", search_method_action, &misc,
                           NULL);
   ctxopt_add_opt_settings(actions, "auto_da_number", auto_da_action,
@@ -6477,7 +6520,7 @@ main(int argc, char * argv[])
   ctxopt_add_opt_settings(actions, "field_da_number", field_da_number_action,
                           NULL);
   ctxopt_add_opt_settings(actions, "da_options", da_options_action, &langinfo,
-                          &daccess_index, NULL);
+                          &daccess_index, &misc, NULL);
 
   /* ctxopt constraints */
   /* """""""""""""""""" */
@@ -6533,10 +6576,10 @@ main(int argc, char * argv[])
   /* the inclusion and exclusion patterns.                             */
   /* """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" */
   if (include_pattern != NULL)
-    utf8_interpret(include_pattern, &langinfo);
+    utf8_interpret(include_pattern, &langinfo, misc.invalid_char_substitute);
 
   if (exclude_pattern != NULL)
-    utf8_interpret(exclude_pattern, &langinfo);
+    utf8_interpret(exclude_pattern, &langinfo, misc.invalid_char_substitute);
 
   /* If we did not impose the number of columns, use the whole */
   /* terminal width                                            */
@@ -6615,7 +6658,8 @@ main(int argc, char * argv[])
 
   if (custom_ini_file != NULL)
   {
-    if (ini_load(custom_ini_file, &win, &term, &limits, &timers, &misc, ini_cb))
+    if (ini_load(custom_ini_file, &win, &term, &limits, &timers, &misc,
+                 &langinfo, ini_cb))
       exit(EXIT_FAILURE);
   }
   else
@@ -6627,10 +6671,12 @@ main(int argc, char * argv[])
 
     /* Set the attributes from the configuration file if possible */
     /* """""""""""""""""""""""""""""""""""""""""""""""""""""""""" */
-    if (ini_load(home_ini_file, &win, &term, &limits, &timers, &misc, ini_cb))
+    if (ini_load(home_ini_file, &win, &term, &limits, &timers, &misc, &langinfo,
+                 ini_cb))
       exit(EXIT_FAILURE);
 
-    if (ini_load(local_ini_file, &win, &term, &limits, &timers, &misc, ini_cb))
+    if (ini_load(local_ini_file, &win, &term, &limits, &timers, &misc,
+                 &langinfo, ini_cb))
       exit(EXIT_FAILURE);
 
     free(home_ini_file);
@@ -7284,7 +7330,8 @@ main(int argc, char * argv[])
 
       parse_selectors(rows_selector, &filter_type, unparsed,
                       &inc_row_interval_list, &inc_row_regex_list,
-                      &exc_row_interval_list, &exc_row_regex_list, &langinfo);
+                      &exc_row_interval_list, &exc_row_regex_list, &langinfo,
+                      &misc);
 
       if (*unparsed != '\0')
       {
@@ -7323,7 +7370,8 @@ main(int argc, char * argv[])
 
       parse_selectors(cols_selector, &filter_type, unparsed,
                       &inc_col_interval_list, &inc_col_regex_list,
-                      &exc_col_interval_list, &exc_col_regex_list, &langinfo);
+                      &exc_col_interval_list, &exc_col_regex_list, &langinfo,
+                      &misc);
 
       if (*unparsed != '\0')
       {
@@ -7435,7 +7483,7 @@ main(int argc, char * argv[])
   /* """""""""""""""""""""""""""""""""""""""""""""""""""""""""""" */
   while ((word = get_word(input_file, word_delims_list, record_delims_list,
                           zapped_glyphs_list, utf8_buffer, &is_last, &toggle,
-                          &langinfo, &win, &limits))
+                          &langinfo, &win, &limits, &misc))
          != NULL)
   {
     int selectable;
@@ -8261,7 +8309,7 @@ main(int argc, char * argv[])
     word_len = strlen(word->str);
 
     expanded_word = xmalloc(5 * word_len + 1);
-    len           = expand(word->str, expanded_word, &langinfo, &toggle);
+    len           = expand(word->str, expanded_word, &langinfo, &toggle, &misc);
 
     /* Update it if needed */
     /* ''''''''''''''''''' */
diff --git a/smenu.h b/smenu.h
index 768d04e..ebb4360 100644
--- a/smenu.h
+++ b/smenu.h
@@ -187,6 +187,7 @@ struct timers_s
 struct misc_s
 {
   search_mode_t default_search_method;
+  char          invalid_char_substitute;
 };
 
 /* Terminal setting variables */
@@ -440,7 +441,7 @@ void
 setup_term(int const fd);
 
 void
-strip_ansi_color(char * s, toggle_t * toggle);
+strip_ansi_color(char * s, toggle_t * toggle, misc_t * misc);
 
 int
 tst_cb(void * elem);
@@ -450,14 +451,15 @@ tst_cb_cli(void * elem);
 
 int
 ini_load(const char * filename, win_t * win, term_t * term, limits_t * limits,
-         timers_t * timers, misc_t * misc,
+         timers_t * timers, misc_t * misc, langinfo_t * langinfo,
          int (*report)(win_t * win, term_t * term, limits_t * limits,
-                       timers_t * timers, misc_t * misc, const char * section,
-                       const char * name, char * value));
+                       timers_t * timers, misc_t * misc, langinfo_t * langinfo,
+                       const char * section, const char * name, char * value));
 
 int
 ini_cb(win_t * win, term_t * term, limits_t * limits, timers_t * timers,
-       misc_t * misc, const char * section, const char * name, char * value);
+       misc_t * misc, langinfo_t * langinfo, const char * section,
+       const char * name, char * value);
 
 char *
 make_ini_path(char * name, char * base);
@@ -516,11 +518,12 @@ disp_word(long pos, search_mode_t search_mode, search_data_t * search_data,
           term_t * term, win_t * win, char * tmp_word);
 
 size_t
-expand(char * src, char * dest, langinfo_t * langinfo, toggle_t * toggle);
+expand(char * src, char * dest, langinfo_t * langinfo, toggle_t * toggle,
+       misc_t * misc);
 
 int
 get_bytes(FILE * input, char * utf8_buffer, ll_t * ignored_glyphs_list,
-          langinfo_t * langinfo);
+          langinfo_t * langinfo, misc_t * misc);
 
 int
 get_scancode(unsigned char * s, size_t max);
@@ -529,7 +532,7 @@ char *
 get_word(FILE * input, ll_t * word_delims_list, ll_t * record_delims_list,
          ll_t * ignored_glyphs_list, char * utf8_buffer,
          unsigned char * is_last, toggle_t * toggle, langinfo_t * langinfo,
-         win_t * win, limits_t * limits);
+         win_t * win, limits_t * limits, misc_t * misc);
 
 void
 left_margin_putp(char * s, term_t * term, win_t * win);
@@ -551,7 +554,7 @@ void
 parse_selectors(char * str, filters_t * filter, char * unparsed,
                 ll_t ** inc_interval_list, ll_t ** inc_regex_list,
                 ll_t ** exc_interval_list, ll_t ** exc_regex_list,
-                langinfo_t * langinfo);
+                langinfo_t * langinfo, misc_t * misc);
 
 int
 replace(char * orig, sed_t * sed);
diff --git a/usage.c b/usage.c
index 231c757..3786a02 100644
--- a/usage.c
+++ b/usage.c
@@ -40,6 +40,9 @@ common_help(void)
   printf("  sets the number of lines in the selection window.\n");
   printf("-b|-blank\n");
   printf("  displays non printable characters as space.\n");
+  printf("-.|-dot|-invalid\n");
+  printf("  defines the substitution character for a non-printable "
+         "character.\n");
   printf("-M|-middle|-center\n");
   printf("  centers the display if possible.\n");
   printf("-d|-restore|-delete|-clean|-delete_window|-clean_window\n");
diff --git a/utf8.c b/utf8.c
index bf007b2..504f9c9 100644
--- a/utf8.c
+++ b/utf8.c
@@ -23,21 +23,23 @@
 /* All hexadecimal sequences of \uxx, \uxxxx, \uxxxxxx and \uxxxxxxxx will */
 /* be replace by the corresponding UTF-8 character.                        */
 /* ======================================================================= */
-void
-utf8_interpret(char * s, langinfo_t * langinfo)
+int
+utf8_interpret(char * s, langinfo_t * langinfo, char substitute)
 {
-  char * utf8_str;          /* \uxx...                                        */
-  size_t utf8_to_eos_len;   /* bytes in s starting from the first             *
-                             * occurrence of \u                               */
-  size_t init_len;          /* initial lengths of the string to interpret     */
-  size_t utf8_ascii_len;    /* 2,4,6 or 8 bytes                               */
-  size_t len_to_remove = 0; /* number of bytes to remove after the conversion */
-  char   tmp[9];            /* temporary string                               */
-
-  /* Guard against the case where s is NULL */
-  /* """""""""""""""""""""""""""""""""""""" */
+  char * utf8_str;          /* \uxx...                                     */
+  size_t utf8_to_eos_len;   /* bytes in s starting from the first          *
+                             * occurrence of \u.                           */
+  size_t init_len;          /* initial lengths of the string to interpret  */
+  size_t utf8_ascii_len;    /* 2,4,6 or 8 bytes.                           */
+  size_t len_to_remove = 0; /* number of bytes to remove after the         *
+                             | conversion.                                 */
+  char tmp[9];              /* temporary string.                           */
+  int  rc = 1;              /* return code, 0: error, 1: fine.             */
+
+  /* Guard against the case where s is NULL. */
+  /* """"""""""""""""""""""""""""""""""""""" */
   if (s == NULL)
-    return;
+    return 0;
 
   init_len = strlen(s);
 
@@ -45,12 +47,13 @@ utf8_interpret(char * s, langinfo_t * langinfo)
   {
     utf8_to_eos_len = strlen(utf8_str);
     if (utf8_to_eos_len < 4) /* string too short to contain *
-                              * a valid UTF-8 char          */
author	pgen <p.gen.progs@gmail.com>	2020-07-12 00:11:00 +0200
committer	pgen <p.gen.progs@gmail.com>	2020-07-18 18:31:03 +0200
commit	ecb544da53f6c7cba0191f4d7c5764dcd6b864a2 (patch)
tree	1479a4ba9bc5df3bd0c56b23ae696804aad0e07e
parent	01f9896fb17ea1cb7228dcd13b655f3812a72919 (diff)