diff options
author | pgen <p.gen.progs@gmail.com> | 2020-07-12 00:11:00 +0200 |
---|---|---|
committer | pgen <p.gen.progs@gmail.com> | 2020-07-18 18:31:03 +0200 |
commit | ecb544da53f6c7cba0191f4d7c5764dcd6b864a2 (patch) | |
tree | 1479a4ba9bc5df3bd0c56b23ae696804aad0e07e | |
parent | 01f9896fb17ea1cb7228dcd13b655f3812a72919 (diff) |
Allow to change the substitution character
A pointer to the misc structure has to be passed to a certain number of
functions for that, but this prepares the ground for future developments
of this type.
The code to interpret \u (UTF-8) sequences has also been made more robust
for better manage incorrect entries.
-rw-r--r-- | smenu.1 | 34 | ||||
-rw-r--r-- | smenu.c | 194 | ||||
-rw-r--r-- | smenu.h | 21 | ||||
-rw-r--r-- | usage.c | 3 | ||||
-rw-r--r-- | utf8.c | 154 | ||||
-rw-r--r-- | utf8.h | 6 |
6 files changed, 239 insertions, 173 deletions
@@ -108,25 +108,25 @@ characters \fIa\fP \fIb\fP \fIt\fP \fIn\fP \fIv\fP \fIf\fP \fIr\fP and UTF-8 sequences introduced by \fI\\u\fP are also understood. \fI\\u\fP can be followed by 2,4,6 or 8 hexadecimal characters. -An invalid UTF-8 sequence will be replaced by a dot (\fI.\fP), see -also below. - -Example: \fI\\uc3a9\fP means latin small letter e with acute. +Here is an example of using \fI\\u\fP to represent a lowercase latin +e with acute: \fI\\uc3a9\fP. .PP Note that with most shells, the \fI\\\fP before the \fIu\fP need to be protected or escaped. .PP -Note also that a dot, which is the consequence of an invalid UTF-8 -sequence, will not be distinguished from a dot normally present in the -input stream. -.PP Quotations (single and double) in the input stream can be used to ignore the word separators so that a group of words are taken as a single entity. .PP Non printable characters in words that are not delimiters are converted to their traditional form (\fI\\n\fP for end-of-line, \fI\\t\fP for tabulation...) by default. -A single dot (\fI.\fP) is also used as a placeholder otherwise. +.PP +An invalid UTF-8 sequence or other non-printable character will be +replaced by a dot (\fI.\fP) by default. +.PP +There are nevertheless a possibilities to change this substitution +character with another \fBASCII\fP printable one with the help of the +command line option \fB-.\fP|\fB-dot\fP|\fB-invalid\fP. .PP Words containing only spaces, entered directly or resulting from a substitution, are also rejected unless they are not selectable. @@ -134,16 +134,16 @@ This allows special effects like creating blank lines for example. These words are also kept in column mode, selectable or not. .PP \fBWarning\fP, \fBUTF-8\fP encoded codepoints are quietly converted -into dots (\fI.\fP) when the user locale is not \fBUTF-8\fP aware like -\fBPOSIX\fP or \fBC\fP by example. +into the substitution character when the user locale is not \fBUTF-8\fP +aware like \fBPOSIX\fP or \fBC\fP by example. .PP smenu has an option to define a set of characters or UTF-8 sequences -wich should be ignored when reading words from the input. +which should be ignored when reading words. This can be very useful when dealing with inputs where the EOL sequence consists in more than one character. .PP A typical example is DOS or Windows files with lines ending with -\fICRLF\fI. +\fICRLF\fP. In such a case one might decide to ignore all \fICR\fP characters from the input. .PP @@ -670,7 +670,7 @@ by \fB-I\fP|\fB-si\fP|\fB-subst_included\fP or Displays a message above the window. If the current locale is not \fIUTF-8\fP, then all \fIUTF-8\fP characters -in it will be converted into a dot. +will be replaced by the substitution character. \fI\\u\fP sequences can be used in the message. @@ -1017,6 +1017,12 @@ value is used. Replaces all non-printable characters by a blank. If this results in a blank word, it will be potentially deleted. +.IP "\fB-.\fP|\fB-dot\fP|\fB-invalid\fP" +(Allowed in all contexts) + +Sets the substitution character for non-printable characters. +When this parameter is not used, the default substitution character is +a single dot. .IP "\fB-M\fP|\fB-middle\fP|\fB-center\fP" (Allowed in all contexts) @@ -418,7 +418,8 @@ apply_attr(term_t * term, attr_t attr) /* ===================================================== */ int ini_cb(win_t * win, term_t * term, limits_t * limits, timers_t * timers, - misc_t * misc, const char * section, const char * name, char * value) + misc_t * misc, langinfo_t * langinfo, const char * section, + const char * name, char * value) { int error = 0; int has_colors = (term->colors > 7); @@ -605,6 +606,7 @@ ini_cb(win_t * win, term_t * term, limits_t * limits, timers_t * timers, /* [misc] section */ /* """""""""""""""" */ if (strcmp(name, "default_search_method") == 0) + { if (misc->default_search_method == NONE) { if (strcmp(value, "prefix") == 0) @@ -614,6 +616,7 @@ ini_cb(win_t * win, term_t * term, limits_t * limits, timers_t * timers, else if (strcmp(value, "substring") == 0) misc->default_search_method = SUBSTRING; } + } } out: @@ -633,10 +636,10 @@ out: /* ======================================================================== */ int ini_load(const char * filename, win_t * win, term_t * term, limits_t * limits, - timers_t * timers, misc_t * misc, + timers_t * timers, misc_t * misc, langinfo_t * langinfo, int (*report)(win_t * win, term_t * term, limits_t * limits, - timers_t * timers, misc_t * misc, const char * section, - const char * name, char * value)) + timers_t * timers, misc_t * misc, langinfo_t * langinfo, + const char * section, const char * name, char * value)) { char name[64] = ""; char value[256] = ""; @@ -685,7 +688,8 @@ ini_load(const char * filename, win_t * win, term_t * term, limits_t * limits, /* Callback function calling */ /* """"""""""""""""""""""""" */ - error = report(win, term, limits, timers, misc, section, name, value); + error = report(win, term, limits, timers, misc, langinfo, section, name, + value); if (error) goto out; @@ -1502,7 +1506,7 @@ void parse_selectors(char * str, filters_t * filter, char * unparsed, ll_t ** inc_interval_list, ll_t ** inc_regex_list, ll_t ** exc_interval_list, ll_t ** exc_regex_list, - langinfo_t * langinfo) + langinfo_t * langinfo, misc_t * misc) { char mark; /* Value to set */ char c; @@ -1514,7 +1518,7 @@ parse_selectors(char * str, filters_t * filter, char * unparsed, /* Replace the UTF-8 ascii representation in the selector by */ /* their binary values. */ /* """"""""""""""""""""""""""""""""""""""""""""""""""""""""" */ - utf8_interpret(str, langinfo); + utf8_interpret(str, langinfo, misc->invalid_char_substitute); /* Get the first character to see if this is */ /* an additive or restrictive operation. */ @@ -2063,7 +2067,7 @@ fail: /* Memory space for d must have been allocated before. */ /* ============================================================ */ void -strip_ansi_color(char * s, toggle_t * toggle) +strip_ansi_color(char * s, toggle_t * toggle, misc_t * misc) { char * p = s; long len = strlen(s); @@ -2075,18 +2079,16 @@ strip_ansi_color(char * s, toggle_t * toggle) if ((*s == 0x1b) && (*(s + 1) == '[')) { while ((*s != '\0') && (*s++ != 'm')) - { - /* Do nothing */ - } + ; } - /* Convert a single \x1b in '.' */ - /* """""""""""""""""""""""""""" */ + /* Convert a single \x1b in the invalid substitute character */ + /* """"""""""""""""""""""""""""""""""""""""""""""""""""""""" */ else if (*s == 0x1b) { if (toggle->blank_nonprintable && len > 1) *s++ = ' '; else - *s++ = '.'; + *s++ = misc->invalid_char_substitute; p++; } /* No ESC char, we can move on */ @@ -2301,7 +2303,7 @@ buffer_cmp(const void * a, const void * b) /* ===================================================================== */ int get_bytes(FILE * input, char * utf8_buffer, ll_t * zapped_glyphs_list, - langinfo_t * langinfo) + langinfo_t * langinfo, misc_t * misc) { int byte; int last; @@ -2341,7 +2343,7 @@ get_bytes(FILE * input, char * utf8_buffer, ll_t * zapped_glyphs_list, /* """""""""""""""""""""""""""""""""""""""""""""""""""""""" */ if (langinfo->utf8 && !utf8_validate(utf8_buffer, last)) { - byte = utf8_buffer[0] = '.'; + byte = utf8_buffer[0] = misc->invalid_char_substitute; utf8_buffer[1] = '\0'; } } while (ll_find(zapped_glyphs_list, utf8_buffer, buffer_cmp) != NULL); @@ -2355,7 +2357,8 @@ get_bytes(FILE * input, char * utf8_buffer, ll_t * zapped_glyphs_list, /* dest must be long enough to contain the expanded string */ /* ====================================================================== */ size_t -expand(char * src, char * dest, langinfo_t * langinfo, toggle_t * toggle) +expand(char * src, char * dest, langinfo_t * langinfo, toggle_t * toggle, + misc_t * misc) { char c; int n; @@ -2390,7 +2393,7 @@ expand(char * src, char * dest, langinfo_t * langinfo, toggle_t * toggle) /* ''''''''''''''' */ } while (--n && ('\0' != *(src++))); - *(ptr++) = '.'; + *(ptr++) = misc->invalid_char_substitute; len++; } } @@ -2459,7 +2462,7 @@ expand(char * src, char * dest, langinfo_t * langinfo, toggle_t * toggle) *(ptr++) = ' '; else { - *(ptr++) = '.'; + *(ptr++) = misc->invalid_char_substitute; all_spaces = 0; } } @@ -2489,7 +2492,7 @@ char * get_word(FILE * input, ll_t * word_delims_list, ll_t * record_delims_list, ll_t * zapped_glyphs_list, char * utf8_buffer, unsigned char * is_last, toggle_t * toggle, langinfo_t * langinfo, win_t * win, - limits_t * limits) + limits_t * limits, misc_t * misc) { char * temp = NULL; int byte; @@ -2501,7 +2504,7 @@ get_word(FILE * input, ll_t * word_delims_list, ll_t * record_delims_list, /* Skip leading delimiters */ /* """"""""""""""""""""""" */ - byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo); + byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo, misc); while (byte == EOF || ll_find(word_delims_list, utf8_buffer, buffer_cmp) != NULL) @@ -2509,7 +2512,7 @@ get_word(FILE * input, ll_t * word_delims_list, ll_t * record_delims_list, if (byte == EOF) return NULL; - byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo); + byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo, misc); } /* Allocate initial word storage space */ @@ -2637,7 +2640,7 @@ get_word(FILE * input, ll_t * word_delims_list, ll_t * record_delims_list, is_special = 0; next: - byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo); + byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo, misc); } /* Nul-terminate the word to make it a string */ @@ -2647,18 +2650,18 @@ get_word(FILE * input, ll_t * word_delims_list, ll_t * record_delims_list, /* Replace the UTF-8 ASCII representations in the word just */ /* read by their binary values. */ /* """""""""""""""""""""""""""""""""""""""""""""""""""""""" */ - utf8_interpret(temp, langinfo); + utf8_interpret(temp, langinfo, misc->invalid_char_substitute); /* Skip all field delimiters before a record delimiter */ /* """"""""""""""""""""""""""""""""""""""""""""""""""" */ if (ll_find(record_delims_list, utf8_buffer, buffer_cmp) == NULL) { - byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo); + byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo, misc); while (byte != EOF && ll_find(word_delims_list, utf8_buffer, buffer_cmp) != NULL && ll_find(record_delims_list, utf8_buffer, buffer_cmp) == NULL) - byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo); + byte = get_bytes(input, utf8_buffer, zapped_glyphs_list, langinfo, misc); if (langinfo->utf8 && utf8_get_length(utf8_buffer[0]) > 1) { @@ -2684,7 +2687,7 @@ get_word(FILE * input, ll_t * word_delims_list, ll_t * record_delims_list, /* Remove the ANSI color escape sequences from the word */ /* """""""""""""""""""""""""""""""""""""""""""""""""""" */ - strip_ansi_color(temp, toggle); + strip_ansi_color(temp, toggle, misc); return temp; } @@ -4791,9 +4794,10 @@ start_pattern_action(char * ctx_name, char * opt_name, char * param, { char ** pre_selection_index = opt_data[0]; langinfo_t * langinfo = opt_data[1]; + misc_t * misc = opt_data[2]; *pre_selection_index = xstrdup(values[0]); - utf8_interpret(*pre_selection_index, langinfo); + utf8_interpret(*pre_selection_index, langinfo, misc->invalid_char_substitute); } void @@ -4803,11 +4807,12 @@ title_action(char * ctx_name, char * opt_name, char * param, int nb_values, { char ** message = opt_data[0]; langinfo_t * langinfo = opt_data[1]; + misc_t * misc = opt_data[2]; *message = xstrdup(values[0]); if (!langinfo->utf8) - utf8_sanitize(*message); - utf8_interpret(*message, langinfo); + utf8_sanitize(*message, misc->invalid_char_substitute); + utf8_interpret(*message, langinfo, misc->invalid_char_substitute); } void @@ -4883,12 +4888,28 @@ toggle_action(char * ctx_name, char * opt_name, char * param, int nb_values, } void +invalid_char_action(char * ctx_name, char * opt_name, char * param, + int nb_values, char ** values, int nb_opt_data, + void ** opt_data, int nb_ctx_data, void ** ctx_data) +{ + misc_t * misc = opt_data[0]; + + char ic = *values[0]; + + if (isprint(ic)) + misc->invalid_char_substitute = ic; + else + misc->invalid_char_substitute = '.'; +} + +void gutter_action(char * ctx_name, char * opt_name, char * param, int nb_values, char ** values, int nb_opt_data, void ** opt_data, int nb_ctx_data, void ** ctx_data) { win_t * win = opt_data[0]; langinfo_t * langinfo = opt_data[1]; + misc_t * misc = opt_data[2]; if (nb_values == 0) { @@ -4917,8 +4938,9 @@ gutter_action(char * ctx_name, char * opt_name, char * param, int nb_values, gutter = xstrdup(values[0]); - utf8_interpret(gutter, langinfo); /* Guarantees a well formed * - * UTF-8 string */ + utf8_interpret(gutter, langinfo, + misc->invalid_char_substitute); /* Guarantees a well formed * + * UTF-8 string */ win->gutter_nb = utf8_strlen(gutter); win->gutter_a = xmalloc(win->gutter_nb * sizeof(char *)); @@ -5017,6 +5039,7 @@ post_subst_action(char * ctx_name, char * opt_name, char * param, int nb_values, { ll_t ** list = opt_data[0]; langinfo_t * langinfo = opt_data[1]; + misc_t * misc = opt_data[2]; sed_t * sed_node; int i; @@ -5028,7 +5051,7 @@ post_subst_action(char * ctx_name, char * opt_name, char * param, int nb_values, { sed_node = xmalloc(sizeof(sed_t)); sed_node->pattern = xstrdup(values[i]); - utf8_interpret(sed_node->pattern, langinfo); + utf8_interpret(sed_node->pattern, langinfo, misc->invalid_char_substitute); sed_node->stop = 0; ll_append(*list, sed_node); } @@ -5044,13 +5067,15 @@ special_level_action(char * ctx_name, char * opt_name, char * param, term_t * term = opt_data[2]; langinfo_t * langinfo = opt_data[3]; attr_t * init_attr = opt_data[4]; + misc_t * misc = opt_data[5]; attr_t attr = *init_attr; char opt = param[strlen(param) - 1]; /* last character of param */ int i; special_pattern[opt - '1'] = xstrdup(values[0]); - utf8_interpret(special_pattern[opt - '1'], langinfo); + utf8_interpret(special_pattern[opt - '1'], langinfo, + misc->invalid_char_substitute); /* Parse optional additional arguments */ /* """"""""""""""""""""""""""""""""""" */ @@ -5230,6 +5255,7 @@ timeout_action(char * ctx_name, char * opt_name, char * param, int nb_values, int nb_ctx_data, void ** ctx_data) { langinfo_t * langinfo = opt_data[0]; + misc_t * misc = opt_data[1]; if (strcmp(opt_name, "hidden_timeout") == 0) quiet_timeout = 1; @@ -5244,7 +5270,7 @@ timeout_action(char * ctx_name, char * opt_name, char * param, int nb_values, { timeout.mode = WORD; timeout_word = xstrdup(values[1]); - utf8_interpret(timeout_word, langinfo); + utf8_interpret(timeout_word, langinfo, misc->invalid_char_substitute); } else { @@ -5272,9 +5298,10 @@ force_first_column_action(char * ctx_name, char * opt_name, char * param, { char ** first_word_pattern = opt_data[0]; langinfo_t * langinfo = opt_data[1]; + misc_t * misc = opt_data[2]; *first_word_pattern = xstrdup(values[0]); - utf8_interpret(*first_word_pattern, langinfo); + utf8_interpret(*first_word_pattern, langinfo, misc->invalid_char_substitute); } void @@ -5284,9 +5311,10 @@ force_last_column_action(char * ctx_name, char * opt_name, char * param, { char ** last_word_pattern = opt_data[0]; langinfo_t * langinfo = opt_data[1]; + misc_t * misc = opt_data[2]; *last_word_pattern = xstrdup(values[0]); - utf8_interpret(*last_word_pattern, langinfo); + utf8_interpret(*last_word_pattern, langinfo, misc->invalid_char_substitute); } void @@ -5296,9 +5324,10 @@ zapped_glyphs_action(char * ctx_name, char * opt_name, char * param, { char ** glyph = opt_data[0]; langinfo_t * langinfo = opt_data[1]; + misc_t * misc = opt_data[2]; *glyph = xstrdup(values[0]); - utf8_interpret(*glyph, langinfo); + utf8_interpret(*glyph, langinfo, misc->invalid_char_substitute); } void @@ -5308,9 +5337,10 @@ separators_action(char * ctx_name, char * opt_name, char * param, int nb_values, { char ** sep = opt_data[0]; langinfo_t * langinfo = opt_data[1]; + misc_t * misc = opt_data[2]; *sep = xstrdup(values[0]); - utf8_interpret(*sep, langinfo); + utf8_interpret(*sep, langinfo, misc->invalid_char_substitute); } void @@ -5321,13 +5351,14 @@ tag_mode_action(char * ctx_name, char * opt_name, char * param, int nb_values, toggle_t * toggle = opt_data[0]; win_t * win = opt_data[1]; langinfo_t * langinfo = opt_data[2]; + misc_t * misc = opt_data[3]; toggle->taggable = 1; if (nb_values == 1) { win->sel_sep = xstrdup(values[0]); - utf8_interpret(win->sel_sep, langinfo); + utf8_interpret(win->sel_sep, langinfo, misc->invalid_char_substitute); } } @@ -5339,6 +5370,7 @@ pin_mode_action(char * ctx_name, char * opt_name, char * param, int nb_values, toggle_t * toggle = opt_data[0]; win_t * win = opt_data[1]; langinfo_t * langinfo = opt_data[2]; + misc_t * misc = opt_data[3]; toggle->taggable = 1; toggle->pinable = 1; @@ -5346,7 +5378,7 @@ pin_mode_action(char * ctx_name, char * opt_name, char * param, int nb_values, if (nb_values == 1) { win->sel_sep = xstrdup(values[0]); - utf8_interpret(win->sel_sep, langinfo); + utf8_interpret(win->sel_sep, langinfo, misc->invalid_char_substitute); } } @@ -5417,6 +5449,7 @@ da_options_action(char * ctx_name, char * opt_name, char * param, int nb_values, { langinfo_t * langinfo = opt_data[0]; long * daccess_index = opt_data[1]; + misc_t * misc = opt_data[2]; int pos; wchar_t * w; @@ -5435,7 +5468,7 @@ da_options_action(char * ctx_name, char * opt_name, char * param, int nb_values, free(daccess.left); daccess.left = xstrdup(value + 2); - utf8_interpret(daccess.left, langinfo); + utf8_interpret(daccess.left, langinfo, misc->invalid_char_substitute); if (utf8_strlen(daccess.left) != 1) { @@ -5460,7 +5493,7 @@ da_options_action(char * ctx_name, char * opt_name, char * param, int nb_values, free(daccess.right); daccess.right = xstrdup(value + 2); - utf8_interpret(daccess.right, langinfo); + utf8_interpret(daccess.right, langinfo, misc->invalid_char_substitute); if (utf8_strlen(daccess.right) != 1) { @@ -5601,7 +5634,8 @@ da_options_action(char * ctx_name, char * opt_name, char * param, int nb_values, free(daccess.num_sep); daccess.num_sep = xstrdup(value + 2); - utf8_interpret(daccess.num_sep, langinfo); + utf8_interpret(daccess.num_sep, langinfo, + misc->invalid_char_substitute); if (utf8_strlen(daccess.num_sep) != 1) { @@ -6193,6 +6227,10 @@ main(int argc, char * argv[]) /* """"""""""""""""""""""""""""""""""""""""""""""" */ get_terminal_size(&term.nlines, &term.ncolumns); + /* Default substitution character on invalid input. */ + /* """""""""""""""""""""""""""""""""""""""""""""""" */ + misc.invalid_char_substitute = '.'; + /* Command line options setting */ /* """""""""""""""""""""""""""" */ ctxopt_init(argv[0], "stop_if_non_option=No " @@ -6211,6 +6249,7 @@ main(int argc, char * argv[]) "[zapped_glyphs #bytes] " "[lines [#height]] " "[blank_nonprintable] " + "[*invalid_character #invalid_char_subst] " "[center_mode] " "[clean] " "[keep_spaces] " @@ -6333,6 +6372,7 @@ main(int argc, char * argv[]) ctxopt_add_opt_settings(parameters, "field_da_number", "-F -en -embedded_number"); ctxopt_add_opt_settings(parameters, "da_options", "-D -data -options"); + ctxopt_add_opt_settings(parameters, "invalid_character", "-. -dot -invalid"); ctxopt_add_opt_settings(parameters, "blank_nonprintable", "-b -blank"); ctxopt_add_opt_settings(parameters, "center_mode", "-M -middle -center"); ctxopt_add_opt_settings(parameters, "clean", @@ -6389,6 +6429,8 @@ main(int argc, char * argv[]) /* """""""""""""" */ ctxopt_add_opt_settings(actions, "auto_tag", toggle_action, &toggle, NULL); + ctxopt_add_opt_settings(actions, "invalid_character", invalid_char_action, + &misc, NULL); ctxopt_add_opt_settings(actions, "blank_nonprintable", toggle_action, &toggle, NULL); ctxopt_add_opt_settings(actions, "center_mode", center_mode_action, &win, @@ -6407,7 +6449,7 @@ main(int argc, char * argv[]) ctxopt_add_opt_settings(actions, "exclude_re", exclude_re_action, &pattern_def_include, &exclude_pattern, NULL); ctxopt_add_opt_settings(actions, "gutter", gutter_action, &win, &langinfo, - NULL); + &misc, NULL); ctxopt_add_opt_settings(actions, "help", help_action, NULL); ctxopt_add_opt_settings(actions, "long_help", long_help_action, NULL); ctxopt_add_opt_settings(actions, "usage", usage_action, NULL); @@ -6418,56 +6460,57 @@ main(int argc, char * argv[]) ctxopt_add_opt_settings(actions, "no_scoll_bar", toggle_action, &toggle, NULL); ctxopt_add_opt_settings(actions, "start_pattern", start_pattern_action, - &pre_selection_index, &langinfo, NULL); + &pre_selection_index, &langinfo, &misc, NULL); ctxopt_add_opt_settings(actions, "title", title_action, &message, &langinfo, - NULL); + &misc, NULL); ctxopt_add_opt_settings(actions, "validate_in_search_mode", toggle_action, &toggle, NULL); ctxopt_add_opt_settings(actions, "version", version_action, NULL); ctxopt_add_opt_settings(actions, "visual_bell", toggle_action, &toggle, NULL); ctxopt_add_opt_settings(actions, "wide_mode", wide_mode_action, &win, NULL); ctxopt_add_opt_settings(actions, "post_subst_all", post_subst_action, - &sed_list, &langinfo, NULL); + &sed_list, &langinfo, &misc, NULL); ctxopt_add_opt_settings(actions, "post_subst_included", post_subst_action, - &include_sed_list, &langinfo, NULL); + &include_sed_list, &langinfo, &misc, NULL); ctxopt_add_opt_settings(actions, "post_subst_excluded", post_subst_action, - &exclude_sed_list, &langinfo, NULL); + &exclude_sed_list, &langinfo, &misc, NULL); ctxopt_add_opt_settings(actions, "special_level_1", special_level_action, special_pattern, &win, &term, &langinfo, &init_attr, - NULL); + &misc, NULL); ctxopt_add_opt_settings(actions, "special_level_2", special_level_action, special_pattern, &win, &term, &langinfo, &init_attr, - NULL); + &misc, NULL); ctxopt_add_opt_settings(actions, "special_level_3", special_level_action, special_pattern, &win, &term, &langinfo, &init_attr, - NULL); + &misc, NULL); ctxopt_add_opt_settings(actions, "special_level_4", special_level_action, special_pattern, &win, &term, &langinfo, &init_attr, - NULL); + &misc, NULL); ctxopt_add_opt_settings(actions, "special_level_5", special_level_action, special_pattern, &win, &term, &langinfo, &init_attr, - NULL); + &misc, NULL); ctxopt_add_opt_settings(actions, "attributes", attributes_action, &win, &term, &init_attr, NULL); - ctxopt_add_opt_settings(actions, "timeout", timeout_action, &langinfo, NULL); + ctxopt_add_opt_settings(actions, "timeout", timeout_action, &langinfo, &misc, + NULL); ctxopt_add_opt_settings(actions, "hidden_timeout", timeout_action, &langinfo, NULL); ctxopt_add_opt_settings(actions, "force_first_column", force_first_column_action, &first_word_pattern, - &langinfo, NULL); + &langinfo, &misc, NULL); ctxopt_add_opt_settings(actions, "force_last_column", force_last_column_action, &last_word_pattern, - &langinfo, NULL); + &langinfo, &misc, NULL); ctxopt_add_opt_settings(actions, "word_separators", separators_action, &iws, - &langinfo, NULL); + &langinfo, &misc, NULL); ctxopt_add_opt_settings(actions, "line_separators", separators_action, &ils, - &langinfo, NULL); + &langinfo, &misc, NULL); ctxopt_add_opt_settings(actions, "zapped_glyphs", zapped_glyphs_action, &zg, - &langinfo, NULL); + &langinfo, &misc, NULL); ctxopt_add_opt_settings(actions, "tag_mode", tag_mode_action, &toggle, &win, - &langinfo, NULL); + &langinfo, &misc, NULL); ctxopt_add_opt_settings(actions, "pin_mode", pin_mode_action, &toggle, &win, - &langinfo, NULL); + &langinfo, &misc, NULL); ctxopt_add_opt_settings(actions, "search_method", search_method_action, &misc, NULL); ctxopt_add_opt_settings(actions, "auto_da_number", auto_da_action, @@ -6477,7 +6520,7 @@ main(int argc, char * argv[]) ctxopt_add_opt_settings(actions, "field_da_number", field_da_number_action, NULL); ctxopt_add_opt_settings(actions, "da_options", da_options_action, &langinfo, - &daccess_index, NULL); + &daccess_index, &misc, NULL); /* ctxopt constraints */ /* """""""""""""""""" */ @@ -6533,10 +6576,10 @@ main(int argc, char * argv[]) /* the inclusion and exclusion patterns. */ /* """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" */ if (include_pattern != NULL) - utf8_interpret(include_pattern, &langinfo); + utf8_interpret(include_pattern, &langinfo, misc.invalid_char_substitute); if (exclude_pattern != NULL) - utf8_interpret(exclude_pattern, &langinfo); + utf8_interpret(exclude_pattern, &langinfo, misc.invalid_char_substitute); /* If we did not impose the number of columns, use the whole */ /* terminal width */ @@ -6615,7 +6658,8 @@ main(int argc, char * argv[]) if (custom_ini_file != NULL) { - if (ini_load(custom_ini_file, &win, &term, &limits, &timers, &misc, ini_cb)) + if (ini_load(custom_ini_file, &win, &term, &limits, &timers, &misc, + &langinfo, ini_cb)) exit(EXIT_FAILURE); } else @@ -6627,10 +6671,12 @@ main(int argc, char * argv[]) /* Set the attributes from the configuration file if possible */ /* """""""""""""""""""""""""""""""""""""""""""""""""""""""""" */ - if (ini_load(home_ini_file, &win, &term, &limits, &timers, &misc, ini_cb)) + if (ini_load(home_ini_file, &win, &term, &limits, &timers, &misc, &langinfo, + ini_cb)) exit(EXIT_FAILURE); - if (ini_load(local_ini_file, &win, &term, &limits, &timers, &misc, ini_cb)) + if (ini_load(local_ini_file, &win, &term, &limits, &timers, &misc, + &langinfo, ini_cb)) exit(EXIT_FAILURE); free(home_ini_file); @@ -7284,7 +7330,8 @@ main(int argc, char * argv[]) parse_selectors(rows_selector, &filter_type, unparsed, &inc_row_interval_list, &inc_row_regex_list, - &exc_row_interval_list, &exc_row_regex_list, &langinfo); + &exc_row_interval_list, &exc_row_regex_list, &langinfo, + &misc); if (*unparsed != '\0') { @@ -7323,7 +7370,8 @@ main(int argc, char * argv[]) parse_selectors(cols_selector, &filter_type, unparsed, &inc_col_interval_list, &inc_col_regex_list, - &exc_col_interval_list, &exc_col_regex_list, &langinfo); + &exc_col_interval_list, &exc_col_regex_list, &langinfo, + &misc); if (*unparsed != '\0') { @@ -7435,7 +7483,7 @@ main(int argc, char * argv[]) /* """""""""""""""""""""""""""""""""""""""""""""""""""""""""""" */ while ((word = get_word(input_file, word_delims_list, record_delims_list, zapped_glyphs_list, utf8_buffer, &is_last, &toggle, - &langinfo, &win, &limits)) + &langinfo, &win, &limits, &misc)) != NULL) { int selectable; @@ -8261,7 +8309,7 @@ main(int argc, char * argv[]) word_len = strlen(word->str); expanded_word = xmalloc(5 * word_len + 1); - len = expand(word->str, expanded_word, &langinfo, &toggle); + len = expand(word->str, expanded_word, &langinfo, &toggle, &misc); /* Update it if needed */ /* ''''''''''''''''''' */ @@ -187,6 +187,7 @@ struct timers_s struct misc_s { search_mode_t default_search_method; + char invalid_char_substitute; }; /* Terminal setting variables */ @@ -440,7 +441,7 @@ void setup_term(int const fd); void -strip_ansi_color(char * s, toggle_t * toggle); +strip_ansi_color(char * s, toggle_t * toggle, misc_t * misc); int tst_cb(void * elem); @@ -450,14 +451,15 @@ tst_cb_cli(void * elem); int ini_load(const char * filename, win_t * win, term_t * term, limits_t * limits, - timers_t * timers, misc_t * misc, + timers_t * timers, misc_t * misc, langinfo_t * langinfo, int (*report)(win_t * win, term_t * term, limits_t * limits, - timers_t * timers, misc_t * misc, const char * section, - const char * name, char * value)); + timers_t * timers, misc_t * misc, langinfo_t * langinfo, + const char * section, const char * name, char * value)); int ini_cb(win_t * win, term_t * term, limits_t * limits, timers_t * timers, - misc_t * misc, const char * section, const char * name, char * value); + misc_t * misc, langinfo_t * langinfo, const char * section, + const char * name, char * value); char * make_ini_path(char * name, char * base); @@ -516,11 +518,12 @@ disp_word(long pos, search_mode_t search_mode, search_data_t * search_data, term_t * term, win_t * win, char * tmp_word); size_t -expand(char * src, char * dest, langinfo_t * langinfo, toggle_t * toggle); +expand(char * src, char * dest, langinfo_t * langinfo, toggle_t * toggle, + misc_t * misc); int get_bytes(FILE * input, char * utf8_buffer, ll_t * ignored_glyphs_list, - langinfo_t * langinfo); + langinfo_t * langinfo, misc_t * misc); int get_scancode(unsigned char * s, size_t max); @@ -529,7 +532,7 @@ char * get_word(FILE * input, ll_t * word_delims_list, ll_t * record_delims_list, ll_t * ignored_glyphs_list, char * utf8_buffer, unsigned char * is_last, toggle_t * toggle, langinfo_t * langinfo, - win_t * win, limits_t * limits); + win_t * win, limits_t * limits, misc_t * misc); void left_margin_putp(char * s, term_t * term, win_t * win); @@ -551,7 +554,7 @@ void parse_selectors(char * str, filters_t * filter, char * unparsed, ll_t ** inc_interval_list, ll_t ** inc_regex_list, ll_t ** exc_interval_list, ll_t ** exc_regex_list, - langinfo_t * langinfo); + langinfo_t * langinfo, misc_t * misc); int replace(char * orig, sed_t * sed); @@ -40,6 +40,9 @@ common_help(void) printf(" sets the number of lines in the selection window.\n"); printf("-b|-blank\n"); printf(" displays non printable characters as space.\n"); + printf("-.|-dot|-invalid\n"); + printf(" defines the substitution character for a non-printable " + "character.\n"); printf("-M|-middle|-center\n"); printf(" centers the display if possible.\n"); printf("-d|-restore|-delete|-clean|-delete_window|-clean_window\n"); @@ -23,21 +23,23 @@ /* All hexadecimal sequences of \uxx, \uxxxx, \uxxxxxx and \uxxxxxxxx will */ /* be replace by the corresponding UTF-8 character. */ /* ======================================================================= */ -void -utf8_interpret(char * s, langinfo_t * langinfo) +int +utf8_interpret(char * s, langinfo_t * langinfo, char substitute) { - char * utf8_str; /* \uxx... */ - size_t utf8_to_eos_len; /* bytes in s starting from the first * - * occurrence of \u */ - size_t init_len; /* initial lengths of the string to interpret */ - size_t utf8_ascii_len; /* 2,4,6 or 8 bytes */ - size_t len_to_remove = 0; /* number of bytes to remove after the conversion */ - char tmp[9]; /* temporary string */ - - /* Guard against the case where s is NULL */ - /* """""""""""""""""""""""""""""""""""""" */ + char * utf8_str; /* \uxx... */ + size_t utf8_to_eos_len; /* bytes in s starting from the first * + * occurrence of \u. */ + size_t init_len; /* initial lengths of the string to interpret */ + size_t utf8_ascii_len; /* 2,4,6 or 8 bytes. */ + size_t len_to_remove = 0; /* number of bytes to remove after the * + | conversion. */ + char tmp[9]; /* temporary string. */ + int rc = 1; /* return code, 0: error, 1: fine. */ + + /* Guard against the case where s is NULL. */ + /* """"""""""""""""""""""""""""""""""""""" */ if (s == NULL) - return; + return 0; init_len = strlen(s); @@ -45,12 +47,13 @@ utf8_interpret(char * s, langinfo_t * langinfo) { utf8_to_eos_len = strlen(utf8_str); if (utf8_to_eos_len < 4) /* string too short to contain * - * a valid UTF-8 char */ |