diff options
author | Tom Hinton <tom.hinton@cse.org.uk> | 2015-10-01 12:16:41 +0100 |
---|---|---|
committer | Tom Hinton <tom.hinton@cse.org.uk> | 2015-10-01 12:16:41 +0100 |
commit | af6a4b83ebdb83f24b6913b372e207bcf245ea0c (patch) | |
tree | f55afd8e2ccadb7829900d9d14eebc78e48a3b53 /source/helper.c | |
parent | 574bf2da828b4e3ab0ce3ce7fccd58879db60430 (diff) |
Make normal filtering of plain ASCII lines faster
This patch adds a field lines_not_ascii to the MenuState structure. The
nth entry is 0 unless the nth member of MenuState.lines has a non-ascii
codepoint in it.
All comparison functions (menu_match_cb type) take an additional
argument to tell them if the thing they are matching is not_ascii. They
can use this to determine whether to collate and case-fold the
input (for non-ascii strings), or whether to use strstr/strcasestr (for
ascii strings).
The change is not currently implemented for flex matching, due to my
laziness. However, it should be a simple enough matter to add.
For my large input of 400,000 lines, this reduces typical filtering time
to about ten microseconds from about 2 seconds.
Diffstat (limited to 'source/helper.c')
-rw-r--r-- | source/helper.c | 36 |
1 files changed, 26 insertions, 10 deletions
diff --git a/source/helper.c b/source/helper.c index fd3ee577..3173055f 100644 --- a/source/helper.c +++ b/source/helper.c @@ -310,11 +310,13 @@ int find_arg_char ( const char * const key, char *val ) * Shared 'token_match' function. * Matches tokenized. */ -static int fuzzy_token_match ( char **tokens, const char *input, int case_sensitive ) +static int fuzzy_token_match ( char **tokens, const char *input, __attribute__( (unused) ) int not_ascii, int case_sensitive ) { int match = 1; char *compk = token_collate_key ( input, case_sensitive ); // Do a tokenized match. + // TODO: this doesn't work for unicode input, because it may split a codepoint which is over two bytes. + // TODO this does not use the non-ascii speed-up either. if ( tokens ) { for ( int j = 0; match && tokens[j]; j++ ) { char *t = compk; @@ -331,28 +333,33 @@ static int fuzzy_token_match ( char **tokens, const char *input, int case_sensit g_free ( compk ); return match; } -static int normal_token_match ( char **tokens, const char *input, int case_sensitive ) +static int normal_token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive ) { int match = 1; - char *compk = token_collate_key ( input, case_sensitive ); + char *compk = not_ascii ? token_collate_key ( input, case_sensitive ) : (char *) input; // Do a tokenized match. + if ( tokens ) { - for ( int j = 0; match && tokens[j]; j++ ) { - match = ( strstr ( compk, tokens[j] ) != NULL ); - } + char *(*comparison)(const char *, const char *); + comparison = (case_sensitive || not_ascii) ? strstr : strcasestr; + for ( int j = 0; match && tokens[j]; j++ ) { + match = (comparison( compk, tokens[j] ) != NULL ); + } } - g_free ( compk ); + + if (not_ascii) g_free ( compk ); + return match; } -int token_match ( char **tokens, const char *input, int case_sensitive, +int token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive, __attribute__( ( unused ) ) unsigned int index, __attribute__( ( unused ) ) Switcher *data ) { if ( config.fuzzy ) { - return fuzzy_token_match ( tokens, input, case_sensitive ); + return fuzzy_token_match ( tokens, input, not_ascii, case_sensitive ); } - return normal_token_match ( tokens, input, case_sensitive ); + return normal_token_match ( tokens, input, not_ascii, case_sensitive ); } int execute_generator ( const char * cmd ) @@ -478,3 +485,12 @@ void config_sanity_check ( ) config.menu_bg_alt = config.menu_bg; } } + +int is_not_ascii ( const char * str ) +{ + while (*str > 0) { + str++; + } + if (*str) return 1; + return 0; +} |