summaryrefslogtreecommitdiffstats
path: root/source/helper.c
diff options
context:
space:
mode:
authorTom Hinton <tom.hinton@cse.org.uk>2015-10-01 12:16:41 +0100
committerTom Hinton <tom.hinton@cse.org.uk>2015-10-01 12:16:41 +0100
commitaf6a4b83ebdb83f24b6913b372e207bcf245ea0c (patch)
treef55afd8e2ccadb7829900d9d14eebc78e48a3b53 /source/helper.c
parent574bf2da828b4e3ab0ce3ce7fccd58879db60430 (diff)
Make normal filtering of plain ASCII lines faster
This patch adds a field lines_not_ascii to the MenuState structure. The nth entry is 0 unless the nth member of MenuState.lines has a non-ascii codepoint in it. All comparison functions (menu_match_cb type) take an additional argument to tell them if the thing they are matching is not_ascii. They can use this to determine whether to collate and case-fold the input (for non-ascii strings), or whether to use strstr/strcasestr (for ascii strings). The change is not currently implemented for flex matching, due to my laziness. However, it should be a simple enough matter to add. For my large input of 400,000 lines, this reduces typical filtering time to about ten microseconds from about 2 seconds.
Diffstat (limited to 'source/helper.c')
-rw-r--r--source/helper.c36
1 files changed, 26 insertions, 10 deletions
diff --git a/source/helper.c b/source/helper.c
index fd3ee577..3173055f 100644
--- a/source/helper.c
+++ b/source/helper.c
@@ -310,11 +310,13 @@ int find_arg_char ( const char * const key, char *val )
* Shared 'token_match' function.
* Matches tokenized.
*/
-static int fuzzy_token_match ( char **tokens, const char *input, int case_sensitive )
+static int fuzzy_token_match ( char **tokens, const char *input, __attribute__( (unused) ) int not_ascii, int case_sensitive )
{
int match = 1;
char *compk = token_collate_key ( input, case_sensitive );
// Do a tokenized match.
+ // TODO: this doesn't work for unicode input, because it may split a codepoint which is over two bytes.
+ // TODO this does not use the non-ascii speed-up either.
if ( tokens ) {
for ( int j = 0; match && tokens[j]; j++ ) {
char *t = compk;
@@ -331,28 +333,33 @@ static int fuzzy_token_match ( char **tokens, const char *input, int case_sensit
g_free ( compk );
return match;
}
-static int normal_token_match ( char **tokens, const char *input, int case_sensitive )
+static int normal_token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive )
{
int match = 1;
- char *compk = token_collate_key ( input, case_sensitive );
+ char *compk = not_ascii ? token_collate_key ( input, case_sensitive ) : (char *) input;
// Do a tokenized match.
+
if ( tokens ) {
- for ( int j = 0; match && tokens[j]; j++ ) {
- match = ( strstr ( compk, tokens[j] ) != NULL );
- }
+ char *(*comparison)(const char *, const char *);
+ comparison = (case_sensitive || not_ascii) ? strstr : strcasestr;
+ for ( int j = 0; match && tokens[j]; j++ ) {
+ match = (comparison( compk, tokens[j] ) != NULL );
+ }
}
- g_free ( compk );
+
+ if (not_ascii) g_free ( compk );
+
return match;
}
-int token_match ( char **tokens, const char *input, int case_sensitive,
+int token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive,
__attribute__( ( unused ) ) unsigned int index,
__attribute__( ( unused ) ) Switcher *data )
{
if ( config.fuzzy ) {
- return fuzzy_token_match ( tokens, input, case_sensitive );
+ return fuzzy_token_match ( tokens, input, not_ascii, case_sensitive );
}
- return normal_token_match ( tokens, input, case_sensitive );
+ return normal_token_match ( tokens, input, not_ascii, case_sensitive );
}
int execute_generator ( const char * cmd )
@@ -478,3 +485,12 @@ void config_sanity_check ( )
config.menu_bg_alt = config.menu_bg;
}
}
+
+int is_not_ascii ( const char * str )
+{
+ while (*str > 0) {
+ str++;
+ }
+ if (*str) return 1;
+ return 0;
+}