Make normal filtering of plain ASCII lines faster

This patch adds a field lines_not_ascii to the MenuState structure. The nth entry is 0 unless the nth member of MenuState.lines has a non-ascii codepoint in it. All comparison functions (menu_match_cb type) take an additional argument to tell them if the thing they are matching is not_ascii. They can use this to determine whether to collate and case-fold the input (for non-ascii strings), or whether to use strstr/strcasestr (for ascii strings). The change is not currently implemented for flex matching, due to my laziness. However, it should be a simple enough matter to add. For my large input of 400,000 lines, this reduces typical filtering time to about ten microseconds from about 2 seconds.
author: Tom Hinton <tom.hinton@cse.org.uk> 2015-10-01 12:16:41 +0100
committer: Tom Hinton <tom.hinton@cse.org.uk> 2015-10-01 12:16:41 +0100
commit: af6a4b83ebdb83f24b6913b372e207bcf245ea0c (patch)
tree: f55afd8e2ccadb7829900d9d14eebc78e48a3b53 /source/helper.c
parent: 574bf2da828b4e3ab0ce3ce7fccd58879db60430 (diff)
1 files changed, 26 insertions, 10 deletions
diff --git a/source/helper.c b/source/helper.c
index fd3ee577..3173055f 100644
--- a/source/helper.c
+++ b/source/helper.c
@@ -310,11 +310,13 @@ int find_arg_char ( const char * const key, char *val )
  * Shared 'token_match' function.
  * Matches tokenized.
  */
-static int fuzzy_token_match ( char **tokens, const char *input, int case_sensitive )
+static int fuzzy_token_match ( char **tokens, const char *input, __attribute__( (unused) ) int not_ascii,  int case_sensitive )
 {
     int  match  = 1;
     char *compk = token_collate_key ( input, case_sensitive );
     // Do a tokenized match.
+    // TODO: this doesn't work for unicode input, because it may split a codepoint which is over two bytes.
+    // TODO this does not use the non-ascii speed-up either.
     if ( tokens ) {
         for ( int j = 0; match && tokens[j]; j++ ) {
             char *t        = compk;
@@ -331,28 +333,33 @@ static int fuzzy_token_match ( char **tokens, const char *input, int case_sensit
     g_free ( compk );
     return match;
 }
-static int normal_token_match ( char **tokens, const char *input, int case_sensitive )
+static int normal_token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive )
 {
     int  match  = 1;
-    char *compk = token_collate_key ( input, case_sensitive );
+    char *compk = not_ascii ? token_collate_key ( input, case_sensitive ) : (char *) input;
 
     // Do a tokenized match.
+
     if ( tokens ) {
-        for ( int j = 0; match && tokens[j]; j++ ) {
-            match = ( strstr ( compk, tokens[j] ) != NULL );
-        }
+      char *(*comparison)(const char *, const char *);
+      comparison = (case_sensitive || not_ascii) ? strstr : strcasestr;
+      for ( int j = 0; match && tokens[j]; j++ ) {
+        match = (comparison( compk, tokens[j] ) != NULL );
+      }
     }
-    g_free ( compk );
+
+    if (not_ascii) g_free ( compk );
+
     return match;
 }
-int token_match ( char **tokens, const char *input, int case_sensitive,
+int token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive,
                   __attribute__( ( unused ) ) unsigned int index,
                   __attribute__( ( unused ) ) Switcher *data )
 {
     if ( config.fuzzy ) {
-        return fuzzy_token_match ( tokens, input, case_sensitive );
+        return fuzzy_token_match ( tokens, input, not_ascii, case_sensitive );
     }
-    return normal_token_match ( tokens, input, case_sensitive );
+    return normal_token_match ( tokens, input, not_ascii, case_sensitive );
 }
 
 int execute_generator ( const char * cmd )
@@ -478,3 +485,12 @@ void config_sanity_check (  )
         config.menu_bg_alt = config.menu_bg;
     }
 }
+
+int is_not_ascii ( const char * str )
+{
+   while (*str > 0) {
+     str++;
+   }
+   if (*str) return 1;
+   return 0;
+}
author	Tom Hinton <tom.hinton@cse.org.uk>	2015-10-01 12:16:41 +0100
committer	Tom Hinton <tom.hinton@cse.org.uk>	2015-10-01 12:16:41 +0100
commit	af6a4b83ebdb83f24b6913b372e207bcf245ea0c (patch)
tree	f55afd8e2ccadb7829900d9d14eebc78e48a3b53 /source/helper.c
parent	574bf2da828b4e3ab0ce3ce7fccd58879db60430 (diff)