summaryrefslogtreecommitdiffstats
path: root/source/helper.c
diff options
context:
space:
mode:
authorTom Hinton <tom.hinton@cse.org.uk>2015-10-01 13:45:23 +0100
committerTom Hinton <tom.hinton@cse.org.uk>2015-10-01 13:45:23 +0100
commit0a953a03b407dddcb6d73dc978ddb6e81a741981 (patch)
tree391fd6396c28b3eb858bb30ecbf59bc6598af544 /source/helper.c
parentaf6a4b83ebdb83f24b6913b372e207bcf245ea0c (diff)
Make fuzzy matching fast and unicode-happy
Diffstat (limited to 'source/helper.c')
-rw-r--r--source/helper.c72
1 files changed, 60 insertions, 12 deletions
diff --git a/source/helper.c b/source/helper.c
index 3173055f..40b0a09f 100644
--- a/source/helper.c
+++ b/source/helper.c
@@ -37,6 +37,7 @@
#include <sys/types.h>
#include <sys/file.h>
#include <sys/stat.h>
+#include <ctype.h>
#include "helper.h"
#include "rofi.h"
@@ -306,6 +307,35 @@ int find_arg_char ( const char * const key, char *val )
return FALSE;
}
+/*
+ * auxiliary to `fuzzy-token-match' below;
+ */
+static void advance_unicode_glyph( char** token_in, char** input_in ) {
+ // determine the end of the glyph from token
+
+ char *token = *token_in;
+ char *input = *input_in;
+
+ while (*token < 0) {
+ token++;
+ }
+
+ // now we know the glyph length, we can scan for that substring in input
+ // temporarily add a null-terminator in case:
+ char glyph_end = *token;
+ *token = 0;
+ char *match = strstr(input, *token_in);
+ *token = glyph_end;
+
+ if ( match ) {
+ *token_in = token;
+ *input_in = match;
+ } else {
+ // wind input along to the end so that we fail
+ while ( **input_in ) (*input_in)++;
+ }
+}
+
/**
* Shared 'token_match' function.
* Matches tokenized.
@@ -313,43 +343,61 @@ int find_arg_char ( const char * const key, char *val )
static int fuzzy_token_match ( char **tokens, const char *input, __attribute__( (unused) ) int not_ascii, int case_sensitive )
{
int match = 1;
- char *compk = token_collate_key ( input, case_sensitive );
+
// Do a tokenized match.
+
// TODO: this doesn't work for unicode input, because it may split a codepoint which is over two bytes.
- // TODO this does not use the non-ascii speed-up either.
+ // mind you, it didn't work before I fiddled with it.
+
+ // this could perhaps be a bit more efficient by iterating over all the tokens at once.
+
+ fprintf(stderr, "fz match %s %d\n", input, not_ascii);
+
if ( tokens ) {
+ char *compk = not_ascii ? token_collate_key ( input, case_sensitive ) : (char *) input;
for ( int j = 0; match && tokens[j]; j++ ) {
char *t = compk;
- int token_len = strlen ( tokens[j] );
- for ( int id = 0; match && t != NULL && id < token_len; id++ ) {
- match = ( ( t = strchr ( t, tokens[j][id] ) ) != NULL );
- // next should match the next character.
- if ( t != NULL ) {
- t++;
+ char *token = tokens[j];
+
+ while (*t && *token) {
+ if ( *token > 0 ) // i.e. we are at an ascii codepoint
+ {
+ if ( ( case_sensitive && (*t == *token)) ||
+ (!case_sensitive && (tolower(*t) == tolower(*token))) )
+ token++;
+ }
+ else
+ {
+ // we are not at an ascii codepoint, and so we need to do something
+ // complicated
+ advance_unicode_glyph( &token, &t );
}
+ t++;
}
+
+ match = !(*token);
}
+ if (not_ascii) g_free ( compk );
}
- g_free ( compk );
+
return match;
}
static int normal_token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive )
{
int match = 1;
- char *compk = not_ascii ? token_collate_key ( input, case_sensitive ) : (char *) input;
// Do a tokenized match.
if ( tokens ) {
+ char *compk = not_ascii ? token_collate_key ( input, case_sensitive ) : (char *) input;
char *(*comparison)(const char *, const char *);
comparison = (case_sensitive || not_ascii) ? strstr : strcasestr;
for ( int j = 0; match && tokens[j]; j++ ) {
match = (comparison( compk, tokens[j] ) != NULL );
}
+ if (not_ascii) g_free ( compk );
}
- if (not_ascii) g_free ( compk );
-
return match;
}
int token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive,