diff options
author | Dave Davenport <qball@gmpclient.org> | 2017-01-11 09:20:19 +0100 |
---|---|---|
committer | Dave Davenport <qball@gmpclient.org> | 2017-01-11 09:20:19 +0100 |
commit | 4452b08288c25fa833e22c97b57045ec5888b547 (patch) | |
tree | 6757fdbb677b585315beb9d6866573eff3f1a6f0 /source/helper.c | |
parent | 56c787690fdc8254ed1a6a0c205accfe490d8354 (diff) |
Move fzf matcher into helper
Diffstat (limited to 'source/helper.c')
-rw-r--r-- | source/helper.c | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/source/helper.c b/source/helper.c index cb30a22d..11073d66 100644 --- a/source/helper.c +++ b/source/helper.c @@ -29,6 +29,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <limits.h> #include <unistd.h> #include <errno.h> #include <fcntl.h> @@ -720,3 +721,177 @@ char * rofi_force_utf8 ( gchar *start, ssize_t length ) return g_string_free ( string, FALSE ); } + +/**** + * FZF like scorer + */ + +/** Max length of input to score. */ +#define FUZZY_SCORER_MAX_LENGTH 256 +/** minimum score */ +#define MIN_SCORE ( INT_MIN / 2 ) +/** Leading gap score */ +#define LEADING_GAP_SCORE -4 +/** gap score */ +#define GAP_SCORE -5 +/** start of word score */ +#define WORD_START_SCORE 50 +/** non-word score */ +#define NON_WORD_SCORE 40 +/** CamelCase score */ +#define CAMEL_SCORE ( WORD_START_SCORE + GAP_SCORE - 1 ) +/** Consecutive score */ +#define CONSECUTIVE_SCORE ( WORD_START_SCORE + GAP_SCORE ) +/** non-start multiplier */ +#define PATTERN_NON_START_MULTIPLIER 1 +/** start multiplier */ +#define PATTERN_START_MULTIPLIER 2 + +/** + * Character classification. + */ +enum CharClass +{ + /* Lower case */ + LOWER, + /* Upper case */ + UPPER, + /* Number */ + DIGIT, + /* non word character */ + NON_WORD +}; + +/** + * @param c The character to determine class of + * + * @returns the class of the character c. + */ +static enum CharClass rofi_scorer_get_character_class ( gunichar c ) +{ + if ( g_unichar_islower ( c ) ) { + return LOWER; + } + if ( g_unichar_isupper ( c ) ) { + return UPPER; + } + if ( g_unichar_isdigit ( c ) ) { + return DIGIT; + } + return NON_WORD; +} + +/** + * @param prev The previous character. + * @param curr The current character + * + * Scrore the transition. + * + * @returns score of the transition. + */ +static int rofi_scorer_get_score_for ( enum CharClass prev, enum CharClass curr ) +{ + if ( prev == NON_WORD && curr != NON_WORD ) { + return WORD_START_SCORE; + } + if ( ( prev == LOWER && curr == UPPER ) || + ( prev != DIGIT && curr == DIGIT ) ) { + return CAMEL_SCORE; + } + if ( curr == NON_WORD ) { + return NON_WORD_SCORE; + } + return 0; +} + +/** + * @param pattern The user input to match against. + * @param plen Pattern length. + * @param str The input to match against pattern. + * @param slen Lenght of str. + * + * rofi_scorer_fuzzy_evaluate implements a global sequence alignment algorithm to find the maximum accumulated score by + * aligning `pattern` to `str`. It applies when `pattern` is a subsequence of `str`. + * + * Scoring criteria + * - Prefer matches at the start of a word, or the start of subwords in CamelCase/camelCase/camel123 words. See WORD_START_SCORE/CAMEL_SCORE. + * - Non-word characters matter. See NON_WORD_SCORE. + * - The first characters of words of `pattern` receive bonus because they usually have more significance than the rest. + * See PATTERN_START_MULTIPLIER/PATTERN_NON_START_MULTIPLIER. + * - Superfluous characters in `str` will reduce the score (gap penalty). See GAP_SCORE. + * - Prefer early occurrence of the first character. See LEADING_GAP_SCORE/GAP_SCORE. + * + * The recurrence of the dynamic programming: + * dp[i][j]: maximum accumulated score by aligning pattern[0..i] to str[0..j] + * dp[0][j] = leading_gap_penalty(0, j) + score[j] + * dp[i][j] = max(dp[i-1][j-1] + CONSECUTIVE_SCORE, max(dp[i-1][k] + gap_penalty(k+1, j) + score[j] : k < j)) + * + * The first dimension can be suppressed since we do not need a matching scheme, which reduces the space complexity from + * O(N*M) to O(M) + * + * @returns the sorting weight. + */ +int rofi_scorer_fuzzy_evaluate ( const char *pattern, glong plen, const char *str, glong slen ) +{ + if ( slen > FUZZY_SCORER_MAX_LENGTH ) { + return -MIN_SCORE; + } + if ( plen == 5 ) { + plen = plen; + } + glong pi, si; + // whether we are aligning the first character of pattern + gboolean pfirst = TRUE; + // whether the start of a word in pattern + gboolean pstart = TRUE; + // score for each position + int *score = g_malloc_n ( slen, sizeof ( int ) ); + // dp[i]: maximum value by aligning pattern[0..pi] to str[0..si] + int *dp = g_malloc_n ( slen, sizeof ( int ) ); + // uleft: value of the upper left cell; ulefts: maximum value of uleft and cells on the left. The arbitrary initial + // values suppress warnings. + int uleft = 0, ulefts = 0, left, lefts; + const gchar *pit = pattern, *sit; + enum CharClass prev = NON_WORD, cur; + for ( si = 0, sit = str; si < slen; si++, sit = g_utf8_next_char ( sit ) ) { + cur = rofi_scorer_get_character_class ( g_utf8_get_char ( sit ) ); + score[si] = rofi_scorer_get_score_for ( prev, cur ); + prev = cur; + dp[si] = MIN_SCORE; + } + for ( pi = 0; pi < plen; pi++, pit = g_utf8_next_char ( pit ) ) { + gunichar pc = g_utf8_get_char ( pit ), sc; + if ( g_unichar_isspace ( pc ) ) { + pstart = TRUE; + continue; + } + lefts = MIN_SCORE; + for ( si = 0, sit = str; si < slen; si++, sit = g_utf8_next_char ( sit ) ) { + left = dp[si]; + lefts = MAX ( lefts + GAP_SCORE, left ); + sc = g_utf8_get_char ( sit ); + if ( config.case_sensitive + ? pc == sc + : g_unichar_tolower ( pc ) == g_unichar_tolower ( sc ) ) { + int t = score[si] * ( pstart ? PATTERN_START_MULTIPLIER : PATTERN_NON_START_MULTIPLIER ); + dp[si] = pfirst + ? LEADING_GAP_SCORE * si + t + : MAX ( uleft + CONSECUTIVE_SCORE, ulefts + t ); + } + else { + dp[si] = MIN_SCORE; + } + uleft = left; + ulefts = lefts; + } + pfirst = pstart = FALSE; + } + lefts = MIN_SCORE; + for ( si = 0; si < slen; si++ ) { + lefts = MAX ( lefts + GAP_SCORE, dp[si] ); + } + g_free ( score ); + g_free ( dp ); + return -lefts; +} + |