Move fzf matcher into helper

author: Dave Davenport <qball@gmpclient.org> 2017-01-11 09:20:19 +0100
committer: Dave Davenport <qball@gmpclient.org> 2017-01-11 09:20:19 +0100
commit: 4452b08288c25fa833e22c97b57045ec5888b547 (patch)
tree: 6757fdbb677b585315beb9d6866573eff3f1a6f0 /source/helper.c
parent: 56c787690fdc8254ed1a6a0c205accfe490d8354 (diff)
1 files changed, 175 insertions, 0 deletions
diff --git a/source/helper.c b/source/helper.c
index cb30a22d..11073d66 100644
--- a/source/helper.c
+++ b/source/helper.c
@@ -29,6 +29,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <limits.h>
 #include <unistd.h>
 #include <errno.h>
 #include <fcntl.h>
@@ -720,3 +721,177 @@ char * rofi_force_utf8 ( gchar *start, ssize_t length )
 
     return g_string_free ( string, FALSE );
 }
+
+/****
+ * FZF like scorer
+ */
+
+/** Max length of input to score. */
+#define FUZZY_SCORER_MAX_LENGTH         256
+/** minimum score */
+#define MIN_SCORE                       ( INT_MIN / 2 )
+/** Leading gap score */
+#define LEADING_GAP_SCORE               -4
+/** gap score */
+#define GAP_SCORE                       -5
+/** start of word score */
+#define WORD_START_SCORE                50
+/** non-word score */
+#define NON_WORD_SCORE                  40
+/** CamelCase score */
+#define CAMEL_SCORE                     ( WORD_START_SCORE + GAP_SCORE - 1 )
+/** Consecutive score */
+#define CONSECUTIVE_SCORE               ( WORD_START_SCORE + GAP_SCORE )
+/** non-start multiplier */
+#define PATTERN_NON_START_MULTIPLIER    1
+/** start multiplier */
+#define PATTERN_START_MULTIPLIER        2
+
+/**
+ * Character classification.
+ */
+enum CharClass
+{
+    /* Lower case */
+    LOWER,
+    /* Upper case */
+    UPPER,
+    /* Number */
+    DIGIT,
+    /* non word character */
+    NON_WORD
+};
+
+/**
+ * @param c The character to determine class of
+ *
+ * @returns the class of the character c.
+ */
+static enum CharClass rofi_scorer_get_character_class ( gunichar c )
+{
+    if ( g_unichar_islower ( c ) ) {
+        return LOWER;
+    }
+    if ( g_unichar_isupper ( c ) ) {
+        return UPPER;
+    }
+    if ( g_unichar_isdigit ( c ) ) {
+        return DIGIT;
+    }
+    return NON_WORD;
+}
+
+/**
+ * @param prev The previous character.
+ * @param curr The current character
+ *
+ * Scrore the transition.
+ *
+ * @returns score of the transition.
+ */
+static int rofi_scorer_get_score_for ( enum CharClass prev, enum CharClass curr )
+{
+    if ( prev == NON_WORD && curr != NON_WORD ) {
+        return WORD_START_SCORE;
+    }
+    if ( ( prev == LOWER && curr == UPPER ) ||
+         ( prev != DIGIT && curr == DIGIT ) ) {
+        return CAMEL_SCORE;
+    }
+    if ( curr == NON_WORD ) {
+        return NON_WORD_SCORE;
+    }
+    return 0;
+}
+
+/**
+ * @param pattern   The user input to match against.
+ * @param plen      Pattern length.
+ * @param str       The input to match against pattern.
+ * @param slen      Lenght of str.
+ *
+ *  rofi_scorer_fuzzy_evaluate implements a global sequence alignment algorithm to find the maximum accumulated score by
+ *  aligning `pattern` to `str`. It applies when `pattern` is a subsequence of `str`.
+ *
+ *  Scoring criteria
+ *  - Prefer matches at the start of a word, or the start of subwords in CamelCase/camelCase/camel123 words. See WORD_START_SCORE/CAMEL_SCORE.
+ *  - Non-word characters matter. See NON_WORD_SCORE.
+ *  - The first characters of words of `pattern` receive bonus because they usually have more significance than the rest.
+ *  See PATTERN_START_MULTIPLIER/PATTERN_NON_START_MULTIPLIER.
+ *  - Superfluous characters in `str` will reduce the score (gap penalty). See GAP_SCORE.
+ *  - Prefer early occurrence of the first character. See LEADING_GAP_SCORE/GAP_SCORE.
+ *
+ *  The recurrence of the dynamic programming:
+ *  dp[i][j]: maximum accumulated score by aligning pattern[0..i] to str[0..j]
+ *  dp[0][j] = leading_gap_penalty(0, j) + score[j]
+ *  dp[i][j] = max(dp[i-1][j-1] + CONSECUTIVE_SCORE, max(dp[i-1][k] + gap_penalty(k+1, j) + score[j] : k < j))
+ *
+ *  The first dimension can be suppressed since we do not need a matching scheme, which reduces the space complexity from
+ *  O(N*M) to O(M)
+ *
+ * @returns the sorting weight.
+ */
+int rofi_scorer_fuzzy_evaluate ( const char *pattern, glong plen, const char *str, glong slen )
+{
+    if ( slen > FUZZY_SCORER_MAX_LENGTH ) {
+        return -MIN_SCORE;
+    }
+    if ( plen == 5 ) {
+        plen = plen;
+    }
+    glong    pi, si;
+    // whether we are aligning the first character of pattern
+    gboolean pfirst = TRUE;
+    // whether the start of a word in pattern
+    gboolean pstart = TRUE;
+    // score for each position
+    int      *score = g_malloc_n ( slen, sizeof ( int ) );
+    // dp[i]: maximum value by aligning pattern[0..pi] to str[0..si]
+    int      *dp = g_malloc_n ( slen, sizeof ( int ) );
+    // uleft: value of the upper left cell; ulefts: maximum value of uleft and cells on the left. The arbitrary initial
+    // values suppress warnings.
+    int            uleft = 0, ulefts = 0, left, lefts;
+    const gchar    *pit  = pattern, *sit;
+    enum CharClass prev  = NON_WORD, cur;
+    for ( si = 0, sit = str; si < slen; si++, sit = g_utf8_next_char ( sit ) ) {
+        cur       = rofi_scorer_get_character_class ( g_utf8_get_char ( sit ) );
+        score[si] = rofi_scorer_get_score_for ( prev, cur );
+        prev      = cur;
+        dp[si]    = MIN_SCORE;
+    }
+    for ( pi = 0; pi < plen; pi++, pit = g_utf8_next_char ( pit ) ) {
+        gunichar pc = g_utf8_get_char ( pit ), sc;
+        if ( g_unichar_isspace ( pc ) ) {
+            pstart = TRUE;
+            continue;
+        }
+        lefts = MIN_SCORE;
+        for ( si = 0, sit = str; si < slen; si++, sit = g_utf8_next_char ( sit ) ) {
+            left  = dp[si];
+            lefts = MAX ( lefts + GAP_SCORE, left );
+            sc    = g_utf8_get_char ( sit );
+            if ( config.case_sensitive
+                 ? pc == sc
+                 : g_unichar_tolower ( pc ) == g_unichar_tolower ( sc ) ) {
+                int t = score[si] * ( pstart ? PATTERN_START_MULTIPLIER : PATTERN_NON_START_MULTIPLIER );
+                dp[si] = pfirst
+                         ? LEADING_GAP_SCORE * si + t
+                         : MAX ( uleft + CONSECUTIVE_SCORE, ulefts + t );
+            }
+            else {
+                dp[si] = MIN_SCORE;
+            }
+            uleft  = left;
+            ulefts = lefts;
+        }
+        pfirst = pstart = FALSE;
+    }
+    lefts = MIN_SCORE;
+    for ( si = 0; si < slen; si++ ) {
+        lefts = MAX ( lefts + GAP_SCORE, dp[si] );
+    }
+    g_free ( score );
+    g_free ( dp );
+    return -lefts;
+}
+
author	Dave Davenport <qball@gmpclient.org>	2017-01-11 09:20:19 +0100
committer	Dave Davenport <qball@gmpclient.org>	2017-01-11 09:20:19 +0100
commit	4452b08288c25fa833e22c97b57045ec5888b547 (patch)
tree	6757fdbb677b585315beb9d6866573eff3f1a6f0 /source/helper.c
parent	56c787690fdc8254ed1a6a0c205accfe490d8354 (diff)