summaryrefslogtreecommitdiffstats
path: root/source/helper.c
diff options
context:
space:
mode:
authorDave Davenport <qball@gmpclient.org>2017-01-11 09:20:19 +0100
committerDave Davenport <qball@gmpclient.org>2017-01-11 09:20:19 +0100
commit4452b08288c25fa833e22c97b57045ec5888b547 (patch)
tree6757fdbb677b585315beb9d6866573eff3f1a6f0 /source/helper.c
parent56c787690fdc8254ed1a6a0c205accfe490d8354 (diff)
Move fzf matcher into helper
Diffstat (limited to 'source/helper.c')
-rw-r--r--source/helper.c175
1 files changed, 175 insertions, 0 deletions
diff --git a/source/helper.c b/source/helper.c
index cb30a22d..11073d66 100644
--- a/source/helper.c
+++ b/source/helper.c
@@ -29,6 +29,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <limits.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
@@ -720,3 +721,177 @@ char * rofi_force_utf8 ( gchar *start, ssize_t length )
return g_string_free ( string, FALSE );
}
+
+/****
+ * FZF like scorer
+ */
+
+/** Max length of input to score. */
+#define FUZZY_SCORER_MAX_LENGTH 256
+/** minimum score */
+#define MIN_SCORE ( INT_MIN / 2 )
+/** Leading gap score */
+#define LEADING_GAP_SCORE -4
+/** gap score */
+#define GAP_SCORE -5
+/** start of word score */
+#define WORD_START_SCORE 50
+/** non-word score */
+#define NON_WORD_SCORE 40
+/** CamelCase score */
+#define CAMEL_SCORE ( WORD_START_SCORE + GAP_SCORE - 1 )
+/** Consecutive score */
+#define CONSECUTIVE_SCORE ( WORD_START_SCORE + GAP_SCORE )
+/** non-start multiplier */
+#define PATTERN_NON_START_MULTIPLIER 1
+/** start multiplier */
+#define PATTERN_START_MULTIPLIER 2
+
+/**
+ * Character classification.
+ */
+enum CharClass
+{
+ /* Lower case */
+ LOWER,
+ /* Upper case */
+ UPPER,
+ /* Number */
+ DIGIT,
+ /* non word character */
+ NON_WORD
+};
+
+/**
+ * @param c The character to determine class of
+ *
+ * @returns the class of the character c.
+ */
+static enum CharClass rofi_scorer_get_character_class ( gunichar c )
+{
+ if ( g_unichar_islower ( c ) ) {
+ return LOWER;
+ }
+ if ( g_unichar_isupper ( c ) ) {
+ return UPPER;
+ }
+ if ( g_unichar_isdigit ( c ) ) {
+ return DIGIT;
+ }
+ return NON_WORD;
+}
+
+/**
+ * @param prev The previous character.
+ * @param curr The current character
+ *
+ * Scrore the transition.
+ *
+ * @returns score of the transition.
+ */
+static int rofi_scorer_get_score_for ( enum CharClass prev, enum CharClass curr )
+{
+ if ( prev == NON_WORD && curr != NON_WORD ) {
+ return WORD_START_SCORE;
+ }
+ if ( ( prev == LOWER && curr == UPPER ) ||
+ ( prev != DIGIT && curr == DIGIT ) ) {
+ return CAMEL_SCORE;
+ }
+ if ( curr == NON_WORD ) {
+ return NON_WORD_SCORE;
+ }
+ return 0;
+}
+
+/**
+ * @param pattern The user input to match against.
+ * @param plen Pattern length.
+ * @param str The input to match against pattern.
+ * @param slen Lenght of str.
+ *
+ * rofi_scorer_fuzzy_evaluate implements a global sequence alignment algorithm to find the maximum accumulated score by
+ * aligning `pattern` to `str`. It applies when `pattern` is a subsequence of `str`.
+ *
+ * Scoring criteria
+ * - Prefer matches at the start of a word, or the start of subwords in CamelCase/camelCase/camel123 words. See WORD_START_SCORE/CAMEL_SCORE.
+ * - Non-word characters matter. See NON_WORD_SCORE.
+ * - The first characters of words of `pattern` receive bonus because they usually have more significance than the rest.
+ * See PATTERN_START_MULTIPLIER/PATTERN_NON_START_MULTIPLIER.
+ * - Superfluous characters in `str` will reduce the score (gap penalty). See GAP_SCORE.
+ * - Prefer early occurrence of the first character. See LEADING_GAP_SCORE/GAP_SCORE.
+ *
+ * The recurrence of the dynamic programming:
+ * dp[i][j]: maximum accumulated score by aligning pattern[0..i] to str[0..j]
+ * dp[0][j] = leading_gap_penalty(0, j) + score[j]
+ * dp[i][j] = max(dp[i-1][j-1] + CONSECUTIVE_SCORE, max(dp[i-1][k] + gap_penalty(k+1, j) + score[j] : k < j))
+ *
+ * The first dimension can be suppressed since we do not need a matching scheme, which reduces the space complexity from
+ * O(N*M) to O(M)
+ *
+ * @returns the sorting weight.
+ */
+int rofi_scorer_fuzzy_evaluate ( const char *pattern, glong plen, const char *str, glong slen )
+{
+ if ( slen > FUZZY_SCORER_MAX_LENGTH ) {
+ return -MIN_SCORE;
+ }
+ if ( plen == 5 ) {
+ plen = plen;
+ }
+ glong pi, si;
+ // whether we are aligning the first character of pattern
+ gboolean pfirst = TRUE;
+ // whether the start of a word in pattern
+ gboolean pstart = TRUE;
+ // score for each position
+ int *score = g_malloc_n ( slen, sizeof ( int ) );
+ // dp[i]: maximum value by aligning pattern[0..pi] to str[0..si]
+ int *dp = g_malloc_n ( slen, sizeof ( int ) );
+ // uleft: value of the upper left cell; ulefts: maximum value of uleft and cells on the left. The arbitrary initial
+ // values suppress warnings.
+ int uleft = 0, ulefts = 0, left, lefts;
+ const gchar *pit = pattern, *sit;
+ enum CharClass prev = NON_WORD, cur;
+ for ( si = 0, sit = str; si < slen; si++, sit = g_utf8_next_char ( sit ) ) {
+ cur = rofi_scorer_get_character_class ( g_utf8_get_char ( sit ) );
+ score[si] = rofi_scorer_get_score_for ( prev, cur );
+ prev = cur;
+ dp[si] = MIN_SCORE;
+ }
+ for ( pi = 0; pi < plen; pi++, pit = g_utf8_next_char ( pit ) ) {
+ gunichar pc = g_utf8_get_char ( pit ), sc;
+ if ( g_unichar_isspace ( pc ) ) {
+ pstart = TRUE;
+ continue;
+ }
+ lefts = MIN_SCORE;
+ for ( si = 0, sit = str; si < slen; si++, sit = g_utf8_next_char ( sit ) ) {
+ left = dp[si];
+ lefts = MAX ( lefts + GAP_SCORE, left );
+ sc = g_utf8_get_char ( sit );
+ if ( config.case_sensitive
+ ? pc == sc
+ : g_unichar_tolower ( pc ) == g_unichar_tolower ( sc ) ) {
+ int t = score[si] * ( pstart ? PATTERN_START_MULTIPLIER : PATTERN_NON_START_MULTIPLIER );
+ dp[si] = pfirst
+ ? LEADING_GAP_SCORE * si + t
+ : MAX ( uleft + CONSECUTIVE_SCORE, ulefts + t );
+ }
+ else {
+ dp[si] = MIN_SCORE;
+ }
+ uleft = left;
+ ulefts = lefts;
+ }
+ pfirst = pstart = FALSE;
+ }
+ lefts = MIN_SCORE;
+ for ( si = 0; si < slen; si++ ) {
+ lefts = MAX ( lefts + GAP_SCORE, dp[si] );
+ }
+ g_free ( score );
+ g_free ( dp );
+ return -lefts;
+}
+