summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2005-08-10 07:51:35 +0000
committerBram Moolenaar <Bram@vim.org>2005-08-10 07:51:35 +0000
commit329cc7e429db7a3fd77143ea3ea489911680944e (patch)
tree82b55f54b8fed76cd746a327fcc6447502ad5465
parent8af244281c3a1afe1ca39cbf6ffb72ad42488c56 (diff)
updated for version 7.0126
-rw-r--r--src/spell.c312
1 files changed, 281 insertions, 31 deletions
diff --git a/src/spell.c b/src/spell.c
index 77d6502098..9c3742f7a7 100644
--- a/src/spell.c
+++ b/src/spell.c
@@ -50,6 +50,16 @@
* See ":help develop-spell".
*/
+/* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word.
+ * Only use for small word lists!
+ * SPELL_COMPRESS_CNT is in how many words we compress the tree. */
+#if 0
+# define SPELL_PRINTTREE
+# define SPELL_COMPRESS_CNT 1
+#else
+# define SPELL_COMPRESS_CNT 1000000
+#endif
+
/*
* Use this to adjust the score after finding suggestions, based on the
* suggested word sounding like the bad word. This is much faster than doing
@@ -719,6 +729,7 @@ static linenr_T apply_prefixes __ARGS((slang_T *slang, char_u *word, int round,
static char *e_format = N_("E759: Format error in spell file");
static char *e_spell_trunc = N_("E758: Truncated spell file");
+static char *msg_compressing = N_("Compressing word tree...");
/*
* Main spell-checking function.
@@ -3106,7 +3117,7 @@ struct wordnode_S
{
union /* shared to save space */
{
- char_u hashkey[6]; /* room for the hash key */
+ char_u hashkey[6]; /* the hash key, only used while compressing */
int index; /* index in written nodes (valid after first
round) */
} wn_u1;
@@ -3118,11 +3129,16 @@ struct wordnode_S
wordnode_T *wn_child; /* child (next byte in word) */
wordnode_T *wn_sibling; /* next sibling (alternate byte in word,
always sorted) */
+ int wn_refs; /* nr of references to this node */
char_u wn_byte; /* Byte for this node. NUL for word end */
- short_u wn_flags; /* when wn_byte is NUL: WF_ flags */
- short wn_region; /* when wn_byte is NUL: region mask; for
+ char_u wn_prefixID; /* when "wn_byte" is NUL: supported/required
+ prefix ID or 0 */
+ short_u wn_flags; /* when "wn_byte" is NUL: WF_ flags */
+ short wn_region; /* when "wn_byte" is NUL: region mask; for
PREFIXTREE it's the prefcondnr */
- char_u wn_prefixID; /* supported/required prefix ID or 0 */
+#ifdef SPELL_PRINTTREE
+ int wn_nr; /* sequence nr for printing */
+#endif
};
#define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */
@@ -3180,7 +3196,9 @@ static char_u *getroom_save __ARGS((sblock_T **blp, char_u *s));
static void free_blocks __ARGS((sblock_T *bl));
static wordnode_T *wordtree_alloc __ARGS((sblock_T **blp));
static int store_word __ARGS((char_u *word, spellinfo_T *spin, int flags, int region, char_u *pfxlist));
-static int tree_add_word __ARGS((char_u *word, wordnode_T *tree, int flags, int region, int prefixID, sblock_T **blp));
+static int tree_add_word __ARGS((char_u *word, wordnode_T *tree, int flags, int region, int prefixID, spellinfo_T *spin));
+static wordnode_T *get_wordnode __ARGS((sblock_T **blp));
+static void free_wordnode __ARGS((wordnode_T *n));
static void wordtree_compress __ARGS((wordnode_T *root, spellinfo_T *spin));
static int node_compress __ARGS((wordnode_T *node, hashtab_T *ht, int *tot));
static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2));
@@ -3195,6 +3213,109 @@ static void init_spellfile __ARGS((void));
* Use a negative number with the lower 8 bits zero. */
#define PFX_FLAGS -256
+static int words_added = 0; /* number of words added to tree */
+
+#ifdef SPELL_PRINTTREE
+/*
+ * For debugging the tree code: print the current tree in a (more or less)
+ * readable format, so that we can see what happens when adding a word and/or
+ * compressing the tree.
+ * Based on code from Olaf Seibert.
+ */
+#define PRINTLINESIZE 1000
+#define PRINTWIDTH 6
+
+#define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
+ PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
+
+static char line1[PRINTLINESIZE];
+static char line2[PRINTLINESIZE];
+static char line3[PRINTLINESIZE];
+
+ static void
+spell_clear_flags(wordnode_T *node)
+{
+ wordnode_T *np;
+
+ for (np = node; np != NULL; np = np->wn_sibling)
+ {
+ np->wn_u1.index = FALSE;
+ spell_clear_flags(np->wn_child);
+ }
+}
+
+ static void
+spell_print_node(wordnode_T *node, int depth)
+{
+ if (node->wn_u1.index)
+ {
+ /* Done this node before, print the reference. */
+ PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
+ PRINTSOME(line2, depth, " ", 0, 0);
+ PRINTSOME(line3, depth, " ", 0, 0);
+ msg(line1);
+ msg(line2);
+ msg(line3);
+ }
+ else
+ {
+ node->wn_u1.index = TRUE;
+
+ if (node->wn_byte != NUL)
+ {
+ if (node->wn_child != NULL)
+ PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0);
+ else
+ /* Cannot happen? */
+ PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0);
+ }
+ else
+ PRINTSOME(line1, depth, " $ ", 0, 0);
+
+ PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs);
+
+ if (node->wn_sibling != NULL)
+ PRINTSOME(line3, depth, " | ", 0, 0);
+ else
+ PRINTSOME(line3, depth, " ", 0, 0);
+
+ if (node->wn_byte == NUL)
+ {
+ msg(line1);
+ msg(line2);
+ msg(line3);
+ }
+
+ /* do the children */
+ if (node->wn_byte != NUL && node->wn_child != NULL)
+ spell_print_node(node->wn_child, depth + 1);
+
+ /* do the siblings */
+ if (node->wn_sibling != NULL)
+ {
+ /* get rid of all parent details except | */
+ STRCPY(line1, line3);
+ STRCPY(line2, line3);
+ spell_print_node(node->wn_sibling, depth);
+ }
+ }
+}
+
+ static void
+spell_print_tree(wordnode_T *root)
+{
+ if (root != NULL)
+ {
+ /* Clear the "wn_u1.index" fields, used to remember what has been
+ * done. */
+ spell_clear_flags(root);
+
+ /* Recursively print the tree. */
+ spell_print_node(root, 0);
+ }
+}
+#endif /* SPELL_PRINTTREE */
+
/*
* Read the affix file "fname".
* Returns an afffile_T, NULL for complete failure.
@@ -3611,7 +3732,7 @@ spell_read_aff(fname, spin)
if (upper)
n |= WFP_UP;
tree_add_word(p, spin->si_prefroot, n,
- idx, cur_aff->ah_newID, &spin->si_blocks);
+ idx, cur_aff->ah_newID, spin);
}
}
}
@@ -4583,7 +4704,7 @@ store_word(word, spin, flags, region, pfxlist)
for (p = pfxlist; res == OK; ++p)
{
res = tree_add_word(foldword, spin->si_foldroot, ct | flags,
- region, p == NULL ? 0 : *p, &spin->si_blocks);
+ region, p == NULL ? 0 : *p, spin);
if (p == NULL || *p == NUL)
break;
}
@@ -4594,7 +4715,7 @@ store_word(word, spin, flags, region, pfxlist)
for (p = pfxlist; res == OK; ++p)
{
res = tree_add_word(word, spin->si_keeproot, flags,
- region, p == NULL ? 0 : *p, &spin->si_blocks);
+ region, p == NULL ? 0 : *p, spin);
if (p == NULL || *p == NUL)
break;
}
@@ -4610,27 +4731,63 @@ store_word(word, spin, flags, region, pfxlist)
* Returns FAIL when out of memory.
*/
static int
-tree_add_word(word, root, flags, region, prefixID, blp)
+tree_add_word(word, root, flags, region, prefixID, spin)
char_u *word;
wordnode_T *root;
int flags;
int region;
int prefixID;
- sblock_T **blp;
+ spellinfo_T *spin;
{
wordnode_T *node = root;
wordnode_T *np;
+ wordnode_T *copyp, **copyprev;
wordnode_T **prev = NULL;
int i;
+ sblock_T **blp = &spin->si_blocks;
/* Add each byte of the word to the tree, including the NUL at the end. */
for (i = 0; ; ++i)
{
+ /* When there is more than one reference to this node we need to make
+ * a copy, so that we can modify it. Copy the whole list of siblings
+ * (we don't optimize for a partly shared list of siblings). */
+ if (node != NULL && node->wn_refs > 1)
+ {
+ --node->wn_refs;
+ copyprev = prev;
+ for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling)
+ {
+ /* Allocate a new node and copy the info. */
+ np = get_wordnode(blp);
+ if (np == NULL)
+ return FAIL;
+ np->wn_child = copyp->wn_child;
+ if (np->wn_child != NULL)
+ ++np->wn_child->wn_refs; /* child gets extra ref */
+ np->wn_byte = copyp->wn_byte;
+ if (np->wn_byte == NUL)
+ {
+ np->wn_flags = copyp->wn_flags;
+ np->wn_region = copyp->wn_region;
+ np->wn_prefixID = copyp->wn_prefixID;
+ }
+
+ /* Link the new node in the list, there will be one ref. */
+ np->wn_refs = 1;
+ *copyprev = np;
+ copyprev = &np->wn_sibling;
+
+ /* Let "node" point to the head of the copied list. */
+ if (copyp == node)
+ node = np;
+ }
+ }
+
/* Look for the sibling that has the same character. They are sorted
* on byte value, thus stop searching when a sibling is found with a
* higher byte value. For zero bytes (end of word) the sorting is
- * done on flags and then on prefixID
- */
+ * done on flags and then on prefixID. */
while (node != NULL
&& (node->wn_byte < word[i]
|| (node->wn_byte == NUL
@@ -4651,10 +4808,22 @@ tree_add_word(word, root, flags, region, prefixID, blp)
|| node->wn_prefixID != prefixID)))
{
/* Allocate a new node. */
- np = (wordnode_T *)getroom(blp, sizeof(wordnode_T), TRUE);
+ np = get_wordnode(blp);
if (np == NULL)
return FAIL;
np->wn_byte = word[i];
+
+ /* If "node" is NULL this is a new child or the end of the sibling
+ * list: ref count is one. Otherwise use ref count of sibling and
+ * make ref count of sibling one (matters when inserting in front
+ * of the list of siblings). */
+ if (node == NULL)
+ np->wn_refs = 1;
+ else
+ {
+ np->wn_refs = node->wn_refs;
+ node->wn_refs = 1;
+ }
*prev = np;
np->wn_sibling = node;
node = np;
@@ -4670,10 +4839,73 @@ tree_add_word(word, root, flags, region, prefixID, blp)
prev = &node->wn_child;
node = *prev;
}
+#ifdef SPELL_PRINTTREE
+ smsg("Added \"%s\"", word);
+ spell_print_tree(root->wn_sibling);
+#endif
+
+ /*
+ * Every so many words compress the tree, so that we don't use too much
+ * memory.
+ */
+ if (++words_added >= SPELL_COMPRESS_CNT)
+ {
+ words_added = 0;
+
+ msg_start();
+ msg_puts((char_u *)_(msg_compressing));
+ msg_clr_eos();
+ msg_didout = FALSE;
+ msg_col = 0;
+ out_flush();
+ wordtree_compress(root->wn_sibling, spin);
+ }
return OK;
}
+/* We keep a list of nodes that have been freed during compression. They are
+ * re-used when adding a new node. The "wn_child" fields links them. */
+static wordnode_T *first_free_node = NULL;
+#ifdef SPELL_PRINTTREE
+static int wordnode_nr = 0;
+#endif
+
+/*
+ * Get a wordnode_T, either from the list of previously freed nodes or
+ * allocate a new one.
+ */
+ static wordnode_T *
+get_wordnode(blp)
+ sblock_T **blp;
+{
+ wordnode_T *n;
+
+ if (first_free_node == NULL)
+ n = (wordnode_T *)getroom(blp, sizeof(wordnode_T), TRUE);
+ else
+ {
+ n = first_free_node;
+ first_free_node = n->wn_child;
+ vim_memset(n, 0, sizeof(wordnode_T));
+ }
+#ifdef SPELL_PRINTTREE
+ n->wn_nr = ++wordnode_nr;
+#endif
+ return n;
+}
+
+/*
+ * Free a wordnode_T for re-use later.
+ */
+ static void
+free_wordnode(n)
+ wordnode_T *n;
+{
+ n->wn_child = first_free_node;
+ first_free_node = n;
+}
+
/*
* Compress a tree: find tails that are identical and can be shared.
*/
@@ -4685,20 +4917,31 @@ wordtree_compress(root, spin)
hashtab_T ht;
int n;
int tot = 0;
+ int perc;
if (root != NULL)
{
hash_init(&ht);
n = node_compress(root, &ht, &tot);
+
+#ifndef SPELL_PRINTTREE
if (spin->si_verbose || p_verbose > 2)
+#endif
{
if (!spin->si_verbose)
verbose_enter();
+ if (tot > 1000000)
+ perc = (tot - n) / (tot / 100);
+ else
+ perc = (tot - n) * 100 / tot;
smsg((char_u *)_("Compressed %d of %d nodes; %d%% remaining"),
- n, tot, (tot - n) * 100 / tot);
+ n, tot, perc);
if (p_verbose > 2)
verbose_leave();
}
+#ifdef SPELL_PRINTTREE
+ spell_print_tree(root);
+#endif
hash_clear(&ht);
}
}
@@ -4749,8 +4992,17 @@ node_compress(node, ht, tot)
if (node_equal(child, tp))
{
/* Found one! Now use that child in place of the
- * current one. This means the current child is
- * dropped from the tree. */
+ * current one. This means the current child and all
+ * its siblings is unlinked from the tree. */
+ ++tp->wn_refs;
+ --child->wn_refs;
+ if (child->wn_refs == 0)
+ for (; child != NULL; child = child->wn_sibling)
+ {
+ if (child->wn_child != NULL)
+ --child->wn_child->wn_refs;
+ free_wordnode(child);
+ }
np->wn_child = tp;
++compressed;
break;
@@ -5023,11 +5275,11 @@ write_vim_spell(fname, spin)
for (round = 1; round <= 3; ++round)
{
if (round == 1)
- tree = spin->si_foldroot;
+ tree = spin->si_foldroot->wn_sibling;
else if (round == 2)
- tree = spin->si_keeproot;
+ tree = spin->si_keeproot->wn_sibling;
else
- tree = spin->si_prefroot;
+ tree = spin->si_prefroot->wn_sibling;
/* Clear the index and wnode fields in the tree. */
clear_node(tree);
@@ -5271,6 +5523,11 @@ mkspell(fcount, fnames, ascii, overwrite, added_word)
ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
+ first_free_node = NULL;
+#ifdef SPELL_PRINTTREE
+ wordnode_nr = 0;
+#endif
+ words_added = 0;
/* default: fnames[0] is output file, following are input files */
innames = &fnames[1];
@@ -5366,7 +5623,7 @@ mkspell(fcount, fnames, ascii, overwrite, added_word)
|| spin.si_keeproot == NULL
|| spin.si_prefroot == NULL)
{
- error = TRUE;
+ free_blocks(spin.si_blocks);
return;
}
@@ -5422,27 +5679,20 @@ mkspell(fcount, fnames, ascii, overwrite, added_word)
if (!error)
{
/*
- * Remove the dummy NUL from the start of the tree root.
- */
- spin.si_foldroot = spin.si_foldroot->wn_sibling;
- spin.si_keeproot = spin.si_keeproot->wn_sibling;
- spin.si_prefroot = spin.si_prefroot->wn_sibling;
-
- /*
* Combine tails in the tree.
*/
if (!added_word || p_verbose > 2)
{
if (added_word)
verbose_enter();
- MSG(_("Compressing word tree..."));
+ MSG(_(msg_compressing));
out_flush();
if (added_word)
verbose_leave();
}
- wordtree_compress(spin.si_foldroot, &spin);
- wordtree_compress(spin.si_keeproot, &spin);
- wordtree_compress(spin.si_prefroot, &spin);
+ wordtree_compress(spin.si_foldroot->wn_sibling, &spin);
+ wordtree_compress(spin.si_keeproot->wn_sibling, &spin);
+ wordtree_compress(spin.si_prefroot->wn_sibling, &spin);
}
if (!error)