summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2005-06-04 21:55:20 +0000
committerBram Moolenaar <Bram@vim.org>2005-06-04 21:55:20 +0000
commit51485f06246966898f7c00e2e53b1ba4c6855cf7 (patch)
treec1cfe02ab088ea7a4423f15829e4083303d62a89
parent4debb442bd885d182d7f77d1dfcdf143fd7cbf88 (diff)
updated for version 7.0079
-rw-r--r--runtime/doc/develop.txt10
-rw-r--r--runtime/doc/map.txt3
-rw-r--r--runtime/spell/en.utf-8.splbin1104055 -> 562278 bytes
-rw-r--r--src/buffer.c21
-rw-r--r--src/getchar.c2
-rw-r--r--src/mark.c12
-rw-r--r--src/normal.c3
-rw-r--r--src/proto/charset.pro1
-rw-r--r--src/spell.c4927
9 files changed, 1308 insertions, 3671 deletions
diff --git a/runtime/doc/develop.txt b/runtime/doc/develop.txt
index bc8b0e8c09..cceda32b97 100644
--- a/runtime/doc/develop.txt
+++ b/runtime/doc/develop.txt
@@ -1,4 +1,4 @@
-*develop.txt* For Vim version 7.0aa. Last change: 2005 Mar 29
+*develop.txt* For Vim version 7.0aa. Last change: 2005 Jun 04
VIM REFERENCE MANUAL by Bram Moolenaar
@@ -381,10 +381,10 @@ checking engine in Vim, for various reasons:
fly (while redrawing), just like syntax highlighting. But the mechanisms
used by other code are much slower. Myspell uses a simplistic hashtable,
for example.
-- For a program like aspell a communication mechanism would have to be setup.
- That's complicated to do in a portable way (Unix-only would be relatively
- simple, but that's not good enough). And performance will become a problem
- (lots of process switching involved).
+- For using an external program like aspell a communication mechanism would
+ have to be setup. That's complicated to do in a portable way (Unix-only
+ would be relatively simple, but that's not good enough). And performance
+ will become a problem (lots of process switching involved).
- Missing support for words with non-word characters, such as "Etten-Leur" and
"et al.", would require marking the pieces of them OK, lowering the
reliability.
diff --git a/runtime/doc/map.txt b/runtime/doc/map.txt
index 1119618d79..18da9ccc06 100644
--- a/runtime/doc/map.txt
+++ b/runtime/doc/map.txt
@@ -1,4 +1,4 @@
-*map.txt* For Vim version 7.0aa. Last change: 2005 Mar 29
+*map.txt* For Vim version 7.0aa. Last change: 2005 Jun 03
VIM REFERENCE MANUAL by Bram Moolenaar
@@ -1032,6 +1032,7 @@ The valid escape sequences are
If the first two characters of an escape sequence are "q-" (for example,
<q-args>) then the value is quoted in such a way as to make it a valid value
for use in an expression. This uses the argument as one single value.
+When there is no argument <q-args> is an empty string.
To allow commands to pass their arguments on to a user-defined function, there
is a special form <f-args> ("function args"). This splits the command
diff --git a/runtime/spell/en.utf-8.spl b/runtime/spell/en.utf-8.spl
index 156c0979e9..300f6ce807 100644
--- a/runtime/spell/en.utf-8.spl
+++ b/runtime/spell/en.utf-8.spl
Binary files differ
diff --git a/src/buffer.c b/src/buffer.c
index 294bc9b166..c54df88175 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -854,11 +854,11 @@ do_bufdel(command, arg, addr_count, start_bnr, end_bnr, forceit)
if (deleted == 0)
{
if (command == DOBUF_UNLOAD)
- sprintf((char *)IObuff, _("E515: No buffers were unloaded"));
+ STRCPY(IObuff, _("E515: No buffers were unloaded"));
else if (command == DOBUF_DEL)
- sprintf((char *)IObuff, _("E516: No buffers were deleted"));
+ STRCPY(IObuff, _("E516: No buffers were deleted"));
else
- sprintf((char *)IObuff, _("E517: No buffers were wiped out"));
+ STRCPY(IObuff, _("E517: No buffers were wiped out"));
errormsg = IObuff;
}
else if (deleted >= p_report)
@@ -2450,7 +2450,7 @@ buflist_list(eap)
else
home_replace(buf, buf->b_fname, NameBuff, MAXPATHL, TRUE);
- sprintf((char *)IObuff, "%3d%c%c%c%c%c \"",
+ vim_snprintf((char *)IObuff, IOSIZE - 20, "%3d%c%c%c%c%c \"%s\"",
buf->b_fnum,
buf->b_p_bl ? ' ' : 'u',
buf == curbuf ? '%' :
@@ -2459,18 +2459,11 @@ buflist_list(eap)
(buf->b_nwindows == 0 ? 'h' : 'a'),
!buf->b_p_ma ? '-' : (buf->b_p_ro ? '=' : ' '),
(buf->b_flags & BF_READERR) ? 'x'
- : (bufIsChanged(buf) ? '+' : ' ')
- );
-
- len = (int)STRLEN(IObuff);
- STRNCPY(IObuff + len, NameBuff, IOSIZE - 20 - len);
- IObuff[IOSIZE - 20 - len] = NUL; /* make sure it's terminated */
-
- len = (int)STRLEN(IObuff);
- IObuff[len++] = '"';
+ : (bufIsChanged(buf) ? '+' : ' '),
+ NameBuff);
/* put "line 999" in column 40 or after the file name */
- IObuff[len] = NUL;
+ len = STRLEN(IObuff);
i = 40 - vim_strsize(IObuff);
do
{
diff --git a/src/getchar.c b/src/getchar.c
index 2ece91aa2e..6d26d6d557 100644
--- a/src/getchar.c
+++ b/src/getchar.c
@@ -4342,7 +4342,7 @@ put_escstr(fd, strstart, what)
if (p != NULL)
{
while (*p != NUL)
- if (putc(*p++, fd) < 0)
+ if (fputc(*p++, fd) < 0)
return FAIL;
--str;
continue;
diff --git a/src/mark.c b/src/mark.c
index 6a149bface..9d74b4a7c9 100644
--- a/src/mark.c
+++ b/src/mark.c
@@ -1445,6 +1445,7 @@ removable(name)
char_u *p;
char_u part[51];
int retval = FALSE;
+ int n;
name = home_replace_save(NULL, name);
if (name != NULL)
@@ -1452,11 +1453,14 @@ removable(name)
for (p = p_viminfo; *p; )
{
copy_option_part(&p, part, 51, ", ");
- if (part[0] == 'r'
- && MB_STRNICMP(part + 1, name, STRLEN(part + 1)) == 0)
+ if (part[0] == 'r')
{
- retval = TRUE;
- break;
+ n = STRLEN(part + 1);
+ if (MB_STRNICMP(part + 1, name, n) == 0)
+ {
+ retval = TRUE;
+ break;
+ }
}
}
vim_free(name);
diff --git a/src/normal.c b/src/normal.c
index 207576698e..c3c7627575 100644
--- a/src/normal.c
+++ b/src/normal.c
@@ -2823,6 +2823,7 @@ do_mouse(oap, c, dir, count, fixindent)
if ((mod_mask & MOD_MASK_MULTI_CLICK) == MOD_MASK_2CLICK)
{
pos_T *pos = NULL;
+ int gc;
if (is_click)
{
@@ -2830,7 +2831,7 @@ do_mouse(oap, c, dir, count, fixindent)
* not a word character, try finding a match and select a (),
* {}, [], #if/#endif, etc. block. */
end_visual = curwin->w_cursor;
- while (vim_iswhite(gchar_pos(&end_visual)))
+ while (gc = gchar_pos(&end_visual), vim_iswhite(gc))
inc(&end_visual);
if (oap != NULL)
oap->motion_type = MCHAR;
diff --git a/src/proto/charset.pro b/src/proto/charset.pro
index e839d6374c..9fae4063f4 100644
--- a/src/proto/charset.pro
+++ b/src/proto/charset.pro
@@ -40,6 +40,7 @@ void getvvcol __ARGS((win_T *wp, pos_T *pos, colnr_T *start, colnr_T *cursor, co
void getvcols __ARGS((win_T *wp, pos_T *pos1, pos_T *pos2, colnr_T *left, colnr_T *right));
char_u *skipwhite __ARGS((char_u *p));
char_u *skipdigits __ARGS((char_u *p));
+char_u *skiphex __ARGS((char_u *p));
char_u *skiptodigit __ARGS((char_u *p));
char_u *skiptohex __ARGS((char_u *p));
int vim_isdigit __ARGS((int c));
diff --git a/src/spell.c b/src/spell.c
index 9d010f912e..2d80389d96 100644
--- a/src/spell.c
+++ b/src/spell.c
@@ -10,25 +10,91 @@
/*
* spell.c: code for spell checking
*
- * The basic spell checking mechanism is:
- * 1. Isolate a word, up to the next non-word character.
- * 2. Find the word in the hashtable of basic words.
- * 3. If not found, look in the hashtable with "prewords". These are prefixes
- * with a non-word character following a word character, e.g., "de-".
- * 4. If still not found, for each matching a prefix try if the word matches
- * without the prefix (and with the "chop" string added back).
- * 5. If still still not found, for each matching suffix try if the word
- * matches without the suffix (and with the "chop" string added back).
+ * The spell checking mechanism uses a tree (aka trie). Each node in the tree
+ * has a list of bytes that can appear (siblings). For each byte there is a
+ * pointer to the node with the byte that follows in the word (child).
+ * A NUL byte is used where the word may end.
+ *
+ * There are two trees: one with case-folded words and one with words in
+ * original case. The second one is only used for keep-case words and is
+ * usually small.
+ *
+ * Thanks to Olaf Seibert for providing an example implementation of this tree
+ * and the compression mechanism.
*
* Matching involves checking the caps type: Onecap ALLCAP KeepCap.
- * After finding a matching word check for a leadstring (non-word characters
- * before the word) and addstring (more text following, starting with a
- * non-word character).
*
* Why doesn't Vim use aspell/ispell/myspell/etc.?
* See ":help develop-spell".
*/
+/*
+ * Vim spell file format: <HEADER> <SUGGEST> <LWORDTREE> <KWORDTREE>
+ *
+ * <HEADER>: <fileID> <regioncnt> <regionname> ...
+ * <charflagslen> <charflags> <fcharslen> <fchars>
+ *
+ * <fileID> 10 bytes "VIMspell05"
+ * <regioncnt> 1 byte number of regions following (8 supported)
+ * <regionname> 2 bytes Region name: ca, au, etc.
+ * First <regionname> is region 1.
+ *
+ * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
+ * <charflags> N bytes List of flags (first one is for character 128):
+ * 0x01 word character
+ * 0x01 upper-case character
+ * <fcharslen> 2 bytes Number of bytes in <fchars>.
+ * <fchars> N bytes Folded characters, first one is for character 128.
+ *
+ *
+ * <SUGGEST> : <suggestlen> <more> ...
+ *
+ * <suggestlen> 4 bytes Length of <SUGGEST> in bytes, excluding
+ * <suggestlen>. MSB first.
+ * <more> To be defined.
+ *
+ *
+ * <LWORDTREE>: <wordtree>
+ *
+ * <wordtree>: <nodecount> <nodedata> ...
+ *
+ * <nodecount> 4 bytes Number of nodes following. MSB first.
+ *
+ * <nodedata>: <siblingcount> <sibling> ...
+ *
+ * <siblingcount> 1 byte Number of siblings in this node. The siblings
+ * follow in sorted order.
+ *
+ * <sibling>: <byte> [<nodeidx> <xbyte> | <flags> [<region>]]
+ *
+ * <byte> 1 byte Byte value of the sibling. Special cases:
+ * BY_NOFLAGS: End of word without flags and for all
+ * regions.
+ * BY_FLAGS: End of word, <flags> follow.
+ * BY_INDEX: Child of sibling is shared, <nodeidx>
+ * and <xbyte> follow.
+ *
+ * <nodeidx> 3 bytes Index of child for this sibling, MSB first.
+ *
+ * <xbyte> 1 byte byte value of the sibling.
+ *
+ * <flags> 1 byte bitmask of:
+ * WF_ALLCAP word must have only capitals
+ * WF_ONECAP first char of word must be capital
+ * WF_RARE rare word
+ * WF_REGION <region> follows
+ *
+ * <region> 1 byte Bitmask for regions in which word is valid. When
+ * omitted it's valid in all regions.
+ * Lowest bit is for region 1.
+ *
+ * <KWORDTREE>: <wordtree>
+ *
+ *
+ * All text characters are in 'encoding', but stored as single bytes.
+ * The region name is ASCII.
+ */
+
#if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
# include <io.h> /* for lseek(), must be before vim.h */
#endif
@@ -41,21 +107,20 @@
# include <fcntl.h>
#endif
-#define MAXWLEN 100 /* assume max. word len is this many bytes */
+#define MAXWLEN 250 /* assume max. word len is this many bytes */
-/*
- * Structure that is used to store the structures and strings from the
- * language file. This avoids the need to allocate space for each individual
- * word. It's allocated in big chunks for speed. It's freed all at once when
- * 'encoding' changes.
- */
-#define SBLOCKSIZE 4096 /* default size of sb_data */
-typedef struct sblock_S sblock_T;
-struct sblock_S
-{
- sblock_T *sb_next; /* next block in list */
- char_u sb_data[1]; /* data, actually longer */
-};
+/* Flags used for a word. */
+#define WF_REGION 0x01 /* region byte follows */
+#define WF_ONECAP 0x02 /* word with one capital (or all capitals) */
+#define WF_ALLCAP 0x04 /* word must be all capitals */
+#define WF_RARE 0x08 /* rare word */
+
+#define WF_KEEPCAP 0x100 /* keep-case word */
+
+#define BY_NOFLAGS 0 /* end of word without flags or region */
+#define BY_FLAGS 1 /* end of word, flag byte follows */
+#define BY_INDEX 2 /* child is shared, index follows */
+#define BY_SPECIAL BY_INDEX /* hightest special byte value */
/* Info from "REP" entries in ".aff" file used in af_rep.
* TODO: This is not used yet. Either use it or remove it. */
@@ -66,58 +131,33 @@ typedef struct repentry_S
} repentry_T;
/*
- * Structure to store affix info.
- */
-typedef struct affitem_S affitem_T;
-struct affitem_S
-{
- affitem_T *ai_next; /* next affix with same ai_add[] or NULL */
- short_u ai_nr; /* affix number */
- char_u ai_flags; /* AFF_ flags */
- char_u ai_choplen; /* length of chop string in bytes */
- char_u ai_addlen; /* length of ai_add in bytes */
- char_u ai_leadlen; /* for AFF_PREWORD: length of lead string */
- char_u ai_taillen; /* for AFF_PREWORD: length of tail string */
- char_u ai_add[1]; /* Text added to basic word. This stores:
- * 0: word for AFF_PREWORD or whole addition
- * ai_addlen + 1: chop string
- * + ai_choplen + 1: lead string for AFF_PREWORD
- * + ai_leadlen + 1: trail string f. AFF_PREWORD
- */
-};
-
-/* Get affitem_T pointer from hashitem that uses ai_add */
-static affitem_T dumai;
-#define HI2AI(hi) ((affitem_T *)((hi)->hi_key - (dumai.ai_add - (char_u *)&dumai)))
-
-/* ai_flags: Affix item flags */
-#define AFF_COMBINE 0x01 /* prefix combines with suffix */
-#define AFF_PREWORD 0x02 /* prefix includes word */
-
-/*
* Structure used to store words and other info for one language, loaded from
* a .spl file.
- * The main access is through hashtable "sl_word", using the case-folded
- * word as the key. This finds a linked list of fword_T.
+ * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the
+ * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words.
+ *
+ * The "byts" array stores the possible bytes in each tree node, preceded by
+ * the number of possible bytes, sorted on byte value:
+ * <len> <byte1> <byte2> ...
+ * The "idxs" array stores the index of the child node corresponding to the
+ * byte in "byts".
+ * Exception: when the byte is zero, the word may end here and "idxs" holds
+ * the flags and region for the word. There may be several zeros in sequence
+ * for alternative flag/region combinations.
*/
typedef struct slang_S slang_T;
struct slang_S
{
slang_T *sl_next; /* next language */
char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */
- hashtab_T sl_words; /* main word table, fword_T */
- int sl_prefcnt; /* number of prefix NRs */
- garray_T sl_preftab; /* list of hashtables to lookup prefixes */
- affitem_T *sl_prefzero; /* list of prefixes with zero add length */
- hashtab_T sl_prewords; /* prefixes that include a word */
- int sl_suffcnt; /* number of suffix NRs */
- garray_T sl_sufftab; /* list of hashtables to lookup suffixes */
- affitem_T *sl_suffzero; /* list of suffixes with zero add length */
+ char_u *sl_fbyts; /* case-folded word bytes */
+ int *sl_fidxs; /* case-folded word indexes */
+ char_u *sl_kbyts; /* keep-case word bytes */
+ int *sl_kidxs; /* keep-case word indexes */
char_u *sl_try; /* "TRY" from .aff file TODO: not used */
garray_T sl_rep; /* list of repentry_T entries from REP lines
* TODO not used */
char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
- sblock_T *sl_block; /* list with allocated memory blocks */
int sl_error; /* error while loading */
};
@@ -125,57 +165,6 @@ struct slang_S
* languages. */
static slang_T *first_lang = NULL;
-/*
- * Structure to store an addition to a basic word.
- * There are many of these, keep it small!
- */
-typedef struct addword_S addword_T;
-struct addword_S
-{
- addword_T *aw_next; /* next addition */
- char_u aw_flags; /* ADD_ flags */
- char_u aw_region; /* region for word with this addition */
- char_u aw_leadlen; /* byte length of lead in aw_word */
- char_u aw_wordlen; /* byte length of first word in aw_word */
- char_u aw_saveb; /* saved byte where aw_word[] is truncated at
- end of hashtable key; NUL when not using
- hashtable */
- char_u aw_word[1]; /* text, actually longer: case-folded addition
- plus, with ADD_KEEPCAP: keep-case addition */
-};
-
-/* Get addword_T pointer from hashitem that uses aw_word */
-static addword_T dumaw;
-#define HI2ADDWORD(hi) ((addword_T *)((hi)->hi_key - (dumaw.aw_word - (char_u *)&dumaw)))
-
-/*
- * Structure to store a basic word.
- * There are many of these, keep it small!
- * The list of prefix and suffix NRs is stored after "fw_word" to avoid the
- * need for two extra pointers.
- */
-typedef struct fword_S fword_T;
-struct fword_S
-{
- fword_T *fw_next; /* same basic word with different caps and/or
- * affixes */
- addword_T *fw_adds; /* first addword_T entry */
- short_u fw_flags; /* BWF_ flags */
- char_u fw_region; /* region bits */
- char_u fw_prefixcnt; /* number of prefix NRs */
- char_u fw_suffixcnt; /* number of suffix NRs */
- char_u fw_word[1]; /* actually longer:
- * 0: case folded word or keep-case word when
- * (flags & BWF_KEEPCAP)
- * + word length + 1: list of prefix NRs
- * + fw_prefixcnt [* 2]: list of suffix NRs
- */
-};
-
-/* Get fword_T pointer from hashitem that uses fw_word */
-static fword_T dumfw;
-#define HI2FWORD(hi) ((fword_T *)((hi)->hi_key - (dumfw.fw_word - (char_u *)&dumfw)))
-
#define REGION_ALL 0xff
@@ -195,39 +184,7 @@ typedef struct langp_S
#define SP_RARE 2
#define SP_LOCAL 3
-/* flags used for basic words in the spell file */
-#define BWF_VALID 0x01 /* word is valid without additions */
-#define BWF_REGION 0x02 /* region byte follows */
-#define BWF_ONECAP 0x04 /* first letter must be capital */
-#define BWF_SUFFIX 0x08 /* has suffix NR list */
-#define BWF_SECOND 0x10 /* second flags byte follows */
-
-#define BWF_ADDS 0x0100 /* there are additions */
-#define BWF_PREFIX 0x0200 /* has prefix NR list */
-#define BWF_ALLCAP 0x0400 /* all letters must be capital (not used
- for single-letter words) */
-#define BWF_KEEPCAP 0x0800 /* Keep case as-is */
-#define BWF_ADDS_M 0x1000 /* there are more than 255 additions */
-
-#define BWF_ADDHASH 0x8000 /* Internal: use hashtab for additions */
-
-#define NOWC_KEY (char_u *)"x" /* hashtab key used for additions without
- any word character */
-
-/* flags used for addition in the spell file */
-#define ADD_REGION 0x02 /* region byte follows */
-#define ADD_ONECAP 0x04 /* first letter must be capital */
-#define ADD_LEADLEN 0x10 /* there is a leadlen byte */
-#define ADD_COPYLEN 0x20 /* there is a copylen byte */
-#define ADD_ALLCAP 0x40 /* all letters must be capital (not used
- for single-letter words) */
-#define ADD_KEEPCAP 0x80 /* fixed case */
-
-/* Translate ADD_ flags to BWF_ flags.
- * (Needed to keep ADD_ flags in one byte.) */
-#define ADD2BWF(x) (((x) & 0x0f) | (((x) & 0xf0) << 4))
-
-#define VIMSPELLMAGIC "VIMspell04" /* string at start of Vim spell file */
+#define VIMSPELLMAGIC "VIMspell05" /* string at start of Vim spell file */
#define VIMSPELLMAGICL 10
/*
@@ -239,49 +196,33 @@ typedef struct matchinf_S
slang_T *mi_slang; /* info for the language */
/* pointers to original text to be checked */
- char_u *mi_line; /* start of line containing word */
char_u *mi_word; /* start of word being checked */
- char_u *mi_end; /* first non-word char after mi_word */
- char_u *mi_wend; /* end of matching word (is mi_end
- * or further) */
+ char_u *mi_end; /* end of matching word */
char_u *mi_fend; /* next char to be added to mi_fword */
+ char_u *mi_cend; /* char after what was used for
+ mi_capflags */
/* case-folded text */
char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */
- int mi_fendlen; /* byte length of first word in
- mi_fword */
- int mi_faddlen; /* byte length of text in mi_fword
- after first word */
- char_u *mi_cword; /* word to check, points in mi_fword */
- char_u *mi_awend; /* after next word, to check for
- addition (NULL when not done yet) */
- int mi_did_awend; /* did compute mi_awend */
+ int mi_fwordlen; /* nr of valid bytes in mi_fword */
/* others */
int mi_result; /* result so far: SP_BAD, SP_OK, etc. */
- int mi_capflags; /* BWF_ONECAP BWF_ALLCAP BWF_KEEPCAP */
+ int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
} matchinf_T;
-static int word_match __ARGS((matchinf_T *mip));
-static int check_adds __ARGS((matchinf_T *mip, fword_T *fw, int req_pref, int req_suf));
-static void fill_awend __ARGS((matchinf_T *mip));
-static void fold_addchars __ARGS((matchinf_T *mip, int addlen));
-static int supports_affix __ARGS((int cnt, char_u *afflist, int afflistlen, int nr));
-static int prefix_match __ARGS((matchinf_T *mip));
-static int noprefix_match __ARGS((matchinf_T *mip, char_u *pword, char_u *cstart, affitem_T *ai));
-static int suffix_match __ARGS((matchinf_T *mip));
-static int match_caps __ARGS((int flags, char_u *caseword, matchinf_T *mip, char_u *cword, char_u *end));
static slang_T *slang_alloc __ARGS((char_u *lang));
static void slang_free __ARGS((slang_T *lp));
+static void find_word __ARGS((matchinf_T *mip, int keepcap));
static slang_T *spell_load_lang __ARGS((char_u *lang));
static void spell_load_file __ARGS((char_u *fname, void *cookie));
-static void *getroom __ARGS((slang_T *lp, int *bl_used, int len));
+static int read_tree __ARGS((FILE *fd, char_u *byts, int *idxs, int maxidx, int startidx));
static int find_region __ARGS((char_u *rp, char_u *region));
static int captype __ARGS((char_u *word, char_u *end));
/*
* Main spell-checking function.
- * "ptr" points to the start of a word.
+ * "ptr" points to a character that could be the start of a word.
* "*attrp" is set to the attributes for a badly spelled word. For a non-word
* or when it's OK it remains unchanged.
* This must only be called when 'spelllang' is not empty.
@@ -289,885 +230,328 @@ static int captype __ARGS((char_u *word, char_u *end));
* caller can skip over the word.
*/
int
-spell_check(wp, line, ptr, attrp)
+spell_check(wp, ptr, attrp)
win_T *wp; /* current window */
- char_u *line; /* start of line where "ptr" points into */
char_u *ptr;
int *attrp;
{
matchinf_T mi; /* Most things are put in "mi" so that it can
be passed to functions quickly. */
- /* Find the end of the word. We already know that *ptr is a word char. */
+ /* Find the end of the word. */
mi.mi_word = ptr;
mi.mi_end = ptr;
- do
- {
- mb_ptr_adv(mi.mi_end);
- } while (*mi.mi_end != NUL && spell_iswordc(mi.mi_end));
- /* A word starting with a number is always OK. */
+ /* A word starting with a number is always OK. Also skip hexadecimal
+ * numbers 0xFF99 and 0X99FF. */
if (*ptr >= '0' && *ptr <= '9')
- return (int)(mi.mi_end - ptr);
-
- /* Make case-folded copy of the word. */
- (void)spell_casefold(ptr, mi.mi_end - ptr, mi.mi_fword, MAXWLEN + 1);
- mi.mi_cword = mi.mi_fword;
- mi.mi_fendlen = STRLEN(mi.mi_fword);
- mi.mi_faddlen = 0;
- mi.mi_fend = mi.mi_end;
-
- /* Check the caps type of the word. */
- mi.mi_capflags = captype(ptr, mi.mi_end);
-
- /* The word is bad unless we recognize it. */
- mi.mi_result = SP_BAD;
- mi.mi_wend = mi.mi_end;
-
- mi.mi_awend = NULL;
- mi.mi_did_awend = FALSE;
- mi.mi_line = line;
-
- /*
- * Loop over the languages specified in 'spelllang'.
- * We check them all, because a matching word may have additions that are
- * longer than an already found matching word.
- */
- for (mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
- mi.mi_lp->lp_slang != NULL; ++mi.mi_lp)
- {
- /*
- * Check for a matching word.
- * If not found or wrong region try removing prefixes (and then
- * suffixes).
- * If still not found or wrong region try removing suffixes.
- */
- mi.mi_slang = mi.mi_lp->lp_slang;
- if (!word_match(&mi) || mi.mi_result != SP_OK)
- if (!prefix_match(&mi) || mi.mi_result != SP_OK)
- suffix_match(&mi);
- }
-
- if (mi.mi_result != SP_OK)
{
- if (mi.mi_result == SP_BAD)
- *attrp = highlight_attr[HLF_SPB];
- else if (mi.mi_result == SP_RARE)
- *attrp = highlight_attr[HLF_SPR];
+ if (*ptr == '0' && (ptr[1] == 'x' || ptr[2] == 'X'))
+ mi.mi_end = skiphex(ptr);
else
- *attrp = highlight_attr[HLF_SPL];
+ mi.mi_end = skipdigits(ptr);
}
-
- return (int)(mi.mi_wend - ptr);
-}
-
-/*
- * Check if the word "mip->mi_word" matches.
- * "mip->mi_fword" is the same word case-folded;
- *
- * This checks the word as a whole and for prefixes that include a word.
- *
- * Note that when called mi_fword only contains the word up to mip->mi_end,
- * but when checking additions it gets longer.
- */
- static int
-word_match(mip)
- matchinf_T *mip;
-{
- hash_T fhash = hash_hash(mip->mi_fword);
- hashitem_T *hi;
- fword_T *fw;
- int valid = FALSE;
- char_u *p;
- char_u pword[MAXWLEN + 1];
- int charlen;
- int capflags_save;
- affitem_T *ai;
- char_u *cstart;
- int addlen;
- int n;
- char_u *save_end;
- int cc;
-
- hi = hash_lookup(&mip->mi_slang->sl_words, mip->mi_fword, fhash);
- if (!HASHITEM_EMPTY(hi))
- {
- /*
- * Find a basic word for which the case of "mi_word" is correct.
- * If it is, check additions and use the longest one.
- */
- for (fw = HI2FWORD(hi); fw != NULL; fw = fw->fw_next)
- if (match_caps(fw->fw_flags, fw->fw_word, mip,
- mip->mi_word, mip->mi_end))
- valid |= check_adds(mip, fw, -1, -1);
- }
-
- /*
- * Try finding a matching preword for "mip->mi_word". These are
- * prefixes that have a non-word character after a word character:
- * "d'", "de-", "'s-", "l'de-". But not "'s".
- * Also need to do this when a matching word was already found, because we
- * might find a longer match this way (French: "qu" and "qu'a-t-elle").
- * The check above may have added characters to mi_fword, thus we need to
- * truncate it after the basic word for the hash lookup.
- */
- cc = mip->mi_fword[mip->mi_fendlen];
- mip->mi_fword[mip->mi_fendlen] = NUL;
- hi = hash_lookup(&mip->mi_slang->sl_prewords, mip->mi_fword, fhash);
- mip->mi_fword[mip->mi_fendlen] = cc;
- if (!HASHITEM_EMPTY(hi))
+ else
{
- capflags_save = mip->mi_capflags;
-
- /* Go through the list of matching prewords. */
- for (ai = HI2AI(hi); ai != NULL; ai = ai->ai_next)
+ mi.mi_fend = ptr;
+ if (spell_iswordc(mi.mi_fend))
{
- /* Check that the lead string matches before the word. */
- p = ai->ai_add + ai->ai_addlen + ai->ai_choplen + 2;
- if (ai->ai_leadlen > 0)
+ /* Make case-folded copy of the characters until the next non-word
+ * character. */
+ do
{
- if (mip->mi_word - mip->mi_line < ai->ai_leadlen
- || STRNCMP(mip->mi_word - ai->ai_leadlen, p,
- ai->ai_leadlen) != 0)
- continue;
- p += ai->ai_leadlen + 1; /* advance "p" to tail */
- }
- else
- ++p; /* advance "p" to tail */
+ mb_ptr_adv(mi.mi_fend);
+ } while (*mi.mi_fend != NUL && spell_iswordc(mi.mi_fend));
- /* Check that the tail string matches after the word. Need
- * to fold case first. */
- if (ai->ai_taillen > 0)
- {
- if (ai->ai_taillen >= mip->mi_faddlen)
- {
- fold_addchars(mip, ai->ai_taillen);
- if (ai->ai_taillen > mip->mi_faddlen)
- continue; /* not enough chars, can't match */
- }
- if (STRNCMP(mip->mi_fword + mip->mi_fendlen,
- p, ai->ai_taillen) != 0)
- continue;
- }
+ (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
+ MAXWLEN + 1);
+ mi.mi_fwordlen = STRLEN(mi.mi_fword);
- /*
- * This preword matches. Remove the preword and check that
- * the resulting word exits.
- */
-
- /* Find the place in the original word where the tail ends,
- * needed for case checks. */
-#ifdef FEAT_MBYTE
- charlen = mb_charlen(p);
-#else
- charlen = ai->ai_taillen;
-#endif
- cstart = mip->mi_end;
- for (n = 0; n < charlen; ++n)
- mb_ptr_adv(cstart);
-
- /* The new word starts with the chop. Then add up to the next
- * non-word char. */
- mch_memmove(pword, ai->ai_add + ai->ai_addlen + 1,
- ai->ai_choplen);
- p = mip->mi_fword + mip->mi_fendlen + ai->ai_taillen;
- addlen = ai->ai_taillen;
- while (spell_iswordc(p))
- {
- ++charlen;
-#ifdef FEAT_MBYTE
- addlen += (*mb_ptr2len_check)(p);
-#else
- ++addlen;
-#endif
- mb_ptr_adv(p);
- if (addlen >= mip->mi_faddlen)
- {
- /* Get more folded characters in mip->mi_fword. */
- fold_addchars(mip, addlen);
- if (addlen >= mip->mi_faddlen)
- break; /* not enough chars, can't match */
- }
- }
- mch_memmove(pword + ai->ai_choplen,
- mip->mi_fword + mip->mi_fendlen + ai->ai_taillen,
- addlen - ai->ai_taillen);
- pword[ai->ai_choplen + addlen - ai->ai_taillen] = NUL;
-
- /* Need to set mi_end to find additions. Also set mi_fendlen
- * and mi_faddlen. */
- save_end = mip->mi_end;
- while (--charlen >= 0)
- mb_ptr_adv(mip->mi_end);
- mip->mi_fendlen += addlen;
- mip->mi_faddlen -= addlen;
-
- /* Find the word "pword", caseword "cstart". */
- n = noprefix_match(mip, pword, cstart, ai);
- mip->mi_end = save_end;
- mip->mi_fendlen -= addlen;
- mip->mi_faddlen += addlen;
- if (n)
- valid = TRUE;
+ /* Check the caps type of the word. */
+ mi.mi_capflags = captype(ptr, mi.mi_fend);
- /* If we found a valid word, we still need to try other
- * suffixes, because it may have an addition that's longer. */
+ /* We always use the characters up to the next non-word character,
+ * also for bad words. */
+ mi.mi_end = mi.mi_fend;
}
-
- mip->mi_capflags = capflags_save;
- }
-
- return valid;
-}
-
-/*
- * Check a matching basic word for additions.
- * Return TRUE if we have a valid match.
- */
- static int
-check_adds(mip, fw, req_pref, req_suf)
- matchinf_T *mip;
- fword_T *fw;
- int req_pref; /* required prefix nr, -1 if none */
- int req_suf; /* required suffix nr, -1 if none */
-{
- int valid = FALSE;
- addword_T *aw;
- addword_T *naw = NULL;
- char_u *p;
- int addlen;
- int cc;
- hashitem_T *hi;
- char_u *cp = NULL;
- int n;
-
- /* Check if required prefixes and suffixes are supported. These are on
- * the basic word, not on each addition. */
- if (req_pref >= 0 || req_suf >= 0)
- {
- /* Prefix NRs are stored just after the word in fw_word. */
- cp = fw->fw_word + STRLEN(fw->fw_word) + 1;
- if (req_pref >= 0 && !supports_affix(mip->mi_slang->sl_prefcnt,
- cp, fw->fw_prefixcnt, req_pref))
- return FALSE;
- if (req_suf >= 0)
+ else
{
- /* Suffix NRs are stored just after the Prefix NRs. */
- if (fw->fw_prefixcnt > 0)
- {
- if (mip->mi_slang->sl_prefcnt > 256)
- cp += fw->fw_prefixcnt * 2;
- else
- cp += fw->fw_prefixcnt;
- }
- if (!supports_affix(mip->mi_slang->sl_suffcnt,
- cp, fw->fw_suffixcnt, req_suf))
- return FALSE;
+ /* No word characters. Don't case-fold anything, we may quickly
+ * find out this is not a word (but it could be!). */
+ mi.mi_fwordlen = 0;
+ mi.mi_capflags = 0;
}
- }
- /* A word may be valid without an addition. */
- if (fw->fw_flags & BWF_VALID)
- {
- valid = TRUE;
- if (mip->mi_result != SP_OK)
- {
- if ((fw->fw_region & mip->mi_lp->lp_region) == 0)
- mip->mi_result = SP_LOCAL;
- else
- mip->mi_result = SP_OK;
- }
- /* Set word end, required when matching a word after a preword. */
- if (mip->mi_wend < mip->mi_end)
- mip->mi_wend = mip->mi_end;
- }
+ mi.mi_cend = mi.mi_fend;
- /*
- * Check additions, both before and after the word.
- * This may make the word longer, thus we also need to check
- * when we already found a matching word.
- * When the BWF_ADDHASH flag is present then fw_adds points to a hashtable
- * for quick lookup. Otherwise it points to the list of all possible
- * additions.
- */
- if (fw->fw_flags & BWF_ADDHASH)
- {
- /* Locate the text up to the next end-of-word. */
- if (!mip->mi_did_awend)
- fill_awend(mip);
- if (mip->mi_awend == NULL)
- return valid; /* there is no next word */
-
- cc = *mip->mi_awend;
- *mip->mi_awend = NUL;
- hi = hash_find((hashtab_T *)fw->fw_adds,
- mip->mi_fword + mip->mi_fendlen);
- *mip->mi_awend = cc;
- if (HASHITEM_EMPTY(hi))
- return valid; /* no matching addition */
- aw = HI2ADDWORD(hi);
-
- /* Also check additions without word characters. If they are there,
- * skip the first dummy entry. */
- hi = hash_find((hashtab_T *)fw->fw_adds, NOWC_KEY);
- if (!HASHITEM_EMPTY(hi))
- naw = HI2ADDWORD(hi)->aw_next;
- }
- else
- aw = fw->fw_adds;
+ /* The word is bad unless we recognize it. */
+ mi.mi_result = SP_BAD;
- for ( ; ; aw = aw->aw_next)
- {
- if (aw == NULL)
+ /*
+ * Loop over the languages specified in 'spelllang'.
+ * We check them all, because a matching word may be longer than an
+ * already found matching word.
+ */
+ for (mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
+ mi.mi_lp->lp_slang != NULL; ++mi.mi_lp)
{
- /* At end of list: may also try additions without word chars. */
- if (naw == NULL)
- break;
- aw = naw;
- naw = NULL;
- }
+ /* Check for a matching word in case-folded words. */
+ find_word(&mi, FALSE);
- if (aw->aw_leadlen > 0)
- {
- /* There is a leader, verify that it matches. */
- if (aw->aw_leadlen > mip->mi_word - mip->mi_line
- || STRNCMP(mip->mi_word - aw->aw_leadlen,
- aw->aw_word, aw->aw_leadlen) != 0)
- continue;
- if (mip->mi_word - aw->aw_leadlen > mip->mi_line)
- {
- /* There must not be a word character just before the
- * leader. */
- p = mip->mi_word - aw->aw_leadlen;
- mb_ptr_back(mip->mi_line, p);
- if (spell_iswordc(p))
- continue;
- }
- /* Leader matches. Addition is rest of "aw_word". */
- p = aw->aw_word + aw->aw_leadlen;
+ /* Try keep-case words