diff options
author | Bram Moolenaar <Bram@vim.org> | 2005-06-04 21:55:20 +0000 |
---|---|---|
committer | Bram Moolenaar <Bram@vim.org> | 2005-06-04 21:55:20 +0000 |
commit | 51485f06246966898f7c00e2e53b1ba4c6855cf7 (patch) | |
tree | c1cfe02ab088ea7a4423f15829e4083303d62a89 | |
parent | 4debb442bd885d182d7f77d1dfcdf143fd7cbf88 (diff) |
updated for version 7.0079
-rw-r--r-- | runtime/doc/develop.txt | 10 | ||||
-rw-r--r-- | runtime/doc/map.txt | 3 | ||||
-rw-r--r-- | runtime/spell/en.utf-8.spl | bin | 1104055 -> 562278 bytes | |||
-rw-r--r-- | src/buffer.c | 21 | ||||
-rw-r--r-- | src/getchar.c | 2 | ||||
-rw-r--r-- | src/mark.c | 12 | ||||
-rw-r--r-- | src/normal.c | 3 | ||||
-rw-r--r-- | src/proto/charset.pro | 1 | ||||
-rw-r--r-- | src/spell.c | 4927 |
9 files changed, 1308 insertions, 3671 deletions
diff --git a/runtime/doc/develop.txt b/runtime/doc/develop.txt index bc8b0e8c09..cceda32b97 100644 --- a/runtime/doc/develop.txt +++ b/runtime/doc/develop.txt @@ -1,4 +1,4 @@ -*develop.txt* For Vim version 7.0aa. Last change: 2005 Mar 29 +*develop.txt* For Vim version 7.0aa. Last change: 2005 Jun 04 VIM REFERENCE MANUAL by Bram Moolenaar @@ -381,10 +381,10 @@ checking engine in Vim, for various reasons: fly (while redrawing), just like syntax highlighting. But the mechanisms used by other code are much slower. Myspell uses a simplistic hashtable, for example. -- For a program like aspell a communication mechanism would have to be setup. - That's complicated to do in a portable way (Unix-only would be relatively - simple, but that's not good enough). And performance will become a problem - (lots of process switching involved). +- For using an external program like aspell a communication mechanism would + have to be setup. That's complicated to do in a portable way (Unix-only + would be relatively simple, but that's not good enough). And performance + will become a problem (lots of process switching involved). - Missing support for words with non-word characters, such as "Etten-Leur" and "et al.", would require marking the pieces of them OK, lowering the reliability. diff --git a/runtime/doc/map.txt b/runtime/doc/map.txt index 1119618d79..18da9ccc06 100644 --- a/runtime/doc/map.txt +++ b/runtime/doc/map.txt @@ -1,4 +1,4 @@ -*map.txt* For Vim version 7.0aa. Last change: 2005 Mar 29 +*map.txt* For Vim version 7.0aa. Last change: 2005 Jun 03 VIM REFERENCE MANUAL by Bram Moolenaar @@ -1032,6 +1032,7 @@ The valid escape sequences are If the first two characters of an escape sequence are "q-" (for example, <q-args>) then the value is quoted in such a way as to make it a valid value for use in an expression. This uses the argument as one single value. +When there is no argument <q-args> is an empty string. To allow commands to pass their arguments on to a user-defined function, there is a special form <f-args> ("function args"). This splits the command diff --git a/runtime/spell/en.utf-8.spl b/runtime/spell/en.utf-8.spl Binary files differindex 156c0979e9..300f6ce807 100644 --- a/runtime/spell/en.utf-8.spl +++ b/runtime/spell/en.utf-8.spl diff --git a/src/buffer.c b/src/buffer.c index 294bc9b166..c54df88175 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -854,11 +854,11 @@ do_bufdel(command, arg, addr_count, start_bnr, end_bnr, forceit) if (deleted == 0) { if (command == DOBUF_UNLOAD) - sprintf((char *)IObuff, _("E515: No buffers were unloaded")); + STRCPY(IObuff, _("E515: No buffers were unloaded")); else if (command == DOBUF_DEL) - sprintf((char *)IObuff, _("E516: No buffers were deleted")); + STRCPY(IObuff, _("E516: No buffers were deleted")); else - sprintf((char *)IObuff, _("E517: No buffers were wiped out")); + STRCPY(IObuff, _("E517: No buffers were wiped out")); errormsg = IObuff; } else if (deleted >= p_report) @@ -2450,7 +2450,7 @@ buflist_list(eap) else home_replace(buf, buf->b_fname, NameBuff, MAXPATHL, TRUE); - sprintf((char *)IObuff, "%3d%c%c%c%c%c \"", + vim_snprintf((char *)IObuff, IOSIZE - 20, "%3d%c%c%c%c%c \"%s\"", buf->b_fnum, buf->b_p_bl ? ' ' : 'u', buf == curbuf ? '%' : @@ -2459,18 +2459,11 @@ buflist_list(eap) (buf->b_nwindows == 0 ? 'h' : 'a'), !buf->b_p_ma ? '-' : (buf->b_p_ro ? '=' : ' '), (buf->b_flags & BF_READERR) ? 'x' - : (bufIsChanged(buf) ? '+' : ' ') - ); - - len = (int)STRLEN(IObuff); - STRNCPY(IObuff + len, NameBuff, IOSIZE - 20 - len); - IObuff[IOSIZE - 20 - len] = NUL; /* make sure it's terminated */ - - len = (int)STRLEN(IObuff); - IObuff[len++] = '"'; + : (bufIsChanged(buf) ? '+' : ' '), + NameBuff); /* put "line 999" in column 40 or after the file name */ - IObuff[len] = NUL; + len = STRLEN(IObuff); i = 40 - vim_strsize(IObuff); do { diff --git a/src/getchar.c b/src/getchar.c index 2ece91aa2e..6d26d6d557 100644 --- a/src/getchar.c +++ b/src/getchar.c @@ -4342,7 +4342,7 @@ put_escstr(fd, strstart, what) if (p != NULL) { while (*p != NUL) - if (putc(*p++, fd) < 0) + if (fputc(*p++, fd) < 0) return FAIL; --str; continue; diff --git a/src/mark.c b/src/mark.c index 6a149bface..9d74b4a7c9 100644 --- a/src/mark.c +++ b/src/mark.c @@ -1445,6 +1445,7 @@ removable(name) char_u *p; char_u part[51]; int retval = FALSE; + int n; name = home_replace_save(NULL, name); if (name != NULL) @@ -1452,11 +1453,14 @@ removable(name) for (p = p_viminfo; *p; ) { copy_option_part(&p, part, 51, ", "); - if (part[0] == 'r' - && MB_STRNICMP(part + 1, name, STRLEN(part + 1)) == 0) + if (part[0] == 'r') { - retval = TRUE; - break; + n = STRLEN(part + 1); + if (MB_STRNICMP(part + 1, name, n) == 0) + { + retval = TRUE; + break; + } } } vim_free(name); diff --git a/src/normal.c b/src/normal.c index 207576698e..c3c7627575 100644 --- a/src/normal.c +++ b/src/normal.c @@ -2823,6 +2823,7 @@ do_mouse(oap, c, dir, count, fixindent) if ((mod_mask & MOD_MASK_MULTI_CLICK) == MOD_MASK_2CLICK) { pos_T *pos = NULL; + int gc; if (is_click) { @@ -2830,7 +2831,7 @@ do_mouse(oap, c, dir, count, fixindent) * not a word character, try finding a match and select a (), * {}, [], #if/#endif, etc. block. */ end_visual = curwin->w_cursor; - while (vim_iswhite(gchar_pos(&end_visual))) + while (gc = gchar_pos(&end_visual), vim_iswhite(gc)) inc(&end_visual); if (oap != NULL) oap->motion_type = MCHAR; diff --git a/src/proto/charset.pro b/src/proto/charset.pro index e839d6374c..9fae4063f4 100644 --- a/src/proto/charset.pro +++ b/src/proto/charset.pro @@ -40,6 +40,7 @@ void getvvcol __ARGS((win_T *wp, pos_T *pos, colnr_T *start, colnr_T *cursor, co void getvcols __ARGS((win_T *wp, pos_T *pos1, pos_T *pos2, colnr_T *left, colnr_T *right)); char_u *skipwhite __ARGS((char_u *p)); char_u *skipdigits __ARGS((char_u *p)); +char_u *skiphex __ARGS((char_u *p)); char_u *skiptodigit __ARGS((char_u *p)); char_u *skiptohex __ARGS((char_u *p)); int vim_isdigit __ARGS((int c)); diff --git a/src/spell.c b/src/spell.c index 9d010f912e..2d80389d96 100644 --- a/src/spell.c +++ b/src/spell.c @@ -10,25 +10,91 @@ /* * spell.c: code for spell checking * - * The basic spell checking mechanism is: - * 1. Isolate a word, up to the next non-word character. - * 2. Find the word in the hashtable of basic words. - * 3. If not found, look in the hashtable with "prewords". These are prefixes - * with a non-word character following a word character, e.g., "de-". - * 4. If still not found, for each matching a prefix try if the word matches - * without the prefix (and with the "chop" string added back). - * 5. If still still not found, for each matching suffix try if the word - * matches without the suffix (and with the "chop" string added back). + * The spell checking mechanism uses a tree (aka trie). Each node in the tree + * has a list of bytes that can appear (siblings). For each byte there is a + * pointer to the node with the byte that follows in the word (child). + * A NUL byte is used where the word may end. + * + * There are two trees: one with case-folded words and one with words in + * original case. The second one is only used for keep-case words and is + * usually small. + * + * Thanks to Olaf Seibert for providing an example implementation of this tree + * and the compression mechanism. * * Matching involves checking the caps type: Onecap ALLCAP KeepCap. - * After finding a matching word check for a leadstring (non-word characters - * before the word) and addstring (more text following, starting with a - * non-word character). * * Why doesn't Vim use aspell/ispell/myspell/etc.? * See ":help develop-spell". */ +/* + * Vim spell file format: <HEADER> <SUGGEST> <LWORDTREE> <KWORDTREE> + * + * <HEADER>: <fileID> <regioncnt> <regionname> ... + * <charflagslen> <charflags> <fcharslen> <fchars> + * + * <fileID> 10 bytes "VIMspell05" + * <regioncnt> 1 byte number of regions following (8 supported) + * <regionname> 2 bytes Region name: ca, au, etc. + * First <regionname> is region 1. + * + * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). + * <charflags> N bytes List of flags (first one is for character 128): + * 0x01 word character + * 0x01 upper-case character + * <fcharslen> 2 bytes Number of bytes in <fchars>. + * <fchars> N bytes Folded characters, first one is for character 128. + * + * + * <SUGGEST> : <suggestlen> <more> ... + * + * <suggestlen> 4 bytes Length of <SUGGEST> in bytes, excluding + * <suggestlen>. MSB first. + * <more> To be defined. + * + * + * <LWORDTREE>: <wordtree> + * + * <wordtree>: <nodecount> <nodedata> ... + * + * <nodecount> 4 bytes Number of nodes following. MSB first. + * + * <nodedata>: <siblingcount> <sibling> ... + * + * <siblingcount> 1 byte Number of siblings in this node. The siblings + * follow in sorted order. + * + * <sibling>: <byte> [<nodeidx> <xbyte> | <flags> [<region>]] + * + * <byte> 1 byte Byte value of the sibling. Special cases: + * BY_NOFLAGS: End of word without flags and for all + * regions. + * BY_FLAGS: End of word, <flags> follow. + * BY_INDEX: Child of sibling is shared, <nodeidx> + * and <xbyte> follow. + * + * <nodeidx> 3 bytes Index of child for this sibling, MSB first. + * + * <xbyte> 1 byte byte value of the sibling. + * + * <flags> 1 byte bitmask of: + * WF_ALLCAP word must have only capitals + * WF_ONECAP first char of word must be capital + * WF_RARE rare word + * WF_REGION <region> follows + * + * <region> 1 byte Bitmask for regions in which word is valid. When + * omitted it's valid in all regions. + * Lowest bit is for region 1. + * + * <KWORDTREE>: <wordtree> + * + * + * All text characters are in 'encoding', but stored as single bytes. + * The region name is ASCII. + */ + #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64) # include <io.h> /* for lseek(), must be before vim.h */ #endif @@ -41,21 +107,20 @@ # include <fcntl.h> #endif -#define MAXWLEN 100 /* assume max. word len is this many bytes */ +#define MAXWLEN 250 /* assume max. word len is this many bytes */ -/* - * Structure that is used to store the structures and strings from the - * language file. This avoids the need to allocate space for each individual - * word. It's allocated in big chunks for speed. It's freed all at once when - * 'encoding' changes. - */ -#define SBLOCKSIZE 4096 /* default size of sb_data */ -typedef struct sblock_S sblock_T; -struct sblock_S -{ - sblock_T *sb_next; /* next block in list */ - char_u sb_data[1]; /* data, actually longer */ -}; +/* Flags used for a word. */ +#define WF_REGION 0x01 /* region byte follows */ +#define WF_ONECAP 0x02 /* word with one capital (or all capitals) */ +#define WF_ALLCAP 0x04 /* word must be all capitals */ +#define WF_RARE 0x08 /* rare word */ + +#define WF_KEEPCAP 0x100 /* keep-case word */ + +#define BY_NOFLAGS 0 /* end of word without flags or region */ +#define BY_FLAGS 1 /* end of word, flag byte follows */ +#define BY_INDEX 2 /* child is shared, index follows */ +#define BY_SPECIAL BY_INDEX /* hightest special byte value */ /* Info from "REP" entries in ".aff" file used in af_rep. * TODO: This is not used yet. Either use it or remove it. */ @@ -66,58 +131,33 @@ typedef struct repentry_S } repentry_T; /* - * Structure to store affix info. - */ -typedef struct affitem_S affitem_T; -struct affitem_S -{ - affitem_T *ai_next; /* next affix with same ai_add[] or NULL */ - short_u ai_nr; /* affix number */ - char_u ai_flags; /* AFF_ flags */ - char_u ai_choplen; /* length of chop string in bytes */ - char_u ai_addlen; /* length of ai_add in bytes */ - char_u ai_leadlen; /* for AFF_PREWORD: length of lead string */ - char_u ai_taillen; /* for AFF_PREWORD: length of tail string */ - char_u ai_add[1]; /* Text added to basic word. This stores: - * 0: word for AFF_PREWORD or whole addition - * ai_addlen + 1: chop string - * + ai_choplen + 1: lead string for AFF_PREWORD - * + ai_leadlen + 1: trail string f. AFF_PREWORD - */ -}; - -/* Get affitem_T pointer from hashitem that uses ai_add */ -static affitem_T dumai; -#define HI2AI(hi) ((affitem_T *)((hi)->hi_key - (dumai.ai_add - (char_u *)&dumai))) - -/* ai_flags: Affix item flags */ -#define AFF_COMBINE 0x01 /* prefix combines with suffix */ -#define AFF_PREWORD 0x02 /* prefix includes word */ - -/* * Structure used to store words and other info for one language, loaded from * a .spl file. - * The main access is through hashtable "sl_word", using the case-folded - * word as the key. This finds a linked list of fword_T. + * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the + * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words. + * + * The "byts" array stores the possible bytes in each tree node, preceded by + * the number of possible bytes, sorted on byte value: + * <len> <byte1> <byte2> ... + * The "idxs" array stores the index of the child node corresponding to the + * byte in "byts". + * Exception: when the byte is zero, the word may end here and "idxs" holds + * the flags and region for the word. There may be several zeros in sequence + * for alternative flag/region combinations. */ typedef struct slang_S slang_T; struct slang_S { slang_T *sl_next; /* next language */ char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */ - hashtab_T sl_words; /* main word table, fword_T */ - int sl_prefcnt; /* number of prefix NRs */ - garray_T sl_preftab; /* list of hashtables to lookup prefixes */ - affitem_T *sl_prefzero; /* list of prefixes with zero add length */ - hashtab_T sl_prewords; /* prefixes that include a word */ - int sl_suffcnt; /* number of suffix NRs */ - garray_T sl_sufftab; /* list of hashtables to lookup suffixes */ - affitem_T *sl_suffzero; /* list of suffixes with zero add length */ + char_u *sl_fbyts; /* case-folded word bytes */ + int *sl_fidxs; /* case-folded word indexes */ + char_u *sl_kbyts; /* keep-case word bytes */ + int *sl_kidxs; /* keep-case word indexes */ char_u *sl_try; /* "TRY" from .aff file TODO: not used */ garray_T sl_rep; /* list of repentry_T entries from REP lines * TODO not used */ char_u sl_regions[17]; /* table with up to 8 region names plus NUL */ - sblock_T *sl_block; /* list with allocated memory blocks */ int sl_error; /* error while loading */ }; @@ -125,57 +165,6 @@ struct slang_S * languages. */ static slang_T *first_lang = NULL; -/* - * Structure to store an addition to a basic word. - * There are many of these, keep it small! - */ -typedef struct addword_S addword_T; -struct addword_S -{ - addword_T *aw_next; /* next addition */ - char_u aw_flags; /* ADD_ flags */ - char_u aw_region; /* region for word with this addition */ - char_u aw_leadlen; /* byte length of lead in aw_word */ - char_u aw_wordlen; /* byte length of first word in aw_word */ - char_u aw_saveb; /* saved byte where aw_word[] is truncated at - end of hashtable key; NUL when not using - hashtable */ - char_u aw_word[1]; /* text, actually longer: case-folded addition - plus, with ADD_KEEPCAP: keep-case addition */ -}; - -/* Get addword_T pointer from hashitem that uses aw_word */ -static addword_T dumaw; -#define HI2ADDWORD(hi) ((addword_T *)((hi)->hi_key - (dumaw.aw_word - (char_u *)&dumaw))) - -/* - * Structure to store a basic word. - * There are many of these, keep it small! - * The list of prefix and suffix NRs is stored after "fw_word" to avoid the - * need for two extra pointers. - */ -typedef struct fword_S fword_T; -struct fword_S -{ - fword_T *fw_next; /* same basic word with different caps and/or - * affixes */ - addword_T *fw_adds; /* first addword_T entry */ - short_u fw_flags; /* BWF_ flags */ - char_u fw_region; /* region bits */ - char_u fw_prefixcnt; /* number of prefix NRs */ - char_u fw_suffixcnt; /* number of suffix NRs */ - char_u fw_word[1]; /* actually longer: - * 0: case folded word or keep-case word when - * (flags & BWF_KEEPCAP) - * + word length + 1: list of prefix NRs - * + fw_prefixcnt [* 2]: list of suffix NRs - */ -}; - -/* Get fword_T pointer from hashitem that uses fw_word */ -static fword_T dumfw; -#define HI2FWORD(hi) ((fword_T *)((hi)->hi_key - (dumfw.fw_word - (char_u *)&dumfw))) - #define REGION_ALL 0xff @@ -195,39 +184,7 @@ typedef struct langp_S #define SP_RARE 2 #define SP_LOCAL 3 -/* flags used for basic words in the spell file */ -#define BWF_VALID 0x01 /* word is valid without additions */ -#define BWF_REGION 0x02 /* region byte follows */ -#define BWF_ONECAP 0x04 /* first letter must be capital */ -#define BWF_SUFFIX 0x08 /* has suffix NR list */ -#define BWF_SECOND 0x10 /* second flags byte follows */ - -#define BWF_ADDS 0x0100 /* there are additions */ -#define BWF_PREFIX 0x0200 /* has prefix NR list */ -#define BWF_ALLCAP 0x0400 /* all letters must be capital (not used - for single-letter words) */ -#define BWF_KEEPCAP 0x0800 /* Keep case as-is */ -#define BWF_ADDS_M 0x1000 /* there are more than 255 additions */ - -#define BWF_ADDHASH 0x8000 /* Internal: use hashtab for additions */ - -#define NOWC_KEY (char_u *)"x" /* hashtab key used for additions without - any word character */ - -/* flags used for addition in the spell file */ -#define ADD_REGION 0x02 /* region byte follows */ -#define ADD_ONECAP 0x04 /* first letter must be capital */ -#define ADD_LEADLEN 0x10 /* there is a leadlen byte */ -#define ADD_COPYLEN 0x20 /* there is a copylen byte */ -#define ADD_ALLCAP 0x40 /* all letters must be capital (not used - for single-letter words) */ -#define ADD_KEEPCAP 0x80 /* fixed case */ - -/* Translate ADD_ flags to BWF_ flags. - * (Needed to keep ADD_ flags in one byte.) */ -#define ADD2BWF(x) (((x) & 0x0f) | (((x) & 0xf0) << 4)) - -#define VIMSPELLMAGIC "VIMspell04" /* string at start of Vim spell file */ +#define VIMSPELLMAGIC "VIMspell05" /* string at start of Vim spell file */ #define VIMSPELLMAGICL 10 /* @@ -239,49 +196,33 @@ typedef struct matchinf_S slang_T *mi_slang; /* info for the language */ /* pointers to original text to be checked */ - char_u *mi_line; /* start of line containing word */ char_u *mi_word; /* start of word being checked */ - char_u *mi_end; /* first non-word char after mi_word */ - char_u *mi_wend; /* end of matching word (is mi_end - * or further) */ + char_u *mi_end; /* end of matching word */ char_u *mi_fend; /* next char to be added to mi_fword */ + char_u *mi_cend; /* char after what was used for + mi_capflags */ /* case-folded text */ char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */ - int mi_fendlen; /* byte length of first word in - mi_fword */ - int mi_faddlen; /* byte length of text in mi_fword - after first word */ - char_u *mi_cword; /* word to check, points in mi_fword */ - char_u *mi_awend; /* after next word, to check for - addition (NULL when not done yet) */ - int mi_did_awend; /* did compute mi_awend */ + int mi_fwordlen; /* nr of valid bytes in mi_fword */ /* others */ int mi_result; /* result so far: SP_BAD, SP_OK, etc. */ - int mi_capflags; /* BWF_ONECAP BWF_ALLCAP BWF_KEEPCAP */ + int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */ } matchinf_T; -static int word_match __ARGS((matchinf_T *mip)); -static int check_adds __ARGS((matchinf_T *mip, fword_T *fw, int req_pref, int req_suf)); -static void fill_awend __ARGS((matchinf_T *mip)); -static void fold_addchars __ARGS((matchinf_T *mip, int addlen)); -static int supports_affix __ARGS((int cnt, char_u *afflist, int afflistlen, int nr)); -static int prefix_match __ARGS((matchinf_T *mip)); -static int noprefix_match __ARGS((matchinf_T *mip, char_u *pword, char_u *cstart, affitem_T *ai)); -static int suffix_match __ARGS((matchinf_T *mip)); -static int match_caps __ARGS((int flags, char_u *caseword, matchinf_T *mip, char_u *cword, char_u *end)); static slang_T *slang_alloc __ARGS((char_u *lang)); static void slang_free __ARGS((slang_T *lp)); +static void find_word __ARGS((matchinf_T *mip, int keepcap)); static slang_T *spell_load_lang __ARGS((char_u *lang)); static void spell_load_file __ARGS((char_u *fname, void *cookie)); -static void *getroom __ARGS((slang_T *lp, int *bl_used, int len)); +static int read_tree __ARGS((FILE *fd, char_u *byts, int *idxs, int maxidx, int startidx)); static int find_region __ARGS((char_u *rp, char_u *region)); static int captype __ARGS((char_u *word, char_u *end)); /* * Main spell-checking function. - * "ptr" points to the start of a word. + * "ptr" points to a character that could be the start of a word. * "*attrp" is set to the attributes for a badly spelled word. For a non-word * or when it's OK it remains unchanged. * This must only be called when 'spelllang' is not empty. @@ -289,885 +230,328 @@ static int captype __ARGS((char_u *word, char_u *end)); * caller can skip over the word. */ int -spell_check(wp, line, ptr, attrp) +spell_check(wp, ptr, attrp) win_T *wp; /* current window */ - char_u *line; /* start of line where "ptr" points into */ char_u *ptr; int *attrp; { matchinf_T mi; /* Most things are put in "mi" so that it can be passed to functions quickly. */ - /* Find the end of the word. We already know that *ptr is a word char. */ + /* Find the end of the word. */ mi.mi_word = ptr; mi.mi_end = ptr; - do - { - mb_ptr_adv(mi.mi_end); - } while (*mi.mi_end != NUL && spell_iswordc(mi.mi_end)); - /* A word starting with a number is always OK. */ + /* A word starting with a number is always OK. Also skip hexadecimal + * numbers 0xFF99 and 0X99FF. */ if (*ptr >= '0' && *ptr <= '9') - return (int)(mi.mi_end - ptr); - - /* Make case-folded copy of the word. */ - (void)spell_casefold(ptr, mi.mi_end - ptr, mi.mi_fword, MAXWLEN + 1); - mi.mi_cword = mi.mi_fword; - mi.mi_fendlen = STRLEN(mi.mi_fword); - mi.mi_faddlen = 0; - mi.mi_fend = mi.mi_end; - - /* Check the caps type of the word. */ - mi.mi_capflags = captype(ptr, mi.mi_end); - - /* The word is bad unless we recognize it. */ - mi.mi_result = SP_BAD; - mi.mi_wend = mi.mi_end; - - mi.mi_awend = NULL; - mi.mi_did_awend = FALSE; - mi.mi_line = line; - - /* - * Loop over the languages specified in 'spelllang'. - * We check them all, because a matching word may have additions that are - * longer than an already found matching word. - */ - for (mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); - mi.mi_lp->lp_slang != NULL; ++mi.mi_lp) - { - /* - * Check for a matching word. - * If not found or wrong region try removing prefixes (and then - * suffixes). - * If still not found or wrong region try removing suffixes. - */ - mi.mi_slang = mi.mi_lp->lp_slang; - if (!word_match(&mi) || mi.mi_result != SP_OK) - if (!prefix_match(&mi) || mi.mi_result != SP_OK) - suffix_match(&mi); - } - - if (mi.mi_result != SP_OK) { - if (mi.mi_result == SP_BAD) - *attrp = highlight_attr[HLF_SPB]; - else if (mi.mi_result == SP_RARE) - *attrp = highlight_attr[HLF_SPR]; + if (*ptr == '0' && (ptr[1] == 'x' || ptr[2] == 'X')) + mi.mi_end = skiphex(ptr); else - *attrp = highlight_attr[HLF_SPL]; + mi.mi_end = skipdigits(ptr); } - - return (int)(mi.mi_wend - ptr); -} - -/* - * Check if the word "mip->mi_word" matches. - * "mip->mi_fword" is the same word case-folded; - * - * This checks the word as a whole and for prefixes that include a word. - * - * Note that when called mi_fword only contains the word up to mip->mi_end, - * but when checking additions it gets longer. - */ - static int -word_match(mip) - matchinf_T *mip; -{ - hash_T fhash = hash_hash(mip->mi_fword); - hashitem_T *hi; - fword_T *fw; - int valid = FALSE; - char_u *p; - char_u pword[MAXWLEN + 1]; - int charlen; - int capflags_save; - affitem_T *ai; - char_u *cstart; - int addlen; - int n; - char_u *save_end; - int cc; - - hi = hash_lookup(&mip->mi_slang->sl_words, mip->mi_fword, fhash); - if (!HASHITEM_EMPTY(hi)) - { - /* - * Find a basic word for which the case of "mi_word" is correct. - * If it is, check additions and use the longest one. - */ - for (fw = HI2FWORD(hi); fw != NULL; fw = fw->fw_next) - if (match_caps(fw->fw_flags, fw->fw_word, mip, - mip->mi_word, mip->mi_end)) - valid |= check_adds(mip, fw, -1, -1); - } - - /* - * Try finding a matching preword for "mip->mi_word". These are - * prefixes that have a non-word character after a word character: - * "d'", "de-", "'s-", "l'de-". But not "'s". - * Also need to do this when a matching word was already found, because we - * might find a longer match this way (French: "qu" and "qu'a-t-elle"). - * The check above may have added characters to mi_fword, thus we need to - * truncate it after the basic word for the hash lookup. - */ - cc = mip->mi_fword[mip->mi_fendlen]; - mip->mi_fword[mip->mi_fendlen] = NUL; - hi = hash_lookup(&mip->mi_slang->sl_prewords, mip->mi_fword, fhash); - mip->mi_fword[mip->mi_fendlen] = cc; - if (!HASHITEM_EMPTY(hi)) + else { - capflags_save = mip->mi_capflags; - - /* Go through the list of matching prewords. */ - for (ai = HI2AI(hi); ai != NULL; ai = ai->ai_next) + mi.mi_fend = ptr; + if (spell_iswordc(mi.mi_fend)) { - /* Check that the lead string matches before the word. */ - p = ai->ai_add + ai->ai_addlen + ai->ai_choplen + 2; - if (ai->ai_leadlen > 0) + /* Make case-folded copy of the characters until the next non-word + * character. */ + do { - if (mip->mi_word - mip->mi_line < ai->ai_leadlen - || STRNCMP(mip->mi_word - ai->ai_leadlen, p, - ai->ai_leadlen) != 0) - continue; - p += ai->ai_leadlen + 1; /* advance "p" to tail */ - } - else - ++p; /* advance "p" to tail */ + mb_ptr_adv(mi.mi_fend); + } while (*mi.mi_fend != NUL && spell_iswordc(mi.mi_fend)); - /* Check that the tail string matches after the word. Need - * to fold case first. */ - if (ai->ai_taillen > 0) - { - if (ai->ai_taillen >= mip->mi_faddlen) - { - fold_addchars(mip, ai->ai_taillen); - if (ai->ai_taillen > mip->mi_faddlen) - continue; /* not enough chars, can't match */ - } - if (STRNCMP(mip->mi_fword + mip->mi_fendlen, - p, ai->ai_taillen) != 0) - continue; - } + (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword, + MAXWLEN + 1); + mi.mi_fwordlen = STRLEN(mi.mi_fword); - /* - * This preword matches. Remove the preword and check that - * the resulting word exits. - */ - - /* Find the place in the original word where the tail ends, - * needed for case checks. */ -#ifdef FEAT_MBYTE - charlen = mb_charlen(p); -#else - charlen = ai->ai_taillen; -#endif - cstart = mip->mi_end; - for (n = 0; n < charlen; ++n) - mb_ptr_adv(cstart); - - /* The new word starts with the chop. Then add up to the next - * non-word char. */ - mch_memmove(pword, ai->ai_add + ai->ai_addlen + 1, - ai->ai_choplen); - p = mip->mi_fword + mip->mi_fendlen + ai->ai_taillen; - addlen = ai->ai_taillen; - while (spell_iswordc(p)) - { - ++charlen; -#ifdef FEAT_MBYTE - addlen += (*mb_ptr2len_check)(p); -#else - ++addlen; -#endif - mb_ptr_adv(p); - if (addlen >= mip->mi_faddlen) - { - /* Get more folded characters in mip->mi_fword. */ - fold_addchars(mip, addlen); - if (addlen >= mip->mi_faddlen) - break; /* not enough chars, can't match */ - } - } - mch_memmove(pword + ai->ai_choplen, - mip->mi_fword + mip->mi_fendlen + ai->ai_taillen, - addlen - ai->ai_taillen); - pword[ai->ai_choplen + addlen - ai->ai_taillen] = NUL; - - /* Need to set mi_end to find additions. Also set mi_fendlen - * and mi_faddlen. */ - save_end = mip->mi_end; - while (--charlen >= 0) - mb_ptr_adv(mip->mi_end); - mip->mi_fendlen += addlen; - mip->mi_faddlen -= addlen; - - /* Find the word "pword", caseword "cstart". */ - n = noprefix_match(mip, pword, cstart, ai); - mip->mi_end = save_end; - mip->mi_fendlen -= addlen; - mip->mi_faddlen += addlen; - if (n) - valid = TRUE; + /* Check the caps type of the word. */ + mi.mi_capflags = captype(ptr, mi.mi_fend); - /* If we found a valid word, we still need to try other - * suffixes, because it may have an addition that's longer. */ + /* We always use the characters up to the next non-word character, + * also for bad words. */ + mi.mi_end = mi.mi_fend; } - - mip->mi_capflags = capflags_save; - } - - return valid; -} - -/* - * Check a matching basic word for additions. - * Return TRUE if we have a valid match. - */ - static int -check_adds(mip, fw, req_pref, req_suf) - matchinf_T *mip; - fword_T *fw; - int req_pref; /* required prefix nr, -1 if none */ - int req_suf; /* required suffix nr, -1 if none */ -{ - int valid = FALSE; - addword_T *aw; - addword_T *naw = NULL; - char_u *p; - int addlen; - int cc; - hashitem_T *hi; - char_u *cp = NULL; - int n; - - /* Check if required prefixes and suffixes are supported. These are on - * the basic word, not on each addition. */ - if (req_pref >= 0 || req_suf >= 0) - { - /* Prefix NRs are stored just after the word in fw_word. */ - cp = fw->fw_word + STRLEN(fw->fw_word) + 1; - if (req_pref >= 0 && !supports_affix(mip->mi_slang->sl_prefcnt, - cp, fw->fw_prefixcnt, req_pref)) - return FALSE; - if (req_suf >= 0) + else { - /* Suffix NRs are stored just after the Prefix NRs. */ - if (fw->fw_prefixcnt > 0) - { - if (mip->mi_slang->sl_prefcnt > 256) - cp += fw->fw_prefixcnt * 2; - else - cp += fw->fw_prefixcnt; - } - if (!supports_affix(mip->mi_slang->sl_suffcnt, - cp, fw->fw_suffixcnt, req_suf)) - return FALSE; + /* No word characters. Don't case-fold anything, we may quickly + * find out this is not a word (but it could be!). */ + mi.mi_fwordlen = 0; + mi.mi_capflags = 0; } - } - /* A word may be valid without an addition. */ - if (fw->fw_flags & BWF_VALID) - { - valid = TRUE; - if (mip->mi_result != SP_OK) - { - if ((fw->fw_region & mip->mi_lp->lp_region) == 0) - mip->mi_result = SP_LOCAL; - else - mip->mi_result = SP_OK; - } - /* Set word end, required when matching a word after a preword. */ - if (mip->mi_wend < mip->mi_end) - mip->mi_wend = mip->mi_end; - } + mi.mi_cend = mi.mi_fend; - /* - * Check additions, both before and after the word. - * This may make the word longer, thus we also need to check - * when we already found a matching word. - * When the BWF_ADDHASH flag is present then fw_adds points to a hashtable - * for quick lookup. Otherwise it points to the list of all possible - * additions. - */ - if (fw->fw_flags & BWF_ADDHASH) - { - /* Locate the text up to the next end-of-word. */ - if (!mip->mi_did_awend) - fill_awend(mip); - if (mip->mi_awend == NULL) - return valid; /* there is no next word */ - - cc = *mip->mi_awend; - *mip->mi_awend = NUL; - hi = hash_find((hashtab_T *)fw->fw_adds, - mip->mi_fword + mip->mi_fendlen); - *mip->mi_awend = cc; - if (HASHITEM_EMPTY(hi)) - return valid; /* no matching addition */ - aw = HI2ADDWORD(hi); - - /* Also check additions without word characters. If they are there, - * skip the first dummy entry. */ - hi = hash_find((hashtab_T *)fw->fw_adds, NOWC_KEY); - if (!HASHITEM_EMPTY(hi)) - naw = HI2ADDWORD(hi)->aw_next; - } - else - aw = fw->fw_adds; + /* The word is bad unless we recognize it. */ + mi.mi_result = SP_BAD; - for ( ; ; aw = aw->aw_next) - { - if (aw == NULL) + /* + * Loop over the languages specified in 'spelllang'. + * We check them all, because a matching word may be longer than an + * already found matching word. + */ + for (mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); + mi.mi_lp->lp_slang != NULL; ++mi.mi_lp) { - /* At end of list: may also try additions without word chars. */ - if (naw == NULL) - break; - aw = naw; - naw = NULL; - } + /* Check for a matching word in case-folded words. */ + find_word(&mi, FALSE); - if (aw->aw_leadlen > 0) - { - /* There is a leader, verify that it matches. */ - if (aw->aw_leadlen > mip->mi_word - mip->mi_line - || STRNCMP(mip->mi_word - aw->aw_leadlen, - aw->aw_word, aw->aw_leadlen) != 0) - continue; - if (mip->mi_word - aw->aw_leadlen > mip->mi_line) - { - /* There must not be a word character just before the - * leader. */ - p = mip->mi_word - aw->aw_leadlen; - mb_ptr_back(mip->mi_line, p); - if (spell_iswordc(p)) - continue; - } - /* Leader matches. Addition is rest of "aw_word". */ - p = aw->aw_word + aw->aw_leadlen; + /* Try keep-case words |