/* vi:set ts=8 sts=4 sw=4:
*
* VIM - Vi IMproved by Bram Moolenaar
*
* Do ":help uganda" in Vim to read copying and usage conditions.
* Do ":help credits" in Vim to see a list of people who contributed.
* See README.txt for an overview of the Vim source code.
*/
/*
* spell.c: code for spell checking
*
* Terminology:
* "dword" is a dictionary word, made out of letters and digits.
* "nword" is a word with a character that's not a letter or digit.
* "word" is either a "dword" or an "nword".
*/
#if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
# include <io.h> /* for lseek(), must be before vim.h */
#endif
#include "vim.h"
#if defined(FEAT_SYN_HL) || defined(PROTO)
#ifdef HAVE_FCNTL_H
# include <fcntl.h>
#endif
#define MAXWLEN 100 /* assume max. word len is this many bytes */
/*
* Structure that is used to store the text from the language file. This
* avoids the need to allocate each individual word and copying it. It's
* allocated in big chunks for speed.
*/
#define SBLOCKSIZE 4096 /* default size of sb_data */
typedef struct sblock_S sblock_T;
struct sblock_S
{
sblock_T *sb_next; /* next block in list */
char_u sb_data[1]; /* data, actually longer */
};
/* Structure to store words and additions. Used twice : once for case-folded
* and once for keep-case words. */
typedef struct winfo_S
{
hashtab_T wi_ht; /* hashtable with all words, both dword_T and
nword_T (check flags for DW_NWORD) */
garray_T wi_add; /* table with pointers to additions in a
dword_T */
int wi_addlen; /* longest addition length */
} winfo_T;
/*
* Structure used to store words and other info for one language.
*/
typedef struct slang_S slang_T;
struct slang_S
{
slang_T *sl_next; /* next language */
char_u sl_name[2]; /* language name "en", "nl", etc. */
winfo_T sl_fwords; /* case-folded words and additions */
winfo_T sl_kwords; /* keep-case words and additions */
char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
sblock_T *sl_block; /* list with allocated memory blocks */
};
static slang_T *first_lang = NULL;
/* Entry for dword in "sl_ht". Also used for part of an nword, starting with
* the first non-word character. And used for additions in wi_add. */
typedef struct dword_S
{
char_u dw_region; /* one bit per region where it's valid */
char_u dw_flags; /* WF_ flags */
char_u dw_word[1]; /* actually longer, NUL terminated */
} dword_T;
#define REGION_ALL 0xff
#define HI2DWORD(hi) (dword_T *)(hi->hi_key - 2)
/* Entry for a nword in "sl_ht". Note that the last three items must be
* identical to dword_T, so that they can be in the same hashtable. */
typedef struct nword_S
{
garray_T nw_ga; /* table with pointers to dword_T for part
starting with non-word character */
int nw_maxlen; /* longest nword length (after the dword) */
char_u nw_region; /* one bit per region where it's valid */
char_u nw_flags; /* WF_ flags */
char_u nw_word[1]; /* actually longer, NUL terminated */
} nword_T;
/* Get nword_T pointer from hashitem that uses nw_word */
static nword_T dumnw;
#define HI2NWORD(hi) ((nword_T *)((hi)->hi_key - (dumnw.nw_word - (char_u *)&dumnw)))
#define DW_CAP 0x01 /* word must start with capital */
#define DW_RARE 0x02 /* rare word */
#define DW_NWORD 0x04 /* this is an nword_T */
#define DW_DWORD 0x08 /* (also) use as dword without nword */
/*
* Structure used in "b_langp", filled from 'spelllang'.
*/
typedef struct langp_S
{
slang_T *lp_slang; /* info for this language (NULL for last one) */
int lp_region; /* bitmask for region or REGION_ALL */
} langp_T;
#define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
#define DWORD_ENTRY(gap, i) *(((dword_T **)(gap)->ga_data) + i)
#define SP_OK 0
#define SP_BAD 1
#define SP_RARE 2
#define SP_LOCAL 3
static char *e_invchar2 = N_("E753: Invalid character in \"%s\"");
static slang_T *spell_load_lang __ARGS((char_u *lang));
static void spell_load_file __ARGS((char_u *fname));
static int find_region __ARGS((char_u *rp, char_u *region));
/*
* Main spell-checking function.
* "ptr" points to the start of a word.
* "*attrp" is set to the attributes for a badly spelled word. For a non-word
* or when it's OK it remains unchanged.
* This must only be called when 'spelllang' is not empty.
* Returns the length of the word in bytes, also when it's OK, so that the
* caller can skip over the word.
*/
int
spell_check(wp, ptr, attrp)
win_T *wp; /* current window */
char_u *ptr;
int *attrp;
{
char_u *e; /* end of word */
char_u *ne; /* new end of word */
char_u *me; /* max. end of match */
langp_T *lp;
int result;
int len = 0;
hashitem_T *hi;
int round;
char_u kword[MAXWLEN + 1]; /* word copy */
char_u fword[MAXWLEN + 1]; /* word with case folded */
char_u match[MAXWLEN + 1]; /* fword with additional chars */
char_u kwordclen[MAXWLEN + 1]; /* len of orig chars after kword[] */
char_u fwordclen[MAXWLEN + 1]; /* len of chars after fword[] */
char_u *clen;
int cidx = 0; /* char index in xwordclen[] */
hash_T fhash; /* hash for fword */
hash_T khash; /* hash for kword */
int match_len = 0; /* length of match[] */
int fmatch_len = 0; /* length of nword match in chars */
garray_T *gap;
int l, t;
char_u *p, *tp;
int n;