summaryrefslogtreecommitdiffstats
path: root/match.c
AgeCommit message (Expand)Author
2002-03-05 - markus@cvs.openbsd.org 2002/03/01 13:12:10Ben Lindstrom
2002-03-05 - stevesk@cvs.openbsd.org 2002/02/28 19:36:28Ben Lindstrom
2002-02-13 - markus@cvs.openbsd.org 2002/02/11 16:21:42Damien Miller
2001-12-21 - deraadt@cvs.openbsd.org 2001/12/19 07:18:56Damien Miller
2001-12-06 - markus@cvs.openbsd.org 2001/12/05 16:54:51Ben Lindstrom
2001-07-04 - markus@cvs.openbsd.org 2001/06/27 04:48:53Ben Lindstrom
2001-06-25 - markus@cvs.openbsd.org 2001/06/24 05:25:10Ben Lindstrom
2001-03-11 - markus@cvs.openbsd.org 2001/03/10 17:51:04Ben Lindstrom
2001-01-22Hopefully things did not get mixed around too much. It compiles underBen Lindstrom
2000-12-22One way to massive patch. <sigh> It compiles and works under Linux..Ben Lindstrom
2000-09-16 - (djm) Merge OpenBSD changes:Damien Miller
2000-06-22 - OpenBSD CVS Updates:Damien Miller
2000-06-07 - (djm) Fix rsh path in RPMs. Report from Jason L Tibbitts IIIDamien Miller
2000-04-16 - OpenBSD CVS updates.Damien Miller
2000-03-26 - OpenBSD CVS updateDamien Miller
1999-11-25 - More reformatting merged from OpenBSD CVSDamien Miller
1999-11-25 - Merged very large OpenBSD source code reformatDamien Miller
1999-10-27Initial revisionDamien Miller
='n222' href='#n222'>222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
/* vi:set ts=8 sts=4 sw=4 noet:
 *
 * VIM - Vi IMproved    by Bram Moolenaar
 *
 * Do ":help uganda"  in Vim to read copying and usage conditions.
 * Do ":help credits" in Vim to see a list of people who contributed.
 * See README.txt for an overview of the Vim source code.
 */

/*
 * arabic.c: functions for Arabic language
 *
 * Author: Nadim Shaikli & Isam Bayazidi
 * Farsi support and restructuring to make adding new letters easier by Ali
 * Gholami Rudi.  Further work by Ameretat Reith.
 */

/*
 * Sorted list of unicode Arabic characters.  Each entry holds the
 * presentation forms of a letter.
 *
 * Arabic characters are categorized into following types:
 *
 * Isolated	- iso-8859-6 form
 * Initial	- unicode form-B start
 * Medial	- unicode form-B middle
 * Final	- unicode form-B final
 * Stand-Alone	- unicode form-B isolated
 */

#include "vim.h"

#if defined(FEAT_ARABIC) || defined(PROTO)

// Unicode values for Arabic characters.
#define a_HAMZA				0x0621
#define a_ALEF_MADDA			0x0622
#define a_ALEF_HAMZA_ABOVE		0x0623
#define a_WAW_HAMZA			0x0624
#define a_ALEF_HAMZA_BELOW		0x0625
#define a_YEH_HAMZA			0x0626
#define a_ALEF				0x0627
#define a_BEH				0x0628
#define a_TEH_MARBUTA			0x0629
#define a_TEH				0x062a
#define a_THEH				0x062b
#define a_JEEM				0x062c
#define a_HAH				0x062d
#define a_KHAH				0x062e
#define a_DAL				0x062f
#define a_THAL				0x0630
#define a_REH				0x0631
#define a_ZAIN				0x0632
#define a_SEEN				0x0633
#define a_SHEEN				0x0634
#define a_SAD				0x0635
#define a_DAD				0x0636
#define a_TAH				0x0637
#define a_ZAH				0x0638
#define a_AIN				0x0639
#define a_GHAIN				0x063a
#define a_TATWEEL			0x0640
#define a_FEH				0x0641
#define a_QAF				0x0642
#define a_KAF				0x0643
#define a_LAM				0x0644
#define a_MEEM				0x0645
#define a_NOON				0x0646
#define a_HEH				0x0647
#define a_WAW				0x0648
#define a_ALEF_MAKSURA			0x0649
#define a_YEH				0x064a
#define a_FATHATAN			0x064b
#define a_DAMMATAN			0x064c
#define a_KASRATAN			0x064d
#define a_FATHA				0x064e
#define a_DAMMA				0x064f
#define a_KASRA				0x0650
#define a_SHADDA			0x0651
#define a_SUKUN				0x0652
#define a_MADDA_ABOVE			0x0653
#define a_HAMZA_ABOVE			0x0654
#define a_HAMZA_BELOW			0x0655

#define a_PEH				0x067e
#define a_TCHEH				0x0686
#define a_JEH				0x0698
#define a_FKAF				0x06a9
#define a_GAF				0x06af
#define a_FYEH				0x06cc

#define a_s_LAM_ALEF_MADDA_ABOVE	0xfef5
#define a_f_LAM_ALEF_MADDA_ABOVE	0xfef6
#define a_s_LAM_ALEF_HAMZA_ABOVE	0xfef7
#define a_f_LAM_ALEF_HAMZA_ABOVE	0xfef8
#define a_s_LAM_ALEF_HAMZA_BELOW	0xfef9
#define a_f_LAM_ALEF_HAMZA_BELOW	0xfefa
#define a_s_LAM_ALEF			0xfefb
#define a_f_LAM_ALEF			0xfefc

static struct achar {
    unsigned c;
    unsigned isolated;
    unsigned initial;
    unsigned medial;
    unsigned final;
} achars[] = {
    {a_HAMZA, 0xfe80, 0, 0, 0},
    {a_ALEF_MADDA, 0xfe81, 0, 0, 0xfe82},
    {a_ALEF_HAMZA_ABOVE, 0xfe83, 0, 0, 0xfe84},
    {a_WAW_HAMZA, 0xfe85, 0, 0, 0xfe86},
    {a_ALEF_HAMZA_BELOW, 0xfe87, 0, 0, 0xfe88},
    {a_YEH_HAMZA, 0xfe89, 0xfe8b, 0xfe8c, 0xfe8a},
    {a_ALEF, 0xfe8d, 0, 0, 0xfe8e},
    {a_BEH, 0xfe8f, 0xfe91, 0xfe92, 0xfe90},
    {a_TEH_MARBUTA, 0xfe93, 0, 0, 0xfe94},
    {a_TEH, 0xfe95, 0xfe97, 0xfe98, 0xfe96},
    {a_THEH, 0xfe99, 0xfe9b, 0xfe9c, 0xfe9a},
    {a_JEEM, 0xfe9d, 0xfe9f, 0xfea0, 0xfe9e},
    {a_HAH, 0xfea1, 0xfea3, 0xfea4, 0xfea2},
    {a_KHAH, 0xfea5, 0xfea7, 0xfea8, 0xfea6},
    {a_DAL, 0xfea9, 0, 0, 0xfeaa},
    {a_THAL, 0xfeab, 0, 0, 0xfeac},
    {a_REH, 0xfead, 0, 0, 0xfeae},
    {a_ZAIN, 0xfeaf, 0, 0, 0xfeb0},
    {a_SEEN, 0xfeb1, 0xfeb3, 0xfeb4, 0xfeb2},
    {a_SHEEN, 0xfeb5, 0xfeb7, 0xfeb8, 0xfeb6},
    {a_SAD, 0xfeb9, 0xfebb, 0xfebc, 0xfeba},
    {a_DAD, 0xfebd, 0xfebf, 0xfec0, 0xfebe},
    {a_TAH, 0xfec1, 0xfec3, 0xfec4, 0xfec2},
    {a_ZAH, 0xfec5, 0xfec7, 0xfec8, 0xfec6},
    {a_AIN, 0xfec9, 0xfecb, 0xfecc, 0xfeca},
    {a_GHAIN, 0xfecd, 0xfecf, 0xfed0, 0xfece},
    {a_TATWEEL, 0, 0x0640, 0x0640, 0x0640},
    {a_FEH, 0xfed1, 0xfed3, 0xfed4, 0xfed2},
    {a_QAF, 0xfed5, 0xfed7, 0xfed8, 0xfed6},
    {a_KAF, 0xfed9, 0xfedb, 0xfedc, 0xfeda},
    {a_LAM, 0xfedd, 0xfedf, 0xfee0, 0xfede},
    {a_MEEM, 0xfee1, 0xfee3, 0xfee4, 0xfee2},
    {a_NOON, 0xfee5, 0xfee7, 0xfee8, 0xfee6},
    {a_HEH, 0xfee9, 0xfeeb, 0xfeec, 0xfeea},
    {a_WAW, 0xfeed, 0, 0, 0xfeee},
    {a_ALEF_MAKSURA, 0xfeef, 0, 0, 0xfef0},
    {a_YEH, 0xfef1, 0xfef3, 0xfef4, 0xfef2},
    {a_FATHATAN, 0xfe70, 0, 0, 0},
    {a_DAMMATAN, 0xfe72, 0, 0, 0},
    {a_KASRATAN, 0xfe74, 0, 0, 0},
    {a_FATHA, 0xfe76, 0, 0xfe77, 0},
    {a_DAMMA, 0xfe78, 0, 0xfe79, 0},
    {a_KASRA, 0xfe7a, 0, 0xfe7b, 0},
    {a_SHADDA, 0xfe7c, 0, 0xfe7c, 0},
    {a_SUKUN, 0xfe7e, 0, 0xfe7f, 0},
    {a_MADDA_ABOVE, 0, 0, 0, 0},
    {a_HAMZA_ABOVE, 0, 0, 0, 0},
    {a_HAMZA_BELOW, 0, 0, 0, 0},
    {a_PEH, 0xfb56, 0xfb58, 0xfb59, 0xfb57},
    {a_TCHEH, 0xfb7a, 0xfb7c, 0xfb7d, 0xfb7b},
    {a_JEH, 0xfb8a, 0, 0, 0xfb8b},
    {a_FKAF, 0xfb8e, 0xfb90, 0xfb91, 0xfb8f},
    {a_GAF, 0xfb92, 0xfb94, 0xfb95, 0xfb93},
    {a_FYEH, 0xfbfc, 0xfbfe, 0xfbff, 0xfbfd},
};

#define a_BYTE_ORDER_MARK		0xfeff

/*
 * Find the struct achar pointer to the given Arabic char.
 * Returns NULL if not found.
 */
    static struct achar *
find_achar(int c)
{
    int h, m, l;

    // using binary search to find c
    h = ARRAY_LENGTH(achars);
    l = 0;
    while (l < h)
    {
	m = (h + l) / 2;
	if (achars[m].c == (unsigned)c)
	    return &achars[m];
	if ((unsigned)c < achars[m].c)
	    h = m;
	else
	    l = m + 1;
    }
    return NULL;
}

/*
 * Change shape - from Combination (2 char) to an Isolated
 */
    static int
chg_c_laa2i(int hid_c)
{
    int tempc;

    switch (hid_c)
    {
	case a_ALEF_MADDA:
	    tempc = a_s_LAM_ALEF_MADDA_ABOVE;
	    break;
	case a_ALEF_HAMZA_ABOVE:
	    tempc = a_s_LAM_ALEF_HAMZA_ABOVE;
	    break;
	case a_ALEF_HAMZA_BELOW:
	    tempc = a_s_LAM_ALEF_HAMZA_BELOW;
	    break;
	case a_ALEF:
	    tempc = a_s_LAM_ALEF;
	    break;
	default:
	    tempc = 0;
    }

    return tempc;
}

/*
 * Change shape - from Combination-Isolated to Final
 */
    static int
chg_c_laa2f(int hid_c)
{
    int tempc;

    switch (hid_c)
    {
	case a_ALEF_MADDA:
	    tempc = a_f_LAM_ALEF_MADDA_ABOVE;
	    break;
	case a_ALEF_HAMZA_ABOVE:
	    tempc = a_f_LAM_ALEF_HAMZA_ABOVE;
	    break;
	case a_ALEF_HAMZA_BELOW:
	    tempc = a_f_LAM_ALEF_HAMZA_BELOW;
	    break;
	case a_ALEF:
	    tempc = a_f_LAM_ALEF;
	    break;
	default:
	    tempc = 0;
    }

    return tempc;
}

/*
 * Returns whether it is possible to join the given letters
 */
    static int
can_join(int c1, int c2)
{
    struct achar *a1 = find_achar(c1);
    struct achar *a2 = find_achar(c2);

    return a1 && a2 && (a1->initial || a1->medial) && (a2->final || a2->medial);
}

/*
 * Check whether we are dealing with a character that could be regarded as an
 * Arabic combining character, need to check the character before this.
 */
    int
arabic_maycombine(int two)
{
    if (p_arshape && !p_tbidi)
	return (two == a_ALEF_MADDA
		    || two == a_ALEF_HAMZA_ABOVE
		    || two == a_ALEF_HAMZA_BELOW
		    || two == a_ALEF);
    return FALSE;
}

/*
 * Check whether we are dealing with Arabic combining characters.
 * Note: these are NOT really composing characters!
 */
    int
arabic_combine(
    int		one,	    // first character
    int		two)	    // character just after "one"
{
    if (one == a_LAM)
	return arabic_maycombine(two);
    return FALSE;
}

/*
 * A_is_iso returns true if 'c' is an Arabic ISO-8859-6 character