From b86f10ee10bdf932df02bdaf601dffa671518a47 Mon Sep 17 00:00:00 2001 From: Bram Moolenaar Date: Mon, 21 Mar 2016 22:09:44 +0100 Subject: patch 7.4.1629 Problem: Handling emoji characters as full width has problems with backwards compatibility. Solution: Remove ambiguous and double width characters from the emoji table. Use a separate table for the character class. (partly by Yasuhiro Matsumoto) --- runtime/tools/unicode.vim | 196 ++++++++++++++++++++++++++++++---------------- 1 file changed, 130 insertions(+), 66 deletions(-) (limited to 'runtime/tools') diff --git a/runtime/tools/unicode.vim b/runtime/tools/unicode.vim index dfe9cef417..e0627b644d 100644 --- a/runtime/tools/unicode.vim +++ b/runtime/tools/unicode.vim @@ -32,8 +32,8 @@ func! ParseFoldProps() if line !~ '^#' && line !~ '^\s*$' let l = split(line, '\s*;\s*', 1) if len(l) != 4 - echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 4' - return + echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 4' + return endif call add(s:foldprops, l) endif @@ -50,8 +50,8 @@ func! ParseWidthProps() if line !~ '^#' && line !~ '^\s*$' let l = split(line, '\s*;\s*', 1) if len(l) != 2 - echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 2' - return + echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 2' + return endif call add(s:widthprops, l) endif @@ -72,18 +72,18 @@ func! BuildCaseTable(name, index) let n = ('0x' . p[0]) + 0 let nl = ('0x' . p[a:index]) + 0 if start >= 0 && add == nl - n && (step == 0 || n - end == step) - " continue with same range. - let step = n - end - let end = n + " continue with same range. + let step = n - end + let end = n else - if start >= 0 - " produce previous range - call Range(ranges, start, end, step, add) - endif - let start = n - let end = n - let step = 0 - let add = nl - n + if start >= 0 + " produce previous range + call Range(ranges, start, end, step, add) + endif + let start = n + let end = n + let step = 0 + let add = nl - n endif endif endfor @@ -115,18 +115,18 @@ func! BuildFoldTable() let n = ('0x' . p[0]) + 0 let nl = ('0x' . p[2]) + 0 if start >= 0 && add == nl - n && (step == 0 || n - end == step) - " continue with same range. - let step = n - end - let end = n + " continue with same range. + let step = n - end + let end = n else - if start >= 0 - " produce previous range - call Range(ranges, start, end, step, add) - endif - let start = n - let end = n - let step = 0 - let add = nl - n + if start >= 0 + " produce previous range + call Range(ranges, start, end, step, add) + endif + let start = n + let end = n + let step = 0 + let add = nl - n endif endif endfor @@ -160,15 +160,15 @@ func! BuildCombiningTable() if p[2] == 'Mn' || p[2] == 'Mc' || p[2] == 'Me' let n = ('0x' . p[0]) + 0 if start >= 0 && end + 1 == n - " continue with same range. - let end = n + " continue with same range. + let end = n else - if start >= 0 - " produce previous range - call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) - endif - let start = n - let end = n + if start >= 0 + " produce previous range + call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) + endif + let start = n + let end = n endif endif endfor @@ -197,47 +197,57 @@ func! BuildWidthTable(pattern, tableName) for p in s:widthprops if p[1][0] =~ a:pattern if p[0] =~ '\.\.' - " It is a range. we don't check for composing char then. - let rng = split(p[0], '\.\.') - if len(rng) != 2 - echoerr "Cannot parse range: '" . p[0] . "' in width table" - endif - let n = ('0x' . rng[0]) + 0 - let n_last = ('0x' . rng[1]) + 0 + " It is a range. we don't check for composing char then. + let rng = split(p[0], '\.\.') + if len(rng) != 2 + echoerr "Cannot parse range: '" . p[0] . "' in width table" + endif + let n = ('0x' . rng[0]) + 0 + let n_last = ('0x' . rng[1]) + 0 else - let n = ('0x' . p[0]) + 0 - let n_last = n + let n = ('0x' . p[0]) + 0 + let n_last = n endif " Find this char in the data table. while 1 - let dn = ('0x' . s:dataprops[dataidx][0]) + 0 - if dn >= n - break - endif - let dataidx += 1 + let dn = ('0x' . s:dataprops[dataidx][0]) + 0 + if dn >= n + break + endif + let dataidx += 1 endwhile if dn != n && n_last == n - echoerr "Cannot find character " . n . " in data table" + echoerr "Cannot find character " . n . " in data table" endif " Only use the char when it's not a composing char. " But use all chars from a range. let dp = s:dataprops[dataidx] if n_last > n || (dp[2] != 'Mn' && dp[2] != 'Mc' && dp[2] != 'Me') - if start >= 0 && end + 1 == n - " continue with same range. - else - if start >= 0 - " produce previous range - call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) - endif - let start = n - endif - let end = n_last + if start >= 0 && end + 1 == n + " continue with same range. + else + if start >= 0 + " produce previous range + call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) + if a:pattern == 'A' + call add(s:ambitable, [start, end]) + else + call add(s:doubletable, [start, end]) + endif + endif + let start = n + endif + let end = n_last endif endif endfor if start >= 0 call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) + if a:pattern == 'A' + call add(s:ambitable, [start, end]) + else + call add(s:doubletable, [start, end]) + endif endif " New buffer to put the result in. @@ -253,21 +263,72 @@ endfunc " Build the amoji width table in a new buffer. func! BuildEmojiTable(pattern, tableName) - let ranges = [] - for line in map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~a:pattern'), 'matchstr(v:val,"^\\S\\+")') + let alltokens = [] + let widthtokens = [] + let lines = map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~a:pattern'), 'matchstr(v:val,"^\\S\\+")') + for n in range(len(lines)) + let line = lines[n] let token = split(line, '\.\.') + let first = ('0x' . token[0]) + 0 if len(token) == 1 - call add(token, token[0]) + let last = first + else + let last = ('0x' . token[1]) + 0 + endif + + let token = [first, last] + if len(alltokens) > 0 && (token[0] - 1 == alltokens[-1][1]) + let alltokens[-1][1] = token[1] + else + call add(alltokens, token) + endif + + " exclude characters that are in the "ambiguous" or "doublewidth" table + for ambi in s:ambitable + if first >= ambi[0] && first <= ambi[1] + let first = ambi[1] + 1 + endif + if last >= ambi[0] && last <= ambi[1] + let last = ambi[0] - 1 + endif + endfor + for double in s:doubletable + if first >= double[0] && first <= double[1] + let first = double[1] + 1 + endif + if last >= double[0] && last <= double[1] + let last = double[0] - 1 + endif + endfor + + if first <= last + let token = [first, last] + if len(widthtokens) > 0 && (token[0] - 1 == widthtokens[-1][1]) + let widthtokens[-1][1] = token[1] + else + call add(widthtokens, token) + endif endif - call add(ranges, printf("\t{0x%04x, 0x%04x},", "0x".token[0], "0x".token[1])) endfor + let allranges = map(alltokens, 'printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])') + let widthranges = map(widthtokens, 'printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])') " New buffer to put the result in. new - exe "file " . a:tableName - call setline(1, " static struct interval " . a:tableName . "[] =") + exe "file " . a:tableName . '_all' + call setline(1, " static struct interval " . a:tableName . "_all[] =") call setline(2, " {") - call append('$', ranges) + call append('$', allranges) + call setline('$', getline('$')[:-2]) " remove last comma + call setline(line('$') + 1, " };") + wincmd p + + " New buffer to put the result in. + new + exe "file " . a:tableName . '_width' + call setline(1, " static struct interval " . a:tableName . "_width[] =") + call setline(2, " {") + call append('$', widthranges) call setline('$', getline('$')[:-2]) " remove last comma call setline(line('$') + 1, " };") wincmd p @@ -307,13 +368,16 @@ edit http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt call ParseWidthProps() " Build the double width table. +let s:doubletable = [] call BuildWidthTable('[WF]', 'doublewidth') " Build the ambiguous width table. +let s:ambitable = [] call BuildWidthTable('A', 'ambiguous') " Edit the emoji text file. Requires the netrw plugin. edit http://www.unicode.org/Public/emoji/3.0/emoji-data.txt " Build the emoji table. Ver. 1.0 - 6.0 +" Must come after the "ambiguous" table call BuildEmojiTable('; Emoji\s\+# [1-6]\.[0-9]', 'emoji') -- cgit v1.2.3