From 56a573de86a9d6257f426cee4d8ca895880c9681 Mon Sep 17 00:00:00 2001 From: Jesse Duffield Date: Tue, 9 Feb 2021 09:47:57 +1100 Subject: support wide characters in the editor --- go.mod | 6 +- go.sum | 12 + vendor/github.com/jesseduffield/gocui/edit.go | 19 +- vendor/github.com/mattn/go-runewidth/go.mod | 2 + vendor/github.com/mattn/go-runewidth/go.sum | 2 + vendor/github.com/mattn/go-runewidth/runewidth.go | 80 +- vendor/github.com/rivo/uniseg/LICENSE.txt | 21 + vendor/github.com/rivo/uniseg/README.md | 62 + vendor/github.com/rivo/uniseg/doc.go | 8 + vendor/github.com/rivo/uniseg/go.mod | 3 + vendor/github.com/rivo/uniseg/grapheme.go | 268 ++++ vendor/github.com/rivo/uniseg/properties.go | 1658 +++++++++++++++++++++ vendor/modules.txt | 9 +- 13 files changed, 2093 insertions(+), 57 deletions(-) create mode 100644 vendor/github.com/mattn/go-runewidth/go.sum create mode 100644 vendor/github.com/rivo/uniseg/LICENSE.txt create mode 100644 vendor/github.com/rivo/uniseg/README.md create mode 100644 vendor/github.com/rivo/uniseg/doc.go create mode 100644 vendor/github.com/rivo/uniseg/go.mod create mode 100644 vendor/github.com/rivo/uniseg/grapheme.go create mode 100644 vendor/github.com/rivo/uniseg/properties.go diff --git a/go.mod b/go.mod index 84b3ce319..9e453f0f9 100644 --- a/go.mod +++ b/go.mod @@ -19,17 +19,19 @@ require ( github.com/imdario/mergo v0.3.11 github.com/integrii/flaggy v1.4.0 github.com/jesseduffield/go-git/v5 v5.1.2-0.20201006095850-341962be15a4 - github.com/jesseduffield/gocui v0.3.1-0.20201010224802-8a6768078fd7 + github.com/jesseduffield/gocui v0.3.1-0.20210208224444-2eecee85583d github.com/jesseduffield/termbox-go v0.0.0-20200823212418-a2289ed6aafe github.com/jesseduffield/yaml v2.1.0+incompatible github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 github.com/konsorten/go-windows-terminal-sequences v1.0.2 // indirect github.com/kylelemons/godebug v1.1.0 // indirect github.com/mattn/go-colorable v0.1.7 // indirect - github.com/mattn/go-runewidth v0.0.9 + github.com/mattn/go-runewidth v0.0.10 github.com/mgutz/str v1.2.0 + github.com/nsf/termbox-go v0.0.0-20210114135735-d04385b850e8 // indirect github.com/onsi/ginkgo v1.10.3 // indirect github.com/onsi/gomega v1.7.1 // indirect + github.com/rivo/uniseg v0.2.0 // indirect github.com/sahilm/fuzzy v0.1.0 github.com/sirupsen/logrus v1.4.2 github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad diff --git a/go.sum b/go.sum index 0a1ddfe94..48561c786 100644 --- a/go.sum +++ b/go.sum @@ -65,8 +65,12 @@ github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOl github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= github.com/jesseduffield/go-git/v5 v5.1.2-0.20201006095850-341962be15a4 h1:GOQrmaE8i+KEdB8NzAegKYd4tPn/inM0I1uo0NXFerg= github.com/jesseduffield/go-git/v5 v5.1.2-0.20201006095850-341962be15a4/go.mod h1:nGNEErzf+NRznT+N2SWqmHnDnF9aLgANB1CUNEan09o= +github.com/jesseduffield/gocui v0.3.1-0.20161105104656-d666c9f652af h1:9ZI/QyVOerYYeqMt4svycU2Lz0WvxNHCpHHbsFsi/oA= +github.com/jesseduffield/gocui v0.3.1-0.20161105104656-d666c9f652af/go.mod h1:2RtZznzYKt8RLRwvFiSkXjU0Ei8WwHdubgnlaYH47dw= github.com/jesseduffield/gocui v0.3.1-0.20201010224802-8a6768078fd7 h1:K3MGrjmpPtIhfXmKh/zsIF0CdmNKOkjpIwcUfAa/J2A= github.com/jesseduffield/gocui v0.3.1-0.20201010224802-8a6768078fd7/go.mod h1:2RtZznzYKt8RLRwvFiSkXjU0Ei8WwHdubgnlaYH47dw= +github.com/jesseduffield/gocui v0.3.1-0.20210208224444-2eecee85583d h1:Jto9W9w8CFwZiAYXa7LsHDEOb5cKCA1f5LOL1A3jva4= +github.com/jesseduffield/gocui v0.3.1-0.20210208224444-2eecee85583d/go.mod h1:2RtZznzYKt8RLRwvFiSkXjU0Ei8WwHdubgnlaYH47dw= github.com/jesseduffield/termbox-go v0.0.0-20200823212418-a2289ed6aafe h1:qsVhCf2RFyyKIUe/+gJblbCpXMUki9rZrHuEctg6M/E= github.com/jesseduffield/termbox-go v0.0.0-20200823212418-a2289ed6aafe/go.mod h1:anMibpZtqNxjDbxrcDEAwSdaJ37vyUeM1f/M4uekib4= github.com/jesseduffield/yaml v2.1.0+incompatible h1:HWQJ1gIv2zHKbDYNp0Jwjlj24K8aqpFHnMCynY1EpmE= @@ -103,12 +107,16 @@ github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHX github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= +github.com/mattn/go-runewidth v0.0.10 h1:CoZ3S2P7pvtP45xOtBw+/mDL2z0RKI576gSkzRRpdGg= +github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk= github.com/mgutz/str v1.2.0 h1:4IzWSdIz9qPQWLfKZ0rJcV0jcUDpxvP4JVZ4GXQyvSw= github.com/mgutz/str v1.2.0/go.mod h1:w1v0ofgLaJdoD0HpQ3fycxKD1WtxpjSo151pK/31q6w= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= +github.com/nsf/termbox-go v0.0.0-20210114135735-d04385b850e8 h1:3vzIuru1svOK2sXlg4XcrO3KkGRneIejmfQfR+ptSW8= +github.com/nsf/termbox-go v0.0.0-20210114135735-d04385b850e8/go.mod h1:T0cTdVuOwf7pHQNtfhnEbzHbcNyCEcVU4YPpouCbVxo= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.10.3 h1:OoxbjfXVZyod1fmWYhI7SEyaD8B00ynP3T+D5GiyHOY= github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= @@ -118,6 +126,10 @@ github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rivo/uniseg v0.1.0 h1:+2KBaVoUmb9XzDsrx/Ct0W/EYOSFf/nWTauy++DprtY= +github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/sahilm/fuzzy v0.1.0 h1:FzWGaw2Opqyu+794ZQ9SYifWv2EIXpwP4q8dY1kDAwI= github.com/sahilm/fuzzy v0.1.0/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y= github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= diff --git a/vendor/github.com/jesseduffield/gocui/edit.go b/vendor/github.com/jesseduffield/gocui/edit.go index f8eb08a57..93717f69e 100644 --- a/vendor/github.com/jesseduffield/gocui/edit.go +++ b/vendor/github.com/jesseduffield/gocui/edit.go @@ -343,17 +343,18 @@ func (v *View) writeRune(x, y int, ch rune) error { } olen := len(v.lines[y]) + w := runewidth.RuneWidth(ch) var s []cell if x >= len(v.lines[y]) { - s = make([]cell, x-len(v.lines[y])+1) + s = make([]cell, x-len(v.lines[y])+w) } else if !v.Overwrite { - s = make([]cell, 1) + s = make([]cell, w) } v.lines[y] = append(v.lines[y], s...) - if !v.Overwrite || (v.Overwrite && x >= olen-1) { - copy(v.lines[y][x+1:], v.lines[y][x:]) + if !v.Overwrite || (v.Overwrite && x >= olen-w) { + copy(v.lines[y][x+w:], v.lines[y][x:]) } v.lines[y][x] = cell{ fgColor: v.FgColor, @@ -361,6 +362,14 @@ func (v *View) writeRune(x, y int, ch rune) error { chr: ch, } + for i := 1; i < w; i++ { + v.lines[y][x+i] = cell{ + fgColor: v.FgColor, + bgColor: v.BgColor, + chr: '\x00', + } + } + return nil } @@ -384,7 +393,7 @@ func (v *View) deleteRune(x, y int) (int, error) { w := runewidth.RuneWidth(v.lines[y][i].chr) tw += w if tw > x { - v.lines[y] = append(v.lines[y][:i], v.lines[y][i+1:]...) + v.lines[y] = append(v.lines[y][:i], v.lines[y][i+w:]...) return w, nil } diff --git a/vendor/github.com/mattn/go-runewidth/go.mod b/vendor/github.com/mattn/go-runewidth/go.mod index fa7f4d864..8a9d524ec 100644 --- a/vendor/github.com/mattn/go-runewidth/go.mod +++ b/vendor/github.com/mattn/go-runewidth/go.mod @@ -1,3 +1,5 @@ module github.com/mattn/go-runewidth go 1.9 + +require github.com/rivo/uniseg v0.1.0 diff --git a/vendor/github.com/mattn/go-runewidth/go.sum b/vendor/github.com/mattn/go-runewidth/go.sum new file mode 100644 index 000000000..02135660b --- /dev/null +++ b/vendor/github.com/mattn/go-runewidth/go.sum @@ -0,0 +1,2 @@ +github.com/rivo/uniseg v0.1.0 h1:+2KBaVoUmb9XzDsrx/Ct0W/EYOSFf/nWTauy++DprtY= +github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= diff --git a/vendor/github.com/mattn/go-runewidth/runewidth.go b/vendor/github.com/mattn/go-runewidth/runewidth.go index 19f8e0449..f3871a624 100644 --- a/vendor/github.com/mattn/go-runewidth/runewidth.go +++ b/vendor/github.com/mattn/go-runewidth/runewidth.go @@ -2,6 +2,8 @@ package runewidth import ( "os" + + "github.com/rivo/uniseg" ) //go:generate go run script/generate.go @@ -10,9 +12,6 @@ var ( // EastAsianWidth will be set true if the current locale is CJK EastAsianWidth bool - // ZeroWidthJoiner is flag to set to use UTR#51 ZWJ - ZeroWidthJoiner bool - // DefaultCondition is a condition in current locale DefaultCondition = &Condition{} ) @@ -30,7 +29,6 @@ func handleEnv() { } // update DefaultCondition DefaultCondition.EastAsianWidth = EastAsianWidth - DefaultCondition.ZeroWidthJoiner = ZeroWidthJoiner } type interval struct { @@ -85,15 +83,13 @@ var nonprint = table{ // Condition have flag EastAsianWidth whether the current locale is CJK or not. type Condition struct { - EastAsianWidth bool - ZeroWidthJoiner bool + EastAsianWidth bool } // NewCondition return new instance of Condition which is current locale. func NewCondition() *Condition { return &Condition{ - EastAsianWidth: EastAsianWidth, - ZeroWidthJoiner: ZeroWidthJoiner, + EastAsianWidth: EastAsianWidth, } } @@ -110,38 +106,20 @@ func (c *Condition) RuneWidth(r rune) int { } } -func (c *Condition) stringWidth(s string) (width int) { - for _, r := range []rune(s) { - width += c.RuneWidth(r) - } - return width -} - -func (c *Condition) stringWidthZeroJoiner(s string) (width int) { - r1, r2 := rune(0), rune(0) - for _, r := range []rune(s) { - if r == 0xFE0E || r == 0xFE0F { - continue - } - w := c.RuneWidth(r) - if r2 == 0x200D && inTables(r, emoji) && inTables(r1, emoji) { - if width < w { - width = w - } - } else { - width += w - } - r1, r2 = r2, r - } - return width -} - // StringWidth return width as you can see func (c *Condition) StringWidth(s string) (width int) { - if c.ZeroWidthJoiner { - return c.stringWidthZeroJoiner(s) + g := uniseg.NewGraphemes(s) + for g.Next() { + var chWidth int + for _, r := range g.Runes() { + chWidth = c.RuneWidth(r) + if chWidth > 0 { + break // Our best guess at this point is to use the width of the first non-zero-width rune. + } + } + width += chWidth } - return c.stringWidth(s) + return } // Truncate return string truncated with w cells @@ -149,19 +127,25 @@ func (c *Condition) Truncate(s string, w int, tail string) string { if c.StringWidth(s) <= w { return s } - r := []rune(s) - tw := c.StringWidth(tail) - w -= tw - width := 0 - i := 0 - for ; i < len(r); i++ { - cw := c.RuneWidth(r[i]) - if width+cw > w { + w -= c.StringWidth(tail) + var width int + pos := len(s) + g := uniseg.NewGraphemes(s) + for g.Next() { + var chWidth int + for _, r := range g.Runes() { + chWidth = c.RuneWidth(r) + if chWidth > 0 { + break // See StringWidth() for details. + } + } + if width+chWidth > w { + pos, _ = g.Positions() break } - width += cw + width += chWidth } - return string(r[0:i]) + tail + return s[:pos] + tail } // Wrap return string wrapped with w cells @@ -169,7 +153,7 @@ func (c *Condition) Wrap(s string, w int) string { width := 0 out := "" for _, r := range []rune(s) { - cw := RuneWidth(r) + cw := c.RuneWidth(r) if r == '\n' { out += string(r) width = 0 diff --git a/vendor/github.com/rivo/uniseg/LICENSE.txt b/vendor/github.com/rivo/uniseg/LICENSE.txt new file mode 100644 index 000000000..5040f1ef8 --- /dev/null +++ b/vendor/github.com/rivo/uniseg/LICENSE.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Oliver Kuederle + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/rivo/uniseg/README.md b/vendor/github.com/rivo/uniseg/README.md new file mode 100644 index 000000000..f8da293e1 --- /dev/null +++ b/vendor/github.com/rivo/uniseg/README.md @@ -0,0 +1,62 @@ +# Unicode Text Segmentation for Go + +[![Godoc Reference](https://img.shields.io/badge/godoc-reference-blue.svg)](https://godoc.org/github.com/rivo/uniseg) +[![Go Report](https://img.shields.io/badge/go%20report-A%2B-brightgreen.svg)](https://goreportcard.com/report/github.com/rivo/uniseg) + +This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](http://unicode.org/reports/tr29/) (Unicode version 12.0.0). + +At this point, only the determination of grapheme cluster boundaries is implemented. + +## Background + +In Go, [strings are read-only slices of bytes](https://blog.golang.org/strings). They can be turned into Unicode code points using the `for` loop or by casting: `[]rune(str)`. However, multiple code points may be combined into one user-perceived character or what the Unicode specification calls "grapheme cluster". Here are some examples: + +|String|Bytes (UTF-8)|Code points (runes)|Grapheme clusters| +|-|-|-|-| +|Käse|6 bytes: `4b 61 cc 88 73 65`|5 code points: `4b 61 308 73 65`|4 clusters: `[4b],[61 308],[73],[65]`| +|🏳️‍🌈|14 bytes: `f0 9f 8f b3 ef b8 8f e2 80 8d f0 9f 8c 88`|4 code points: `1f3f3 fe0f 200d 1f308`|1 cluster: `[1f3f3 fe0f 200d 1f308]`| +|🇩🇪|8 bytes: `f0 9f 87 a9 f0 9f 87 aa`|2 code points: `1f1e9 1f1ea`|1 cluster: `[1f1e9 1f1ea]`| + +This package provides a tool to iterate over these grapheme clusters. This may be used to determine the number of user-perceived characters, to split strings in their intended places, or to extract individual characters which form a unit. + +## Installation + +```bash +go get github.com/rivo/uniseg +``` + +## Basic Example + +```go +package uniseg + +import ( + "fmt" + + "github.com/rivo/uniseg" +) + +func main() { + gr := uniseg.NewGraphemes("👍🏼!") + for gr.Next() { + fmt.Printf("%x ", gr.Runes()) + } + // Output: [1f44d 1f3fc] [21] +} +``` + +## Documentation + +Refer to https://godoc.org/github.com/rivo/uniseg for the package's documentation. + +## Dependencies + +This package does not depend on any packages outside the standard library. + +## Your Feedback + +Add your issue here on GitHub. Feel free to get in touch if you have any questions. + +## Version + +Version tags will be introduced once Golang modules are official. Consider this version 0.1. diff --git a/vendor/github.com/rivo/uniseg/doc.go b/vendor/github.com/rivo/uniseg/doc.go new file mode 100644 index 000000000..60c737d7b --- /dev/null +++ b/vendor/github.com/rivo/uniseg/doc.go @@ -0,0 +1,8 @@ +/* +Package uniseg implements Unicode Text Segmentation according to Unicode +Standard Annex #29 (http://unicode.org/reports/tr29/). + +At this point, only the determination of grapheme cluster boundaries is +implemented. +*/ +package uniseg diff --git a/vendor/github.com/rivo/uniseg/go.mod b/vendor/github.com/rivo/uniseg/go.mod new file mode 100644 index 000000000..a54280b2d --- /dev/null +++ b/vendor/github.com/rivo/uniseg/go.mod @@ -0,0 +1,3 @@ +module github.com/rivo/uniseg + +go 1.12 diff --git a/vendor/github.com/rivo/uniseg/grapheme.go b/vendor/github.com/rivo/uniseg/grapheme.go new file mode 100644 index 000000000..207157f5e --- /dev/null +++ b/vendor/github.com/rivo/uniseg/grapheme.go @@ -0,0 +1,268 @@ +package uniseg + +import "unicode/utf8" + +// The states of the grapheme cluster parser. +const ( + grAny = iota + grCR + grControlLF + grL + grLVV + grLVTT + grPrepend + grExtendedPictographic + grExtendedPictographicZWJ + grRIOdd + grRIEven +) + +// The grapheme cluster parser's breaking instructions. +const ( + grNoBoundary = iota + grBoundary +) + +// The grapheme cluster parser's state transitions. Maps (state, property) to +// (new state, breaking instruction, rule number). The breaking instruction +// always refers to the boundary between the last and next code point. +// +// This map is queried as follows: +// +// 1. Find specific state + specific property. Stop if found. +// 2. Find specific state + any property. +// 3. Find any state + specific property. +// 4. If only (2) or (3) (but not both) was found, stop. +// 5. If both (2) and (3) were found, use state and breaking instruction from +// the transition with the lower rule number, prefer (3) if rule numbers +// are equal. Stop. +// 6. Assume grAny and grBoundary. +var grTransitions = map[[2]int][3]int{ + // GB5 + {grAny, prCR}: {grCR, grBoundary, 50}, + {grAny, prLF}: {grControlLF, grBoundary, 50}, + {grAny, prControl}: {grControlLF, grBoundary, 50}, + + // GB4 + {grCR, prAny}: {grAny, grBoundary, 40}, + {grControlLF, prAny}: {grAny, grBoundary, 40}, + + // GB3. + {grCR, prLF}: {grAny, grNoBoundary, 30}, + + // GB6. + {grAny, prL}: {grL, grBoundary, 9990}, + {grL, prL}: {grL, grNoBoundary, 60}, + {grL, prV}: {grLVV, grNoBoundary, 60}, + {grL, prLV}: {grLVV, grNoBoundary, 60}, + {grL, prLVT}: {grLVTT, grNoBoundary, 60}, + + // GB7. + {grAny, prLV}: {grLVV, grBoundary, 9990}, + {grAny, prV}: {grLVV, grBoundary, 9990}, + {grLVV, prV}: {grLVV, grNoBoundary, 70}, + {grLVV, prT}: {grLVTT, grNoBoundary, 70}, + + // GB8. + {grAny, prLVT}: {grLVTT, grBoundary, 9990}, + {grAny, prT}: {grLVTT, grBoundary, 9990}, + {grLVTT, prT}: {grLVTT, grNoBoundary, 80}, + + // GB9. + {grAny, prExtend}: {grAny, grNoBoundary, 90}, + {grAny, prZWJ}: {grAny, grNoBoundary, 90}, + + // GB9a. + {grAny, prSpacingMark}: {grAny, grNoBoundary, 91}, + + // GB9b. + {grAny, prPreprend}: {grPrepend, grBoundary, 9990}, + {grPrepend, prAny}: {grAny, grNoBoundary, 92}, + + // GB11. + {grAny, prExtendedPictographic}: {grExtendedPictographic, grBoundary, 9990}, + {grExtendedPictographic, prExtend}: {grExtendedPictographic, grNoBoundary, 110}, + {grExtendedPictographic, prZWJ}: {grExtendedPictographicZWJ, grNoBoundary, 110}, + {grExtendedPictographicZWJ, prExtendedPictographic}: {grExtendedPictographic, grNoBoundary, 110}, + + // GB12 / GB13. + {grAny, prRegionalIndicator}: {grRIOdd, grBoundary, 9990}, + {grRIOdd, prRegionalIndicator}: {grRIEven, grNoBoundary, 120}, + {grRIEven, prRegionalIndicator}: {grRIOdd, grBoundary, 120}, +} + +// Graphemes implements an iterator over Unicode extended grapheme clusters, +// specified in the Unicode Standard Annex #29. Grapheme clusters correspond to +// "user-perceived characters". These characters often consist of multiple +// code points (e.g. the "woman kissing woman" emoji consists of 8 code points: +// woman + ZWJ + heavy black heart (2 code points) + ZWJ + kiss mark + ZWJ + +// woman) and the rules described in Annex #29 must be applied to group those +// code points into clusters perceived by the user as one character. +type Graphemes struct { + // The code points over which this class iterates. + codePoints []rune + + // The (byte-based) indices of the code points into the original string plus + // len(original string). Thus, len(indices) = len(codePoints) + 1. + indices []int + + // The current grapheme cluster to be returned. These are indices into + // codePoints/indices. If start == end, we either haven't started iterating + // yet (0) or the iteration has already completed (1). + start, end int + + // The index of the next code point to be parsed. + pos int + + // The current state of the code point parser. + state int +} + +// NewGraphemes returns a new grapheme cluster iterator. +func NewGraphemes(s string) *Graphemes { + l := utf8.RuneCountInString(s) + codePoints := make([]rune, l) + indices := make([]int, l+1) + i := 0 + for pos, r := range s { + codePoints[i] = r + indices[i] = pos + i++ + } + indices[l] = len(s) + g := &Graphemes{ + codePoints: codePoints, + indices: indices, + } + g.Next() // Parse ahead. + return g +} + +// Next advances the iterator by one grapheme cluster and returns false if no +// clusters are left. This function must be called before the first cluster is +// accessed. +func (g *Graphemes) Next() bool { + g.start = g.end + + // The state transition gives us a boundary instruction BEFORE the next code + // point so we always need to stay ahead by one code point. + + // Parse the next code point. + for g.pos <= len(g.codePoints) { + // GB2. + if g.pos == len(g.codePoints) { + g.end = g.pos + g.pos++ + break + } + + // Determine the property of the next character. + nextProperty := property(g.codePoints[g.pos]) + g.pos++ + + // Find the applicable transition. + var boundary bool + transition, ok := grTransitions[[2]int{g.state, nextProperty}] + if ok { + // We have a specific transition. We'll use it. + g.state = transition[0] + boundary = transition[1] == grBoundary + } else { + // No specific transition found. Try the less specific ones. + transAnyProp, okAnyProp := grTransitions[[2]int{g.state, prAny}] + transAnyState, okAnyState := grTransitions[[2]int{grAny, nextProperty}] + if okAnyProp && okAnyState { + // Both apply. We'll use a mix (see comments for grTransitions). + g.state = transAnyState[0] + boundary = transAnyState[1] == grBoundary + if transAnyProp[2] < transAnyState[2] { + g.state = transAnyProp[0] + boundary = transAnyProp[1] == grBoundary + } + } else if okAnyProp { + // We only have a specific state. + g.state = transAnyProp[0] + boundary = transAnyProp[1] == grBoundary + // This branch will probably never be reached because okAnyState will + // always be true given the current transition map. But we keep it here + // for future modifications to the transition map where this may not be + // true anymore. + } else if okAnyState { + // We only have a specific property. + g.state = transAnyState[0] + boundary = transAnyState[1] == grBoundary + } else { + // No known transition. GB999: Any x Any. + g.state = grAny + boundary = true + } + } + + // If we found a cluster boundary, let's stop here. The current cluster will + // be the one that just ended. + if g.pos-1 == 0 /* GB1 */ || boundary { + g.end = g.pos - 1 + break + } + } + + return g.start != g.end +} + +// Runes returns a slice of runes (code points) which corresponds to the current +// grapheme cluster. If the iterator is already past the end or Next() has not +// yet been called, nil is returned. +func (g *Graphemes) Runes() []rune { + if g.start == g.end { + return nil + } + return g.codePoints[g.start:g.end] +} + +// Str returns a substring of the original string which corresponds to the +// current grapheme cluster. If the iterator is already past the end or Next() +// has not yet been called, an empty string is returned. +func (g *Graphemes) Str() string { + if g.start == g.end { + return "" + } + return string(g.codePoints[g.start:g.end]) +} + +// Bytes returns a byte slice which corresponds to the current grapheme cluster. +// If the iterator is already past the end or Next() has not yet been called, +// nil is returned. +func (g *Graphemes) Bytes() []byte { + if g.start == g.end { + return nil + } + return []byte(string(g.codePoints[g.start:g.end])) +} + +// Positions returns the interval of the current grapheme cluster as byte +// positions into the original string. The first returned value "from" indexes +// the first byte and the second returned value "to" indexes the first byte that +// is not included anymore, i.e. str[from:to] is the current grapheme cluster of +// the original string "str". If Next() has not yet been called, both values are +// 0. If the iterator is already past the end, both values are 1. +func (g *Graphemes) Positions() (int, int) { + return g.indices[g.start], g.indices[g.end] +} + +// Reset puts the iterator into its initial state such that the next call to +// Next() sets it to the first grapheme cluster again. +func (g *Graphemes) Reset() { + g.start, g.end, g.pos, g.state = 0, 0, 0, grAny + g.Next() // Parse ahead again. +} + +// GraphemeClusterCount returns the number of user-perceived characters +// (grapheme clusters) for the given string. To calculate this number, it +// iterates through the string using the Graphemes iterator. +func GraphemeClusterCount(s string) (n int) { + g := NewGraphemes(s) + for g.Next() { + n++ + } + return +} diff --git a/vendor/github.com/rivo/uniseg/properties.go b/vendor/github.com/rivo/uniseg/properties.go new file mode 100644 index 000000000..a75ab5883 --- /dev/null +++ b/vendor/github.com/rivo/uniseg/properties.go @@ -0,0 +1,1658 @@ +package uniseg + +// The unicode properties. Only the ones needed in the context of this package +// are included. +const ( + prAny = iota + prPreprend + prCR + prLF + prControl + prExtend + prRegionalIndicator + prSpacingMark + prL + prV + prT + prLV + prLVT + prZWJ + prExtendedPictographic +) + +// Maps code point ranges to their properties. In the context of this package, +// any code point that is not contained may map to "prAny". The code point +// ranges in this slice are numerically sorted. +// +// These ranges were taken from +// http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt +// as well as +// https://unicode.org/Public/emoji/latest/emoji-data.txt +// ("Extended_Pictographic" only) on March 11, 2019. See +// https://www.unicode.org/license.html for the Unicode license agreement. +var codePoints = [][3]int{ + {0x0000, 0x0009, prControl}, // Cc [10] .. + {0x000A, 0x000A, prLF}, // Cc + {0x000B, 0x000C, prControl}, // Cc [2] .. + {0x000D, 0x000D, prCR}, // Cc + {0x000E, 0x001F, prControl}, // Cc [18] .. + {0x007F, 0x009F, prControl}, // Cc [33] .. + {0x00A9, 0x00A9, prExtendedPictographic}, // 1.1 [1] (©️) copyright + {0x00AD, 0x00AD, prControl}, // Cf SOFT HYPHEN + {0x00AE, 0x00AE, prExtendedPictographic}, // 1.1 [1] (®️) registered + {0x0300, 0x036F, prExtend}, // Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X + {0x0483, 0x0487, prExtend}, // Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE + {0x0488, 0x0489, prExtend}, // Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN + {0x0591, 0x05BD, prExtend}, // Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG + {0x05BF, 0x05BF, prExtend}, // Mn HEBREW POINT RAFE + {0x05C1, 0x05C2, prExtend}, // Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT + {0x05C4, 0x05C5, prExtend}, // Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT + {0x05C7, 0x05C7, prExtend}, // Mn HEBREW POINT QAMATS QATAN + {0x0600, 0x0605, prPreprend}, // Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE + {0x0610, 0x061A, prExtend}, // Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA + {0x061C, 0x061C, prControl}, // Cf ARABIC LETTER MARK + {0x064B, 0x065F, prExtend}, // Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW + {0x0670, 0x0670, prExtend}, // Mn ARABIC LETTER SUPERSCRIPT ALEF + {0x06D6, 0x06DC, prExtend}, // Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN + {0x06DD, 0x06DD, prPreprend}, // Cf ARABIC END OF AYAH + {0x06DF, 0x06E4, prExtend}, // Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA + {0x06E7, 0x06E8, prExtend}, // Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON + {0x06EA, 0x06ED, prExtend}, // Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM + {0x070F, 0x070F, prPreprend}, // Cf SYRIAC ABBREVIATION MARK + {0x0711, 0x0711, prExtend}, // Mn SYRIAC LETTER SUPERSCRIPT ALAPH + {0x0730, 0x074A, prExtend}, // Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH + {0x07A6, 0x07B0, prExtend}, // Mn [11] THAANA ABAFILI..THAANA SUKUN + {0x07EB, 0x07F3, prExtend}, // Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE + {0x07FD, 0x07FD, prExtend}, // Mn NKO DANTAYALAN + {0x0816, 0x0819, prExtend}, // Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH + {0x081B, 0x0823, prExtend}, // Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A + {0x0825, 0x0827, prExtend}, // Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U + {0x0829, 0x082D, prExtend}, // Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA + {0x0859, 0x085B, prExtend}, // Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK + {0x08D3, 0x08E1, prExtend}, // Mn [15] ARABIC SMALL LOW WAW..ARABIC SMALL HIGH SIGN SAFHA + {0x08E2, 0x08E2, prPreprend}, // Cf ARABIC DISPUTED END OF AYAH + {0x08E3, 0x0902, prExtend}, // Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA + {0x0903, 0x0903, prSpacingMark}, // Mc DEVANAGARI SIGN VISARGA + {0x093A, 0x093A, prExtend}, // Mn DEVANAGARI VOWEL SIGN OE + {0x093B, 0x093B, prSpacingMark}, // Mc DEVANAGARI VOWEL SIGN OOE + {0x093C, 0x093C, prExtend}, // Mn DEVANAGARI SIGN NUKTA + {0x093E, 0x0940, prSpacingMark}, // Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II + {0x0941, 0x0948, prExtend}, // Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI + {0x0949, 0x094C, prSpacingMark}, // Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU + {0x094D, 0x094D, prExtend}, // Mn DEVANAGARI SIGN VIRAMA + {0x094E, 0x094F, prSpacingMark}, // Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW + {0x0951, 0x0957, prExtend}, // Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE + {0x0962, 0x0963, prExtend}, // Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL + {0x0981, 0x0981, prExtend}, // Mn BENGALI SIGN CANDRABINDU + {0x0982, 0x0983, prSpacingMark}, // Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA + {0x09BC, 0x09BC, prExtend}, // Mn BENGALI SIGN NUKTA + {0x09BE, 0x09BE, prExtend}, // Mc BENGALI VOWEL SIGN AA + {0x09BF, 0x09C0, prSpacingMark}, // Mc [2] BENGALI VOWEL SIGN I..BENGALI VOWEL SIGN II + {0x09C1, 0x09C4, prExtend}, // Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR + {0x09C7, 0x09C8, prSpacingMark}, // Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI + {0x09CB, 0x09CC, prSpacingMark}, // Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU + {0x09CD, 0x09CD, prExtend}, // Mn BENGALI SIGN VIRAMA + {0x09D7, 0x09D7, prExtend}, // Mc BENGALI AU LENGTH MARK + {0x09E2, 0x09E3, prExtend}, // Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL + {0x09FE, 0x09FE, prExtend}, // Mn BENGALI SANDHI MARK + {0x0A01, 0x0A02, prExtend}, // Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI + {0x0A03, 0x0A03, prSpacingMark}, // Mc GURMUKHI SIGN VISARGA + {0x0A3C, 0x0A3C, prExtend}, // Mn GURMUKHI SIGN NUKTA + {0x0A3E, 0x0A40, prSpacingMark}, // Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II + {0x0A41, 0x0A42, prExtend}, // Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU + {0x0A47, 0x0A48, prExtend}, // Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI + {0x0A4B, 0x0A4D, prExtend}, // Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA + {0x0A51, 0x0A51, prExtend}, // Mn GURMUKHI SIGN UDAAT + {0x0A70, 0x0A71, prExtend}, // Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK + {0x0A75, 0x0A75, prExtend}, // Mn GURMUKHI SIGN YAKASH + {0x0A81, 0x0A82, prExtend}, // Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA + {0x0A83, 0x0A83, prSpacingMark}, // Mc GUJARATI SIGN VISARGA + {0x0ABC, 0x0ABC, prExtend}, // Mn GUJARATI SIGN NUKTA + {0x0ABE, 0x0AC0, prSpacingMark}, // Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II + {0x0AC1, 0x0AC5, prExtend}, // Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E + {0x0AC7, 0x0AC8, prExtend}, // Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI + {0x0AC9, 0x0AC9, prSpacingMark}, // Mc GUJARATI VOWEL SIGN CANDRA O + {0x0ACB, 0x0ACC, prSpacingMark}, // Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU + {0x0ACD, 0x0ACD, prExtend}, // Mn GUJARATI SIGN VIRAMA + {0x0AE2, 0x0AE3, prExtend}, // Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL + {0x0AFA, 0x0AFF, prExtend}, // Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE + {0x0B01, 0x0B01, prExtend}, // Mn ORIYA SIGN CANDRABINDU + {0x0B02, 0x0B03, prSpacingMark}, // Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA + {0x0B3C, 0x0B3C, prExtend}, // Mn ORIYA SIGN NUKTA + {0x0B3E, 0x0B3E, prExtend}, // Mc ORIYA VOWEL SIGN AA + {0x0B3F, 0x0B3F, prExtend}, // Mn ORIYA VOWEL SIGN I + {0x0B40, 0x0B40, prSpacingMark}, // Mc ORIYA VOWEL SIGN II + {0x0B41, 0x0B44, prExtend}, // Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR + {0x0B47, 0x0B48, prSpacingMark}, // Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI + {0x0B4B, 0x0B4C, prSpacingMark}, // Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU + {0x0B4D, 0x0B4D, prExtend}, // Mn ORIYA SIGN VIRAMA + {0x0B56, 0x0B56, prExtend}, // Mn ORIYA AI LENGTH MARK + {0x0B57, 0x0B57, prExtend}, // Mc ORIYA AU LENGTH MARK + {0x0B62, 0x0B63, prExtend}, // Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL + {0x0B82, 0x0B82, prExtend}, // Mn TAMIL SIGN ANUSVARA + {0x0BBE, 0x0BBE, prExtend}, // Mc TAMIL VOWEL SIGN AA + {0x0BBF, 0x0BBF, prSpacingMark}, // Mc TAMIL VOWEL SIGN I + {0x0BC0, 0x0BC0, prExtend}, // Mn TAMIL VOWEL SIGN II + {0x0BC1, 0x0BC2, prSpacingMark}, // Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU + {0x0BC6, 0x0BC8, prSpacingMark}, // Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI + {0x0BCA, 0x0BCC, prSpacingMark}, // Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU + {0x0BCD, 0x0BCD, prExtend}, // Mn TAMIL SIGN VIRAMA + {0x0BD7, 0x0BD7, prExtend}, // Mc TAMIL AU LENGTH MARK + {0x0C00, 0x0C00, prExtend}, // Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE + {0x0C01, 0x0C03, prSpacingMark}, // Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA + {0x0C04, 0x0C04, prExtend}, // Mn TELUGU SIGN COMBINING ANUSVARA ABOVE + {0x0C3E, 0x0C40, prExtend}, // Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II + {0x0C41, 0x0C44, prSpacingMark}, // Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR + {0x0C46, 0x0C48, prExtend}, // Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI + {0x0C4A, 0x0C4D, prExtend}, // Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA + {0x0C55, 0x0C56, prExtend}, // Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK + {0x0C62, 0x0C63, prExtend}, // Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL + {0x0C81, 0x0C81, prExtend}, // Mn KANNADA SIGN CANDRABINDU + {0x0C82, 0x0C83, prSpacingMark}, // Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA + {0x0CBC, 0x0CBC, prExtend}, // Mn KANNADA SIGN NUKTA + {0x0CBE, 0x0CBE, prSpacingMark}, // Mc KANNADA VOWEL SIGN AA + {0x0CBF, 0x0CBF, prExtend}, // Mn KANNADA VOWEL SIGN I + {0x0CC0, 0x0CC1, prSpacingMark}, // Mc [2] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN U + {0x0CC2, 0x0CC2, prExtend}, // Mc KANNADA VOWEL SIGN UU + {0x0CC3, 0x0CC4, prSpacingMark}, // Mc [2] KANNADA VOWEL SIGN VOCALIC R..KANNADA VOWEL SIGN VOCALIC RR + {0x0CC6, 0x0CC6, prExtend}, // Mn KANNADA VOWEL SIGN E + {0x0CC7, 0x0CC8, prSpacingMark}, // Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI + {0x0CCA, 0x0CCB, prSpacingMark}, // Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO + {0x0CCC, 0x0CCD, prExtend}, // Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA + {0x0CD5, 0x0CD6, prExtend}, // Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK + {0x0CE2, 0x0CE3, prExtend}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL + {0x0D00, 0x0D01, prExtend}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU + {0x0D02, 0x0D03, prSpacingMark}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA + {0x0D3B, 0x0D3C, prExtend}, // Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA + {0x0D3E, 0x0D3E, prExtend}, // Mc MALAYALAM VOWEL SIGN AA + {0x0D3F, 0x0D40, prSpacingMark}, // Mc [2] MALAYALAM VOWEL SIGN I..MALAYALAM VOWEL SIGN II + {0x0D41, 0x0D44, prExtend}, // Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR + {0x0D46, 0x0D48, prSpacingMark}, // Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI + {0x0D4A, 0x0D4C, prSpacingMark}, // Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU + {0x0D4D, 0x0D4D, prExtend}, // Mn MALAYALAM SIGN VIRAMA + {0x0D4E, 0x0D4E, prPreprend}, // Lo MALAYALAM LETTER DOT REPH + {0x0D57, 0x0D57, prExtend}, // Mc MALAYALAM AU LENGTH MARK + {0x0D62, 0x0D63, prExtend}, // Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL + {0x0D82, 0x0D83, prSpacingMark}, // Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA + {0x0DCA, 0x0DCA, prExtend}, // Mn SINHALA SIGN AL-LAKUNA + {0x0DCF, 0x0DCF, prExtend}, // Mc SINHALA VOWEL SIGN AELA-PILLA + {0x0DD0, 0x0DD1, prSpacingMark}, // Mc [2] SINHALA VOWEL SIGN KETTI AEDA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA + {0x0DD2, 0x0DD4, prExtend}, // Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA + {0x0DD6, 0x0DD6, prExtend}, // Mn SINHALA VOWEL SIGN DIGA PAA-PILLA + {0x0DD8, 0x0DDE, prSpacingMark}, // Mc [7] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA + {0x0DDF, 0x0DDF, prExtend}, // Mc SINHALA VOWEL SIGN GAYANUKITTA + {0x0DF2, 0x0DF3, prSpacingMark}, // Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA + {0x0E31, 0x0E31, prExtend}, // Mn THAI CHARACTER MAI HAN-AKAT + {0x0E33, 0x0E33, prSpacingMark}, // Lo THAI CHARACTER SARA AM + {0x0E34, 0x0E3A, prExtend}, // Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU + {0x0E47, 0x0E4E, prExtend}, // Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN + {0x0EB1, 0x0EB1, prExtend}, // Mn LAO VOWEL SIGN MAI KAN + {0x0EB3, 0x0EB3, prSpacingMark}, // Lo LAO VOWEL SIGN AM + {0x0EB4, 0x0EBC, prExtend}, // Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO + {0x0EC8, 0x0ECD, prExtend}, // Mn [6] LAO TONE MAI EK..LAO NIGGAHITA + {0x0F18, 0x0F19, prExtend}, // Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS + {0x0F35, 0x0F35, prExtend}, // Mn TIBETAN MARK NGAS BZUNG NYI ZLA + {0x0F37, 0x0F37, prExtend}, // Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS + {0x0F39, 0x0F39, prExtend}, // Mn TIBETAN MARK TSA -PHRU + {0x0F3E, 0x0F3F, prSpacingMark}, // Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES + {0x0F71, 0x0F7E, prExtend}, // Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO + {0x0F7F, 0x0F7F, prSpacingMark}, // Mc TIBETAN SIGN RNAM BCAD + {0x0F80, 0x0F84, prExtend}, // Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA + {0x0F86, 0x0F87, prExtend}, // Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS + {0x0F8D, 0x0F97, prExtend}, // Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA + {0x0F99, 0x0FBC, prExtend}, // Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA + {0x0FC6, 0x0FC6, prExtend}, // Mn TIBETAN SYMBOL PADMA GDAN + {0x102D, 0x1030, prExtend}, // Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU + {0x1031, 0x1031, prSpacingMark}, // Mc MYANMAR VOWEL SIGN E + {0x1032, 0x1037, prExtend}, // Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW + {0x1039, 0x103A, prExtend}, // Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT + {0x103B, 0x103C, prSpacingMark}, // Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA + {0x103D, 0x103E, prExtend}, // Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA + {0x1056, 0x1057, prSpacingMark}, // Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR + {0x1058, 0x1059, prExtend}, // Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL + {0x105E, 0x1060, prExtend}, // Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA + {0x1071, 0x1074, prExtend}, // Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE + {0x1082, 0x1082, prExtend}, // Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA + {0x1084, 0x1084, prSpacingMark}, // Mc MYANMAR VOWEL SIGN SHAN E + {0x1085, 0x1086, prExtend}, // Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y + {0x108D, 0x108D, prExtend}, // Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE + {0x109D, 0x109D, prExtend}, // Mn MYANMAR VOWEL SIGN AITON AI + {0x1100, 0x115F, prL}, // Lo [96] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG FILLER + {0x1160, 0x11A7, prV}, // Lo [72] HANGUL JUNGSEONG FILLER..HANGUL JUNGSEONG O-YAE + {0x11A8, 0x11FF, prT}, // Lo [88] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN + {0x135D, 0x135F, prExtend}, // Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK + {0x1712, 0x1714, prExtend}, // Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA + {0x1732, 0x1734, prExtend}, // Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD + {0x1752, 0x1753, prExtend}, // Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U + {0x1772, 0x1773, prExtend}, // Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U + {0x17B4, 0x17B5, prExtend}, // Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA + {0x17B6, 0x17B6, prSpacingMark}, // Mc KHMER VOWEL SIGN AA + {0x17B7, 0x17BD, prExtend}, // Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA + {0x17BE, 0x17C5, prSpacingMark}, // Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU + {0x17C6, 0x17C6, prExtend}, // Mn KHMER SIGN NIKAHIT + {0x17C7, 0x17C8, prSpacingMark}, // Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU + {0x17C9, 0x17D3, prExtend}, // Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT + {0x17DD, 0x17DD, prExtend}, // Mn KHMER SIGN ATTHACAN + {0x180B, 0x180D, prExtend}, // Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE + {0x180E, 0x180E, prControl}, // Cf MONGOLIAN VOWEL SEPARATOR + {0x1885, 0x1886, prExtend}, // Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA + {0x18A9, 0x18A9, prExtend}, // Mn MONGOLIAN LETTER ALI GALI DAGALGA + {0x1920, 0x1922, prExtend}, // Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U + {0x1923, 0x1926, prSpacingMark}, // Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU + {0x1927, 0x1928, prExtend}, // Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O + {0x1929, 0x192B, prSpacingMark}, // Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA + {0x1930, 0x1931, prSpacingMark}, // Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA + {0x1932, 0x1932, prExtend}, // Mn LIMBU SMALL LETTER ANUSVARA + {0x1933, 0x1938, prSpacingMark}, // Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA + {0x1939, 0x193B, prExtend}, // Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I + {0x1A17, 0x1A18, prExtend}, // Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U + {0x1A19, 0x1A1A, prSpacingMark}, // Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O + {0x1A1B, 0x1A1B, prExtend}, // Mn BUGINESE VOWEL SIGN AE + {0x1A55, 0x1A55, prSpacingMark}, // Mc TAI THAM CONSONANT SIGN MEDIAL RA + {0x1A56, 0x1A56, prExtend}, // Mn TAI THAM CONSONANT SIGN MEDIAL LA + {0x1A57, 0x1A57, prSpacingMark}, // Mc TAI THAM CONSONANT SIGN LA TANG LAI + {0x1A58, 0x1A5E, prExtend}, // Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA + {0x1A60, 0x1A60, prExtend}, // Mn TAI THAM SIGN SAKOT + {0x1A62, 0x1A62, prExtend}, // Mn TAI THAM VOWEL SIGN MAI SAT + {0x1A65, 0x1A6C, prExtend}, // Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW + {0x1A6D, 0x1A72, prSpacingMark}, // Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI + {0x1A73, 0x1A7C, prExtend}, // Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN + {0x1A7F, 0x1A7F, prExtend}, // Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT + {0x1AB0, 0x1ABD, prExtend}, // Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW + {0x1ABE, 0x1ABE, prExtend}, // Me COMBINING PARENTHESES OVERLAY + {0x1B00, 0x1B03, prExtend}, // Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG + {0x1B04, 0x1B04, prSpacingMark}, // Mc BALINESE SIGN BISAH + {0x1B34, 0x1B34, prExtend}, // Mn BALINESE SIGN REREKAN + {0x1B35, 0x1B35, prExtend}, // Mc BALINESE VOWEL SIGN TEDUNG + {0x1B36, 0x1B3A, prExtend}, // Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA + {0x1B3B, 0x1B3B, prSpacingMark}, // Mc BALINESE VOWEL SIGN RA REPA TEDUNG + {0x1B3C, 0x1B3C, prExtend}, // Mn BALINESE VOWEL SIGN LA LENGA + {0x1B3D, 0x1B41, prSpacingMark}, // Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG + {0x1B42, 0x1B42, prExtend}, // Mn BALINESE VOWEL SIGN PEPET + {0x1B43, 0x1B44, prSpacingMark}, // Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG + {0x1B6B, 0x1B73, prExtend}, // Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG + {0x1B80, 0x1B81, prExtend}, // Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR + {0x1B82, 0x1B82, prSpacingMark}, // Mc SUNDANESE SIGN PANGWISAD + {0x1BA1, 0x1BA1, prSpacingMark}, // Mc SUNDANESE CONSONANT SIGN PAMINGKAL + {0x1BA2, 0x1BA5, prExtend}, // Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU + {0x1BA6, 0x1BA7, prSpacingMark}, // Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG + {0x1BA8, 0x1BA9, prExtend}, // Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG + {0x1BAA, 0x1BAA, prSpacingMark}, // Mc SUNDANESE SIGN PAMAAEH + {0x1BAB, 0x1BAD, prExtend}, // Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA + {0x1BE6, 0x1BE6, prExtend}, // Mn BATAK SIGN TOMPI + {0x1BE7, 0x1BE7, prSpacingMark}, // Mc BATAK VOWEL SIGN E + {0x1BE8, 0x1BE9, prExtend}, // Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE + {0x1BEA, 0x1BEC, prSpacingMark}, // Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O + {0x1BED, 0x1BED, prExtend}, // Mn BATAK VOWEL SIGN KARO O + {0x1BEE, 0x1BEE, prSpacingMark}, // Mc BATAK VOWEL SIGN U + {0x1BEF, 0x1BF1, prExtend}, // Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H + {0x1BF2, 0x1BF3, prSpacingMark}, // Mc [2] BATAK PANGOLAT..BATAK PANONGONAN + {0x1C24, 0x1C2B, prSpacingMark}, // Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU + {0x1C2C, 0x1C33, prExtend}, // Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T + {0x1C34, 0x1C35, prSpacingMark}, // Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG + {0x1C36, 0x1C37, prExtend}, // Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA + {0x1CD0, 0x1CD2, prExtend}, // Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA + {0x1CD4, 0x1CE0, prExtend}, // Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA + {0x1CE1, 0x1CE1, prSpacingMark}, // Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA + {0x1CE2, 0x1CE8, prExtend}, // Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL + {0x1CED, 0x1CED, prExtend}, // Mn VEDIC SIGN TIRYAK + {0x1CF4, 0x1CF4, prExtend}, // Mn VEDIC TONE CANDRA ABOVE + {0x1CF7, 0x1CF7, prSpacingMark}, // Mc VEDIC SIGN ATIKRAMA + {0x1CF8, 0x1CF9, prExtend}, // Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE + {0x1DC0, 0x1DF9, prExtend}, // Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW + {0x1DFB, 0x1DFF, prExtend}, // Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW + {0x200B, 0x200B, prControl}, // Cf ZERO WIDTH SPACE + {0x200C, 0x200C, prExtend}, // Cf ZERO WIDTH NON-JOINER + {0x200D, 0x200D, prZWJ}, // Cf ZERO WIDTH JOINER + {0x200E, 0x200F, prControl}, // Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK + {0x2028, 0x2028, prControl}, // Zl LINE SEPARATOR + {0x2029, 0x2029, prControl}, // Zp PARAGRAPH SEPARATOR + {0x202A, 0x202E, prControl}, // Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE + {0x203C, 0x203C, prExtendedPictographic}, // 1.1 [1] (‼️) double exclamation mark + {0x2049, 0x2049, prExtendedPictographic}, // 3.0 [1] (⁉️) exclamation question mark + {0x2060, 0x2064, prControl}, // Cf [5] WORD JOINER..INVISIBLE PLUS + {0x2065, 0x2065, prControl}, // Cn + {0x2066, 0x206F, prControl}, // Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES + {0x20D0, 0x20DC, prExtend}, // Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE + {0x20DD, 0x20E0, prExtend}, // Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH + {0x20E1, 0x20E1, prExtend}, // Mn COMBINING LEFT RIGHT ARROW ABOVE + {0x20E2, 0x20E4, prExtend}, // Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE + {0x20E5, 0x20F0, prExtend}, // Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE + {0x2122, 0x2122, prExtendedPictographic}, // 1.1 [1] (™️) trade mark + {0x2139, 0x2139, prExtendedPictographic}, // 3.0 [1] (ℹ️) information + {0x2194, 0x2199, prExtendedPictographic}, // 1.1 [6] (↔️..↙️) left-right arrow..down-left arrow + {0x21A9, 0x21AA, prExtendedPictographic}, // 1.1 [2] (↩️..↪️) right arrow curving left..left arrow curving right + {0x231A, 0x231B, prExtendedPictographic}, // 1.1 [2] (⌚..⌛) watch..hourglass done + {0x2328, 0x2328, prExtendedPictographic}, // 1.1 [1] (⌨️) keyboard + {0x2388, 0x2388, prExtendedPictographic}, // 3.0 [1] (⎈) HELM SYMBOL + {0x23CF, 0x23CF, prExtendedPictographic}, // 4.0 [1] (⏏️) eject button + {0x23E9, 0x23F3, prExtendedPictographic}, // 6.0 [11] (⏩..⏳) fast-forward button..hourglass not done + {0x23F8, 0x23FA, prExtendedPictographic}, // 7.0 [3] (⏸️..⏺️) pause button..record button + {0x24C2, 0x24C2, prExtendedPictographic}, // 1.1 [1] (Ⓜ️) circled M + {0x25AA, 0x25AB, prExtendedPictographic}, // 1.1 [2] (▪️..▫️) black small square..white small square + {0x25B6, 0x25B6, prExtendedPictographic}, // 1.1 [1] (▶️) play button + {0x25C0, 0x25C0, prExtendedPictographic}, // 1.1 [1] (◀️) reverse button + {0x25FB, 0x25FE, prExtendedPictographic}, // 3.2 [4] (◻️..◾) white medium square..black medium-small square + {0x2600, 0x2605, prExtendedPictographic}, // 1.1 [6] (☀️..★) sun..BLACK STAR + {0x2607, 0x2612, prExtendedPictographic}, // 1.1 [12] (☇..☒) LIGHTNING..BALLOT BOX WITH X + {0x2614, 0x2615, prExtendedPictographic}, // 4.0 [2] (☔..☕) umbrella with rain drops..hot beverage + {0x2616, 0x2617, prExtendedPictographic}, // 3.2 [2] (☖..☗) WHITE SHOGI PIECE..BLACK SHOGI PIECE + {0x2618, 0x2618, prExtendedPictographic}, // 4.1 [1] (☘️) shamrock + {0x2619, 0x2619, prExtendedPictographic}, // 3.0 [1] (☙) REVERSED ROTATED FLORAL HEART BULLET + {0x261A, 0x266F, prExtendedPictographic}, // 1.1 [86] (☚..♯) BLACK LEFT POINTING INDEX..MUSIC SHARP SIGN + {0x2670, 0x2671, prExtendedPictographic}, // 3.0 [2] (♰..♱) WEST SYRIAC CROSS..EAST SYRIAC CROSS + {0x2672, 0x267D, prExtendedPictographic}, // 3.2 [12] (♲..♽) UNIVERSAL RECYCLING SYMBOL..PARTIALLY-RECYCLED PAPER SYMBOL + {0x267E, 0x267F, prExtendedPictographic}, // 4.1 [2] (♾️..♿) infinity..wheelchair symbol + {0x2680, 0x2685, prExtendedPictographic}, // 3.2 [6] (⚀..⚅) DIE FACE-1..DIE FACE-6 + {0x2690, 0x2691, prExtendedPictographic}, // 4.0 [2] (⚐..⚑) WHITE FLAG..BLACK FLAG + {0x2692, 0x269C, prExtendedPictographic}, // 4.1 [11] (⚒️..⚜️) hammer and pick..fleur-de-lis + {0x269D, 0x269D, prExtendedPictographic}, // 5.1 [1] (⚝) OUTLINED WHITE STAR + {0x269E, 0x269F, prExtendedPictographic}, // 5.2 [2] (⚞..⚟) THREE LINES CONVERGING RIGHT..THREE LINES CONVERGING LEFT + {0x26A0, 0x26A1, prExtendedPictographic}, // 4.0 [2] (⚠️..⚡) warning..high voltage + {0x26A2, 0x26B1, prExtendedPictographic}, // 4.1 [16] (⚢..⚱️) DOUBLED FEMALE SIGN..funeral urn + {0x26B2, 0x26B2, prExtendedPictographic}, // 5.0 [1] (⚲) NEUTER + {0x26B3, 0x26BC, prExtendedPictographic}, // 5.1 [10] (⚳..⚼) CERES..SESQUIQUADRATE + {0x26BD, 0x26BF, prExtendedPictographic}, // 5.2 [3] (⚽..⚿) soccer ball..SQUARED KEY + {0x26C0, 0x26C3, prExtendedPictographic}, // 5.1 [4] (⛀..⛃) WHITE DRAUGHTS MAN..BLACK DRAUGHTS KING + {0x26C4, 0x26CD, prExtendedPictographic}, // 5.2 [10] (⛄..⛍) snowman without snow..DISABLED CAR + {0x26CE, 0x26CE, prExtendedPictographic}, // 6.0 [1] (⛎) Ophiuchus + {0x26CF, 0x26E1, prExtendedPictographic}, // 5.2 [19] (⛏️..⛡) pick..RESTRICTED LEFT ENTRY-2 + {0x26E2, 0x26E2, prExtendedPictographic}, // 6.0 [1] (⛢) ASTRONOMICAL SYMBOL FOR URANUS + {0x26E3, 0x26E3, prExtendedPictographic}, // 5.2 [1] (⛣) HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE + {0x26E4, 0x26E7, prExtendedPictographic}, // 6.0 [4] (⛤..⛧) PENTAGRAM..INVERTED PENTAGRAM + {0x26E8, 0x26FF, prExtendedPictographic}, // 5.2 [24] (⛨..⛿) BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE + {0x2700, 0x2700, prExtendedPictographic}, // 7.0 [1] (✀) BLACK SAFETY SCISSORS + {0x2701, 0x2704, prExtendedPictographic}, // 1.1 [4] (✁..✄) UPPER BLADE SCISSORS..WHITE SCISSORS + {0x2705, 0x2705, prExtendedPictographic}, // 6.0 [1] (✅) check mark button + {0x2708, 0x2709, prExtendedPictographic}, // 1.1 [2] (✈️..✉️) airplane..envelope + {0x270A, 0x270B, prExtendedPictographic}, // 6.0 [2] (✊..✋) raised fist..raised hand + {0x270C, 0x2712, prExtendedPictographic}, // 1.1 [7] (✌️..✒️) victory hand..black nib + {0x2714, 0x2714, prExtendedPictographic}, // 1.1 [1] (✔️) check mark + {0x2716, 0x2716, prExtendedPictographic}, // 1.1 [1] (✖️) multiplication sign + {0x271D, 0x271D, prExtendedPictographic}, // 1.1 [1] (✝️) latin cross + {0x2721, 0x2721, prExtendedPictographic}, // 1.1 [1] (✡️) star of David + {0x2728, 0x2728, prExtendedPictographic}, // 6.0 [1] (✨) sparkles + {0x2733, 0x2734, prExtendedPictographic}, // 1.1 [2] (✳️..✴️) eight-spoked asterisk..eight-pointed star + {0x2744, 0x2744, prExtendedPictographic}, // 1.1 [1] (❄️) snowflake + {0x2747, 0x2747, prExtendedPictographic}, // 1.1 [1] (❇️) sparkle + {0x274C, 0x274C, prExtendedPictographic}, // 6.0 [1] (❌) cross mark + {0x274E, 0x274E, prExtendedPictographic}, // 6.0 [1] (❎) cross mark button + {0x2753, 0x2755, prExtendedPictographic}, // 6.0 [3] (❓..❕) question mark..white exclamation mark + {0x2757, 0x2757, prExtendedPictographic}, // 5.2 [1] (❗) exclamation mark + {0x2763, 0x2767, prExtendedPictographic}, // 1.1 [5] (❣️..❧) heart exclamation..ROTATED FLORAL HEART BULLET + {0x2795, 0x2797, prExtendedPictographic}, // 6.0 [3] (➕..➗) plus sign..division sign + {0x27A1, 0x27A1, prExtendedPictographic}, // 1.1 [1] (➡️) right arrow + {0x27B0, 0x27B0, prExtendedPictographic}, // 6.0 [1] (➰) curly loop + {0x27BF, 0x27BF, prExtendedPictographic}, // 6.0 [1] (➿) double curly loop + {0x2934, 0x2935, prExtendedPictographic}, // 3.2 [2] (⤴️..⤵️) right arrow curving up..right arrow curving down + {0x2B05, 0x2B07, prExtendedPictographic}, // 4.0 [3] (⬅️..⬇️) left arrow..down arrow + {0x2B1B, 0x2B1C, prExtendedPictographic}, // 5.1 [2] (⬛..⬜) black large square..white large square + {0x2B50, 0x2B50, prExtendedPictographic}, // 5.1 [1] (⭐) star + {0x2B55, 0x2B55, prExtendedPictographic}, // 5.2 [1] (⭕) hollow red circle + {0x2CEF, 0x2CF1, prExtend}, // Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS + {0x2D7F, 0x2D7F, prExtend}, // Mn TIFINAGH CONSONANT JOINER + {0x2DE0, 0x2DFF, prExtend}, // Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS + {0x302A, 0x302D, prExtend}, // Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK + {0x302E, 0x302F, prExtend}, // Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK + {0x3030, 0x3030, prExtendedPictographic}, // 1.1 [1] (〰️) wavy dash + {0x303D, 0x303D, prExtendedPictographic}, // 3.2 [1] (〽️) part alternation mark + {0x3099, 0x309A, prExtend}, // Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + {0x3297, 0x3297, prExtendedPictographic}, // 1.1 [1] (㊗️) Japanese “congratulations” button + {0x3299, 0x3299, prExtendedPictographic}, // 1.1 [1] (㊙️) Japanese “secret” button + {0xA66F, 0xA66F, prExtend}, // Mn COMBINING CYRILLIC VZMET + {0xA670, 0xA672, prExtend}, // Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN + {0xA674, 0xA67D, prExtend}, // Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK + {0xA69E, 0xA69F, prExtend}, // Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E + {0xA6F0, 0xA6F1, prExtend}, // Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS + {0xA802, 0xA802, prExtend}, // Mn SYLOTI NAGRI SIGN DVISVARA + {0xA806, 0xA806, prExtend}, // Mn SYLOTI NAGRI SIGN HASANTA + {0xA80B, 0xA80B, prExtend}, // Mn SYLOTI NAGRI SIGN ANUSVARA + {0xA823, 0xA824, prSpacingMark}, // Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I + {0xA825, 0xA826, prExtend}, // Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E + {0xA827, 0xA827, prSpacingMark}, // Mc SYLOTI NAGRI VOWEL SIGN OO + {0xA880, 0xA881, prSpacingMark}, // Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA + {0xA8B4, 0xA8C3, prSpacingMark}, // Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU + {0xA8C4, 0xA8C5, prExtend},