diff options
author | Jesse Duffield <jessedduffield@gmail.com> | 2021-02-09 09:47:57 +1100 |
---|---|---|
committer | github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> | 2021-02-08 22:55:11 +0000 |
commit | 56a573de86a9d6257f426cee4d8ca895880c9681 (patch) | |
tree | b600999e2c1c85abb45779e06f639a463df48600 | |
parent | e7fff2529ce7243d3d1b690d2ce553dd0de040ec (diff) |
support wide characters in the editor
-rw-r--r-- | go.mod | 6 | ||||
-rw-r--r-- | go.sum | 12 | ||||
-rw-r--r-- | vendor/github.com/jesseduffield/gocui/edit.go | 19 | ||||
-rw-r--r-- | vendor/github.com/mattn/go-runewidth/go.mod | 2 | ||||
-rw-r--r-- | vendor/github.com/mattn/go-runewidth/go.sum | 2 | ||||
-rw-r--r-- | vendor/github.com/mattn/go-runewidth/runewidth.go | 80 | ||||
-rw-r--r-- | vendor/github.com/rivo/uniseg/LICENSE.txt | 21 | ||||
-rw-r--r-- | vendor/github.com/rivo/uniseg/README.md | 62 | ||||
-rw-r--r-- | vendor/github.com/rivo/uniseg/doc.go | 8 | ||||
-rw-r--r-- | vendor/github.com/rivo/uniseg/go.mod | 3 | ||||
-rw-r--r-- | vendor/github.com/rivo/uniseg/grapheme.go | 268 | ||||
-rw-r--r-- | vendor/github.com/rivo/uniseg/properties.go | 1658 | ||||
-rw-r--r-- | vendor/modules.txt | 9 |
13 files changed, 2093 insertions, 57 deletions
@@ -19,17 +19,19 @@ require ( github.com/imdario/mergo v0.3.11 github.com/integrii/flaggy v1.4.0 github.com/jesseduffield/go-git/v5 v5.1.2-0.20201006095850-341962be15a4 - github.com/jesseduffield/gocui v0.3.1-0.20201010224802-8a6768078fd7 + github.com/jesseduffield/gocui v0.3.1-0.20210208224444-2eecee85583d github.com/jesseduffield/termbox-go v0.0.0-20200823212418-a2289ed6aafe github.com/jesseduffield/yaml v2.1.0+incompatible github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 github.com/konsorten/go-windows-terminal-sequences v1.0.2 // indirect github.com/kylelemons/godebug v1.1.0 // indirect github.com/mattn/go-colorable v0.1.7 // indirect - github.com/mattn/go-runewidth v0.0.9 + github.com/mattn/go-runewidth v0.0.10 github.com/mgutz/str v1.2.0 + github.com/nsf/termbox-go v0.0.0-20210114135735-d04385b850e8 // indirect github.com/onsi/ginkgo v1.10.3 // indirect github.com/onsi/gomega v1.7.1 // indirect + github.com/rivo/uniseg v0.2.0 // indirect github.com/sahilm/fuzzy v0.1.0 github.com/sirupsen/logrus v1.4.2 github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad @@ -65,8 +65,12 @@ github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOl github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= github.com/jesseduffield/go-git/v5 v5.1.2-0.20201006095850-341962be15a4 h1:GOQrmaE8i+KEdB8NzAegKYd4tPn/inM0I1uo0NXFerg= github.com/jesseduffield/go-git/v5 v5.1.2-0.20201006095850-341962be15a4/go.mod h1:nGNEErzf+NRznT+N2SWqmHnDnF9aLgANB1CUNEan09o= +github.com/jesseduffield/gocui v0.3.1-0.20161105104656-d666c9f652af h1:9ZI/QyVOerYYeqMt4svycU2Lz0WvxNHCpHHbsFsi/oA= +github.com/jesseduffield/gocui v0.3.1-0.20161105104656-d666c9f652af/go.mod h1:2RtZznzYKt8RLRwvFiSkXjU0Ei8WwHdubgnlaYH47dw= github.com/jesseduffield/gocui v0.3.1-0.20201010224802-8a6768078fd7 h1:K3MGrjmpPtIhfXmKh/zsIF0CdmNKOkjpIwcUfAa/J2A= github.com/jesseduffield/gocui v0.3.1-0.20201010224802-8a6768078fd7/go.mod h1:2RtZznzYKt8RLRwvFiSkXjU0Ei8WwHdubgnlaYH47dw= +github.com/jesseduffield/gocui v0.3.1-0.20210208224444-2eecee85583d h1:Jto9W9w8CFwZiAYXa7LsHDEOb5cKCA1f5LOL1A3jva4= +github.com/jesseduffield/gocui v0.3.1-0.20210208224444-2eecee85583d/go.mod h1:2RtZznzYKt8RLRwvFiSkXjU0Ei8WwHdubgnlaYH47dw= github.com/jesseduffield/termbox-go v0.0.0-20200823212418-a2289ed6aafe h1:qsVhCf2RFyyKIUe/+gJblbCpXMUki9rZrHuEctg6M/E= github.com/jesseduffield/termbox-go v0.0.0-20200823212418-a2289ed6aafe/go.mod h1:anMibpZtqNxjDbxrcDEAwSdaJ37vyUeM1f/M4uekib4= github.com/jesseduffield/yaml v2.1.0+incompatible h1:HWQJ1gIv2zHKbDYNp0Jwjlj24K8aqpFHnMCynY1EpmE= @@ -103,12 +107,16 @@ github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHX github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= +github.com/mattn/go-runewidth v0.0.10 h1:CoZ3S2P7pvtP45xOtBw+/mDL2z0RKI576gSkzRRpdGg= +github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk= github.com/mgutz/str v1.2.0 h1:4IzWSdIz9qPQWLfKZ0rJcV0jcUDpxvP4JVZ4GXQyvSw= github.com/mgutz/str v1.2.0/go.mod h1:w1v0ofgLaJdoD0HpQ3fycxKD1WtxpjSo151pK/31q6w= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= +github.com/nsf/termbox-go v0.0.0-20210114135735-d04385b850e8 h1:3vzIuru1svOK2sXlg4XcrO3KkGRneIejmfQfR+ptSW8= +github.com/nsf/termbox-go v0.0.0-20210114135735-d04385b850e8/go.mod h1:T0cTdVuOwf7pHQNtfhnEbzHbcNyCEcVU4YPpouCbVxo= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.10.3 h1:OoxbjfXVZyod1fmWYhI7SEyaD8B00ynP3T+D5GiyHOY= github.com/onsi/ginkgo v1.10.3/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= @@ -118,6 +126,10 @@ github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rivo/uniseg v0.1.0 h1:+2KBaVoUmb9XzDsrx/Ct0W/EYOSFf/nWTauy++DprtY= +github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/sahilm/fuzzy v0.1.0 h1:FzWGaw2Opqyu+794ZQ9SYifWv2EIXpwP4q8dY1kDAwI= github.com/sahilm/fuzzy v0.1.0/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y= github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= diff --git a/vendor/github.com/jesseduffield/gocui/edit.go b/vendor/github.com/jesseduffield/gocui/edit.go index f8eb08a57..93717f69e 100644 --- a/vendor/github.com/jesseduffield/gocui/edit.go +++ b/vendor/github.com/jesseduffield/gocui/edit.go @@ -343,17 +343,18 @@ func (v *View) writeRune(x, y int, ch rune) error { } olen := len(v.lines[y]) + w := runewidth.RuneWidth(ch) var s []cell if x >= len(v.lines[y]) { - s = make([]cell, x-len(v.lines[y])+1) + s = make([]cell, x-len(v.lines[y])+w) } else if !v.Overwrite { - s = make([]cell, 1) + s = make([]cell, w) } v.lines[y] = append(v.lines[y], s...) - if !v.Overwrite || (v.Overwrite && x >= olen-1) { - copy(v.lines[y][x+1:], v.lines[y][x:]) + if !v.Overwrite || (v.Overwrite && x >= olen-w) { + copy(v.lines[y][x+w:], v.lines[y][x:]) } v.lines[y][x] = cell{ fgColor: v.FgColor, @@ -361,6 +362,14 @@ func (v *View) writeRune(x, y int, ch rune) error { chr: ch, } + for i := 1; i < w; i++ { + v.lines[y][x+i] = cell{ + fgColor: v.FgColor, + bgColor: v.BgColor, + chr: '\x00', + } + } + return nil } @@ -384,7 +393,7 @@ func (v *View) deleteRune(x, y int) (int, error) { w := runewidth.RuneWidth(v.lines[y][i].chr) tw += w if tw > x { - v.lines[y] = append(v.lines[y][:i], v.lines[y][i+1:]...) + v.lines[y] = append(v.lines[y][:i], v.lines[y][i+w:]...) return w, nil } diff --git a/vendor/github.com/mattn/go-runewidth/go.mod b/vendor/github.com/mattn/go-runewidth/go.mod index fa7f4d864..8a9d524ec 100644 --- a/vendor/github.com/mattn/go-runewidth/go.mod +++ b/vendor/github.com/mattn/go-runewidth/go.mod @@ -1,3 +1,5 @@ module github.com/mattn/go-runewidth go 1.9 + +require github.com/rivo/uniseg v0.1.0 diff --git a/vendor/github.com/mattn/go-runewidth/go.sum b/vendor/github.com/mattn/go-runewidth/go.sum new file mode 100644 index 000000000..02135660b --- /dev/null +++ b/vendor/github.com/mattn/go-runewidth/go.sum @@ -0,0 +1,2 @@ +github.com/rivo/uniseg v0.1.0 h1:+2KBaVoUmb9XzDsrx/Ct0W/EYOSFf/nWTauy++DprtY= +github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= diff --git a/vendor/github.com/mattn/go-runewidth/runewidth.go b/vendor/github.com/mattn/go-runewidth/runewidth.go index 19f8e0449..f3871a624 100644 --- a/vendor/github.com/mattn/go-runewidth/runewidth.go +++ b/vendor/github.com/mattn/go-runewidth/runewidth.go @@ -2,6 +2,8 @@ package runewidth import ( "os" + + "github.com/rivo/uniseg" ) //go:generate go run script/generate.go @@ -10,9 +12,6 @@ var ( // EastAsianWidth will be set true if the current locale is CJK EastAsianWidth bool - // ZeroWidthJoiner is flag to set to use UTR#51 ZWJ - ZeroWidthJoiner bool - // DefaultCondition is a condition in current locale DefaultCondition = &Condition{} ) @@ -30,7 +29,6 @@ func handleEnv() { } // update DefaultCondition DefaultCondition.EastAsianWidth = EastAsianWidth - DefaultCondition.ZeroWidthJoiner = ZeroWidthJoiner } type interval struct { @@ -85,15 +83,13 @@ var nonprint = table{ // Condition have flag EastAsianWidth whether the current locale is CJK or not. type Condition struct { - EastAsianWidth bool - ZeroWidthJoiner bool + EastAsianWidth bool } // NewCondition return new instance of Condition which is current locale. func NewCondition() *Condition { return &Condition{ - EastAsianWidth: EastAsianWidth, - ZeroWidthJoiner: ZeroWidthJoiner, + EastAsianWidth: EastAsianWidth, } } @@ -110,38 +106,20 @@ func (c *Condition) RuneWidth(r rune) int { } } -func (c *Condition) stringWidth(s string) (width int) { - for _, r := range []rune(s) { - width += c.RuneWidth(r) - } - return width -} - -func (c *Condition) stringWidthZeroJoiner(s string) (width int) { - r1, r2 := rune(0), rune(0) - for _, r := range []rune(s) { - if r == 0xFE0E || r == 0xFE0F { - continue - } - w := c.RuneWidth(r) - if r2 == 0x200D && inTables(r, emoji) && inTables(r1, emoji) { - if width < w { - width = w - } - } else { - width += w - } - r1, r2 = r2, r - } - return width -} - // StringWidth return width as you can see func (c *Condition) StringWidth(s string) (width int) { - if c.ZeroWidthJoiner { - return c.stringWidthZeroJoiner(s) + g := uniseg.NewGraphemes(s) + for g.Next() { + var chWidth int + for _, r := range g.Runes() { + chWidth = c.RuneWidth(r) + if chWidth > 0 { + break // Our best guess at this point is to use the width of the first non-zero-width rune. + } + } + width += chWidth } - return c.stringWidth(s) + return } // Truncate return string truncated with w cells @@ -149,19 +127,25 @@ func (c *Condition) Truncate(s string, w int, tail string) string { if c.StringWidth(s) <= w { return s } - r := []rune(s) - tw := c.StringWidth(tail) - w -= tw - width := 0 - i := 0 - for ; i < len(r); i++ { - cw := c.RuneWidth(r[i]) - if width+cw > w { + w -= c.StringWidth(tail) + var width int + pos := len(s) + g := uniseg.NewGraphemes(s) + for g.Next() { + var chWidth int + for _, r := range g.Runes() { + chWidth = c.RuneWidth(r) + if chWidth > 0 { + break // See StringWidth() for details. + } + } + if width+chWidth > w { + pos, _ = g.Positions() break } - width += cw + width += chWidth } - return string(r[0:i]) + tail + return s[:pos] + tail } // Wrap return string wrapped with w cells @@ -169,7 +153,7 @@ func (c *Condition) Wrap(s string, w int) string { width := 0 out := "" for _, r := range []rune(s) { - cw := RuneWidth(r) + cw := c.RuneWidth(r) if r == '\n' { out += string(r) width = 0 diff --git a/vendor/github.com/rivo/uniseg/LICENSE.txt b/vendor/github.com/rivo/uniseg/LICENSE.txt new file mode 100644 index 000000000..5040f1ef8 --- /dev/null +++ b/vendor/github.com/rivo/uniseg/LICENSE.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Oliver Kuederle + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/rivo/uniseg/README.md b/vendor/github.com/rivo/uniseg/README.md new file mode 100644 index 000000000..f8da293e1 --- /dev/null +++ b/vendor/github.com/rivo/uniseg/README.md @@ -0,0 +1,62 @@ +# Unicode Text Segmentation for Go + +[![Godoc Reference](https://img.shields.io/badge/godoc-reference-blue.svg)](https://godoc.org/github.com/rivo/uniseg) +[![Go Report](https://img.shields.io/badge/go%20report-A%2B-brightgreen.svg)](https://goreportcard.com/report/github.com/rivo/uniseg) + +This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](http://unicode.org/reports/tr29/) (Unicode version 12.0.0). + +At this point, only the determination of grapheme cluster boundaries is implemented. + +## Background + +In Go, [strings are read-only slices of bytes](https://blog.golang.org/strings). They can be turned into Unicode code points using the `for` loop or by casting: `[]rune(str)`. However, multiple code points may be combined into one user-perceived character or what the Unicode specification calls "grapheme cluster". Here are some examples: + +|String|Bytes (UTF-8)|Code points (runes)|Grapheme clusters| +|-|-|-|-| +|Käse|6 bytes: `4b 61 cc 88 73 65`|5 code points: `4b 61 308 73 65`|4 clusters: `[4b],[61 308],[73],[65]`| +|🏳️🌈|14 bytes: `f0 9f 8f b3 ef b8 8f e2 80 8d f0 9f 8c 88`|4 code points: `1f3f3 fe0f 200d 1f308`|1 cluster: `[1f3f3 fe0f 200d 1f308]`| +|🇩🇪|8 bytes: `f0 9f 87 a9 f0 9f 87 aa`|2 code points: `1f1e9 1f1ea`|1 cluster: `[1f1e9 1f1ea]`| + +This package provides a tool to iterate over these grapheme clusters. This may be used to determine the number of user-perceived characters, to split strings in their intended places, or to extract individual characters which form a unit. + +## Installation + +```bash +go get github.com/rivo/uniseg +``` + +## Basic Example + +```go +package uniseg + +import ( + "fmt" + + "github.com/rivo/uniseg" +) + +func main() { + gr := uniseg.NewGraphemes("👍🏼!") + for gr.Next() { + fmt.Printf("%x ", gr.Runes()) + } + // Output: [1f44d 1f3fc] [21] +} +``` + +## Documentation + +Refer to https://godoc.org/github.com/rivo/uniseg for the package's documentation. + +## Dependencies + +This package does not depend on any packages outside the standard library. + +## Your Feedback + +Add your issue here on GitHub. Feel free to get in touch if you have any questions. + +## Version + +Version tags will be introduced once Golang modules are official. Consider this version 0.1. diff --git a/vendor/github.com/rivo/uniseg/doc.go b/vendor/github.com/rivo/uniseg/doc.go new file mode 100644 index 000000000..60c737d7b --- /dev/null +++ b/vendor/github.com/rivo/uniseg/doc.go @@ -0,0 +1,8 @@ +/* +Package uniseg implements Unicode Text Segmentation according to Unicode +Standard Annex #29 (http://unicode.org/reports/tr29/). + +At this point, only the determination of grapheme cluster boundaries is +implemented. +*/ +package uniseg diff --git a/vendor/github.com/rivo/uniseg/go.mod b/vendor/github.com/rivo/uniseg/go.mod new file mode 100644 index 000000000..a54280b2d --- /dev/null +++ b/vendor/github.com/rivo/uniseg/go.mod @@ -0,0 +1,3 @@ +module github.com/rivo/uniseg + +go 1.12 diff --git a/vendor/github.com/rivo/uniseg/grapheme.go b/vendor/github.com/rivo/uniseg/grapheme.go new file mode 100644 index 000000000..207157f5e --- /dev/null +++ b/vendor/github.com/rivo/uniseg/grapheme.go @@ -0,0 +1,268 @@ +package uniseg + +import "unicode/utf8" + +// The states of the grapheme cluster parser. +const ( + grAny = iota + grCR + grControlLF + grL + grLVV + grLVTT + grPrepend + grExtendedPictographic + grExtendedPictographicZWJ + grRIOdd + grRIEven +) + +// The grapheme cluster parser's breaking instructions. +const ( + grNoBoundary = iota + grBoundary +) + +// The grapheme cluster parser's state transitions. Maps (state, property) to +// (new state, breaking instruction, rule number). The breaking instruction +// always refers to the boundary between the last and next code point. +// +// This map is queried as follows: +// +// 1. Find specific state + specific property. Stop if found. +// 2. Find specific state + any property. +// 3. Find any state + specific property. +// 4. If only (2) or (3) (but not both) was found, stop. +// 5. If both (2) and (3) were found, use state and breaking instruction from +// the transition with the lower rule number, prefer (3) if rule numbers +// are equal. Stop. +// 6. Assume grAny and grBoundary. +var grTransitions = map[[2]int][3]int{ + // GB5 + {grAny, prCR}: {grCR, grBoundary, 50}, + {grAny, prLF}: {grControlLF, grBoundary, 50}, + {grAny, prControl}: {grControlLF, grBoundary, 50}, + + // GB4 + {grCR, prAny}: {grAny, grBoundary, 40}, + {grControlLF, prAny}: {grAny, grBoundary, 40}, + + // GB3. + {grCR, prLF}: {grAny, grNoBoundary, 30}, + + // GB6. + {grAny, prL}: {grL, grBoundary, 9990}, + {grL, prL}: {grL, grNoBoundary, 60}, + {grL, prV}: {grLVV, grNoBoundary, 60}, + {grL, prLV}: {grLVV, grNoBoundary, 60}, + {grL, prLVT}: {grLVTT, grNoBoundary, 60}, + + // GB7. + {grAny, prLV}: {grLVV, grBoundary, 9990}, + {grAny, prV}: {grLVV, grBoundary, 9990}, + {grLVV, prV}: {grLVV, grNoBoundary, 70}, + {grLVV, prT}: {grLVTT, grNoBoundary, 70}, + + // GB8. + {grAny, prLVT}: {grLVTT, grBoundary, 9990}, + {grAny, prT}: {grLVTT, grBoundary, 9990}, + {grLVTT, prT}: {grLVTT, grNoBoundary, 80}, + + // GB9. + {grAny, prExtend}: {grAny, grNoBoundary, 90}, + {grAny, prZWJ}: {grAny, grNoBoundary, 90}, + + // GB9a. + {grAny, prSpacingMark}: {grAny, grNoBoundary, 91}, + + // GB9b. + {grAny, prPreprend}: {grPrepend, grBoundary, 9990}, + {grPrepend, prAny}: {grAny, grNoBoundary, 92}, + + // GB11. + {grAny, prExtendedPictographic}: {grExtendedPictographic, grBoundary, 9990}, + {grExtendedPictographic, prExtend}: {grExtendedPictographic, grNoBoundary, 110}, + {grExtendedPictographic, prZWJ}: {grExtendedPictographicZWJ, grNoBoundary, 110}, + {grExtendedPictographicZWJ, prExtendedPictographic}: {grExtendedPictographic, grNoBoundary, 110}, + + // GB12 / GB13. + {grAny, prRegionalIndicator}: {grRIOdd, grBoundary, 9990}, + {grRIOdd, prRegionalIndicator}: {grRIEven, grNoBoundary, 120}, + {grRIEven, prRegionalIndicator}: {grRIOdd, grBoundary, 120}, +} + +// Graphemes implements an iterator over Unicode extended grapheme clusters, +// specified in the Unicode Standard Annex #29. Grapheme clusters correspond to +// "user-perceived characters". These characters often consist of multiple +// code points (e.g. the "woman kissing woman" emoji consists of 8 code points: +// woman + ZWJ + heavy black heart (2 code points) + ZWJ + kiss mark + ZWJ + +// woman) and the rules described in Annex #29 must be applied to group those +// code points into clusters perceived by the user as one character. +type Graphemes struct { + // The code points over which this class iterates. + codePoints []rune + + // The (byte-based) indices of the code points into the original string plus + // len(original string). Thus, len(indices) = len(codePoints) + 1. + indices []int + + // The current grapheme cluster to be returned. These are indices into + // codePoints/indices. If start == end, we either haven't started iterating + // yet (0) or the iteration has already completed (1). + start, end int + + // The index of the next code point to be parsed. + pos int + + // The current state of the code point parser. + state int +} + +// NewGraphemes returns a new grapheme cluster iterator. +func NewGraphemes(s string) *Graphemes { + l := utf8.RuneCountInString(s) + codePoints := make([]rune, l) + indices := make([]int, l+1) + i := 0 + for pos, r := range s { + codePoints[i] = r + indices[i] = pos + i++ + } + indices[l] = len(s) + g := &Graphemes{ + codePoints: codePoints, + indices: indices, + } + g.Next() // Parse ahead. + return g +} + +// Next advances the iterator by one grapheme cluster and returns false if no +// clusters are left. This function must be called before the first cluster is +// accessed. +func (g *Graphemes) Next() bool { + g.start = g.end + + // The state transition gives us a boundary instruction BEFORE the next code + // point so we always need to stay ahead by one code point. + + // Parse the next code point. + for g.pos <= len(g.codePoints) { + // GB2. + if g.pos == len(g.codePoints) { + g.end = g.pos + g.pos++ + break + } + + // Determine the property of the next character. + nextProperty := property(g.codePoints[g.pos]) + g.pos++ + + // Find the applicable transition. + var boundary bool + transition, ok := grTransitions[[2]int{g.state, nextProperty}] + if ok { + // We have a specific transition. We'll use it. + g.state = transition[0] + boundary = transition[1] == grBoundary + } else { + // No specific transition found. Try the less specific ones. + transAnyProp, okAnyProp := grTransitions[[2]int{g.state, prAny}] + transAnyState, okAnyState := grTransitions[[2]int{grAny, nextProperty}] + if okAnyProp && okAnyState { + // Both apply. We'll use a mix (see comments for grTransitions). + g.state = transAnyState[0] + boundary = transAnyState[1] == grBoundary + if transAnyProp[2] < transAnyState[2] { + g.state = transAnyProp[0] + boundary = transAnyProp[1] == grBoundary + } + } else if okAnyProp { + // We only have a specific state. + g.state = transAnyProp[0] + boundary = transAnyProp[1] == grBoundary + // This branch will probably never be reached because okAnyState will + // always be true given the current transition map. But we keep it here + // for future modifications to the transition map where this may not be + // true anymore. + } else if okAnyState { + // We only have a specific property. + g.state = transAnyState[0] + boundary = transAnyState[1] == grBoundary + } else { + // No known transition. GB999: Any x Any. + g.state = grAny + boundary = true + } + } + + // If we found a cluster boundary, let's stop here. The current cluster will + // be the one that just ended. + if g.pos-1 == 0 /* GB1 */ || boundary { + g.end = g.pos - 1 + break + } + } + + return g.start != g.end +} + +// Runes returns a slice of runes (code points) which corresponds to the current +// grapheme cluster. If the iterator is already past the end or Next() has not +// yet been called, nil is returned. +func (g *Graphemes) Runes() []rune { + if g.start == g.end { + return nil + } + return g.codePoints[g.start:g.end] +} + +// Str returns a substring of the original string which corresponds to the +// current grapheme cluster. If the iterator is already past the end or Next() +// has not yet been called, an empty string is returned. +func (g *Graphemes) Str() string { + if g.start == g.end { + return "" + } + return string(g.codePoints[g.start:g.end]) +} + +// Bytes returns a byte slice which corresponds to the current grapheme cluster. +// If the iterator is already past the end or Next() has not yet been called, +// nil is returned. +func (g *Graphemes) Bytes() []byte { + if g.start == g.end { + return nil + } + return []byte(string(g.codePoints[g.start:g.end])) +} + +// Positions returns the interval of the current grapheme cluster as byte +// positions into the original string. The first returned value "from" indexes +// the first byte and the second returned value "to" indexes the first byte that +// is not included anymore, i.e. str[from:to] is the current grapheme cluster of +// the original string "str". If Next() has not yet been called, both values are +// 0. If the iterator is already past the end, both values are 1. +func (g *Graphemes) Positions() (int, int) { + return g.indices[g.start], g.indices[g.end] +} + +// Reset puts the iterator into its initial state such that the next call to +// Next() sets it to the first grapheme cluster again. +func (g *Graphemes) Reset() { + g.start, g.end, g.pos, g.state = 0, 0, 0, grAny + g.Next() // Parse ahead again. +} + +// GraphemeClusterCount returns the number of user-perceived characters +// (grapheme clusters) for the given string. To calculate this number, it +// iterates through the string using the Graphemes iterator. +func GraphemeClusterCount(s string) (n int) { + g := NewGraphemes(s) + for g.Next() { + n++ + } + return +} diff --git a/vendor/github.com/rivo/uniseg/properties.go b/vendor/github.com/rivo/uniseg/properties.go new file mode 100644 index 000000000..a75ab5883 --- /dev/null +++ b/vendor/github.com/rivo/uniseg/properties.go @@ -0,0 +1,1658 @@ +package uniseg + +// The unicode properties. Only the ones needed in the context of this package +// are included. +const ( + prAny = iota + prPreprend + prCR + prLF + prControl + prExtend + prRegionalIndicator + prSpacingMark + prL + prV + prT + prLV + prLVT + prZWJ + prExtendedPictographic +) + +// Maps code point ranges to their properties. In the context of this package, +// any code point that is not contained may map to "prAny". The code point +// ranges in this slice are numerically sorted. +// +// These ranges were taken from +// http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt +// as well as +// https://unicode.org/Public/emoji/latest/emoji-data.txt +// ("Extended_Pictographic" only) on March 11, 2019. See +// https://www.unicode.org/license.html for the Unicode license agreement. +var codePoints = [][3]int{ + {0x0000, 0x0009, prControl}, // Cc [10] <control-0000>..<control-0009> + {0x000A, 0x000A, prLF}, // Cc <control-000A> + {0x000B, 0x000C, prControl}, // Cc [2] <control-000B>..<control-000C> + {0x000D, 0x000D, prCR}, // Cc <control-000D> + {0x000E, 0x001F, prControl}, // Cc [18] <control-000E>..<control-001F> + {0x007F, 0x009F, prControl}, // Cc [33] <control-007F>..<control-009F> + {0x00A9, 0x00A9, prExtendedPictographic}, // 1.1 [1] (©️) copyright + {0x00AD, 0x00AD, prControl}, // Cf SOFT HYPHEN + {0x00AE, 0x00AE, prExtendedPictographic}, // 1.1 [1] (®️) registered + {0x0300, 0x036F, prExtend}, // Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X + {0x0483, 0x0487, prExtend}, // Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE + {0x0488, 0x0489, prExtend}, // Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN + {0x0591, 0x05BD, prExtend}, // Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG + {0x05BF, 0x05BF, prExtend}, // Mn HEBREW POINT RAFE + {0x05C1, 0x05C2, prExtend}, // Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT + {0x05C4, 0x05C5, prExtend}, // Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT + {0x05C7, 0x05C7, prExtend}, // Mn HEBREW POINT QAMATS QATAN + {0x0600, 0x0605, prPreprend}, // Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE + {0x0610, 0x061A, prExtend}, // Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA + {0x061C, 0x061C, prControl}, // Cf ARABIC LETTER MARK + {0x064B, 0x065F, prExtend}, // Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW + {0x0670, 0x0670, prExtend}, // Mn ARABIC LETTER SUPERSCRIPT ALEF + {0x06D6, 0x06DC, prExtend}, // Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN + {0x06DD, 0x06DD, prPreprend}, // Cf ARABIC END OF AYAH + {0x06DF, 0x06E4, prExtend}, // Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA + {0x06E7, 0x06E8, prExtend}, // Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON + {0x06EA, 0x06ED, prExtend}, // Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM + {0x070F, 0x070F, prPreprend}, // Cf SYRIAC ABBREVIATION MARK + {0x0711, 0x0711, prExtend}, // Mn SYRIAC LETTER SUPERSCRIPT ALAPH + {0x0730, 0x074A, prExtend}, // Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH + {0x07A6, 0x07B0, prExtend}, // Mn [11] THAANA ABAFILI..THAANA SUKUN + {0x07EB, 0x07F3, prExtend}, // Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE + {0x07FD, 0x07FD, prExtend}, // Mn NKO DANTAYALAN + {0x0816, 0x0819, prExtend}, // Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH + {0x081B, 0x0823, prExtend}, // Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A + {0x0825, 0x0827, prExtend}, // Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U + {0x0829, 0x082D, prExtend}, // Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA + {0x0859, 0x085B, prExtend}, // Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK + {0x08D3, 0x08E1, prExtend}, // Mn [15] ARABIC SMALL LOW WAW..ARABIC SMALL HIGH SIGN SAFHA + {0x08E2, 0x08E2, prPreprend}, // Cf ARABIC DISPUTED END OF AYAH + {0x08E3, 0x0902, prExtend}, // Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA + {0x0903, 0x0903, prSpacingMark}, // Mc DEVANAGARI SIGN VISARGA + {0x093A, 0x093A, prExtend}, // Mn DEVANAGARI VOWEL SIGN OE + {0x093B, 0x093B, prSpacingMark}, // Mc DEVANAGARI VOWEL SIGN OOE + {0x093C, 0x093C, prExtend}, // Mn DEVANAGARI SIGN NUKTA + {0x093E, 0x0940, prSpacingMark}, // Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II + {0x0941, 0x0948, prExtend}, // Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI + {0x0949, 0x094C, prSpacingMark}, // Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU + {0x094D, 0x094D, prExtend}, // Mn DEVANAGARI SIGN VIRAMA + {0x094E, 0x094F, prSpacingMark}, // Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW + {0x0951, 0x0957, prExtend}, // Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE + {0x0962, 0x0963, prExtend}, // Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL + {0x0981, 0x0981, prExtend}, // Mn BENGALI SIGN CANDRABINDU |