summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/rivo/uniseg/wordrules.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/rivo/uniseg/wordrules.go')
-rw-r--r--vendor/github.com/rivo/uniseg/wordrules.go160
1 files changed, 98 insertions, 62 deletions
diff --git a/vendor/github.com/rivo/uniseg/wordrules.go b/vendor/github.com/rivo/uniseg/wordrules.go
index 325407e40..57a8c6831 100644
--- a/vendor/github.com/rivo/uniseg/wordrules.go
+++ b/vendor/github.com/rivo/uniseg/wordrules.go
@@ -22,82 +22,121 @@ const (
wbZWJBit = 16 // This bit is set for any states followed by at least one zero-width joiner (see WB4 and WB3c).
)
-// The word break parser's breaking instructions.
-const (
- wbDontBreak = iota
- wbBreak
-)
-
-// The word break parser's state transitions. It's anologous to grTransitions,
-// see comments there for details. Unicode version 14.0.0.
-var wbTransitions = map[[2]int][3]int{
+// wbTransitions implements the word break parser's state transitions. It's
+// anologous to [grTransitions], see comments there for details.
+//
+// Unicode version 15.0.0.
+func wbTransitions(state, prop int) (newState int, wordBreak bool, rule int) {
+ switch uint64(state) | uint64(prop)<<32 {
// WB3b.
- {wbAny, prNewline}: {wbNewline, wbBreak, 32},
- {wbAny, prCR}: {wbCR, wbBreak, 32},
- {wbAny, prLF}: {wbLF, wbBreak, 32},
+ case wbAny | prNewline<<32:
+ return wbNewline, true, 32
+ case wbAny | prCR<<32:
+ return wbCR, true, 32
+ case wbAny | prLF<<32:
+ return wbLF, true, 32
// WB3a.
- {wbNewline, prAny}: {wbAny, wbBreak, 31},
- {wbCR, prAny}: {wbAny, wbBreak, 31},
- {wbLF, prAny}: {wbAny, wbBreak, 31},
+ case wbNewline | prAny<<32:
+ return wbAny, true, 31
+ case wbCR | prAny<<32:
+ return wbAny, true, 31
+ case wbLF | prAny<<32:
+ return wbAny, true, 31
// WB3.
- {wbCR, prLF}: {wbLF, wbDontBreak, 30},
+ case wbCR | prLF<<32:
+ return wbLF, false, 30
// WB3d.
- {wbAny, prWSegSpace}: {wbWSegSpace, wbBreak, 9990},
- {wbWSegSpace, prWSegSpace}: {wbWSegSpace, wbDontBreak, 34},
+ case wbAny | prWSegSpace<<32:
+ return wbWSegSpace, true, 9990
+ case wbWSegSpace | prWSegSpace<<32:
+ return wbWSegSpace, false, 34
// WB5.
- {wbAny, prALetter}: {wbALetter, wbBreak, 9990},
- {wbAny, prHebrewLetter}: {wbHebrewLetter, wbBreak, 9990},
- {wbALetter, prALetter}: {wbALetter, wbDontBreak, 50},
- {wbALetter, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 50},
- {wbHebrewLetter, prALetter}: {wbALetter, wbDontBreak, 50},
- {wbHebrewLetter, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 50},
+ case wbAny | prALetter<<32:
+ return wbALetter, true, 9990
+ case wbAny | prHebrewLetter<<32:
+ return wbHebrewLetter, true, 9990
+ case wbALetter | prALetter<<32:
+ return wbALetter, false, 50
+ case wbALetter | prHebrewLetter<<32:
+ return wbHebrewLetter, false, 50
+ case wbHebrewLetter | prALetter<<32:
+ return wbALetter, false, 50
+ case wbHebrewLetter | prHebrewLetter<<32:
+ return wbHebrewLetter, false, 50
// WB7. Transitions to wbWB7 handled by transitionWordBreakState().
- {wbWB7, prALetter}: {wbALetter, wbDontBreak, 70},
- {wbWB7, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 70},
+ case wbWB7 | prALetter<<32:
+ return wbALetter, false, 70
+ case wbWB7 | prHebrewLetter<<32:
+ return wbHebrewLetter, false, 70
// WB7a.
- {wbHebrewLetter, prSingleQuote}: {wbAny, wbDontBreak, 71},
+ case wbHebrewLetter | prSingleQuote<<32:
+ return wbAny, false, 71
// WB7c. Transitions to wbWB7c handled by transitionWordBreakState().
- {wbWB7c, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 73},
+ case wbWB7c | prHebrewLetter<<32:
+ return wbHebrewLetter, false, 73
// WB8.
- {wbAny, prNumeric}: {wbNumeric, wbBreak, 9990},
- {wbNumeric, prNumeric}: {wbNumeric, wbDontBreak, 80},
+ case wbAny | prNumeric<<32:
+ return wbNumeric, true, 9990
+ case wbNumeric | prNumeric<<32:
+ return wbNumeric, false, 80
// WB9.
- {wbALetter, prNumeric}: {wbNumeric, wbDontBreak, 90},
- {wbHebrewLetter, prNumeric}: {wbNumeric, wbDontBreak, 90},
+ case wbALetter | prNumeric<<32:
+ return wbNumeric, false, 90
+ case wbHebrewLetter | prNumeric<<32:
+ return wbNumeric, false, 90
// WB10.
- {wbNumeric, prALetter}: {wbALetter, wbDontBreak, 100},
- {wbNumeric, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 100},
+ case wbNumeric | prALetter<<32:
+ return wbALetter, false, 100
+ case wbNumeric | prHebrewLetter<<32:
+ return wbHebrewLetter, false, 100
// WB11. Transitions to wbWB11 handled by transitionWordBreakState().
- {wbWB11, prNumeric}: {wbNumeric, wbDontBreak, 110},
+ case wbWB11 | prNumeric<<32:
+ return wbNumeric, false, 110
// WB13.
- {wbAny, prKatakana}: {wbKatakana, wbBreak, 9990},
- {wbKatakana, prKatakana}: {wbKatakana, wbDontBreak, 130},
+ case wbAny | prKatakana<<32:
+ return wbKatakana, true, 9990
+ case wbKatakana | prKatakana<<32:
+ return wbKatakana, false, 130
// WB13a.
- {wbAny, prExtendNumLet}: {wbExtendNumLet, wbBreak, 9990},
- {wbALetter, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131},
- {wbHebrewLetter, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131},
- {wbNumeric, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131},
- {wbKatakana, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131},
- {wbExtendNumLet, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131},
+ case wbAny | prExtendNumLet<<32:
+ return wbExtendNumLet, true, 9990
+ case wbALetter | prExtendNumLet<<32:
+ return wbExtendNumLet, false, 131
+ case wbHebrewLetter | prExtendNumLet<<32:
+ return wbExtendNumLet, false, 131
+ case wbNumeric | prExtendNumLet<<32:
+ return wbExtendNumLet, false, 131
+ case wbKatakana | prExtendNumLet<<32:
+ return wbExtendNumLet, false, 131
+ case wbExtendNumLet | prExtendNumLet<<32:
+ return wbExtendNumLet, false, 131
// WB13b.
- {wbExtendNumLet, prALetter}: {wbALetter, wbDontBreak, 132},
- {wbExtendNumLet, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 132},
- {wbExtendNumLet, prNumeric}: {wbNumeric, wbDontBreak, 132},
- {wbExtendNumLet, prKatakana}: {prKatakana, wbDontBreak, 132},
+ case wbExtendNumLet | prALetter<<32:
+ return wbALetter, false, 132
+ case wbExtendNumLet | prHebrewLetter<<32:
+ return wbHebrewLetter, false, 132
+ case wbExtendNumLet | prNumeric<<32:
+ return wbNumeric, false, 132
+ case wbExtendNumLet | prKatakana<<32:
+ return wbKatakana, false, 132
+
+ default:
+ return -1, false, -1
+ }
}
// transitionWordBreakState determines the new state of the word break parser
@@ -141,30 +180,27 @@ func transitionWordBreakState(state int, r rune, b []byte, str string) (newState
// Find the applicable transition in the table.
var rule int
- transition, ok := wbTransitions[[2]int{state, nextProperty}]
- if ok {
- // We have a specific transition. We'll use it.
- newState, wordBreak, rule = transition[0], transition[1] == wbBreak, transition[2]
- } else {
+ newState, wordBreak, rule = wbTransitions(state, nextProperty)
+ if newState < 0 {
// No specific transition found. Try the less specific ones.
- transAnyProp, okAnyProp := wbTransitions[[2]int{state, prAny}]
- transAnyState, okAnyState := wbTransitions[[2]int{wbAny, nextProperty}]
- if okAnyProp && okAnyState {
+ anyPropState, anyPropWordBreak, anyPropRule := wbTransitions(state, prAny)
+ anyStateState, anyStateWordBreak, anyStateRule := wbTransitions(wbAny, nextProperty)
+ if anyPropState >= 0 && anyStateState >= 0 {
// Both apply. We'll use a mix (see comments for grTransitions).
- newState, wordBreak, rule = transAnyState[0], transAnyState[1] == wbBreak, transAnyState[2]
- if transAnyProp[2] < transAnyState[2] {
- wordBreak, rule = transAnyProp[1] == wbBreak, transAnyProp[2]
+ newState, wordBreak, rule = anyStateState, anyStateWordBreak, anyStateRule
+ if anyPropRule < anyStateRule {
+ wordBreak, rule = anyPropWordBreak, anyPropRule
}
- } else if okAnyProp {
+ } else if anyPropState >= 0 {
// We only have a specific state.
- newState, wordBreak, rule = transAnyProp[0], transAnyProp[1] == wbBreak, transAnyProp[2]
+ newState, wordBreak, rule = anyPropState, anyPropWordBreak, anyPropRule
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
- } else if okAnyState {
+ } else if anyStateState >= 0 {
// We only have a specific property.
- newState, wordBreak, rule = transAnyState[0], transAnyState[1] == wbBreak, transAnyState[2]
+ newState, wordBreak, rule = anyStateState, anyStateWordBreak, anyStateRule
} else {
// No known transition. WB999: Any รท Any.
newState, wordBreak, rule = wbAny, true, 9990