summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/rivo/uniseg/sentencerules.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/rivo/uniseg/sentencerules.go')
-rw-r--r--vendor/github.com/rivo/uniseg/sentencerules.go265
1 files changed, 168 insertions, 97 deletions
diff --git a/vendor/github.com/rivo/uniseg/sentencerules.go b/vendor/github.com/rivo/uniseg/sentencerules.go
index 58c04794e..0b29c7bdb 100644
--- a/vendor/github.com/rivo/uniseg/sentencerules.go
+++ b/vendor/github.com/rivo/uniseg/sentencerules.go
@@ -18,104 +18,178 @@ const (
sbSB8aSp
)
-// The sentence break parser's breaking instructions.
-const (
- sbDontBreak = iota
- sbBreak
-)
-
-// The sentence break parser's state transitions. It's anologous to
-// grTransitions, see comments there for details. Unicode version 14.0.0.
-var sbTransitions = map[[2]int][3]int{
+// sbTransitions implements the sentence break parser's state transitions. It's
+// anologous to [grTransitions], see comments there for details.
+//
+// Unicode version 15.0.0.
+func sbTransitions(state, prop int) (newState int, sentenceBreak bool, rule int) {
+ switch uint64(state) | uint64(prop)<<32 {
// SB3.
- {sbAny, prCR}: {sbCR, sbDontBreak, 9990},
- {sbCR, prLF}: {sbParaSep, sbDontBreak, 30},
+ case sbAny | prCR<<32:
+ return sbCR, false, 9990
+ case sbCR | prLF<<32:
+ return sbParaSep, false, 30
// SB4.
- {sbAny, prSep}: {sbParaSep, sbDontBreak, 9990},
- {sbAny, prLF}: {sbParaSep, sbDontBreak, 9990},
- {sbParaSep, prAny}: {sbAny, sbBreak, 40},
- {sbCR, prAny}: {sbAny, sbBreak, 40},
+ case sbAny | prSep<<32:
+ return sbParaSep, false, 9990
+ case sbAny | prLF<<32:
+ return sbParaSep, false, 9990
+ case sbParaSep | prAny<<32:
+ return sbAny, true, 40
+ case sbCR | prAny<<32:
+ return sbAny, true, 40
// SB6.
- {sbAny, prATerm}: {sbATerm, sbDontBreak, 9990},
- {sbATerm, prNumeric}: {sbAny, sbDontBreak, 60},
- {sbSB7, prNumeric}: {sbAny, sbDontBreak, 60}, // Because ATerm also appears in SB7.
+ case sbAny | prATerm<<32:
+ return sbATerm, false, 9990
+ case sbATerm | prNumeric<<32:
+ return sbAny, false, 60
+ case sbSB7 | prNumeric<<32:
+ return sbAny, false, 60 // Because ATerm also appears in SB7.
// SB7.
- {sbAny, prUpper}: {sbUpper, sbDontBreak, 9990},
- {sbAny, prLower}: {sbLower, sbDontBreak, 9990},
- {sbUpper, prATerm}: {sbSB7, sbDontBreak, 70},
- {sbLower, prATerm}: {sbSB7, sbDontBreak, 70},
- {sbSB7, prUpper}: {sbUpper, sbDontBreak, 70},
+ case sbAny | prUpper<<32:
+ return sbUpper, false, 9990
+ case sbAny | prLower<<32:
+ return sbLower, false, 9990
+ case sbUpper | prATerm<<32:
+ return sbSB7, false, 70
+ case sbLower | prATerm<<32:
+ return sbSB7, false, 70
+ case sbSB7 | prUpper<<32:
+ return sbUpper, false, 70
// SB8a.
- {sbAny, prSTerm}: {sbSTerm, sbDontBreak, 9990},
- {sbATerm, prSContinue}: {sbAny, sbDontBreak, 81},
- {sbATerm, prATerm}: {sbATerm, sbDontBreak, 81},
- {sbATerm, prSTerm}: {sbSTerm, sbDontBreak, 81},
- {sbSB7, prSContinue}: {sbAny, sbDontBreak, 81},
- {sbSB7, prATerm}: {sbATerm, sbDontBreak, 81},
- {sbSB7, prSTerm}: {sbSTerm, sbDontBreak, 81},
- {sbSB8Close, prSContinue}: {sbAny, sbDontBreak, 81},
- {sbSB8Close, prATerm}: {sbATerm, sbDontBreak, 81},
- {sbSB8Close, prSTerm}: {sbSTerm, sbDontBreak, 81},
- {sbSB8Sp, prSContinue}: {sbAny, sbDontBreak, 81},
- {sbSB8Sp, prATerm}: {sbATerm, sbDontBreak, 81},
- {sbSB8Sp, prSTerm}: {sbSTerm, sbDontBreak, 81},
- {sbSTerm, prSContinue}: {sbAny, sbDontBreak, 81},
- {sbSTerm, prATerm}: {sbATerm, sbDontBreak, 81},
- {sbSTerm, prSTerm}: {sbSTerm, sbDontBreak, 81},
- {sbSB8aClose, prSContinue}: {sbAny, sbDontBreak, 81},
- {sbSB8aClose, prATerm}: {sbATerm, sbDontBreak, 81},
- {sbSB8aClose, prSTerm}: {sbSTerm, sbDontBreak, 81},
- {sbSB8aSp, prSContinue}: {sbAny, sbDontBreak, 81},
- {sbSB8aSp, prATerm}: {sbATerm, sbDontBreak, 81},
- {sbSB8aSp, prSTerm}: {sbSTerm, sbDontBreak, 81},
+ case sbAny | prSTerm<<32:
+ return sbSTerm, false, 9990
+ case sbATerm | prSContinue<<32:
+ return sbAny, false, 81
+ case sbATerm | prATerm<<32:
+ return sbATerm, false, 81
+ case sbATerm | prSTerm<<32:
+ return sbSTerm, false, 81
+ case sbSB7 | prSContinue<<32:
+ return sbAny, false, 81
+ case sbSB7 | prATerm<<32:
+ return sbATerm, false, 81
+ case sbSB7 | prSTerm<<32:
+ return sbSTerm, false, 81
+ case sbSB8Close | prSContinue<<32:
+ return sbAny, false, 81
+ case sbSB8Close | prATerm<<32:
+ return sbATerm, false, 81
+ case sbSB8Close | prSTerm<<32:
+ return sbSTerm, false, 81
+ case sbSB8Sp | prSContinue<<32:
+ return sbAny, false, 81
+ case sbSB8Sp | prATerm<<32:
+ return sbATerm, false, 81
+ case sbSB8Sp | prSTerm<<32:
+ return sbSTerm, false, 81
+ case sbSTerm | prSContinue<<32:
+ return sbAny, false, 81
+ case sbSTerm | prATerm<<32:
+ return sbATerm, false, 81
+ case sbSTerm | prSTerm<<32:
+ return sbSTerm, false, 81
+ case sbSB8aClose | prSContinue<<32:
+ return sbAny, false, 81
+ case sbSB8aClose | prATerm<<32:
+ return sbATerm, false, 81
+ case sbSB8aClose | prSTerm<<32:
+ return sbSTerm, false, 81
+ case sbSB8aSp | prSContinue<<32:
+ return sbAny, false, 81
+ case sbSB8aSp | prATerm<<32:
+ return sbATerm, false, 81
+ case sbSB8aSp | prSTerm<<32:
+ return sbSTerm, false, 81
// SB9.
- {sbATerm, prClose}: {sbSB8Close, sbDontBreak, 90},
- {sbSB7, prClose}: {sbSB8Close, sbDontBreak, 90},
- {sbSB8Close, prClose}: {sbSB8Close, sbDontBreak, 90},
- {sbATerm, prSp}: {sbSB8Sp, sbDontBreak, 90},
- {sbSB7, prSp}: {sbSB8Sp, sbDontBreak, 90},
- {sbSB8Close, prSp}: {sbSB8Sp, sbDontBreak, 90},
- {sbSTerm, prClose}: {sbSB8aClose, sbDontBreak, 90},
- {sbSB8aClose, prClose}: {sbSB8aClose, sbDontBreak, 90},
- {sbSTerm, prSp}: {sbSB8aSp, sbDontBreak, 90},
- {sbSB8aClose, prSp}: {sbSB8aSp, sbDontBreak, 90},
- {sbATerm, prSep}: {sbParaSep, sbDontBreak, 90},
- {sbATerm, prCR}: {sbParaSep, sbDontBreak, 90},
- {sbATerm, prLF}: {sbParaSep, sbDontBreak, 90},
- {sbSB7, prSep}: {sbParaSep, sbDontBreak, 90},
- {sbSB7, prCR}: {sbParaSep, sbDontBreak, 90},
- {sbSB7, prLF}: {sbParaSep, sbDontBreak, 90},
- {sbSB8Close, prSep}: {sbParaSep, sbDontBreak, 90},
- {sbSB8Close, prCR}: {sbParaSep, sbDontBreak, 90},
- {sbSB8Close, prLF}: {sbParaSep, sbDontBreak, 90},
- {sbSTerm, prSep}: {sbParaSep, sbDontBreak, 90},
- {sbSTerm, prCR}: {sbParaSep, sbDontBreak, 90},
- {sbSTerm, prLF}: {sbParaSep, sbDontBreak, 90},
- {sbSB8aClose, prSep}: {sbParaSep, sbDontBreak, 90},
- {sbSB8aClose, prCR}: {sbParaSep, sbDontBreak, 90},
- {sbSB8aClose, prLF}: {sbParaSep, sbDontBreak, 90},
+ case sbATerm | prClose<<32:
+ return sbSB8Close, false, 90
+ case sbSB7 | prClose<<32:
+ return sbSB8Close, false, 90
+ case sbSB8Close | prClose<<32:
+ return sbSB8Close, false, 90
+ case sbATerm | prSp<<32:
+ return sbSB8Sp, false, 90
+ case sbSB7 | prSp<<32:
+ return sbSB8Sp, false, 90
+ case sbSB8Close | prSp<<32:
+ return sbSB8Sp, false, 90
+ case sbSTerm | prClose<<32:
+ return sbSB8aClose, false, 90
+ case sbSB8aClose | prClose<<32:
+ return sbSB8aClose, false, 90
+ case sbSTerm | prSp<<32:
+ return sbSB8aSp, false, 90
+ case sbSB8aClose | prSp<<32:
+ return sbSB8aSp, false, 90
+ case sbATerm | prSep<<32:
+ return sbParaSep, false, 90
+ case sbATerm | prCR<<32:
+ return sbParaSep, false, 90
+ case sbATerm | prLF<<32:
+ return sbParaSep, false, 90
+ case sbSB7 | prSep<<32:
+ return sbParaSep, false, 90
+ case sbSB7 | prCR<<32:
+ return sbParaSep, false, 90
+ case sbSB7 | prLF<<32:
+ return sbParaSep, false, 90
+ case sbSB8Close | prSep<<32:
+ return sbParaSep, false, 90
+ case sbSB8Close | prCR<<32:
+ return sbParaSep, false, 90
+ case sbSB8Close | prLF<<32:
+ return sbParaSep, false, 90
+ case sbSTerm | prSep<<32:
+ return sbParaSep, false, 90
+ case sbSTerm | prCR<<32:
+ return sbParaSep, false, 90
+ case sbSTerm | prLF<<32:
+ return sbParaSep, false, 90
+ case sbSB8aClose | prSep<<32:
+ return sbParaSep, false, 90
+ case sbSB8aClose | prCR<<32:
+ return sbParaSep, false, 90
+ case sbSB8aClose | prLF<<32:
+ return sbParaSep, false, 90
// SB10.
- {sbSB8Sp, prSp}: {sbSB8Sp, sbDontBreak, 100},
- {sbSB8aSp, prSp}: {sbSB8aSp, sbDontBreak, 100},
- {sbSB8Sp, prSep}: {sbParaSep, sbDontBreak, 100},
- {sbSB8Sp, prCR}: {sbParaSep, sbDontBreak, 100},
- {sbSB8Sp, prLF}: {sbParaSep, sbDontBreak, 100},
+ case sbSB8Sp | prSp<<32:
+ return sbSB8Sp, false, 100
+ case sbSB8aSp | prSp<<32:
+ return sbSB8aSp, false, 100
+ case sbSB8Sp | prSep<<32:
+ return sbParaSep, false, 100
+ case sbSB8Sp | prCR<<32:
+ return sbParaSep, false, 100
+ case sbSB8Sp | prLF<<32:
+ return sbParaSep, false, 100
// SB11.
- {sbATerm, prAny}: {sbAny, sbBreak, 110},
- {sbSB7, prAny}: {sbAny, sbBreak, 110},
- {sbSB8Close, prAny}: {sbAny, sbBreak, 110},
- {sbSB8Sp, prAny}: {sbAny, sbBreak, 110},
- {sbSTerm, prAny}: {sbAny, sbBreak, 110},
- {sbSB8aClose, prAny}: {sbAny, sbBreak, 110},
- {sbSB8aSp, prAny}: {sbAny, sbBreak, 110},
+ case sbATerm | prAny<<32:
+ return sbAny, true, 110
+ case sbSB7 | prAny<<32:
+ return sbAny, true, 110
+ case sbSB8Close | prAny<<32:
+ return sbAny, true, 110
+ case sbSB8Sp | prAny<<32:
+ return sbAny, true, 110
+ case sbSTerm | prAny<<32:
+ return sbAny, true, 110
+ case sbSB8aClose | prAny<<32:
+ return sbAny, true, 110
+ case sbSB8aSp | prAny<<32:
+ return sbAny, true, 110
// We'll always break after ParaSep due to SB4.
+
+ default:
+ return -1, false, -1
+ }
}
// transitionSentenceBreakState determines the new state of the sentence break
@@ -141,30 +215,27 @@ func transitionSentenceBreakState(state int, r rune, b []byte, str string) (newS
// Find the applicable transition in the table.
var rule int
- transition, ok := sbTransitions[[2]int{state, nextProperty}]
- if ok {
- // We have a specific transition. We'll use it.
- newState, sentenceBreak, rule = transition[0], transition[1] == sbBreak, transition[2]
- } else {
+ newState, sentenceBreak, rule = sbTransitions(state, nextProperty)
+ if newState < 0 {
// No specific transition found. Try the less specific ones.
- transAnyProp, okAnyProp := sbTransitions[[2]int{state, prAny}]
- transAnyState, okAnyState := sbTransitions[[2]int{sbAny, nextProperty}]
- if okAnyProp && okAnyState {
+ anyPropState, anyPropProp, anyPropRule := sbTransitions(state, prAny)
+ anyStateState, anyStateProp, anyStateRule := sbTransitions(sbAny, nextProperty)
+ if anyPropState >= 0 && anyStateState >= 0 {
// Both apply. We'll use a mix (see comments for grTransitions).
- newState, sentenceBreak, rule = transAnyState[0], transAnyState[1] == sbBreak, transAnyState[2]
- if transAnyProp[2] < transAnyState[2] {
- sentenceBreak, rule = transAnyProp[1] == sbBreak, transAnyProp[2]
+ newState, sentenceBreak, rule = anyStateState, anyStateProp, anyStateRule
+ if anyPropRule < anyStateRule {
+ sentenceBreak, rule = anyPropProp, anyPropRule
}
- } else if okAnyProp {
+ } else if anyPropState >= 0 {
// We only have a specific state.
- newState, sentenceBreak, rule = transAnyProp[0], transAnyProp[1] == sbBreak, transAnyProp[2]
+ newState, sentenceBreak, rule = anyPropState, anyPropProp, anyPropRule
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
- } else if okAnyState {
+ } else if anyStateState >= 0 {
// We only have a specific property.
- newState, sentenceBreak, rule = transAnyState[0], transAnyState[1] == sbBreak, transAnyState[2]
+ newState, sentenceBreak, rule = anyStateState, anyStateProp, anyStateRule
} else {
// No known transition. SB999: Any × Any.
newState, sentenceBreak, rule = sbAny, false, 9990