diff options
Diffstat (limited to 'vendor/github.com/rivo/uniseg/sentencerules.go')
-rw-r--r-- | vendor/github.com/rivo/uniseg/sentencerules.go | 265 |
1 files changed, 168 insertions, 97 deletions
diff --git a/vendor/github.com/rivo/uniseg/sentencerules.go b/vendor/github.com/rivo/uniseg/sentencerules.go index 58c04794e..0b29c7bdb 100644 --- a/vendor/github.com/rivo/uniseg/sentencerules.go +++ b/vendor/github.com/rivo/uniseg/sentencerules.go @@ -18,104 +18,178 @@ const ( sbSB8aSp ) -// The sentence break parser's breaking instructions. -const ( - sbDontBreak = iota - sbBreak -) - -// The sentence break parser's state transitions. It's anologous to -// grTransitions, see comments there for details. Unicode version 14.0.0. -var sbTransitions = map[[2]int][3]int{ +// sbTransitions implements the sentence break parser's state transitions. It's +// anologous to [grTransitions], see comments there for details. +// +// Unicode version 15.0.0. +func sbTransitions(state, prop int) (newState int, sentenceBreak bool, rule int) { + switch uint64(state) | uint64(prop)<<32 { // SB3. - {sbAny, prCR}: {sbCR, sbDontBreak, 9990}, - {sbCR, prLF}: {sbParaSep, sbDontBreak, 30}, + case sbAny | prCR<<32: + return sbCR, false, 9990 + case sbCR | prLF<<32: + return sbParaSep, false, 30 // SB4. - {sbAny, prSep}: {sbParaSep, sbDontBreak, 9990}, - {sbAny, prLF}: {sbParaSep, sbDontBreak, 9990}, - {sbParaSep, prAny}: {sbAny, sbBreak, 40}, - {sbCR, prAny}: {sbAny, sbBreak, 40}, + case sbAny | prSep<<32: + return sbParaSep, false, 9990 + case sbAny | prLF<<32: + return sbParaSep, false, 9990 + case sbParaSep | prAny<<32: + return sbAny, true, 40 + case sbCR | prAny<<32: + return sbAny, true, 40 // SB6. - {sbAny, prATerm}: {sbATerm, sbDontBreak, 9990}, - {sbATerm, prNumeric}: {sbAny, sbDontBreak, 60}, - {sbSB7, prNumeric}: {sbAny, sbDontBreak, 60}, // Because ATerm also appears in SB7. + case sbAny | prATerm<<32: + return sbATerm, false, 9990 + case sbATerm | prNumeric<<32: + return sbAny, false, 60 + case sbSB7 | prNumeric<<32: + return sbAny, false, 60 // Because ATerm also appears in SB7. // SB7. - {sbAny, prUpper}: {sbUpper, sbDontBreak, 9990}, - {sbAny, prLower}: {sbLower, sbDontBreak, 9990}, - {sbUpper, prATerm}: {sbSB7, sbDontBreak, 70}, - {sbLower, prATerm}: {sbSB7, sbDontBreak, 70}, - {sbSB7, prUpper}: {sbUpper, sbDontBreak, 70}, + case sbAny | prUpper<<32: + return sbUpper, false, 9990 + case sbAny | prLower<<32: + return sbLower, false, 9990 + case sbUpper | prATerm<<32: + return sbSB7, false, 70 + case sbLower | prATerm<<32: + return sbSB7, false, 70 + case sbSB7 | prUpper<<32: + return sbUpper, false, 70 // SB8a. - {sbAny, prSTerm}: {sbSTerm, sbDontBreak, 9990}, - {sbATerm, prSContinue}: {sbAny, sbDontBreak, 81}, - {sbATerm, prATerm}: {sbATerm, sbDontBreak, 81}, - {sbATerm, prSTerm}: {sbSTerm, sbDontBreak, 81}, - {sbSB7, prSContinue}: {sbAny, sbDontBreak, 81}, - {sbSB7, prATerm}: {sbATerm, sbDontBreak, 81}, - {sbSB7, prSTerm}: {sbSTerm, sbDontBreak, 81}, - {sbSB8Close, prSContinue}: {sbAny, sbDontBreak, 81}, - {sbSB8Close, prATerm}: {sbATerm, sbDontBreak, 81}, - {sbSB8Close, prSTerm}: {sbSTerm, sbDontBreak, 81}, - {sbSB8Sp, prSContinue}: {sbAny, sbDontBreak, 81}, - {sbSB8Sp, prATerm}: {sbATerm, sbDontBreak, 81}, - {sbSB8Sp, prSTerm}: {sbSTerm, sbDontBreak, 81}, - {sbSTerm, prSContinue}: {sbAny, sbDontBreak, 81}, - {sbSTerm, prATerm}: {sbATerm, sbDontBreak, 81}, - {sbSTerm, prSTerm}: {sbSTerm, sbDontBreak, 81}, - {sbSB8aClose, prSContinue}: {sbAny, sbDontBreak, 81}, - {sbSB8aClose, prATerm}: {sbATerm, sbDontBreak, 81}, - {sbSB8aClose, prSTerm}: {sbSTerm, sbDontBreak, 81}, - {sbSB8aSp, prSContinue}: {sbAny, sbDontBreak, 81}, - {sbSB8aSp, prATerm}: {sbATerm, sbDontBreak, 81}, - {sbSB8aSp, prSTerm}: {sbSTerm, sbDontBreak, 81}, + case sbAny | prSTerm<<32: + return sbSTerm, false, 9990 + case sbATerm | prSContinue<<32: + return sbAny, false, 81 + case sbATerm | prATerm<<32: + return sbATerm, false, 81 + case sbATerm | prSTerm<<32: + return sbSTerm, false, 81 + case sbSB7 | prSContinue<<32: + return sbAny, false, 81 + case sbSB7 | prATerm<<32: + return sbATerm, false, 81 + case sbSB7 | prSTerm<<32: + return sbSTerm, false, 81 + case sbSB8Close | prSContinue<<32: + return sbAny, false, 81 + case sbSB8Close | prATerm<<32: + return sbATerm, false, 81 + case sbSB8Close | prSTerm<<32: + return sbSTerm, false, 81 + case sbSB8Sp | prSContinue<<32: + return sbAny, false, 81 + case sbSB8Sp | prATerm<<32: + return sbATerm, false, 81 + case sbSB8Sp | prSTerm<<32: + return sbSTerm, false, 81 + case sbSTerm | prSContinue<<32: + return sbAny, false, 81 + case sbSTerm | prATerm<<32: + return sbATerm, false, 81 + case sbSTerm | prSTerm<<32: + return sbSTerm, false, 81 + case sbSB8aClose | prSContinue<<32: + return sbAny, false, 81 + case sbSB8aClose | prATerm<<32: + return sbATerm, false, 81 + case sbSB8aClose | prSTerm<<32: + return sbSTerm, false, 81 + case sbSB8aSp | prSContinue<<32: + return sbAny, false, 81 + case sbSB8aSp | prATerm<<32: + return sbATerm, false, 81 + case sbSB8aSp | prSTerm<<32: + return sbSTerm, false, 81 // SB9. - {sbATerm, prClose}: {sbSB8Close, sbDontBreak, 90}, - {sbSB7, prClose}: {sbSB8Close, sbDontBreak, 90}, - {sbSB8Close, prClose}: {sbSB8Close, sbDontBreak, 90}, - {sbATerm, prSp}: {sbSB8Sp, sbDontBreak, 90}, - {sbSB7, prSp}: {sbSB8Sp, sbDontBreak, 90}, - {sbSB8Close, prSp}: {sbSB8Sp, sbDontBreak, 90}, - {sbSTerm, prClose}: {sbSB8aClose, sbDontBreak, 90}, - {sbSB8aClose, prClose}: {sbSB8aClose, sbDontBreak, 90}, - {sbSTerm, prSp}: {sbSB8aSp, sbDontBreak, 90}, - {sbSB8aClose, prSp}: {sbSB8aSp, sbDontBreak, 90}, - {sbATerm, prSep}: {sbParaSep, sbDontBreak, 90}, - {sbATerm, prCR}: {sbParaSep, sbDontBreak, 90}, - {sbATerm, prLF}: {sbParaSep, sbDontBreak, 90}, - {sbSB7, prSep}: {sbParaSep, sbDontBreak, 90}, - {sbSB7, prCR}: {sbParaSep, sbDontBreak, 90}, - {sbSB7, prLF}: {sbParaSep, sbDontBreak, 90}, - {sbSB8Close, prSep}: {sbParaSep, sbDontBreak, 90}, - {sbSB8Close, prCR}: {sbParaSep, sbDontBreak, 90}, - {sbSB8Close, prLF}: {sbParaSep, sbDontBreak, 90}, - {sbSTerm, prSep}: {sbParaSep, sbDontBreak, 90}, - {sbSTerm, prCR}: {sbParaSep, sbDontBreak, 90}, - {sbSTerm, prLF}: {sbParaSep, sbDontBreak, 90}, - {sbSB8aClose, prSep}: {sbParaSep, sbDontBreak, 90}, - {sbSB8aClose, prCR}: {sbParaSep, sbDontBreak, 90}, - {sbSB8aClose, prLF}: {sbParaSep, sbDontBreak, 90}, + case sbATerm | prClose<<32: + return sbSB8Close, false, 90 + case sbSB7 | prClose<<32: + return sbSB8Close, false, 90 + case sbSB8Close | prClose<<32: + return sbSB8Close, false, 90 + case sbATerm | prSp<<32: + return sbSB8Sp, false, 90 + case sbSB7 | prSp<<32: + return sbSB8Sp, false, 90 + case sbSB8Close | prSp<<32: + return sbSB8Sp, false, 90 + case sbSTerm | prClose<<32: + return sbSB8aClose, false, 90 + case sbSB8aClose | prClose<<32: + return sbSB8aClose, false, 90 + case sbSTerm | prSp<<32: + return sbSB8aSp, false, 90 + case sbSB8aClose | prSp<<32: + return sbSB8aSp, false, 90 + case sbATerm | prSep<<32: + return sbParaSep, false, 90 + case sbATerm | prCR<<32: + return sbParaSep, false, 90 + case sbATerm | prLF<<32: + return sbParaSep, false, 90 + case sbSB7 | prSep<<32: + return sbParaSep, false, 90 + case sbSB7 | prCR<<32: + return sbParaSep, false, 90 + case sbSB7 | prLF<<32: + return sbParaSep, false, 90 + case sbSB8Close | prSep<<32: + return sbParaSep, false, 90 + case sbSB8Close | prCR<<32: + return sbParaSep, false, 90 + case sbSB8Close | prLF<<32: + return sbParaSep, false, 90 + case sbSTerm | prSep<<32: + return sbParaSep, false, 90 + case sbSTerm | prCR<<32: + return sbParaSep, false, 90 + case sbSTerm | prLF<<32: + return sbParaSep, false, 90 + case sbSB8aClose | prSep<<32: + return sbParaSep, false, 90 + case sbSB8aClose | prCR<<32: + return sbParaSep, false, 90 + case sbSB8aClose | prLF<<32: + return sbParaSep, false, 90 // SB10. - {sbSB8Sp, prSp}: {sbSB8Sp, sbDontBreak, 100}, - {sbSB8aSp, prSp}: {sbSB8aSp, sbDontBreak, 100}, - {sbSB8Sp, prSep}: {sbParaSep, sbDontBreak, 100}, - {sbSB8Sp, prCR}: {sbParaSep, sbDontBreak, 100}, - {sbSB8Sp, prLF}: {sbParaSep, sbDontBreak, 100}, + case sbSB8Sp | prSp<<32: + return sbSB8Sp, false, 100 + case sbSB8aSp | prSp<<32: + return sbSB8aSp, false, 100 + case sbSB8Sp | prSep<<32: + return sbParaSep, false, 100 + case sbSB8Sp | prCR<<32: + return sbParaSep, false, 100 + case sbSB8Sp | prLF<<32: + return sbParaSep, false, 100 // SB11. - {sbATerm, prAny}: {sbAny, sbBreak, 110}, - {sbSB7, prAny}: {sbAny, sbBreak, 110}, - {sbSB8Close, prAny}: {sbAny, sbBreak, 110}, - {sbSB8Sp, prAny}: {sbAny, sbBreak, 110}, - {sbSTerm, prAny}: {sbAny, sbBreak, 110}, - {sbSB8aClose, prAny}: {sbAny, sbBreak, 110}, - {sbSB8aSp, prAny}: {sbAny, sbBreak, 110}, + case sbATerm | prAny<<32: + return sbAny, true, 110 + case sbSB7 | prAny<<32: + return sbAny, true, 110 + case sbSB8Close | prAny<<32: + return sbAny, true, 110 + case sbSB8Sp | prAny<<32: + return sbAny, true, 110 + case sbSTerm | prAny<<32: + return sbAny, true, 110 + case sbSB8aClose | prAny<<32: + return sbAny, true, 110 + case sbSB8aSp | prAny<<32: + return sbAny, true, 110 // We'll always break after ParaSep due to SB4. + + default: + return -1, false, -1 + } } // transitionSentenceBreakState determines the new state of the sentence break @@ -141,30 +215,27 @@ func transitionSentenceBreakState(state int, r rune, b []byte, str string) (newS // Find the applicable transition in the table. var rule int - transition, ok := sbTransitions[[2]int{state, nextProperty}] - if ok { - // We have a specific transition. We'll use it. - newState, sentenceBreak, rule = transition[0], transition[1] == sbBreak, transition[2] - } else { + newState, sentenceBreak, rule = sbTransitions(state, nextProperty) + if newState < 0 { // No specific transition found. Try the less specific ones. - transAnyProp, okAnyProp := sbTransitions[[2]int{state, prAny}] - transAnyState, okAnyState := sbTransitions[[2]int{sbAny, nextProperty}] - if okAnyProp && okAnyState { + anyPropState, anyPropProp, anyPropRule := sbTransitions(state, prAny) + anyStateState, anyStateProp, anyStateRule := sbTransitions(sbAny, nextProperty) + if anyPropState >= 0 && anyStateState >= 0 { // Both apply. We'll use a mix (see comments for grTransitions). - newState, sentenceBreak, rule = transAnyState[0], transAnyState[1] == sbBreak, transAnyState[2] - if transAnyProp[2] < transAnyState[2] { - sentenceBreak, rule = transAnyProp[1] == sbBreak, transAnyProp[2] + newState, sentenceBreak, rule = anyStateState, anyStateProp, anyStateRule + if anyPropRule < anyStateRule { + sentenceBreak, rule = anyPropProp, anyPropRule } - } else if okAnyProp { + } else if anyPropState >= 0 { // We only have a specific state. - newState, sentenceBreak, rule = transAnyProp[0], transAnyProp[1] == sbBreak, transAnyProp[2] + newState, sentenceBreak, rule = anyPropState, anyPropProp, anyPropRule // This branch will probably never be reached because okAnyState will // always be true given the current transition map. But we keep it here // for future modifications to the transition map where this may not be // true anymore. - } else if okAnyState { + } else if anyStateState >= 0 { // We only have a specific property. - newState, sentenceBreak, rule = transAnyState[0], transAnyState[1] == sbBreak, transAnyState[2] + newState, sentenceBreak, rule = anyStateState, anyStateProp, anyStateRule } else { // No known transition. SB999: Any × Any. newState, sentenceBreak, rule = sbAny, false, 9990 |