diff options
author | Junegunn Choi <junegunn.c@gmail.com> | 2017-07-16 23:31:19 +0900 |
---|---|---|
committer | Junegunn Choi <junegunn.c@gmail.com> | 2017-07-16 23:34:32 +0900 |
commit | 9e85cba0d06025983a1a747bfc06c9955388d9c0 (patch) | |
tree | 8fe8dc1fd62ad3ecfbfd02e440fac6cfedcd313c /src/util | |
parent | 4b59ced08f1d417530a25af8fe13aa5d40579220 (diff) |
Reduce memory footprint of Item struct
Diffstat (limited to 'src/util')
-rw-r--r-- | src/util/chars.go | 99 | ||||
-rw-r--r-- | src/util/chars_test.go | 20 |
2 files changed, 70 insertions, 49 deletions
diff --git a/src/util/chars.go b/src/util/chars.go index 061120e0..8325cf44 100644 --- a/src/util/chars.go +++ b/src/util/chars.go @@ -3,63 +3,81 @@ package util import ( "unicode" "unicode/utf8" + "unsafe" ) type Chars struct { - runes []rune - bytes []byte + slice []byte // or []rune + inBytes bool + trimLengthKnown bool + trimLength uint16 + + // XXX Piggybacking item index here is a horrible idea. But I'm trying to + // minimize the memory footprint by not wasting padded spaces. + Index int32 } // ToChars converts byte array into rune array -func ToChars(bytea []byte) Chars { +func ToChars(bytes []byte) Chars { var runes []rune - ascii := true - numBytes := len(bytea) + inBytes := true + numBytes := len(bytes) for i := 0; i < numBytes; { - if bytea[i] < utf8.RuneSelf { - if !ascii { - runes = append(runes, rune(bytea[i])) + if bytes[i] < utf8.RuneSelf { + if !inBytes { + runes = append(runes, rune(bytes[i])) } i++ } else { - if ascii { - ascii = false + if inBytes { + inBytes = false runes = make([]rune, i, numBytes) for j := 0; j < i; j++ { - runes[j] = rune(bytea[j]) + runes[j] = rune(bytes[j]) } } - r, sz := utf8.DecodeRune(bytea[i:]) + r, sz := utf8.DecodeRune(bytes[i:]) i += sz runes = append(runes, r) } } - if ascii { - return Chars{bytes: bytea} + if inBytes { + return Chars{slice: bytes, inBytes: inBytes} } - return Chars{runes: runes} + return RunesToChars(runes) } func RunesToChars(runes []rune) Chars { - return Chars{runes: runes} + return Chars{slice: *(*[]byte)(unsafe.Pointer(&runes)), inBytes: false} +} + +func (chars *Chars) optionalRunes() []rune { + if chars.inBytes { + return nil + } + return *(*[]rune)(unsafe.Pointer(&chars.slice)) } func (chars *Chars) Get(i int) rune { - if chars.runes != nil { - return chars.runes[i] + if runes := chars.optionalRunes(); runes != nil { + return runes[i] } - return rune(chars.bytes[i]) + return rune(chars.slice[i]) } func (chars *Chars) Length() int { - if chars.runes != nil { - return len(chars.runes) + if runes := chars.optionalRunes(); runes != nil { + return len(runes) } - return len(chars.bytes) + return len(chars.slice) } // TrimLength returns the length after trimming leading and trailing whitespaces -func (chars *Chars) TrimLength() int { +func (chars *Chars) TrimLength() uint16 { + if chars.trimLengthKnown { + return chars.trimLength + } + chars.trimLengthKnown = true var i int len := chars.Length() for i = len - 1; i >= 0; i-- { @@ -80,7 +98,8 @@ func (chars *Chars) TrimLength() int { break } } - return i - j + 1 + chars.trimLength = AsUint16(i - j + 1) + return chars.trimLength } func (chars *Chars) TrailingWhitespaces() int { @@ -96,28 +115,40 @@ func (chars *Chars) TrailingWhitespaces() int { } func (chars *Chars) ToString() string { - if chars.runes != nil { - return string(chars.runes) + if runes := chars.optionalRunes(); runes != nil { + return string(runes) } - return string(chars.bytes) + return string(chars.slice) } func (chars *Chars) ToRunes() []rune { - if chars.runes != nil { - return chars.runes + if runes := chars.optionalRunes(); runes != nil { + return runes } - runes := make([]rune, len(chars.bytes)) - for idx, b := range chars.bytes { + bytes := chars.slice + runes := make([]rune, len(bytes)) + for idx, b := range bytes { runes[idx] = rune(b) } return runes } +func (chars *Chars) CopyRunes(dest []rune) { + if runes := chars.optionalRunes(); runes != nil { + copy(dest, runes) + return + } + for idx, b := range chars.slice { + dest[idx] = rune(b) + } + return +} + func (chars *Chars) Slice(b int, e int) Chars { - if chars.runes != nil { - return Chars{runes: chars.runes[b:e]} + if runes := chars.optionalRunes(); runes != nil { + return RunesToChars(runes[b:e]) } - return Chars{bytes: chars.bytes[b:e]} + return Chars{slice: chars.slice[b:e], inBytes: true} } func (chars *Chars) Split(delimiter string) []Chars { diff --git a/src/util/chars_test.go b/src/util/chars_test.go index 12c629d5..07b8dea5 100644 --- a/src/util/chars_test.go +++ b/src/util/chars_test.go @@ -2,27 +2,16 @@ package util import "testing" -func TestToCharsNil(t *testing.T) { - bs := Chars{bytes: []byte{}} - if bs.bytes == nil || bs.runes != nil { - t.Error() - } - rs := RunesToChars([]rune{}) - if rs.bytes != nil || rs.runes == nil { - t.Error() - } -} - func TestToCharsAscii(t *testing.T) { chars := ToChars([]byte("foobar")) - if chars.ToString() != "foobar" || chars.runes != nil { + if !chars.inBytes || chars.ToString() != "foobar" || !chars.inBytes { t.Error() } } func TestCharsLength(t *testing.T) { chars := ToChars([]byte("\tabc한글 ")) - if chars.Length() != 8 || chars.TrimLength() != 5 { + if chars.inBytes || chars.Length() != 8 || chars.TrimLength() != 5 { t.Error() } } @@ -36,7 +25,7 @@ func TestCharsToString(t *testing.T) { } func TestTrimLength(t *testing.T) { - check := func(str string, exp int) { + check := func(str string, exp uint16) { chars := ToChars([]byte(str)) trimmed := chars.TrimLength() if trimmed != exp { @@ -61,7 +50,8 @@ func TestSplit(t *testing.T) { input := ToChars([]byte(str)) result := input.Split(delim) if len(result) != len(tokens) { - t.Errorf("Invalid Split result for '%s': %d tokens found (expected %d): %s", + t.Errorf( + "Invalid Split result for '%s': %d tokens found (expected %d): %s", str, len(result), len(tokens), result) } for idx, token := range tokens { |