Avoid splitting words for summary

For people using autogenerated summaries, this is one of the hot spots in the memory department. We don't need to split al the content into words to do proper summary truncation. This is obviously more effective: ``` BenchmarkTestTruncateWordsToWholeSentence-4 300000 4720 ns/op 0 B/op 0 allocs/op BenchmarkTestTruncateWordsToWholeSentenceOld-4 100000 17699 ns/op 3072 B/op 3 allocs/op ```
author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> 2016-08-16 22:50:15 +0200
committer: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> 2016-09-14 10:50:55 +0200
commit: bcd434794a28ff75a6e6504c6c3bada554ba88ce (patch)
tree: 7ebb624045cdd6236cfdfd0f3620bb5bb8bf88d2 /helpers/content.go
parent: 74ffb45fbe2c121881b2386fc3210f8b1c6bd952 (diff)
1 files changed, 50 insertions, 4 deletions
diff --git a/helpers/content.go b/helpers/content.go
index 53176de64..bb7819175 100644
--- a/helpers/content.go
+++ b/helpers/content.go
@@ -21,6 +21,7 @@ import (
 	"bytes"
 	"html/template"
 	"os/exec"
+	"unicode"
 	"unicode/utf8"
 
 	"github.com/miekg/mmark"
@@ -424,10 +425,55 @@ func TruncateWordsByRune(words []string, max int) (string, bool) {
 	return strings.Join(words, " "), false
 }
 
-// TruncateWordsToWholeSentence takes content and an int
-// and returns entire sentences from content, delimited by the int
-// and whether it's truncated or not.
-func TruncateWordsToWholeSentence(words []string, max int) (string, bool) {
+// TruncateWordsToWholeSentence takes content and truncates to whole sentence
+// limited by max number of words. It also returns whether it is truncated.
+func TruncateWordsToWholeSentence(s string, max int) (string, bool) {
+
+	var (
+		wordCount     = 0
+		lastWordIndex = -1
+	)
+
+	for i, r := range s {
+		if unicode.IsSpace(r) {
+			wordCount++
+			lastWordIndex = i
+
+			if wordCount >= max {
+				break
+			}
+
+		}
+	}
+
+	if lastWordIndex == -1 {
+		return s, false
+	}
+
+	endIndex := -1
+
+	for j, r := range s[lastWordIndex:] {
+		if isEndOfSentence(r) {
+			endIndex = j + lastWordIndex + utf8.RuneLen(r)
+			break
+		}
+	}
+
+	if endIndex == -1 {
+		return s, false
+	}
+
+	return strings.TrimSpace(s[:endIndex]), endIndex < len(s)
+}
+
+func isEndOfSentence(r rune) bool {
+	return r == '.' || r == '?' || r == '!' || r == '"' || r == '\n'
+}
+
+// Kept only for benchmark.
+func truncateWordsToWholeSentenceOld(content string, max int) (string, bool) {
+	words := strings.Fields(content)
+
 	if max >= len(words) {
 		return strings.Join(words, " "), false
 	}
author	Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>	2016-08-16 22:50:15 +0200
committer	Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>	2016-09-14 10:50:55 +0200
commit	bcd434794a28ff75a6e6504c6c3bada554ba88ce (patch)
tree	7ebb624045cdd6236cfdfd0f3620bb5bb8bf88d2 /helpers/content.go
parent	74ffb45fbe2c121881b2386fc3210f8b1c6bd952 (diff)