summaryrefslogtreecommitdiffstats
path: root/parser
diff options
context:
space:
mode:
authorMattias Wadman <mattias.wadman@gmail.com>2016-04-13 00:14:00 +0200
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2016-04-14 10:46:27 +0200
commit5d50c46482d231efa26c95e4705e720fb9bf753c (patch)
tree413d6bfc1f632d6e0f398471baf0f3a56b392522 /parser
parentd48b986c45f8d5a5fef7c98da17af8ca82867b25 (diff)
Chomp Unicode BOM if present
Useful if using or sharing files with users that use editors that append a unicode byte order marker header (like Windows notepad). This will still assume files are UTF-8 encoded. Closes #2075
Diffstat (limited to 'parser')
-rw-r--r--parser/page.go19
1 files changed, 19 insertions, 0 deletions
diff --git a/parser/page.go b/parser/page.go
index 949bfdac6..5092caddc 100644
--- a/parser/page.go
+++ b/parser/page.go
@@ -50,6 +50,8 @@ const (
HTMLCommentStart = "<!--"
// HTMLCommentEnd identifies the end of HTML comment.
HTMLCommentEnd = "-->"
+ // BOM Unicode byte order marker
+ BOM = '\ufeff'
)
var (
@@ -101,6 +103,10 @@ func (p *page) Metadata() (meta interface{}, err error) {
func ReadFrom(r io.Reader) (p Page, err error) {
reader := bufio.NewReader(r)
+ // chomp BOM and assume UTF-8
+ if err = chompBOM(reader); err != nil && err != io.EOF {
+ return
+ }
if err = chompWhitespace(reader); err != nil && err != io.EOF {
return
}
@@ -135,6 +141,19 @@ func ReadFrom(r io.Reader) (p Page, err error) {
return newp, nil
}
+func chompBOM(r io.RuneScanner) (err error) {
+ for {
+ c, _, err := r.ReadRune()
+ if err != nil {
+ return err
+ }
+ if c != BOM {
+ r.UnreadRune()
+ return nil
+ }
+ }
+}
+
func chompWhitespace(r io.RuneScanner) (err error) {
for {
c, _, err := r.ReadRune()