summaryrefslogtreecommitdiffstats
path: root/parser
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2018-12-23 10:40:32 +0100
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2018-12-23 16:33:21 +0100
commita5744697971d296eb973e04e4259fe9e516b908f (patch)
tree488ed37ebfc8916b5cfcdaade249884aec7105c3 /parser
parent822dc627a1cfdf1f97882f27761675ac6ace7669 (diff)
Add CSV support to transform.Unmarshal
Fixes #5555
Diffstat (limited to 'parser')
-rw-r--r--parser/metadecoders/decoder.go64
-rw-r--r--parser/metadecoders/decoder_test.go10
-rw-r--r--parser/metadecoders/format.go10
-rw-r--r--parser/metadecoders/format_test.go3
4 files changed, 75 insertions, 12 deletions
diff --git a/parser/metadecoders/decoder.go b/parser/metadecoders/decoder.go
index 6da791c73..0ca8575fe 100644
--- a/parser/metadecoders/decoder.go
+++ b/parser/metadecoders/decoder.go
@@ -14,6 +14,8 @@
package metadecoders
import (
+ "bytes"
+ "encoding/csv"
"encoding/json"
"fmt"
@@ -27,22 +29,37 @@ import (
yaml "gopkg.in/yaml.v2"
)
+// Decoder provides some configuration options for the decoders.
+type Decoder struct {
+ // Comma is the field delimiter used in the CSV decoder. It defaults to ','.
+ Comma rune
+
+ // Comment, if not 0, is the comment character ued in the CSV decoder. Lines beginning with the
+ // Comment character without preceding whitespace are ignored.
+ Comment rune
+}
+
+// Default is a Decoder in its default configuration.
+var Default = Decoder{
+ Comma: ',',
+}
+
// UnmarshalToMap will unmarshall data in format f into a new map. This is
// what's needed for Hugo's front matter decoding.
-func UnmarshalToMap(data []byte, f Format) (map[string]interface{}, error) {
+func (d Decoder) UnmarshalToMap(data []byte, f Format) (map[string]interface{}, error) {
m := make(map[string]interface{})
if data == nil {
return m, nil
}
- err := unmarshal(data, f, &m)
+ err := d.unmarshal(data, f, &m)
return m, err
}
// UnmarshalFileToMap is the same as UnmarshalToMap, but reads the data from
// the given filename.
-func UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]interface{}, error) {
+func (d Decoder) UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]interface{}, error) {
format := FormatFromString(filename)
if format == "" {
return nil, errors.Errorf("%q is not a valid configuration format", filename)
@@ -52,23 +69,29 @@ func UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]interface{}, e
if err != nil {
return nil, err
}
- return UnmarshalToMap(data, format)
+ return d.UnmarshalToMap(data, format)
}
// Unmarshal will unmarshall data in format f into an interface{}.
// This is what's needed for Hugo's /data handling.
-func Unmarshal(data []byte, f Format) (interface{}, error) {
+func (d Decoder) Unmarshal(data []byte, f Format) (interface{}, error) {
if data == nil {
- return make(map[string]interface{}), nil
+ switch f {
+ case CSV:
+ return make([][]string, 0), nil
+ default:
+ return make(map[string]interface{}), nil
+ }
+
}
var v interface{}
- err := unmarshal(data, f, &v)
+ err := d.unmarshal(data, f, &v)
return v, err
}
// unmarshal unmarshals data in format f into v.
-func unmarshal(data []byte, f Format, v interface{}) error {
+func (d Decoder) unmarshal(data []byte, f Format, v interface{}) error {
var err error
@@ -116,6 +139,9 @@ func unmarshal(data []byte, f Format, v interface{}) error {
*v.(*interface{}) = mm
}
}
+ case CSV:
+ return d.unmarshalCSV(data, v)
+
default:
return errors.Errorf("unmarshal of format %q is not supported", f)
}
@@ -128,6 +154,28 @@ func unmarshal(data []byte, f Format, v interface{}) error {
}
+func (d Decoder) unmarshalCSV(data []byte, v interface{}) error {
+ r := csv.NewReader(bytes.NewReader(data))
+ r.Comma = d.Comma
+ r.Comment = d.Comment
+
+ records, err := r.ReadAll()
+ if err != nil {
+ return err
+ }
+
+ switch v.(type) {
+ case *interface{}:
+ *v.(*interface{}) = records
+ default:
+ return errors.Errorf("CSV cannot be unmarshaled into %T", v)
+
+ }
+
+ return nil
+
+}
+
func toFileError(f Format, err error) error {
return herrors.ToFileError(string(f), err)
}
diff --git a/parser/metadecoders/decoder_test.go b/parser/metadecoders/decoder_test.go
index 94cfd5a9a..38d002dd8 100644
--- a/parser/metadecoders/decoder_test.go
+++ b/parser/metadecoders/decoder_test.go
@@ -26,6 +26,8 @@ func TestUnmarshalToMap(t *testing.T) {
expect := map[string]interface{}{"a": "b"}
+ d := Default
+
for i, test := range []struct {
data string
format Format
@@ -40,9 +42,10 @@ func TestUnmarshalToMap(t *testing.T) {
{`#+a: b`, ORG, expect},
// errors
{`a = b`, TOML, false},
+ {`a,b,c`, CSV, false}, // Use Unmarshal for CSV
} {
msg := fmt.Sprintf("%d: %s", i, test.format)
- m, err := UnmarshalToMap([]byte(test.data), test.format)
+ m, err := d.UnmarshalToMap([]byte(test.data), test.format)
if b, ok := test.expect.(bool); ok && !b {
assert.Error(err, msg)
} else {
@@ -57,6 +60,8 @@ func TestUnmarshalToInterface(t *testing.T) {
expect := map[string]interface{}{"a": "b"}
+ d := Default
+
for i, test := range []struct {
data string
format Format
@@ -67,12 +72,13 @@ func TestUnmarshalToInterface(t *testing.T) {
{`#+a: b`, ORG, expect},
{`a = "b"`, TOML, expect},
{`a: "b"`, YAML, expect},
+ {`a,b,c`, CSV, [][]string{[]string{"a", "b", "c"}}},
{"a: Easy!\nb:\n c: 2\n d: [3, 4]", YAML, map[string]interface{}{"a": "Easy!", "b": map[string]interface{}{"c": 2, "d": []interface{}{3, 4}}}},
// errors
{`a = "`, TOML, false},
} {
msg := fmt.Sprintf("%d: %s", i, test.format)
- m, err := Unmarshal([]byte(test.data), test.format)
+ m, err := d.Unmarshal([]byte(test.data), test.format)
if b, ok := test.expect.(bool); ok && !b {
assert.Error(err, msg)
} else {
diff --git a/parser/metadecoders/format.go b/parser/metadecoders/format.go
index 4a30898fe..719fbf100 100644
--- a/parser/metadecoders/format.go
+++ b/parser/metadecoders/format.go
@@ -31,6 +31,7 @@ const (
JSON Format = "json"
TOML Format = "toml"
YAML Format = "yaml"
+ CSV Format = "csv"
)
// FormatFromString turns formatStr, typically a file extension without any ".",
@@ -51,6 +52,8 @@ func FormatFromString(formatStr string) Format {
return TOML
case "org":
return ORG
+ case "csv":
+ return CSV
}
return ""
@@ -88,11 +91,16 @@ func FormatFromFrontMatterType(typ pageparser.ItemType) Format {
// FormatFromContentString tries to detect the format (JSON, YAML or TOML)
// in the given string.
// It return an empty string if no format could be detected.
-func FormatFromContentString(data string) Format {
+func (d Decoder) FormatFromContentString(data string) Format {
+ csvIdx := strings.IndexRune(data, d.Comma)
jsonIdx := strings.Index(data, "{")
yamlIdx := strings.Index(data, ":")
tomlIdx := strings.Index(data, "=")
+ if isLowerIndexThan(csvIdx, jsonIdx, yamlIdx, tomlIdx) {
+ return CSV
+ }
+
if isLowerIndexThan(jsonIdx, yamlIdx, tomlIdx) {
return JSON
}
diff --git a/parser/metadecoders/format_test.go b/parser/metadecoders/format_test.go
index 6243b3f1e..7794843b1 100644
--- a/parser/metadecoders/format_test.go
+++ b/parser/metadecoders/format_test.go
@@ -88,12 +88,13 @@ func TestFormatFromContentString(t *testing.T) {
{`foo: "bar"`, YAML},
{`foo:"bar"`, YAML},
{`{ "foo": "bar"`, JSON},
+ {`a,b,c"`, CSV},
{`asdfasdf`, Format("")},
{``, Format("")},
} {
errMsg := fmt.Sprintf("[%d] %s", i, test.data)
- result := FormatFromContentString(test.data)
+ result := Default.FormatFromContentString(test.data)
assert.Equal(test.expect, result, errMsg)
}