From a5744697971d296eb973e04e4259fe9e516b908f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Sun, 23 Dec 2018 10:40:32 +0100 Subject: Add CSV support to transform.Unmarshal Fixes #5555 --- parser/metadecoders/decoder.go | 64 ++++++++++++++++++++++++++++++++----- parser/metadecoders/decoder_test.go | 10 ++++-- parser/metadecoders/format.go | 10 +++++- parser/metadecoders/format_test.go | 3 +- 4 files changed, 75 insertions(+), 12 deletions(-) (limited to 'parser') diff --git a/parser/metadecoders/decoder.go b/parser/metadecoders/decoder.go index 6da791c73..0ca8575fe 100644 --- a/parser/metadecoders/decoder.go +++ b/parser/metadecoders/decoder.go @@ -14,6 +14,8 @@ package metadecoders import ( + "bytes" + "encoding/csv" "encoding/json" "fmt" @@ -27,22 +29,37 @@ import ( yaml "gopkg.in/yaml.v2" ) +// Decoder provides some configuration options for the decoders. +type Decoder struct { + // Comma is the field delimiter used in the CSV decoder. It defaults to ','. + Comma rune + + // Comment, if not 0, is the comment character ued in the CSV decoder. Lines beginning with the + // Comment character without preceding whitespace are ignored. + Comment rune +} + +// Default is a Decoder in its default configuration. +var Default = Decoder{ + Comma: ',', +} + // UnmarshalToMap will unmarshall data in format f into a new map. This is // what's needed for Hugo's front matter decoding. -func UnmarshalToMap(data []byte, f Format) (map[string]interface{}, error) { +func (d Decoder) UnmarshalToMap(data []byte, f Format) (map[string]interface{}, error) { m := make(map[string]interface{}) if data == nil { return m, nil } - err := unmarshal(data, f, &m) + err := d.unmarshal(data, f, &m) return m, err } // UnmarshalFileToMap is the same as UnmarshalToMap, but reads the data from // the given filename. -func UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]interface{}, error) { +func (d Decoder) UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]interface{}, error) { format := FormatFromString(filename) if format == "" { return nil, errors.Errorf("%q is not a valid configuration format", filename) @@ -52,23 +69,29 @@ func UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]interface{}, e if err != nil { return nil, err } - return UnmarshalToMap(data, format) + return d.UnmarshalToMap(data, format) } // Unmarshal will unmarshall data in format f into an interface{}. // This is what's needed for Hugo's /data handling. -func Unmarshal(data []byte, f Format) (interface{}, error) { +func (d Decoder) Unmarshal(data []byte, f Format) (interface{}, error) { if data == nil { - return make(map[string]interface{}), nil + switch f { + case CSV: + return make([][]string, 0), nil + default: + return make(map[string]interface{}), nil + } + } var v interface{} - err := unmarshal(data, f, &v) + err := d.unmarshal(data, f, &v) return v, err } // unmarshal unmarshals data in format f into v. -func unmarshal(data []byte, f Format, v interface{}) error { +func (d Decoder) unmarshal(data []byte, f Format, v interface{}) error { var err error @@ -116,6 +139,9 @@ func unmarshal(data []byte, f Format, v interface{}) error { *v.(*interface{}) = mm } } + case CSV: + return d.unmarshalCSV(data, v) + default: return errors.Errorf("unmarshal of format %q is not supported", f) } @@ -128,6 +154,28 @@ func unmarshal(data []byte, f Format, v interface{}) error { } +func (d Decoder) unmarshalCSV(data []byte, v interface{}) error { + r := csv.NewReader(bytes.NewReader(data)) + r.Comma = d.Comma + r.Comment = d.Comment + + records, err := r.ReadAll() + if err != nil { + return err + } + + switch v.(type) { + case *interface{}: + *v.(*interface{}) = records + default: + return errors.Errorf("CSV cannot be unmarshaled into %T", v) + + } + + return nil + +} + func toFileError(f Format, err error) error { return herrors.ToFileError(string(f), err) } diff --git a/parser/metadecoders/decoder_test.go b/parser/metadecoders/decoder_test.go index 94cfd5a9a..38d002dd8 100644 --- a/parser/metadecoders/decoder_test.go +++ b/parser/metadecoders/decoder_test.go @@ -26,6 +26,8 @@ func TestUnmarshalToMap(t *testing.T) { expect := map[string]interface{}{"a": "b"} + d := Default + for i, test := range []struct { data string format Format @@ -40,9 +42,10 @@ func TestUnmarshalToMap(t *testing.T) { {`#+a: b`, ORG, expect}, // errors {`a = b`, TOML, false}, + {`a,b,c`, CSV, false}, // Use Unmarshal for CSV } { msg := fmt.Sprintf("%d: %s", i, test.format) - m, err := UnmarshalToMap([]byte(test.data), test.format) + m, err := d.UnmarshalToMap([]byte(test.data), test.format) if b, ok := test.expect.(bool); ok && !b { assert.Error(err, msg) } else { @@ -57,6 +60,8 @@ func TestUnmarshalToInterface(t *testing.T) { expect := map[string]interface{}{"a": "b"} + d := Default + for i, test := range []struct { data string format Format @@ -67,12 +72,13 @@ func TestUnmarshalToInterface(t *testing.T) { {`#+a: b`, ORG, expect}, {`a = "b"`, TOML, expect}, {`a: "b"`, YAML, expect}, + {`a,b,c`, CSV, [][]string{[]string{"a", "b", "c"}}}, {"a: Easy!\nb:\n c: 2\n d: [3, 4]", YAML, map[string]interface{}{"a": "Easy!", "b": map[string]interface{}{"c": 2, "d": []interface{}{3, 4}}}}, // errors {`a = "`, TOML, false}, } { msg := fmt.Sprintf("%d: %s", i, test.format) - m, err := Unmarshal([]byte(test.data), test.format) + m, err := d.Unmarshal([]byte(test.data), test.format) if b, ok := test.expect.(bool); ok && !b { assert.Error(err, msg) } else { diff --git a/parser/metadecoders/format.go b/parser/metadecoders/format.go index 4a30898fe..719fbf100 100644 --- a/parser/metadecoders/format.go +++ b/parser/metadecoders/format.go @@ -31,6 +31,7 @@ const ( JSON Format = "json" TOML Format = "toml" YAML Format = "yaml" + CSV Format = "csv" ) // FormatFromString turns formatStr, typically a file extension without any ".", @@ -51,6 +52,8 @@ func FormatFromString(formatStr string) Format { return TOML case "org": return ORG + case "csv": + return CSV } return "" @@ -88,11 +91,16 @@ func FormatFromFrontMatterType(typ pageparser.ItemType) Format { // FormatFromContentString tries to detect the format (JSON, YAML or TOML) // in the given string. // It return an empty string if no format could be detected. -func FormatFromContentString(data string) Format { +func (d Decoder) FormatFromContentString(data string) Format { + csvIdx := strings.IndexRune(data, d.Comma) jsonIdx := strings.Index(data, "{") yamlIdx := strings.Index(data, ":") tomlIdx := strings.Index(data, "=") + if isLowerIndexThan(csvIdx, jsonIdx, yamlIdx, tomlIdx) { + return CSV + } + if isLowerIndexThan(jsonIdx, yamlIdx, tomlIdx) { return JSON } diff --git a/parser/metadecoders/format_test.go b/parser/metadecoders/format_test.go index 6243b3f1e..7794843b1 100644 --- a/parser/metadecoders/format_test.go +++ b/parser/metadecoders/format_test.go @@ -88,12 +88,13 @@ func TestFormatFromContentString(t *testing.T) { {`foo: "bar"`, YAML}, {`foo:"bar"`, YAML}, {`{ "foo": "bar"`, JSON}, + {`a,b,c"`, CSV}, {`asdfasdf`, Format("")}, {``, Format("")}, } { errMsg := fmt.Sprintf("[%d] %s", i, test.data) - result := FormatFromContentString(test.data) + result := Default.FormatFromContentString(test.data) assert.Equal(test.expect, result, errMsg) } -- cgit v1.2.3