diff options
author | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2017-12-27 19:31:42 +0100 |
---|---|---|
committer | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2017-12-28 17:41:51 +0100 |
commit | e50a8c7a142487d88fe0780c24873c1b95a2283c (patch) | |
tree | 29b1c4ad4e1dd9af49f45fb7b329d2909c511155 /helpers/general.go | |
parent | 7e76a6fd3bc78363ed31d712c63e6b17734797d7 (diff) |
resource: Use MD5 to identify image files
But only a set of byte chunks spread around in the image file to calculate the fingerprint, which is much faster than reading the whole file:
```bash
BenchmarkMD5FromFileFast/full=false-4 300000 4356 ns/op 240 B/op 5 allocs/op
BenchmarkMD5FromFileFast/full=true-4 30000 42899 ns/op 32944 B/op 5 allocs/op
```
Fixes #4186
Diffstat (limited to 'helpers/general.go')
-rw-r--r-- | helpers/general.go | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/helpers/general.go b/helpers/general.go index 4bb4eb584..dcbac697e 100644 --- a/helpers/general.go +++ b/helpers/general.go @@ -26,6 +26,8 @@ import ( "unicode" "unicode/utf8" + "github.com/spf13/afero" + "github.com/jdkato/prose/transform" bp "github.com/gohugoio/hugo/bufferpool" @@ -372,6 +374,57 @@ func MD5String(f string) string { return hex.EncodeToString(h.Sum([]byte{})) } +// MD5FromFileFast creates a MD5 hash from the given file. It only reads parts of +// the file for speed, so don't use it if the files are very subtly different. +// It will not close the file. +func MD5FromFileFast(f afero.File) (string, error) { + const ( + // Do not change once set in stone! + maxChunks = 8 + peekSize = 64 + seek = 2048 + ) + + h := md5.New() + buff := make([]byte, peekSize) + + for i := 0; i < maxChunks; i++ { + if i > 0 { + _, err := f.Seek(seek, 0) + if err != nil { + if err == io.EOF { + break + } + return "", err + } + } + + _, err := io.ReadAtLeast(f, buff, peekSize) + if err != nil { + if err == io.EOF || err == io.ErrUnexpectedEOF { + h.Write(buff) + break + } + return "", err + } + h.Write(buff) + } + + h.Write(buff) + + return hex.EncodeToString(h.Sum(nil)), nil +} + +// MD5FromFile creates a MD5 hash from the given file. +// It will not close the file. +func MD5FromFile(f afero.File) (string, error) { + h := md5.New() + if _, err := io.Copy(h, f); err != nil { + return "", nil + } + return hex.EncodeToString(h.Sum(nil)), nil +} + // IsWhitespace determines if the given rune is whitespace. func IsWhitespace(r rune) bool { return r == ' ' || r == '\t' || r == '\n' || r == '\r' |