summaryrefslogtreecommitdiffstats
path: root/helpers/general.go
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2017-12-27 19:31:42 +0100
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2017-12-28 17:41:51 +0100
commite50a8c7a142487d88fe0780c24873c1b95a2283c (patch)
tree29b1c4ad4e1dd9af49f45fb7b329d2909c511155 /helpers/general.go
parent7e76a6fd3bc78363ed31d712c63e6b17734797d7 (diff)
resource: Use MD5 to identify image files
But only a set of byte chunks spread around in the image file to calculate the fingerprint, which is much faster than reading the whole file: ```bash BenchmarkMD5FromFileFast/full=false-4 300000 4356 ns/op 240 B/op 5 allocs/op BenchmarkMD5FromFileFast/full=true-4 30000 42899 ns/op 32944 B/op 5 allocs/op ``` Fixes #4186
Diffstat (limited to 'helpers/general.go')
-rw-r--r--helpers/general.go53
1 files changed, 53 insertions, 0 deletions
diff --git a/helpers/general.go b/helpers/general.go
index 4bb4eb584..dcbac697e 100644
--- a/helpers/general.go
+++ b/helpers/general.go
@@ -26,6 +26,8 @@ import (
"unicode"
"unicode/utf8"
+ "github.com/spf13/afero"
+
"github.com/jdkato/prose/transform"
bp "github.com/gohugoio/hugo/bufferpool"
@@ -372,6 +374,57 @@ func MD5String(f string) string {
return hex.EncodeToString(h.Sum([]byte{}))
}
+// MD5FromFileFast creates a MD5 hash from the given file. It only reads parts of
+// the file for speed, so don't use it if the files are very subtly different.
+// It will not close the file.
+func MD5FromFileFast(f afero.File) (string, error) {
+ const (
+ // Do not change once set in stone!
+ maxChunks = 8
+ peekSize = 64
+ seek = 2048
+ )
+
+ h := md5.New()
+ buff := make([]byte, peekSize)
+
+ for i := 0; i < maxChunks; i++ {
+ if i > 0 {
+ _, err := f.Seek(seek, 0)
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+ return "", err
+ }
+ }
+
+ _, err := io.ReadAtLeast(f, buff, peekSize)
+ if err != nil {
+ if err == io.EOF || err == io.ErrUnexpectedEOF {
+ h.Write(buff)
+ break
+ }
+ return "", err
+ }
+ h.Write(buff)
+ }
+
+ h.Write(buff)
+
+ return hex.EncodeToString(h.Sum(nil)), nil
+}
+
+// MD5FromFile creates a MD5 hash from the given file.
+// It will not close the file.
+func MD5FromFile(f afero.File) (string, error) {
+ h := md5.New()
+ if _, err := io.Copy(h, f); err != nil {
+ return "", nil
+ }
+ return hex.EncodeToString(h.Sum(nil)), nil
+}
+
// IsWhitespace determines if the given rune is whitespace.
func IsWhitespace(r rune) bool {
return r == ' ' || r == '\t' || r == '\n' || r == '\r'