summaryrefslogtreecommitdiffstats
path: root/resource
diff options
context:
space:
mode:
authorBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2017-12-27 19:31:42 +0100
committerBjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>2017-12-28 17:41:51 +0100
commite50a8c7a142487d88fe0780c24873c1b95a2283c (patch)
tree29b1c4ad4e1dd9af49f45fb7b329d2909c511155 /resource
parent7e76a6fd3bc78363ed31d712c63e6b17734797d7 (diff)
resource: Use MD5 to identify image files
But only a set of byte chunks spread around in the image file to calculate the fingerprint, which is much faster than reading the whole file: ```bash BenchmarkMD5FromFileFast/full=false-4 300000 4356 ns/op 240 B/op 5 allocs/op BenchmarkMD5FromFileFast/full=true-4 30000 42899 ns/op 32944 B/op 5 allocs/op ``` Fixes #4186
Diffstat (limited to 'resource')
-rw-r--r--resource/image.go27
-rw-r--r--resource/image_test.go12
-rw-r--r--resource/resource.go12
3 files changed, 29 insertions, 22 deletions
diff --git a/resource/image.go b/resource/image.go
index c039f68b6..916f89a48 100644
--- a/resource/image.go
+++ b/resource/image.go
@@ -112,6 +112,8 @@ type Image struct {
imaging *Imaging
+ hash string
+
*genericResource
}
@@ -129,6 +131,7 @@ func (i *Image) Height() int {
func (i *Image) WithNewBase(base string) Resource {
return &Image{
imaging: i.imaging,
+ hash: i.hash,
genericResource: i.genericResource.WithNewBase(base).(*genericResource)}
}
@@ -490,6 +493,7 @@ func (i *Image) clone() *Image {
return &Image{
imaging: i.imaging,
+ hash: i.hash,
genericResource: &g}
}
@@ -497,20 +501,11 @@ func (i *Image) setBasePath(conf imageConfig) {
i.rel = i.filenameFromConfig(conf)
}
-// We need to set this to something static during tests.
-var fiModTimeFunc = func(fi os.FileInfo) int64 {
- return fi.ModTime().Unix()
-}
-
func (i *Image) filenameFromConfig(conf imageConfig) string {
p1, p2 := helpers.FileAndExt(i.rel)
- sizeModeStr := fmt.Sprintf("_S%d_T%d", i.osFileInfo.Size(), fiModTimeFunc(i.osFileInfo))
- // On scaling an already scaled image, we get the file info from the original.
- // Repeating the same info in the filename makes it stuttery for no good reason.
- if strings.Contains(p1, sizeModeStr) {
- sizeModeStr = ""
- }
+ idStr := fmt.Sprintf("_H%s_%d", i.hash, i.osFileInfo.Size())
+ // Do not change for no good reason.
const md5Threshold = 100
key := conf.key()
@@ -518,12 +513,16 @@ func (i *Image) filenameFromConfig(conf imageConfig) string {
// It is useful to have the key in clear text, but when nesting transforms, it
// can easily be too long to read, and maybe even too long
// for the different OSes to handle.
- if len(p1)+len(sizeModeStr)+len(p2) > md5Threshold {
+ if len(p1)+len(idStr)+len(p2) > md5Threshold {
key = helpers.MD5String(p1 + key + p2)
- p1 = p1[:strings.Index(p1, "_S")]
+ p1 = p1[:strings.Index(p1, "_H")]
+ } else if strings.Contains(p1, idStr) {
+ // On scaling an already scaled image, we get the file info from the original.
+ // Repeating the same info in the filename makes it stuttery for no good reason.
+ idStr = ""
}
- return fmt.Sprintf("%s%s_%s%s", p1, sizeModeStr, key, p2)
+ return fmt.Sprintf("%s%s_%s%s", p1, idStr, key, p2)
}
func decodeImaging(m map[string]interface{}) (Imaging, error) {
diff --git a/resource/image_test.go b/resource/image_test.go
index 3543abb37..61a9ef844 100644
--- a/resource/image_test.go
+++ b/resource/image_test.go
@@ -15,7 +15,6 @@ package resource
import (
"fmt"
- "os"
"testing"
"github.com/stretchr/testify/require"
@@ -52,9 +51,6 @@ func TestParseImageConfig(t *testing.T) {
}
func TestImageTransform(t *testing.T) {
- fiModTimeFunc = func(fi os.FileInfo) int64 {
- return int64(10111213)
- }
assert := require.New(t)
@@ -86,13 +82,13 @@ func TestImageTransform(t *testing.T) {
assert.Equal(200, resizedAndRotated.Height())
assertFileCache(assert, image.spec.Fs, resizedAndRotated.RelPermalink(), 125, 200)
- assert.Equal("/a/sunset_S90587_T10111213_300x200_resize_q75_box_center.jpg", resized.RelPermalink())
+ assert.Equal("/a/sunset_H47566bb0ca0462db92c65f4033d77175_90587_300x200_resize_q75_box_center.jpg", resized.RelPermalink())
assert.Equal(300, resized.Width())
assert.Equal(200, resized.Height())
fitted, err := resized.Fit("50x50")
assert.NoError(err)
- assert.Equal("/a/sunset_S90587_T10111213_300x200_resize_q75_box_center_50x50_fit_q75_box_center.jpg", fitted.RelPermalink())
+ assert.Equal("/a/sunset_H47566bb0ca0462db92c65f4033d77175_90587_9b37eba4e4e6ea0cc56a59bb5aa98143.jpg", fitted.RelPermalink())
assert.Equal(50, fitted.Width())
assert.Equal(31, fitted.Height())
@@ -100,13 +96,13 @@ func TestImageTransform(t *testing.T) {
fittedAgain, _ := fitted.Fit("10x20")
fittedAgain, err = fittedAgain.Fit("10x20")
assert.NoError(err)
- assert.Equal("/a/sunset_f1fb715a17c42d5d4602a1870424d590.jpg", fittedAgain.RelPermalink())
+ assert.Equal("/a/sunset_H47566bb0ca0462db92c65f4033d77175_90587_9a8be1402216c385e0dfd73e267c6827.jpg", fittedAgain.RelPermalink())
assert.Equal(10, fittedAgain.Width())
assert.Equal(6, fittedAgain.Height())
filled, err := image.Fill("200x100 bottomLeft")
assert.NoError(err)
- assert.Equal("/a/sunset_S90587_T10111213_200x100_fill_q75_box_bottomleft.jpg", filled.RelPermalink())
+ assert.Equal("/a/sunset_H47566bb0ca0462db92c65f4033d77175_90587_200x100_fill_q75_box_bottomleft.jpg", filled.RelPermalink())
assert.Equal(200, filled.Width())
assert.Equal(100, filled.Height())
assertFileCache(assert, image.spec.Fs, filled.RelPermalink(), 200, 100)
diff --git a/resource/resource.go b/resource/resource.go
index 2c934d031..19392f3d3 100644
--- a/resource/resource.go
+++ b/resource/resource.go
@@ -153,7 +153,19 @@ func (r *Spec) newResource(
gr := r.newGenericResource(linker, fi, absPublishDir, absSourceFilename, filepath.ToSlash(relTargetFilename), mimeType)
if mimeType == "image" {
+ f, err := r.Fs.Source.Open(absSourceFilename)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ hash, err := helpers.MD5FromFileFast(f)
+ if err != nil {
+ return nil, err
+ }
+
return &Image{
+ hash: hash,
imaging: r.imaging,
genericResource: gr}, nil
}