diff options
author | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2017-12-27 19:31:42 +0100 |
---|---|---|
committer | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2017-12-28 17:41:51 +0100 |
commit | e50a8c7a142487d88fe0780c24873c1b95a2283c (patch) | |
tree | 29b1c4ad4e1dd9af49f45fb7b329d2909c511155 /resource | |
parent | 7e76a6fd3bc78363ed31d712c63e6b17734797d7 (diff) |
resource: Use MD5 to identify image files
But only a set of byte chunks spread around in the image file to calculate the fingerprint, which is much faster than reading the whole file:
```bash
BenchmarkMD5FromFileFast/full=false-4 300000 4356 ns/op 240 B/op 5 allocs/op
BenchmarkMD5FromFileFast/full=true-4 30000 42899 ns/op 32944 B/op 5 allocs/op
```
Fixes #4186
Diffstat (limited to 'resource')
-rw-r--r-- | resource/image.go | 27 | ||||
-rw-r--r-- | resource/image_test.go | 12 | ||||
-rw-r--r-- | resource/resource.go | 12 |
3 files changed, 29 insertions, 22 deletions
diff --git a/resource/image.go b/resource/image.go index c039f68b6..916f89a48 100644 --- a/resource/image.go +++ b/resource/image.go @@ -112,6 +112,8 @@ type Image struct { imaging *Imaging + hash string + *genericResource } @@ -129,6 +131,7 @@ func (i *Image) Height() int { func (i *Image) WithNewBase(base string) Resource { return &Image{ imaging: i.imaging, + hash: i.hash, genericResource: i.genericResource.WithNewBase(base).(*genericResource)} } @@ -490,6 +493,7 @@ func (i *Image) clone() *Image { return &Image{ imaging: i.imaging, + hash: i.hash, genericResource: &g} } @@ -497,20 +501,11 @@ func (i *Image) setBasePath(conf imageConfig) { i.rel = i.filenameFromConfig(conf) } -// We need to set this to something static during tests. -var fiModTimeFunc = func(fi os.FileInfo) int64 { - return fi.ModTime().Unix() -} - func (i *Image) filenameFromConfig(conf imageConfig) string { p1, p2 := helpers.FileAndExt(i.rel) - sizeModeStr := fmt.Sprintf("_S%d_T%d", i.osFileInfo.Size(), fiModTimeFunc(i.osFileInfo)) - // On scaling an already scaled image, we get the file info from the original. - // Repeating the same info in the filename makes it stuttery for no good reason. - if strings.Contains(p1, sizeModeStr) { - sizeModeStr = "" - } + idStr := fmt.Sprintf("_H%s_%d", i.hash, i.osFileInfo.Size()) + // Do not change for no good reason. const md5Threshold = 100 key := conf.key() @@ -518,12 +513,16 @@ func (i *Image) filenameFromConfig(conf imageConfig) string { // It is useful to have the key in clear text, but when nesting transforms, it // can easily be too long to read, and maybe even too long // for the different OSes to handle. - if len(p1)+len(sizeModeStr)+len(p2) > md5Threshold { + if len(p1)+len(idStr)+len(p2) > md5Threshold { key = helpers.MD5String(p1 + key + p2) - p1 = p1[:strings.Index(p1, "_S")] + p1 = p1[:strings.Index(p1, "_H")] + } else if strings.Contains(p1, idStr) { + // On scaling an already scaled image, we get the file info from the original. + // Repeating the same info in the filename makes it stuttery for no good reason. + idStr = "" } - return fmt.Sprintf("%s%s_%s%s", p1, sizeModeStr, key, p2) + return fmt.Sprintf("%s%s_%s%s", p1, idStr, key, p2) } func decodeImaging(m map[string]interface{}) (Imaging, error) { diff --git a/resource/image_test.go b/resource/image_test.go index 3543abb37..61a9ef844 100644 --- a/resource/image_test.go +++ b/resource/image_test.go @@ -15,7 +15,6 @@ package resource import ( "fmt" - "os" "testing" "github.com/stretchr/testify/require" @@ -52,9 +51,6 @@ func TestParseImageConfig(t *testing.T) { } func TestImageTransform(t *testing.T) { - fiModTimeFunc = func(fi os.FileInfo) int64 { - return int64(10111213) - } assert := require.New(t) @@ -86,13 +82,13 @@ func TestImageTransform(t *testing.T) { assert.Equal(200, resizedAndRotated.Height()) assertFileCache(assert, image.spec.Fs, resizedAndRotated.RelPermalink(), 125, 200) - assert.Equal("/a/sunset_S90587_T10111213_300x200_resize_q75_box_center.jpg", resized.RelPermalink()) + assert.Equal("/a/sunset_H47566bb0ca0462db92c65f4033d77175_90587_300x200_resize_q75_box_center.jpg", resized.RelPermalink()) assert.Equal(300, resized.Width()) assert.Equal(200, resized.Height()) fitted, err := resized.Fit("50x50") assert.NoError(err) - assert.Equal("/a/sunset_S90587_T10111213_300x200_resize_q75_box_center_50x50_fit_q75_box_center.jpg", fitted.RelPermalink()) + assert.Equal("/a/sunset_H47566bb0ca0462db92c65f4033d77175_90587_9b37eba4e4e6ea0cc56a59bb5aa98143.jpg", fitted.RelPermalink()) assert.Equal(50, fitted.Width()) assert.Equal(31, fitted.Height()) @@ -100,13 +96,13 @@ func TestImageTransform(t *testing.T) { fittedAgain, _ := fitted.Fit("10x20") fittedAgain, err = fittedAgain.Fit("10x20") assert.NoError(err) - assert.Equal("/a/sunset_f1fb715a17c42d5d4602a1870424d590.jpg", fittedAgain.RelPermalink()) + assert.Equal("/a/sunset_H47566bb0ca0462db92c65f4033d77175_90587_9a8be1402216c385e0dfd73e267c6827.jpg", fittedAgain.RelPermalink()) assert.Equal(10, fittedAgain.Width()) assert.Equal(6, fittedAgain.Height()) filled, err := image.Fill("200x100 bottomLeft") assert.NoError(err) - assert.Equal("/a/sunset_S90587_T10111213_200x100_fill_q75_box_bottomleft.jpg", filled.RelPermalink()) + assert.Equal("/a/sunset_H47566bb0ca0462db92c65f4033d77175_90587_200x100_fill_q75_box_bottomleft.jpg", filled.RelPermalink()) assert.Equal(200, filled.Width()) assert.Equal(100, filled.Height()) assertFileCache(assert, image.spec.Fs, filled.RelPermalink(), 200, 100) diff --git a/resource/resource.go b/resource/resource.go index 2c934d031..19392f3d3 100644 --- a/resource/resource.go +++ b/resource/resource.go @@ -153,7 +153,19 @@ func (r *Spec) newResource( gr := r.newGenericResource(linker, fi, absPublishDir, absSourceFilename, filepath.ToSlash(relTargetFilename), mimeType) if mimeType == "image" { + f, err := r.Fs.Source.Open(absSourceFilename) + if err != nil { + return nil, err + } + defer f.Close() + + hash, err := helpers.MD5FromFileFast(f) + if err != nil { + return nil, err + } + return &Image{ + hash: hash, imaging: r.imaging, genericResource: gr}, nil } |