diff options
author | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2017-12-27 19:31:42 +0100 |
---|---|---|
committer | Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com> | 2017-12-28 17:41:51 +0100 |
commit | e50a8c7a142487d88fe0780c24873c1b95a2283c (patch) | |
tree | 29b1c4ad4e1dd9af49f45fb7b329d2909c511155 /helpers/general_test.go | |
parent | 7e76a6fd3bc78363ed31d712c63e6b17734797d7 (diff) |
resource: Use MD5 to identify image files
But only a set of byte chunks spread around in the image file to calculate the fingerprint, which is much faster than reading the whole file:
```bash
BenchmarkMD5FromFileFast/full=false-4 300000 4356 ns/op 240 B/op 5 allocs/op
BenchmarkMD5FromFileFast/full=true-4 30000 42899 ns/op 32944 B/op 5 allocs/op
```
Fixes #4186
Diffstat (limited to 'helpers/general_test.go')
-rw-r--r-- | helpers/general_test.go | 90 |
1 files changed, 90 insertions, 0 deletions
diff --git a/helpers/general_test.go b/helpers/general_test.go index 2bca632e0..6a7dd4883 100644 --- a/helpers/general_test.go +++ b/helpers/general_test.go @@ -14,10 +14,12 @@ package helpers import ( + "fmt" "reflect" "strings" "testing" + "github.com/spf13/afero" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -270,3 +272,91 @@ func TestToLowerMap(t *testing.T) { } } } + +func TestFastMD5FromFile(t *testing.T) { + fs := afero.NewMemMapFs() + + if err := afero.WriteFile(fs, "small.txt", []byte("abc"), 0777); err != nil { + t.Fatal(err) + } + + if err := afero.WriteFile(fs, "small2.txt", []byte("abd"), 0777); err != nil { + t.Fatal(err) + } + + if err := afero.WriteFile(fs, "bigger.txt", []byte(strings.Repeat("a bc d e", 100)), 0777); err != nil { + t.Fatal(err) + } + + if err := afero.WriteFile(fs, "bigger2.txt", []byte(strings.Repeat("c d e f g", 100)), 0777); err != nil { + t.Fatal(err) + } + + req := require.New(t) + + sf1, err := fs.Open("small.txt") + req.NoError(err) + sf2, err := fs.Open("small2.txt") + req.NoError(err) + + bf1, err := fs.Open("bigger.txt") + req.NoError(err) + bf2, err := fs.Open("bigger2.txt") + req.NoError(err) + + defer sf1.Close() + defer sf2.Close() + defer bf1.Close() + defer bf2.Close() + + m1, err := MD5FromFileFast(sf1) + req.NoError(err) + req.Equal("308d8a1127b46524b51507424071c22c", m1) + + m2, err := MD5FromFileFast(sf2) + req.NoError(err) + req.NotEqual(m1, m2) + + m3, err := MD5FromFileFast(bf1) + req.NoError(err) + req.NotEqual(m2, m3) + + m4, err := MD5FromFileFast(bf2) + req.NoError(err) + req.NotEqual(m3, m4) + + m5, err := MD5FromFile(bf2) + req.NoError(err) + req.NotEqual(m4, m5) +} + +func BenchmarkMD5FromFileFast(b *testing.B) { + fs := afero.NewMemMapFs() + + for _, full := range []bool{false, true} { + b.Run(fmt.Sprintf("full=%t", full), func(b *testing.B) { + for i := 0; i < b.N; i++ { + b.StopTimer() + if err := afero.WriteFile(fs, "file.txt", []byte(strings.Repeat("1234567890", 2000)), 0777); err != nil { + b.Fatal(err) + } + f, err := fs.Open("file.txt") + if err != nil { + b.Fatal(err) + } + b.StartTimer() + if full { + if _, err := MD5FromFile(f); err != nil { + b.Fatal(err) + } + } else { + if _, err := MD5FromFileFast(f); err != nil { + b.Fatal(err) + } + } + f.Close() + } + }) + } + +} |