summaryrefslogtreecommitdiffstats
path: root/deploy
diff options
context:
space:
mode:
authorDavid Jones <david@davidejones.com>2022-04-05 09:42:54 +0100
committerGitHub <noreply@github.com>2022-04-05 10:42:54 +0200
commitd0657a436ed9d629ecc2186558504521c30132ee (patch)
treeb44f30e9b2145fd552b703a33c621e70db90d446 /deploy
parenta6e2e38bb2283cf0d9d682a22e1f47e2615652c0 (diff)
deploy: Set an MD5 attribute and use that if eTag not available
During Hugo deploy when a remote MD5 is invalid (e.g due to multipart eTag) Hugo reads the entire remote file and calculates the MD5 again which can be slow. This commit updates the file upload so that it will also store an MD5 hash in the cloud provider's attributes. e.g in AWS this looks like x-amz-meta-md5chksum: 26fe392386a8123bf8956a16e08cb841.
Diffstat (limited to 'deploy')
-rw-r--r--deploy/deploy.go28
1 files changed, 22 insertions, 6 deletions
diff --git a/deploy/deploy.go b/deploy/deploy.go
index 123cbe566..e33bbf3f2 100644
--- a/deploy/deploy.go
+++ b/deploy/deploy.go
@@ -21,6 +21,7 @@ import (
"compress/gzip"
"context"
"crypto/md5"
+ "encoding/hex"
"fmt"
"io"
"io/ioutil"
@@ -73,6 +74,8 @@ type deploySummary struct {
NumLocal, NumRemote, NumUploads, NumDeletes int
}
+const metaMD5Hash = "md5chksum" // the meta key to store md5hash in
+
// New constructs a new *Deployer.
func New(cfg config.Provider, localFs afero.Fs) (*Deployer, error) {
targetName := cfg.GetString("target")
@@ -314,6 +317,7 @@ func doSingleUpload(ctx context.Context, bucket *blob.Bucket, upload *fileToUplo
CacheControl: upload.Local.CacheControl(),
ContentEncoding: upload.Local.ContentEncoding(),
ContentType: upload.Local.ContentType(),
+ Metadata: map[string]string{metaMD5Hash: hex.EncodeToString(upload.Local.MD5())},
}
w, err := bucket.NewWriter(ctx, upload.Local.SlashPath, opts)
if err != nil {
@@ -566,7 +570,7 @@ func walkRemote(ctx context.Context, bucket *blob.Bucket, include, exclude glob.
jww.INFO.Printf(" remote dropping %q due to exclude\n", obj.Key)
continue
}
- // If the remote didn't give us an MD5, compute one.
+ // If the remote didn't give us an MD5, use remote attributes MD5, if that doesn't exist compute one.
// This can happen for some providers (e.g., fileblob, which uses the
// local filesystem), but not for the most common Cloud providers
// (S3, GCS, Azure). Although, it can happen for S3 if the blob was uploaded
@@ -574,13 +578,25 @@ func walkRemote(ctx context.Context, bucket *blob.Bucket, include, exclude glob.
// Although it's unfortunate to have to read the file, it's likely better
// than assuming a delta and re-uploading it.
if len(obj.MD5) == 0 {
- r, err := bucket.NewReader(ctx, obj.Key, nil)
+ var attrMD5 []byte
+ attrs, err := bucket.Attributes(ctx, obj.Key)
if err == nil {
- h := md5.New()
- if _, err := io.Copy(h, r); err == nil {
- obj.MD5 = h.Sum(nil)
+ md5String, exists := attrs.Metadata[metaMD5Hash]
+ if exists {
+ attrMD5, _ = hex.DecodeString(md5String)
}
- r.Close()
+ }
+ if len(attrMD5) == 0 {
+ r, err := bucket.NewReader(ctx, obj.Key, nil)
+ if err == nil {
+ h := md5.New()
+ if _, err := io.Copy(h, r); err == nil {
+ obj.MD5 = h.Sum(nil)
+ }
+ r.Close()
+ }
+ } else {
+ obj.MD5 = attrMD5
}
}
retval[obj.Key] = obj