diff options
author | Simon Frei <freisim93@gmail.com> | 2024-09-28 17:16:44 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-09-28 17:16:44 +0200 |
commit | 605fd6d726ec20c9e70f3a63e9e31995339f9a01 (patch) | |
tree | 56f4f2316dd30383f3bd9d30d5be5988eb457842 | |
parent | 3c476542d26bce7ff7a454dceed9173a4aec40b6 (diff) |
fix(ignore): ensure normalization of patterns and paths match (fixes #9597) (#9717)
In ignores, normalize the input when parsing it.
When scanning, normalize earlier such that the path is already
normalized when checking ignores. This requires splitting normalization
of the string from normalization of the file, as we don't want to
attempt the latter if the file is ignored.
Closes #9598
---------
Co-authored-by: Jakob Borg <jakob@kastelo.net>
-rw-r--r-- | lib/ignore/ignore.go | 18 | ||||
-rw-r--r-- | lib/ignore/ignore_test.go | 5 | ||||
-rw-r--r-- | lib/scanner/walk.go | 71 |
3 files changed, 59 insertions, 35 deletions
diff --git a/lib/ignore/ignore.go b/lib/ignore/ignore.go index f0510d8254..c564a0a5f9 100644 --- a/lib/ignore/ignore.go +++ b/lib/ignore/ignore.go @@ -18,7 +18,9 @@ import ( "time" "github.com/gobwas/glob" + "golang.org/x/text/unicode/norm" + "github.com/syncthing/syncthing/lib/build" "github.com/syncthing/syncthing/lib/fs" "github.com/syncthing/syncthing/lib/ignore/ignoreresult" "github.com/syncthing/syncthing/lib/osutil" @@ -212,6 +214,11 @@ func (m *Matcher) parseLocked(r io.Reader, file string) error { } // Match matches the patterns plus temporary and internal files. +// +// The "file" parameter must be in the OS' native unicode format (NFD on macos, +// NFC everywhere else). This is always the case in real usage in syncthing, as +// we ensure native unicode normalisation on all entry points (scanning and from +// protocol) - so no need to normalize when calling this, except e.g. in tests. func (m *Matcher) Match(file string) (result ignoreresult.R) { switch { case fs.IsTemporary(file): @@ -387,6 +394,10 @@ func loadParseIncludeFile(filesystem fs.Filesystem, file string, cd ChangeDetect } func parseLine(line string) ([]Pattern, error) { + // We use native normalization internally, thus the patterns must match + // that to avoid false negative matches. + line = nativeUnicodeNorm(line) + pattern := Pattern{ result: ignoreresult.Ignored, } @@ -464,6 +475,13 @@ func parseLine(line string) ([]Pattern, error) { return patterns, nil } +func nativeUnicodeNorm(s string) string { + if build.IsDarwin || build.IsIOS { + return norm.NFD.String(s) + } + return norm.NFC.String(s) +} + func parseIgnoreFile(fs fs.Filesystem, fd io.Reader, currentFile string, cd ChangeDetector, linesSeen map[string]struct{}) ([]string, []Pattern, error) { var patterns []Pattern diff --git a/lib/ignore/ignore_test.go b/lib/ignore/ignore_test.go index 5d0a8b02cb..72a0c4db70 100644 --- a/lib/ignore/ignore_test.go +++ b/lib/ignore/ignore_test.go @@ -805,7 +805,10 @@ func TestIssue3174(t *testing.T) { t.Fatal(err) } - if !pats.Match("åäö").IsIgnored() { + // The pattern above is normalized when parsing, and in order for this + // string to match the pattern, it needs to use the same normalization. And + // Go always uses NFC regardless of OS, while we use NFD on macos. + if !pats.Match(nativeUnicodeNorm("åäö")).IsIgnored() { t.Error("Should match") } } diff --git a/lib/scanner/walk.go b/lib/scanner/walk.go index 3c34ca2854..7146f9ae06 100644 --- a/lib/scanner/walk.go +++ b/lib/scanner/walk.go @@ -293,6 +293,11 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco return skip } + // Just in case the filesystem doesn't produce the normalization the OS + // uses, and we use internally. + nonNormPath := path + path = normalizePath(path) + if m := w.Matcher.Match(path); m.IsIgnored() { l.Debugln(w, "ignored (patterns):", path) // Only descend if matcher says so and the current file is not a symlink. @@ -319,9 +324,23 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco return nil } + if path != nonNormPath { + if !w.AutoNormalize { + // We're not authorized to do anything about it, so complain and skip. + handleError(ctx, "normalizing path", nonNormPath, errUTF8Normalization, finishedChan) + return skip + } + + path, err = w.applyNormalization(nonNormPath, path, info) + if err != nil { + handleError(ctx, "normalizing path", nonNormPath, err, finishedChan) + return skip + } + } + if ignoredParent == "" { // parent isn't ignored, nothing special - return w.handleItem(ctx, path, info, toHashChan, finishedChan, skip) + return w.handleItem(ctx, path, info, toHashChan, finishedChan) } // Part of current path below the ignored (potential) parent @@ -330,7 +349,7 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco // ignored path isn't actually a parent of the current path if rel == path { ignoredParent = "" - return w.handleItem(ctx, path, info, toHashChan, finishedChan, skip) + return w.handleItem(ctx, path, info, toHashChan, finishedChan) } // The previously ignored parent directories of the current, not @@ -345,7 +364,7 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco handleError(ctx, "scan", ignoredParent, err, finishedChan) return skip } - if err = w.handleItem(ctx, ignoredParent, info, toHashChan, finishedChan, skip); err != nil { + if err = w.handleItem(ctx, ignoredParent, info, toHashChan, finishedChan); err != nil { return err } } @@ -355,14 +374,7 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco } } -func (w *walker) handleItem(ctx context.Context, path string, info fs.FileInfo, toHashChan chan<- protocol.FileInfo, finishedChan chan<- ScanResult, skip error) error { - oldPath := path - path, err := w.normalizePath(path, info) - if err != nil { - handleError(ctx, "normalizing path", oldPath, err, finishedChan) - return skip - } - +func (w *walker) handleItem(ctx context.Context, path string, info fs.FileInfo, toHashChan chan<- protocol.FileInfo, finishedChan chan<- ScanResult) error { switch { case info.IsSymlink(): if err := w.walkSymlink(ctx, path, info, finishedChan); err != nil { @@ -375,13 +387,13 @@ func (w *walker) handleItem(ctx context.Context, path string, info fs.FileInfo, return nil case info.IsDir(): - err = w.walkDir(ctx, path, info, finishedChan) + return w.walkDir(ctx, path, info, finishedChan) case info.IsRegular(): - err = w.walkRegular(ctx, path, info, toHashChan) + return w.walkRegular(ctx, path, info, toHashChan) } - return err + return fmt.Errorf("bug: file info for %v is neither symlink, dir nor regular", path) } func (w *walker) walkRegular(ctx context.Context, relPath string, info fs.FileInfo, toHashChan chan<- protocol.FileInfo) error { @@ -550,30 +562,21 @@ func (w *walker) walkSymlink(ctx context.Context, relPath string, info fs.FileIn return nil } -// normalizePath returns the normalized relative path (possibly after fixing -// it on disk), or skip is true. -func (w *walker) normalizePath(path string, info fs.FileInfo) (normPath string, err error) { +func normalizePath(path string) string { if build.IsDarwin || build.IsIOS { // Mac OS X file names should always be NFD normalized. - normPath = norm.NFD.String(path) - } else { - // Every other OS in the known universe uses NFC or just plain - // doesn't bother to define an encoding. In our case *we* do care, - // so we enforce NFC regardless. - normPath = norm.NFC.String(path) - } - - if path == normPath { - // The file name is already normalized: nothing to do - return path, nil - } - - if !w.AutoNormalize { - // We're not authorized to do anything about it, so complain and skip. - - return "", errUTF8Normalization + return norm.NFD.String(path) } + // Every other OS in the known universe uses NFC or just plain + // doesn't bother to define an encoding. In our case *we* do care, + // so we enforce NFC regardless. + return norm.NFC.String(path) +} +// applyNormalization fixes the normalization of the file on disk, i.e. ensures +// the file at path ends up named normPath. It shouldn't but may happen that the +// file ends up with a different name, in which case that one should be scanned. +func (w *walker) applyNormalization(path, normPath string, info fs.FileInfo) (string, error) { // We will attempt to normalize it. normInfo, err := w.Filesystem.Lstat(normPath) if fs.IsNotExist(err) { |