diff options
-rw-r--r-- | lib/ignore/ignore.go | 18 | ||||
-rw-r--r-- | lib/ignore/ignore_test.go | 5 | ||||
-rw-r--r-- | lib/scanner/walk.go | 71 |
3 files changed, 59 insertions, 35 deletions
diff --git a/lib/ignore/ignore.go b/lib/ignore/ignore.go index f0510d8254..c564a0a5f9 100644 --- a/lib/ignore/ignore.go +++ b/lib/ignore/ignore.go @@ -18,7 +18,9 @@ import ( "time" "github.com/gobwas/glob" + "golang.org/x/text/unicode/norm" + "github.com/syncthing/syncthing/lib/build" "github.com/syncthing/syncthing/lib/fs" "github.com/syncthing/syncthing/lib/ignore/ignoreresult" "github.com/syncthing/syncthing/lib/osutil" @@ -212,6 +214,11 @@ func (m *Matcher) parseLocked(r io.Reader, file string) error { } // Match matches the patterns plus temporary and internal files. +// +// The "file" parameter must be in the OS' native unicode format (NFD on macos, +// NFC everywhere else). This is always the case in real usage in syncthing, as +// we ensure native unicode normalisation on all entry points (scanning and from +// protocol) - so no need to normalize when calling this, except e.g. in tests. func (m *Matcher) Match(file string) (result ignoreresult.R) { switch { case fs.IsTemporary(file): @@ -387,6 +394,10 @@ func loadParseIncludeFile(filesystem fs.Filesystem, file string, cd ChangeDetect } func parseLine(line string) ([]Pattern, error) { + // We use native normalization internally, thus the patterns must match + // that to avoid false negative matches. + line = nativeUnicodeNorm(line) + pattern := Pattern{ result: ignoreresult.Ignored, } @@ -464,6 +475,13 @@ func parseLine(line string) ([]Pattern, error) { return patterns, nil } +func nativeUnicodeNorm(s string) string { + if build.IsDarwin || build.IsIOS { + return norm.NFD.String(s) + } + return norm.NFC.String(s) +} + func parseIgnoreFile(fs fs.Filesystem, fd io.Reader, currentFile string, cd ChangeDetector, linesSeen map[string]struct{}) ([]string, []Pattern, error) { var patterns []Pattern diff --git a/lib/ignore/ignore_test.go b/lib/ignore/ignore_test.go index 5d0a8b02cb..72a0c4db70 100644 --- a/lib/ignore/ignore_test.go +++ b/lib/ignore/ignore_test.go @@ -805,7 +805,10 @@ func TestIssue3174(t *testing.T) { t.Fatal(err) } - if !pats.Match("åäö").IsIgnored() { + // The pattern above is normalized when parsing, and in order for this + // string to match the pattern, it needs to use the same normalization. And + // Go always uses NFC regardless of OS, while we use NFD on macos. + if !pats.Match(nativeUnicodeNorm("åäö")).IsIgnored() { t.Error("Should match") } } diff --git a/lib/scanner/walk.go b/lib/scanner/walk.go index 3c34ca2854..7146f9ae06 100644 --- a/lib/scanner/walk.go +++ b/lib/scanner/walk.go @@ -293,6 +293,11 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco return skip } + // Just in case the filesystem doesn't produce the normalization the OS + // uses, and we use internally. + nonNormPath := path + path = normalizePath(path) + if m := w.Matcher.Match(path); m.IsIgnored() { l.Debugln(w, "ignored (patterns):", path) // Only descend if matcher says so and the current file is not a symlink. @@ -319,9 +324,23 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco return nil } + if path != nonNormPath { + if !w.AutoNormalize { + // We're not authorized to do anything about it, so complain and skip. + handleError(ctx, "normalizing path", nonNormPath, errUTF8Normalization, finishedChan) + return skip + } + + path, err = w.applyNormalization(nonNormPath, path, info) + if err != nil { + handleError(ctx, "normalizing path", nonNormPath, err, finishedChan) + return skip + } + } + if ignoredParent == "" { // parent isn't ignored, nothing special - return w.handleItem(ctx, path, info, toHashChan, finishedChan, skip) + return w.handleItem(ctx, path, info, toHashChan, finishedChan) } // Part of current path below the ignored (potential) parent @@ -330,7 +349,7 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco // ignored path isn't actually a parent of the current path if rel == path { ignoredParent = "" - return w.handleItem(ctx, path, info, toHashChan, finishedChan, skip) + return w.handleItem(ctx, path, info, toHashChan, finishedChan) } // The previously ignored parent directories of the current, not @@ -345,7 +364,7 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco handleError(ctx, "scan", ignoredParent, err, finishedChan) return skip } - if err = w.handleItem(ctx, ignoredParent, info, toHashChan, finishedChan, skip); err != nil { + if err = w.handleItem(ctx, ignoredParent, info, toHashChan, finishedChan); err != nil { return err } } @@ -355,14 +374,7 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco } } -func (w *walker) handleItem(ctx context.Context, path string, info fs.FileInfo, toHashChan chan<- protocol.FileInfo, finishedChan chan<- ScanResult, skip error) error { - oldPath := path - path, err := w.normalizePath(path, info) - if err != nil { - handleError(ctx, "normalizing path", oldPath, err, finishedChan) - return skip - } - +func (w *walker) handleItem(ctx context.Context, path string, info fs.FileInfo, toHashChan chan<- protocol.FileInfo, finishedChan chan<- ScanResult) error { switch { case info.IsSymlink(): if err := w.walkSymlink(ctx, path, info, finishedChan); err != nil { @@ -375,13 +387,13 @@ func (w *walker) handleItem(ctx context.Context, path string, info fs.FileInfo, return nil case info.IsDir(): - err = w.walkDir(ctx, path, info, finishedChan) + return w.walkDir(ctx, path, info, finishedChan) case info.IsRegular(): - err = w.walkRegular(ctx, path, info, toHashChan) + return w.walkRegular(ctx, path, info, toHashChan) } - return err + return fmt.Errorf("bug: file info for %v is neither symlink, dir nor regular", path) } func (w *walker) walkRegular(ctx context.Context, relPath string, info fs.FileInfo, toHashChan chan<- protocol.FileInfo) error { @@ -550,30 +562,21 @@ func (w *walker) walkSymlink(ctx context.Context, relPath string, info fs.FileIn return nil } -// normalizePath returns the normalized relative path (possibly after fixing -// it on disk), or skip is true. -func (w *walker) normalizePath(path string, info fs.FileInfo) (normPath string, err error) { +func normalizePath(path string) string { if build.IsDarwin || build.IsIOS { // Mac OS X file names should always be NFD normalized. - normPath = norm.NFD.String(path) - } else { - // Every other OS in the known universe uses NFC or just plain - // doesn't bother to define an encoding. In our case *we* do care, - // so we enforce NFC regardless. - normPath = norm.NFC.String(path) - } - - if path == normPath { - // The file name is already normalized: nothing to do - return path, nil - } - - if !w.AutoNormalize { - // We're not authorized to do anything about it, so complain and skip. - - return "", errUTF8Normalization + return norm.NFD.String(path) } + // Every other OS in the known universe uses NFC or just plain + // doesn't bother to define an encoding. In our case *we* do care, + // so we enforce NFC regardless. + return norm.NFC.String(path) +} +// applyNormalization fixes the normalization of the file on disk, i.e. ensures +// the file at path ends up named normPath. It shouldn't but may happen that the +// file ends up with a different name, in which case that one should be scanned. +func (w *walker) applyNormalization(path, normPath string, info fs.FileInfo) (string, error) { // We will attempt to normalize it. normInfo, err := w.Filesystem.Lstat(normPath) if fs.IsNotExist(err) { |