summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorgreatroar <61184462+greatroar@users.noreply.github.com>2020-04-14 20:26:26 +0200
committerGitHub <noreply@github.com>2020-04-14 20:26:26 +0200
commit81ff31b8fc142a09c622fbcbe71fb3653d045bec (patch)
tree01dffc3cd5ecfe9ab7720f7f62d999be67698c00
parent82fbcb96f801729dc9cd4cd4981f5155e4deec21 (diff)
lib/model: Harden sanitizePath (#6533)
In particular, non-printable runes and non-UTF8 sequences are no longer allowed. Linux systems will happily creates filenames containing these.
-rw-r--r--lib/model/model.go27
-rw-r--r--lib/model/model_test.go27
2 files changed, 49 insertions, 5 deletions
diff --git a/lib/model/model.go b/lib/model/model.go
index c8a3b3955f..39943c958c 100644
--- a/lib/model/model.go
+++ b/lib/model/model.go
@@ -14,11 +14,11 @@ import (
"net"
"path/filepath"
"reflect"
- "regexp"
"runtime"
"strings"
stdsync "sync"
"time"
+ "unicode"
"github.com/pkg/errors"
"github.com/thejerf/suture"
@@ -2697,8 +2697,11 @@ func (m *syncMutexMap) Get(key string) sync.Mutex {
// sanitizePath takes a string that might contain all kinds of special
// characters and makes a valid, similar, path name out of it.
//
-// Spans of invalid characters are replaced by a single space. Invalid
-// characters are control characters, the things not allowed in file names
+// Spans of invalid characters, whitespace and/or non-UTF-8 sequences are
+// replaced by a single space. The result is always UTF-8 and contains only
+// printable characters, as determined by unicode.IsPrint.
+//
+// Invalid characters are non-printing runes, things not allowed in file names
// in Windows, and common shell metacharacters. Even if asterisks and pipes
// and stuff are allowed on Unixes in general they might not be allowed by
// the filesystem and may surprise the user and cause shell oddness. This
@@ -2709,6 +2712,20 @@ func (m *syncMutexMap) Get(key string) sync.Mutex {
// whitespace is collapsed to a single space. Additionally, whitespace at
// either end is removed.
func sanitizePath(path string) string {
- invalid := regexp.MustCompile(`([[:cntrl:]]|[<>:"'/\\|?*\n\r\t \[\]\{\};:!@$%&^#])+`)
- return strings.TrimSpace(invalid.ReplaceAllString(path, " "))
+ var b strings.Builder
+
+ prev := ' '
+ for _, c := range path {
+ if !unicode.IsPrint(c) || c == unicode.ReplacementChar ||
+ strings.ContainsRune(`<>:"'/\|?*[]{};:!@$%&^#`, c) {
+ c = ' '
+ }
+
+ if !(c == ' ' && prev == ' ') {
+ b.WriteRune(c)
+ }
+ prev = c
+ }
+
+ return strings.TrimSpace(b.String())
}
diff --git a/lib/model/model_test.go b/lib/model/model_test.go
index e307d2c0e6..20447767d9 100644
--- a/lib/model/model_test.go
+++ b/lib/model/model_test.go
@@ -24,6 +24,8 @@ import (
"sync/atomic"
"testing"
"time"
+ "unicode"
+ "unicode/utf8"
"github.com/syncthing/syncthing/lib/config"
"github.com/syncthing/syncthing/lib/db"
@@ -3306,6 +3308,9 @@ func TestSanitizePath(t *testing.T) {
{`Räk \/ smörgås`, "Räk smörgås"},
{"هذا هو *\x07?اسم الملف", "هذا هو اسم الملف"},
{`../foo.txt`, `.. foo.txt`},
+ {" \t \n filename in \t space\r", "filename in space"},
+ {"你\xff好", `你 好`},
+ {"\000 foo", "foo"},
}
for _, tc := range cases {
@@ -3316,6 +3321,28 @@ func TestSanitizePath(t *testing.T) {
}
}
+// Fuzz test: sanitizePath must always return strings of printable UTF-8
+// characters when fed random data.
+//
+// Note that space is considered printable, but other whitespace runes are not.
+func TestSanitizePathFuzz(t *testing.T) {
+ buf := make([]byte, 128)
+
+ for i := 0; i < 100; i++ {
+ rand.Read(buf)
+ path := sanitizePath(string(buf))
+ if !utf8.ValidString(path) {
+ t.Errorf("sanitizePath(%q) => %q, not valid UTF-8", buf, path)
+ continue
+ }
+ for _, c := range path {
+ if !unicode.IsPrint(c) {
+ t.Errorf("non-printable rune %q in sanitized path", c)
+ }
+ }
+ }
+}
+
// TestConnCloseOnRestart checks that there is no deadlock when calling Close
// on a protocol connection that has a blocking reader (blocking writer can't
// be done as the test requires clusterconfigs to go through).