7 files changed, 1207 insertions, 91 deletions
diff --git a/common/paths/path.go b/common/paths/path.go
index 5d211c5e0..da99b16ac 100644
--- a/common/paths/path.go
+++ b/common/paths/path.go
@@ -16,14 +16,18 @@ package paths
 import (
 	"errors"
 	"fmt"
+	"net/url"
 	"path"
 	"path/filepath"
-	"regexp"
 	"strings"
+	"unicode"
 )
 
 // FilePathSeparator as defined by os.Separator.
-const FilePathSeparator = string(filepath.Separator)
+const (
+	FilePathSeparator = string(filepath.Separator)
+	slash             = "/"
+)
 
 // filepathPathBridge is a bridge for common functionality in filepath vs path
 type filepathPathBridge interface {
@@ -72,6 +76,30 @@ func AbsPathify(workingDir, inPath string) string {
 	return filepath.Join(workingDir, inPath)
 }
 
+// AddTrailingSlash adds a trailing Unix styled slash (/) if not already
+// there.
+func AddTrailingSlash(path string) string {
+	if !strings.HasSuffix(path, "/") {
+		path += "/"
+	}
+	return path
+}
+
+// AddLeadingSlash adds a leading Unix styled slash (/) if not already
+// there.
+func AddLeadingSlash(path string) string {
+	if !strings.HasPrefix(path, "/") {
+		path = "/" + path
+	}
+	return path
+}
+
+// AddTrailingAndLeadingSlash adds a leading and trailing Unix styled slash (/) if not already
+// there.
+func AddLeadingAndTrailingSlash(path string) string {
+	return AddTrailingSlash(AddLeadingSlash(path))
+}
+
 // MakeTitle converts the path given to a suitable title, trimming whitespace
 // and replacing hyphens with whitespace.
 func MakeTitle(inpath string) string {
@@ -94,43 +122,6 @@ func makePathRelative(inPath string, possibleDirectories ...string) (string, err
 	return inPath, errors.New("can't extract relative path, unknown prefix")
 }
 
-// Should be good enough for Hugo.
-var isFileRe = regexp.MustCompile(`.*\..{1,6}$`)
-
-// GetDottedRelativePath expects a relative path starting after the content directory.
-// It returns a relative path with dots ("..") navigating up the path structure.
-func GetDottedRelativePath(inPath string) string {
-	inPath = path.Clean(filepath.ToSlash(inPath))
-
-	if inPath == "." {
-		return "./"
-	}
-
-	if !isFileRe.MatchString(inPath) && !strings.HasSuffix(inPath, "/") {
-		inPath += "/"
-	}
-
-	if !strings.HasPrefix(inPath, "/") {
-		inPath = "/" + inPath
-	}
-
-	dir, _ := filepath.Split(inPath)
-
-	sectionCount := strings.Count(dir, "/")
-
-	if sectionCount == 0 || dir == "/" {
-		return "./"
-	}
-
-	var dottedPath string
-
-	for i := 1; i < sectionCount; i++ {
-		dottedPath += "../"
-	}
-
-	return dottedPath
-}
-
 // ExtNoDelimiter takes a path and returns the extension, excluding the delimiter, i.e. "md".
 func ExtNoDelimiter(in string) string {
 	return strings.TrimPrefix(Ext(in), ".")
@@ -167,12 +158,6 @@ func Filename(in string) (name string) {
 	return
 }
 
-// PathNoExt takes a path, strips out the extension,
-// and returns the name of the file.
-func PathNoExt(in string) string {
-	return strings.TrimSuffix(in, path.Ext(in))
-}
-
 // FileAndExt returns the filename and any extension of a file path as
 // two separate strings.
 //
@@ -252,16 +237,125 @@ func prettifyPath(in string, b filepathPathBridge) string {
 	return b.Join(b.Dir(in), name, "index"+ext)
 }
 
-type NamedSlice struct {
-	Name  string
-	Slice []string
+// CommonDir returns the common directory of the given paths.
+func CommonDir(path1, path2 string) string {
+	if path1 == "" || path2 == "" {
+		return ""
+	}
+
+	p1 := strings.Split(path1, "/")
+	p2 := strings.Split(path2, "/")
+
+	var common []string
+
+	for i := 0; i < len(p1) && i < len(p2); i++ {
+		if p1[i] == p2[i] {
+			common = append(common, p1[i])
+		} else {
+			break
+		}
+	}
+
+	return strings.Join(common, "/")
+}
+
+// Sanitize sanitizes string to be used in Hugo's file paths and URLs, allowing only
+// a predefined set of special Unicode characters.
+//
+// Spaces will be replaced with a single hyphen.
+//
+// This function is the core function used to normalize paths in Hugo.
+//
+// Note that this is the first common step for URL/path sanitation,
+// the final URL/path may end up looking differently  if the user has stricter rules defined (e.g. removePathAccents=true).
+func Sanitize(s string) string {
+	var willChange bool
+	for i, r := range s {
+		willChange = !isAllowedPathCharacter(s, i, r)
+		if willChange {
+			break
+		}
+	}
+
+	if !willChange {
+		// Prevent allocation when nothing changes.
+		return s
+	}
+
+	target := make([]rune, 0, len(s))
+	var (
+		prependHyphen bool
+		wasHyphen     bool
+	)
+
+	for i, r := range s {
+		isAllowed := isAllowedPathCharacter(s, i, r)
+
+		if isAllowed {
+			// track explicit hyphen in input; no need to add a new hyphen if
+			// we just saw one.
+			wasHyphen = r == '-'
+
+			if prependHyphen {
+				// if currently have a hyphen, don't prepend an extra one
+				if !wasHyphen {
+					target = append(target, '-')
+				}
+				prependHyphen = false
+			}
+			target = append(target, r)
+		} else if len(target) > 0 && !wasHyphen && unicode.IsSpace(r) {
+			prependHyphen = true
+		}
+	}
+
+	return string(target)
+}
+
+func isAllowedPathCharacter(s string, i int, r rune) bool {
+	if r == ' ' {
+		return false
+	}
+	// Check for the most likely first (faster).
+	isAllowed := unicode.IsLetter(r) || unicode.IsDigit(r)
+	isAllowed = isAllowed || r == '.' || r == '/' || r == '\\' || r == '_' || r == '#' || r == '+' || r == '~' || r == '-' || r == '@'
+	isAllowed = isAllowed || unicode.IsMark(r)
+	isAllowed = isAllowed || (r == '%' && i+2 < len(s) && ishex(s[i+1]) && ishex(s[i+2]))
+	return isAllowed
 }
 
-func (n NamedSlice) String() string {
-	if len(n.Slice) == 0 {
-		return n.Name
+// From https://golang.org/src/net/url/url.go
+func ishex(c byte) bool {
+	switch {
+	case '0' <= c && c <= '9':
+		return true
+	case 'a' <= c && c <= 'f':
+		return true
+	case 'A' <= c && c <= 'F':
+		return true
 	}
-	return fmt.Sprintf("%s%s{%s}", n.Name, FilePathSeparator, strings.Join(n.Slice, ","))
+	return false
+}
+
+var slashFunc = func(r rune) bool {
+	return r == '/'
+}
+
+// Dir behaves like path.Dir without the path.Clean step.
+//
+//	The returned path ends in a slash only if it is the root "/".
+func Dir(s string) string {
+	dir, _ := path.Split(s)
+	if len(dir) > 1 && dir[len(dir)-1] == '/' {
+		return dir[:len(dir)-1]
+	}
+	return dir
+}
+
+// FieldsSlash cuts s into fields separated with '/'.
+func FieldsSlash(s string) []string {
+	f := strings.FieldsFunc(s, slashFunc)
+	return f
 }
 
 // DirFile holds the result from path.Split.
@@ -274,3 +368,27 @@ type DirFile struct {
 func (df DirFile) String() string {
 	return fmt.Sprintf("%s|%s", df.Dir, df.File)
 }
+
+// PathEscape escapes unicode letters in pth.
+// Use URLEscape to escape full URLs including scheme, query etc.
+// This is slightly faster for the common case.
+// Note, there is a url.PathEscape function, but that also
+// escapes /.
+func PathEscape(pth string) string {
+	u, err := url.Parse(pth)
+	if err != nil {
+		panic(err)
+	}
+	return u.EscapedPath()
+}
+
+// ToSlashTrimLeading is just a filepath.ToSlash with an added / prefix trimmer.
+func ToSlashTrimLeading(s string) string {
+	return strings.TrimPrefix(filepath.ToSlash(s), "/")
+}
+
+// ToSlashPreserveLeading converts the path given to a forward slash separated path
+// and preserves the leading slash if present trimming any trailing slash.
+func ToSlashPreserveLeading(s string) string {
+	return "/" + strings.Trim(filepath.ToSlash(s), "/")
+}
diff --git a/common/paths/path_test.go b/common/paths/path_test.go
index 2400f16ab..3605bfc43 100644
--- a/common/paths/path_test.go
+++ b/common/paths/path_test.go
@@ -1,4 +1,4 @@
-// Copyright 2021 The Hugo Authors. All rights reserved.
+// Copyright 2024 The Hugo Authors. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -75,44 +75,6 @@ func TestMakePathRelative(t *testing.T) {
 	}
 }
 
-func TestGetDottedRelativePath(t *testing.T) {
-	// on Windows this will receive both kinds, both country and western ...
-	for _, f := range []func(string) string{filepath.FromSlash, func(s string) string { return s }} {
-		doTestGetDottedRelativePath(f, t)
-	}
-}
-
-func doTestGetDottedRelativePath(urlFixer func(string) string, t *testing.T) {
-	type test struct {
-		input, expected string
-	}
-	data := []test{
-		{"", "./"},
-		{urlFixer("/"), "./"},
-		{urlFixer("post"), "../"},
-		{urlFixer("/post"), "../"},
-		{urlFixer("post/"), "../"},
-		{urlFixer("tags/foo.html"), "../"},
-		{urlFixer("/tags/foo.html"), "../"},
-		{urlFixer("/post/"), "../"},
-		{urlFixer("////post/////"), "../"},
-		{urlFixer("/foo/bar/index.html"), "../../"},
-		{urlFixer("/foo/bar/foo/"), "../../../"},
-		{urlFixer("/foo/bar/foo"), "../../../"},
-		{urlFixer("foo/bar/foo/"), "../../../"},
-		{urlFixer("foo/bar/foo/bar"), "../../../../"},
-		{"404.html", "./"},
-		{"404.xml", "./"},
-		{"/404.html", "./"},
-	}
-	for i, d := range data {
-		output := GetDottedRelativePath(d.input)
-		if d.expected != output {
-			t.Errorf("Test %d failed. Expected %q got %q", i, d.expected, output)
-		}
-	}
-}
-
 func TestMakeTitle(t *testing.T) {
 	type test struct {
 		input, expected string
@@ -226,3 +188,77 @@ func TestFileAndExt(t *testing.T) {
 		}
 	}
 }
+
+func TestSanitize(t *testing.T) {
+	c := qt.New(t)
+	tests := []struct {
+		input    string
+		expected string
+	}{
+		{"  Foo bar  ", "Foo-bar"},
+		{"Foo.Bar/foo_Bar-Foo", "Foo.Bar/foo_Bar-Foo"},
+		{"fOO,bar:foobAR", "fOObarfoobAR"},
+		{"FOo/BaR.html", "FOo/BaR.html"},
+		{"FOo/Ba---R.html", "FOo/Ba---R.html"}, /// See #10104
+		{"FOo/Ba       R.html", "FOo/Ba-R.html"},
+		{"трям/трям", "трям/трям"},
+		{"은행", "은행"},
+		{"Банковский кассир", "Банковский-кассир"},
+		// Issue #1488
+		{"संस्कृत", "संस्कृत"},
+		{"a%C3%B1ame", "a%C3%B1ame"},         // Issue #1292
+		{"this+is+a+test", "this+is+a+test"}, // Issue #1290
+		{"~foo", "~foo"},                     // Issue #2177
+
+	}
+
+	for _, test := range tests {
+		c.Assert(Sanitize(test.input), qt.Equals, test.expected)
+	}
+}
+
+func BenchmarkSanitize(b *testing.B) {
+	const (
+		allAlowedPath = "foo/bar"
+		spacePath     = "foo bar"
+	)
+
+	// This should not allocate any memory.
+	b.Run("All allowed", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := Sanitize(allAlowedPath)
+			if got != allAlowedPath {
+				b.Fatal(got)
+			}
+		}
+	})
+
+	// This will allocate some memory.
+	b.Run("Spaces", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			got := Sanitize(spacePath)
+			if got != "foo-bar" {
+				b.Fatal(got)
+			}
+		}
+	})
+}
+
+func TestDir(t *testing.T) {
+	c := qt.New(t)
+	c.Assert(Dir("/a/b/c/d"), qt.Equals, "/a/b/c")
+	c.Assert(Dir("/a"), qt.Equals, "/")
+	c.Assert(Dir("/"), qt.Equals, "/")
+	c.Assert(Dir(""), qt.Equals, "")
+}
+
+func TestFieldsSlash(t *testing.T) {
+	c := qt.New(t)
+
+	c.Assert(FieldsSlash("a/b/c"), qt.DeepEquals, []string{"a", "b", "c"})
+	c.Assert(FieldsSlash("/a/b/c"), qt.DeepEquals, []string{"a", "b", "c"})
+	c.Assert(FieldsSlash("/a/b/c/"), qt.DeepEquals, []string{"a", "b", "c"})
+	c.Assert(FieldsSlash("a/b/c/"), qt.DeepEquals, []string{"a", "b", "c"})
+	c.Assert(FieldsSlash("/"), qt.DeepEquals, []string{})
+	c.Assert(FieldsSlash(""), qt.DeepEquals, []string{})
+}
diff --git a/common/paths/pathparser.go b/common/paths/pathparser.go
new file mode 100644
index 000000000..842d9307b
--- /dev/null
+++ b/common/paths/pathparser.go
@@ -0,0 +1,494 @@
+// Copyright 2024 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package paths
+
+import (
+	"path"
+	"path/filepath"
+	"runtime"
+	"strings"
+
+	"github.com/gohugoio/hugo/common/types"
+	"github.com/gohugoio/hugo/hugofs/files"
+)
+
+var defaultPathParser PathParser
+
+// PathParser parses a path into a Path.
+type PathParser struct {
+	// Maps the language code to its index in the languages/sites slice.
+	LanguageIndex map[string]int
+}
+
+// Parse parses component c with path s into Path using the default path parser.
+func Parse(c, s string) *Path {
+	return defaultPathParser.Parse(c, s)
+}
+
+// NormalizePathString returns a normalized path string using the very basic Hugo rules.
+func NormalizePathStringBasic(s string) string {
+	// All lower case.
+	s = strings.ToLower(s)
+
+	// Replace spaces with hyphens.
+	s = strings.ReplaceAll(s, " ", "-")
+
+	return s
+}
+
+// Parse parses component c with path s into Path using Hugo's content path rules.
+func (parser PathParser) Parse(c, s string) *Path {
+	p, err := parser.parse(c, s)
+	if err != nil {
+		panic(err)
+	}
+	return p
+}
+
+func (pp *PathParser) parse(component, s string) (*Path, error) {
+	ss := NormalizePathStringBasic(s)
+
+	p, err := pp.doParse(component, ss)
+	if err != nil {
+		return nil, err
+	}
+
+	if s != ss {
+		var err error
+		// Preserve the original case for titles etc.
+		p.unnormalized, err = pp.doParse(component, s)
+
+		if err != nil {
+			return nil, err
+		}
+	} else {
+		p.unnormalized = p
+	}
+
+	return p, nil
+}
+
+func (pp *PathParser) doParse(component, s string) (*Path, error) {
+	p := &Path{
+		component:             component,
+		posContainerLow:       -1,
+		posContainerHigh:      -1,
+		posSectionHigh:        -1,
+		posIdentifierLanguage: -1,
+	}
+
+	hasLang := pp.LanguageIndex != nil
+	hasLang = hasLang && (component == files.ComponentFolderContent || component == files.ComponentFolderLayouts)
+
+	if runtime.GOOS == "windows" {
+		s = path.Clean(filepath.ToSlash(s))
+		if s == "." {
+			s = ""
+		}
+	}
+
+	if s == "" {
+		s = "/"
+	}
+
+	// Leading slash, no trailing slash.
+	if !strings.HasPrefix(s, "/") {
+		s = "/" + s
+	}
+
+	if s != "/" && s[len(s)-1] == '/' {
+		s = s[:len(s)-1]
+	}
+
+	p.s = s
+	slashCount := 0
+
+	for i := len(s) - 1; i >= 0; i-- {
+		c := s[i]
+
+		switch c {
+		case '.':
+			if p.posContainerHigh == -1 {
+				var high int
+				if len(p.identifiers) > 0 {
+					high = p.identifiers[len(p.identifiers)-1].Low - 1
+				} else {
+					high = len(p.s)
+				}
+				id := types.LowHigh{Low: i + 1, High: high}
+				if len(p.identifiers) == 0 {
+					p.identifiers = append(p.identifiers, id)
+				} else if len(p.identifiers) == 1 {
+					// Check for a valid language.
+					s := p.s[id.Low:id.High]
+
+					if hasLang {
+						if _, found := pp.LanguageIndex[s]; found {
+							p.posIdentifierLanguage = 1
+							p.identifiers = append(p.identifiers, id)
+						}
+					}
+				}
+			}
+		case '/':
+			slashCount++
+			if p.posContainerHigh == -1 {
+				p.posContainerHigh = i + 1
+			} else if p.posContainerLow == -1 {
+				p.posContainerLow = i + 1
+			}
+			if i > 0 {
+				p.posSectionHigh = i
+			}
+		}
+	}
+
+	isContentComponent := p.component == files.ComponentFolderContent || p.component == files.ComponentFolderArchetypes
+	isContent := isContentComponent && files.IsContentExt(p.Ext())
+
+	if isContent {
+		id := p.identifiers[len(p.identifiers)-1]
+		b := p.s[p.posContainerHigh : id.Low-1]
+		switch b {
+		case "index":
+			p.bundleType = PathTypeLeaf
+		case "_index":
+			p.bundleType = PathTypeBranch
+		default:
+			p.bundleType = PathTypeContentSingle
+		}
+
+		if slashCount == 2 && p.IsLeafBundle() {
+			p.posSectionHigh = 0
+		}
+	}
+
+	return p, nil
+}
+
+func ModifyPathBundleTypeResource(p *Path) {
+	if p.IsContent() {
+		p.bundleType = PathTypeContentResource
+	} else {
+		p.bundleType = PathTypeFile
+	}
+}
+
+type PathType int
+
+const (
+	// A generic resource, e.g. a JSON file.
+	PathTypeFile PathType = iota
+
+	// All below are content files.
+	// A resource of a content type with front matter.
+	PathTypeContentResource
+
+	// E.g. /blog/my-post.md
+	PathTypeContentSingle
+
+	// All bewlow are bundled content files.
+
+	// Leaf bundles, e.g. /blog/my-post/index.md
+	PathTypeLeaf
+
+	// Branch bundles, e.g. /blog/_index.md
+	PathTypeBranch
+)
+
+type Path struct {
+	s string
+
+	posContainerLow  int
+	posContainerHigh int
+	posSectionHigh   int
+
+	component  string
+	bundleType PathType
+
+	identifiers []types.LowHigh
+
+	posIdentifierLanguage int
+
+	trimLeadingSlash bool
+
+	unnormalized *Path
+}
+
+// TrimLeadingSlash returns a copy of the Path with the leading slash removed.
+func (p Path) TrimLeadingSlash() *Path {
+	p.trimLeadingSlash = true
+	return &p
+}
+
+func (p *Path) norm(s string) string {
+	if p.trimLeadingSlash {
+		s = strings.TrimPrefix(s, "/")
+	}
+	return s
+}
+
+// IdentifierBase satifies identity.Identity.
+func (p *Path) IdentifierBase() string {
+	return p.Base()[1:]
+}
+
+// Component returns the component for this path (e.g. "content").
+func (p *Path) Component() string {
+	return p.component
+}
+
+// Container returns the base name of the container directory for this path.
+func (p *Path) Container() string {
+	if p.posContainerLow == -1 {
+		return ""
+	}
+	return p.norm(p.s[p.posContainerLow : p.posContainerHigh-1])
+}
+
+// ContainerDir returns the container directory for this path.
+// For content bundles this will be the parent directory.
+func (p *Path) ContainerDir() string {
+	if p.posContainerLow == -1 || !p.IsBundle() {
+		return p.Dir()
+	}
+	return p.norm(p.s[:p.posContainerLow-1])
+}
+
+// Section returns the first path element (section).
+func (p *Path) Section() string {
+	if p.posSectionHigh <= 0 {
+		return ""
+	}
+	return p.norm(p.s[1:p.posSectionHigh])
+}
+
+// IsContent returns true if the path is a content file (e.g. mypost.md).
+// Note that this will also return true for content files in a bundle.
+func (p *Path) IsContent() bool {
+	return p.BundleType() >= PathTypeContentResource
+}
+
+// isContentPage returns true if the path is a content file (e.g. mypost.md),
+// but nof if inside a leaf bundle.
+func (p *Path) isContentPage() bool {
+	return p.BundleType() >= PathTypeContentSingle
+}
+
+// Name returns the last element of path.
+func (p *Path) Name() string {
+	if p.posContainerHigh > 0 {
+		return p.s[p.posContainerHigh:]
+	}
+	return p.s
+}
+
+// Name returns the last element of path withhout any extension.
+func (p *Path) NameNoExt() string {
+	if i := p.identifierIndex(0); i != -1 {
+		return p.s[p.posContainerHigh : p.identifiers[i].Low-1]
+	}
+	return p.s[p.posContainerHigh:]
+}
+
+// Name returns the last element of path withhout any language identifier.
+func (p *Path) NameNoLang() string {
+	i := p.identifierIndex(p.posIdentifierLanguage)
+	if i == -1 {
+		return p.Name()
+	}
+
+	return p.s[p.posContainerHigh:p.identifiers[i].Low-1] + p.s[p.identifiers[i].High:]
+}
+
+// BaseNameNoIdentifier returns the logcical base name for a resource without any idenifier (e.g. no extension).
+// For bundles this will be the containing directory's name, e.g. "blog".
+func (p *Path) BaseNameNoIdentifier() string {
+	if p.IsBundle() {
+		return p.Container()
+	}
+	return p.NameNoIdentifier()
+}
+
+// NameNoIdentifier returns the last element of path withhout any identifier (e.g. no extension).
+func (p *Path) NameNoIdentifier() string {
+	if len(p.identifiers) > 0 {
+		return p.s[p.posContainerHigh : p.identifiers[len(p.identifiers)-1].Low-1]
+	}
+	return p.s[p.posContainerHigh:]
+}
+
+// Dir returns all but the last element of path, typically the path's directory.
+func (p *Path) Dir() (d string) {
+	if p.posContainerHigh > 0 {
+		d = p.s[:p.posContainerHigh-1]
+	}
+	if d == "" {
+		d = "/"
+	}
+	d = p.norm(d)
+	return
+}
+
+// Path returns the full path.
+func (p *Path) Path() (d string) {
+	return p.norm(p.s)
+}
+
+// Unmormalized returns the Path with the original case preserved.
+func (p *Path) Unmormalized() *Path {
+	return p.unnormalized
+}
+
+// PathNoLang returns the Path but with any language identifier removed.
+func (p *Path) PathNoLang() string {
+	return p.base(true, false)
+}
+
+// PathNoIdentifier returns the Path but with any identifier (ext, lang) removed.
+func (p *Path) PathNoIdentifier() string {
+	return p.base(false, false)
+}
+
+// PathRel returns the path relativeto the given owner.
+func (p *Path) PathRel(owner *Path) string {
+	ob := owner.Base()
+	if !strings.HasSuffix(ob, "/") {
+		ob += "/"
+	}
+	return strings.TrimPrefix(p.Path(), ob)
+}
+
+// BaseRel returns the base path relative to the given owner.
+func (p *Path) BaseRel(owner *Path) string {
+	ob := owner.Base()
+	if ob == "/" {
+		ob = ""
+	}
+	return p.Base()[len(ob)+1:]
+}
+
+// For content files, Base returns the path without any identifiers (extension, language code etc.).
+// Any 'index' as the last path element is ignored.
+//
+// For other files (Resources), any extension is kept.
+func (p *Path) Base() string {
+	return p.base(!p.isContentPage(), p.IsBundle())
+}
+
+// BaseNoLeadingSlash returns the base path without the leading slash.
+func (p *Path) BaseNoLeadingSlash() string {
+	return p.Base()[1:]
+}
+
+func (p *Path) base(preserveExt, isBundle bool) string {
+	if len(p.identifiers) == 0 {
+		return p.norm(p.s)
+	}
+
+	if preserveExt && len(p.identifiers) == 1 {
+		// Preserve extension.
+		return p.norm(p.s)
+	}
+
+	id := p.identifiers[len(p.identifiers)-1]
+	high := id.Low - 1
+
+	if isBundle {
+		high = p.posContainerHigh - 1
+	}
+
+	if high == 0 {
+		high++
+	}
+
+	if !preserveExt {
+		return p.norm(p.s[:high])
+	}
+
+	// For txt files etc. we want to preserve the extension.
+	id = p.identifiers[0]
+
+	return p.norm(p.s[:high] + p.s[id.Low-1:id.High])
+}
+
+func (p *Path) Ext() string {
+	return p.identifierAsString(0)
+}
+
+func (p *Path) Lang() string {
+	return p.identifierAsString(1)
+}
+
+func (p *Path) Identifier(i int) string {
+	return p.identifierAsString(i)
+}
+
+func (p *Path) Identifiers() []string {
+	ids := make([]string, len(p.identifiers))
+	for i, id := range p.identifiers {
+		ids[i] = p.s[id.Low:id.High]
+	}
+	return ids
+}
+
+func (p *Path) IsHTML() bool {
+	return files.IsHTML(p.Ext())
+}
+
+func (p *Path) BundleType() PathType {
+	return p.bundleType
+}
+
+func (p *Path) IsBundle() bool {
+	return p.bundleType >= PathTypeLeaf
+}
+
+func (p *Path) IsBranchBundle() bool {
+	return p.bundleType == PathTypeBranch
+}
+
+func (p *Path) IsLeafBundle() bool {
+	return p.bundleType == PathTypeLeaf
+}
+
+func (p *Path) identifierAsString(i int) string {
+	i = p.identifierIndex(i)
+	if i == -1 {
+		return ""
+	}
+
+	id := p.identifiers[i]
+	return p.s[id.Low:id.High]
+}
+
+func (p *Path) identifierIndex(i int) int {
+	if i < 0 || i >= len(p.identifiers) {
+		return -1
+	}
+	return i
+}
+
+// HasExt returns true if the Unix styled path has an extension.
+func HasExt(p string) bool {
+	for i := len(p) - 1; i >= 0; i-- {
+		if p[i] == '.' {
+			return true
+		}
+		if p[i] == '/' {
+			return false
+		}
+	}
+	return false
+}
diff --git a/common/paths/pathparser_test.go b/common/paths/pathparser_test.go
new file mode 100644
index 000000000..3546b6605
--- /dev/null
+++ b/common/paths/pathparser_test.go
@@ -0,0 +1,351 @@
+// Copyright 2024 The Hugo Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package paths
+
+import (
+	"path/filepath"
+	"testing"
+
+	"github.com/gohugoio/hugo/hugofs/files"
+
+	qt "github.com/frankban/quicktest"
+)
+
+var testParser = &PathParser{
+	LanguageIndex: map[string]int{
+		"no": 0,
+		"en": 1,
+	},
+}
+
+func TestParse(t *testing.T) {
+	c := qt.New(t)
+
+	tests := []struct {
+		name   string
+		path   string
+		assert func(c *qt.C, p *Path)
+	}{
+		{
+			"Basic text file",
+			"/a/b.txt",
+			func(c *qt.C, p *Path) {
+				c.Assert(p.Name(), qt.Equals, "b.txt")
+				c.Assert(p.Base(), qt.Equals, "/a/b.txt")
+				c.Assert(p.Container(), qt.Equals, "a")
+				c.Assert(p.Dir(), qt.Equals, "/a")
+				c.Assert(p.Ext(), qt.Equals, "txt")
+				c.Assert(p.IsContent(), qt.IsFalse)
+			},
+		},
+		{
+			"Basic text file, upper case",
+			"/A/B.txt",
+			func(c *qt.C, p *Path) {
+				c.Assert(p.Name(), qt.Equals, "b.txt")
+				c.Assert(p.NameNoExt(), qt.Equals, "b")
+				c.Assert(p.NameNoIdentifier(), qt.Equals, "b")
+				c.Assert(p.BaseNameNoIdentifier(), qt.Equals, "b")
+				c.Assert(p.Base(), qt.Equals, "/a/b.txt")
+				c.Assert(p.Ext(), qt.Equals, "txt")
+			},
+		},
+		{
+			"Basic text file, 1 space in dir",
+			"/a b/c.txt",
+			func(c *qt.C, p *Path) {
+				c.Assert(p.Base(), qt.Equals, "/a-b/c.txt")
+			},
+		},
+		{
+			"Basic text file, 2 spaces in dir",
+			"/a  b/c.txt",
+			func(c *qt.C, p *Path) {
+				c.Assert(p.Base(), qt.Equals, "/a--b/c.txt")
+			},
+		},
+		{
+			"Basic text file, 1 space in filename",