summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Milde <daniel@milde.cz>2024-02-18 21:37:30 +0100
committerDaniel Milde <daniel@milde.cz>2024-02-18 21:49:09 +0100
commit8b03bc9b076410b29557bd45eadf3b7bd089ed17 (patch)
tree7f13d93fce45bde1884e0248417c84dcbdf019c6
parent8d695cce6a459f2ea9270756cc412050ca5d5e8c (diff)
feat: sequential analyzer
intended for rotating HDDs
-rw-r--r--cmd/gdu/app/app.go66
-rw-r--r--cmd/gdu/main.go1
-rw-r--r--pkg/analyze/dir_linux_test.go26
-rw-r--r--pkg/analyze/parallel.go (renamed from pkg/analyze/dir.go)0
-rw-r--r--pkg/analyze/sequential.go176
-rw-r--r--pkg/analyze/sequential_test.go206
6 files changed, 444 insertions, 31 deletions
diff --git a/cmd/gdu/app/app.go b/cmd/gdu/app/app.go
index 472cecb..c0ecd2b 100644
--- a/cmd/gdu/app/app.go
+++ b/cmd/gdu/app/app.go
@@ -43,37 +43,38 @@ type UI interface {
// Flags define flags accepted by Run
type Flags struct {
- CfgFile string `yaml:"-"`
- LogFile string `yaml:"log-file"`
- InputFile string `yaml:"input-file"`
- OutputFile string `yaml:"output-file"`
- IgnoreDirs []string `yaml:"ignore-dirs"`
- IgnoreDirPatterns []string `yaml:"ignore-dir-patterns"`
- IgnoreFromFile string `yaml:"ignore-from-file"`
- MaxCores int `yaml:"max-cores"`
- ShowDisks bool `yaml:"-"`
- ShowApparentSize bool `yaml:"show-apparent-size"`
- ShowRelativeSize bool `yaml:"show-relative-size"`
- ShowVersion bool `yaml:"-"`
- NoColor bool `yaml:"no-color"`
- NoMouse bool `yaml:"no-mouse"`
- NonInteractive bool `yaml:"non-interactive"`
- NoProgress bool `yaml:"no-progress"`
- NoCross bool `yaml:"no-cross"`
- NoHidden bool `yaml:"no-hidden"`
- FollowSymlinks bool `yaml:"follow-symlinks"`
- Profiling bool `yaml:"profiling"`
- ConstGC bool `yaml:"const-gc"`
- UseStorage bool `yaml:"use-storage"`
- StoragePath string `yaml:"storage-path"`
- ReadFromStorage bool `yaml:"read-from-storage"`
- Summarize bool `yaml:"summarize"`
- UseSIPrefix bool `yaml:"use-si-prefix"`
- NoPrefix bool `yaml:"no-prefix"`
- WriteConfig bool `yaml:"-"`
- ChangeCwd bool `yaml:"change-cwd"`
- Style Style `yaml:"style"`
- Sorting Sorting `yaml:"sorting"`
+ CfgFile string `yaml:"-"`
+ LogFile string `yaml:"log-file"`
+ InputFile string `yaml:"input-file"`
+ OutputFile string `yaml:"output-file"`
+ IgnoreDirs []string `yaml:"ignore-dirs"`
+ IgnoreDirPatterns []string `yaml:"ignore-dir-patterns"`
+ IgnoreFromFile string `yaml:"ignore-from-file"`
+ MaxCores int `yaml:"max-cores"`
+ SequentialScanning bool `yaml:"sequential-scanning"`
+ ShowDisks bool `yaml:"-"`
+ ShowApparentSize bool `yaml:"show-apparent-size"`
+ ShowRelativeSize bool `yaml:"show-relative-size"`
+ ShowVersion bool `yaml:"-"`
+ NoColor bool `yaml:"no-color"`
+ NoMouse bool `yaml:"no-mouse"`
+ NonInteractive bool `yaml:"non-interactive"`
+ NoProgress bool `yaml:"no-progress"`
+ NoCross bool `yaml:"no-cross"`
+ NoHidden bool `yaml:"no-hidden"`
+ FollowSymlinks bool `yaml:"follow-symlinks"`
+ Profiling bool `yaml:"profiling"`
+ ConstGC bool `yaml:"const-gc"`
+ UseStorage bool `yaml:"use-storage"`
+ StoragePath string `yaml:"storage-path"`
+ ReadFromStorage bool `yaml:"read-from-storage"`
+ Summarize bool `yaml:"summarize"`
+ UseSIPrefix bool `yaml:"use-si-prefix"`
+ NoPrefix bool `yaml:"no-prefix"`
+ WriteConfig bool `yaml:"-"`
+ ChangeCwd bool `yaml:"change-cwd"`
+ Style Style `yaml:"style"`
+ Sorting Sorting `yaml:"sorting"`
}
// Style define style config
@@ -144,6 +145,9 @@ func (a *App) Run() (err error) {
if a.Flags.UseStorage {
ui.SetAnalyzer(analyze.CreateStoredAnalyzer(a.Flags.StoragePath))
}
+ if a.Flags.SequentialScanning {
+ ui.SetAnalyzer(analyze.CreateSeqAnalyzer())
+ }
if a.Flags.FollowSymlinks {
ui.SetFollowSymlinks(true)
}
diff --git a/cmd/gdu/main.go b/cmd/gdu/main.go
index 04bca33..2336fbb 100644
--- a/cmd/gdu/main.go
+++ b/cmd/gdu/main.go
@@ -43,6 +43,7 @@ func init() {
flags.StringVarP(&af.OutputFile, "output-file", "o", "", "Export all info into file as JSON")
flags.StringVarP(&af.InputFile, "input-file", "f", "", "Import analysis from JSON file")
flags.IntVarP(&af.MaxCores, "max-cores", "m", runtime.NumCPU(), fmt.Sprintf("Set max cores that GDU will use. %d cores available", runtime.NumCPU()))
+ flags.BoolVar(&af.SequentialScanning, "sequential", false, "Use sequential scanning (intended for rotating HDDs)")
flags.BoolVarP(&af.ShowVersion, "version", "v", false, "Print version")
flags.StringSliceVarP(&af.IgnoreDirs, "ignore-dirs", "i", []string{"/proc", "/dev", "/sys", "/run"}, "Absolute paths to ignore (separated by comma)")
diff --git a/pkg/analyze/dir_linux_test.go b/pkg/analyze/dir_linux_test.go
index f555be7..3fd2615 100644
--- a/pkg/analyze/dir_linux_test.go
+++ b/pkg/analyze/dir_linux_test.go
@@ -37,3 +37,29 @@ func TestErr(t *testing.T) {
assert.Equal(t, "nested", dir.Files[0].GetName())
assert.Equal(t, '!', dir.Files[0].GetFlag())
}
+
+func TestSeqErr(t *testing.T) {
+ fin := testdir.CreateTestDir()
+ defer fin()
+
+ err := os.Chmod("test_dir/nested", 0)
+ assert.Nil(t, err)
+ defer func() {
+ err = os.Chmod("test_dir/nested", 0755)
+ assert.Nil(t, err)
+ }()
+
+ analyzer := CreateSeqAnalyzer()
+ dir := analyzer.AnalyzeDir(
+ "test_dir", func(_, _ string) bool { return false }, false,
+ ).(*Dir)
+ analyzer.GetDone().Wait()
+ dir.UpdateStats(make(fs.HardLinkedItems))
+
+ assert.Equal(t, "test_dir", dir.GetName())
+ assert.Equal(t, 2, dir.ItemCount)
+ assert.Equal(t, '.', dir.GetFlag())
+
+ assert.Equal(t, "nested", dir.Files[0].GetName())
+ assert.Equal(t, '!', dir.Files[0].GetFlag())
+}
diff --git a/pkg/analyze/dir.go b/pkg/analyze/parallel.go
index eedce4d..eedce4d 100644
--- a/pkg/analyze/dir.go
+++ b/pkg/analyze/parallel.go
diff --git a/pkg/analyze/sequential.go b/pkg/analyze/sequential.go
new file mode 100644
index 0000000..77fbd44
--- /dev/null
+++ b/pkg/analyze/sequential.go
@@ -0,0 +1,176 @@
+package analyze
+
+import (
+ "os"
+ "path/filepath"
+ "runtime/debug"
+
+ "github.com/dundee/gdu/v5/internal/common"
+ "github.com/dundee/gdu/v5/pkg/fs"
+ log "github.com/sirupsen/logrus"
+)
+
+// SequentialAnalyzer implements Analyzer
+type SequentialAnalyzer struct {
+ progress *common.CurrentProgress
+ progressChan chan common.CurrentProgress
+ progressOutChan chan common.CurrentProgress
+ progressDoneChan chan struct{}
+ doneChan common.SignalGroup
+ wait *WaitGroup
+ ignoreDir common.ShouldDirBeIgnored
+ followSymlinks bool
+}
+
+// CreateSeqAnalyzer returns Analyzer
+func CreateSeqAnalyzer() *SequentialAnalyzer {
+ return &SequentialAnalyzer{
+ progress: &common.CurrentProgress{
+ ItemCount: 0,
+ TotalSize: int64(0),
+ },
+ progressChan: make(chan common.CurrentProgress, 1),
+ progressOutChan: make(chan common.CurrentProgress, 1),
+ progressDoneChan: make(chan struct{}),
+ doneChan: make(common.SignalGroup),
+ wait: (&WaitGroup{}).Init(),
+ }
+}
+
+// SetFollowSymlinks sets whether symlink to files should be followed
+func (a *SequentialAnalyzer) SetFollowSymlinks(v bool) {
+ a.followSymlinks = v
+}
+
+// GetProgressChan returns channel for getting progress
+func (a *SequentialAnalyzer) GetProgressChan() chan common.CurrentProgress {
+ return a.progressOutChan
+}
+
+// GetDone returns channel for checking when analysis is done
+func (a *SequentialAnalyzer) GetDone() common.SignalGroup {
+ return a.doneChan
+}
+
+// ResetProgress returns progress
+func (a *SequentialAnalyzer) ResetProgress() {
+ a.progress = &common.CurrentProgress{}
+ a.progressChan = make(chan common.CurrentProgress, 1)
+ a.progressOutChan = make(chan common.CurrentProgress, 1)
+ a.progressDoneChan = make(chan struct{})
+ a.doneChan = make(common.SignalGroup)
+}
+
+// AnalyzeDir analyzes given path
+func (a *SequentialAnalyzer) AnalyzeDir(
+ path string, ignore common.ShouldDirBeIgnored, constGC bool,
+) fs.Item {
+ if !constGC {
+ defer debug.SetGCPercent(debug.SetGCPercent(-1))
+ go manageMemoryUsage(a.doneChan)
+ }
+
+ a.ignoreDir = ignore
+
+ go a.updateProgress()
+ dir := a.processDir(path)
+
+ dir.BasePath = filepath.Dir(path)
+
+ a.progressDoneChan <- struct{}{}
+ a.doneChan.Broadcast()
+
+ return dir
+}
+
+func (a *SequentialAnalyzer) processDir(path string) *Dir {
+ var (
+ file *File
+ err error
+ totalSize int64
+ info os.FileInfo
+ dirCount int
+ )
+
+ files, err := os.ReadDir(path)
+ if err != nil {
+ log.Print(err.Error())
+ }
+
+ dir := &Dir{
+ File: &File{
+ Name: filepath.Base(path),
+ Flag: getDirFlag(err, len(files)),
+ },
+ ItemCount: 1,
+ Files: make(fs.Files, 0, len(files)),
+ }
+ setDirPlatformSpecificAttrs(dir, path)
+
+ for _, f := range files {
+ name := f.Name()
+ entryPath := filepath.Join(path, name)
+ if f.IsDir() {
+ if a.ignoreDir(name, entryPath) {
+ continue
+ }
+ dirCount++
+
+ subdir := a.processDir(entryPath)
+ subdir.Parent = dir
+ dir.AddFile(subdir)
+ } else {
+ info, err = f.Info()
+ if err != nil {
+ log.Print(err.Error())
+ dir.Flag = '!'
+ continue
+ }
+ if a.followSymlinks && info.Mode()&os.ModeSymlink != 0 {
+ err = followSymlink(entryPath, &info)
+ if err != nil {
+ log.Print(err.Error())
+ dir.Flag = '!'
+ continue
+ }
+ }
+
+ file = &File{
+ Name: name,
+ Flag: getFlag(info),
+ Size: info.Size(),
+ Parent: dir,
+ }
+ setPlatformSpecificAttrs(file, info)
+
+ totalSize += info.Size()
+
+ dir.AddFile(file)
+ }
+ }
+
+ a.progressChan <- common.CurrentProgress{
+ CurrentItemName: path,
+ ItemCount: len(files),
+ TotalSize: totalSize,
+ }
+ return dir
+}
+
+func (a *SequentialAnalyzer) updateProgress() {
+ for {
+ select {
+ case <-a.progressDoneChan:
+ return
+ case progress := <-a.progressChan:
+ a.progress.CurrentItemName = progress.CurrentItemName
+ a.progress.ItemCount += progress.ItemCount
+ a.progress.TotalSize += progress.TotalSize
+ }
+
+ select {
+ case a.progressOutChan <- *a.progress:
+ default:
+ }
+ }
+}
diff --git a/pkg/analyze/sequential_test.go b/pkg/analyze/sequential_test.go
new file mode 100644
index 0000000..a791e4a
--- /dev/null
+++ b/pkg/analyze/sequential_test.go
@@ -0,0 +1,206 @@
+package analyze
+
+import (
+ "os"
+ "sort"
+ "testing"
+
+ log "github.com/sirupsen/logrus"
+
+ "github.com/dundee/gdu/v5/internal/testdir"
+ "github.com/dundee/gdu/v5/pkg/fs"
+ "github.com/stretchr/testify/assert"
+)
+
+func init() {
+ log.SetLevel(log.WarnLevel)
+}
+
+func TestAnalyzeDirSeq(t *testing.T) {
+ fin := testdir.CreateTestDir()
+ defer fin()
+
+ analyzer := CreateSeqAnalyzer()
+ dir := analyzer.AnalyzeDir(
+ "test_dir", func(_, _ string) bool { return false }, false,
+ ).(*Dir)
+
+ progress := <-analyzer.GetProgressChan()
+ assert.GreaterOrEqual(t, progress.TotalSize, int64(0))
+
+ analyzer.GetDone().Wait()
+ analyzer.ResetProgress()
+ dir.UpdateStats(make(fs.HardLinkedItems))
+
+ // test dir info
+ assert.Equal(t, "test_dir", dir.Name)
+ assert.Equal(t, int64(7+4096*3), dir.Size)
+ assert.Equal(t, 5, dir.ItemCount)
+ assert.True(t, dir.IsDir())
+
+ // test dir tree
+ assert.Equal(t, "nested", dir.Files[0].GetName())
+ assert.Equal(t, "subnested", dir.Files[0].(*Dir).Files[1].GetName())
+
+ // test file
+ assert.Equal(t, "file2", dir.Files[0].(*Dir).Files[0].GetName())
+ assert.Equal(t, int64(2), dir.Files[0].(*Dir).Files[0].GetSize())
+
+ assert.Equal(
+ t, "file", dir.Files[0].(*Dir).Files[1].(*Dir).Files[0].GetName(),
+ )
+ assert.Equal(
+ t, int64(5), dir.Files[0].(*Dir).Files[1].(*Dir).Files[0].GetSize(),
+ )
+
+ // test parent link
+ assert.Equal(
+ t,
+ "test_dir",
+ dir.Files[0].(*Dir).
+ Files[1].(*Dir).
+ Files[0].
+ GetParent().
+ GetParent().
+ GetParent().
+ GetName(),
+ )
+}
+
+func TestIgnoreDirSeq(t *testing.T) {
+ fin := testdir.CreateTestDir()
+ defer fin()
+
+ dir := CreateSeqAnalyzer().AnalyzeDir(
+ "test_dir", func(_, _ string) bool { return true }, false,
+ ).(*Dir)
+
+ assert.Equal(t, "test_dir", dir.Name)
+ assert.Equal(t, 1, dir.ItemCount)
+}
+
+func TestFlagsSeq(t *testing.T) {
+ fin := testdir.CreateTestDir()
+ defer fin()
+
+ err := os.Mkdir("test_dir/empty", 0644)
+ assert.Nil(t, err)
+
+ err = os.Symlink("test_dir/nested/file2", "test_dir/nested/file3")
+ assert.Nil(t, err)
+
+ analyzer := CreateSeqAnalyzer()
+ dir := analyzer.AnalyzeDir(
+ "test_dir", func(_, _ string) bool { return false }, false,
+ ).(*Dir)
+ analyzer.GetDone().Wait()
+ dir.UpdateStats(make(fs.HardLinkedItems))
+
+ sort.Sort(sort.Reverse(dir.Files))
+
+ assert.Equal(t, int64(28+4096*4), dir.Size)
+ assert.Equal(t, 7, dir.ItemCount)
+
+ // test file3
+ assert.Equal(t, "nested", dir.Files[0].GetName())
+ assert.Equal(t, "file3", dir.Files[0].(*Dir).Files[1].GetName())
+ assert.Equal(t, int64(21), dir.Files[0].(*Dir).Files[1].GetSize())
+ assert.Equal(t, '@', dir.Files[0].(*Dir).Files[1].GetFlag())
+
+ assert.Equal(t, 'e', dir.Files[1].GetFlag())
+}
+
+func TestHardlinkSeq(t *testing.T) {
+ fin := testdir.CreateTestDir()
+ defer fin()
+
+ err := os.Link("test_dir/nested/file2", "test_dir/nested/file3")
+ assert.Nil(t, err)
+
+ analyzer := CreateSeqAnalyzer()
+ dir := analyzer.AnalyzeDir(
+ "test_dir", func(_, _ string) bool { return false }, false,
+ ).(*Dir)
+ analyzer.GetDone().Wait()
+ dir.UpdateStats(make(fs.HardLinkedItems))
+
+ assert.Equal(t, int64(7+4096*3), dir.Size) // file2 and file3 are counted just once for size
+ assert.Equal(t, 6, dir.ItemCount) // but twice for item count
+
+ // test file3
+ assert.Equal(t, "file3", dir.Files[0].(*Dir).Files[1].GetName())
+ assert.Equal(t, int64(2), dir.Files[0].(*Dir).Files[1].GetSize())
+ assert.Equal(t, 'H', dir.Files[0].(*Dir).Files[1].GetFlag())
+}
+
+func TestFollowSymlinkSeq(t *testing.T) {
+ fin := testdir.CreateTestDir()
+ defer fin()
+
+ err := os.Mkdir("test_dir/empty", 0644)
+ assert.Nil(t, err)
+
+ err = os.Symlink("./file2", "test_dir/nested/file3")
+ assert.Nil(t, err)
+
+ analyzer := CreateSeqAnalyzer()
+ analyzer.SetFollowSymlinks(true)
+ dir := analyzer.AnalyzeDir(
+ "test_dir", func(_, _ string) bool { return false }, false,
+ ).(*Dir)
+ analyzer.GetDone().Wait()
+ dir.UpdateStats(make(fs.HardLinkedItems))
+
+ sort.Sort(sort.Reverse(dir.Files))
+
+ assert.Equal(t, int64(9+4096*4), dir.Size)
+ assert.Equal(t, 7, dir.ItemCount)
+
+ // test file3
+ assert.Equal(t, "nested", dir.Files[0].GetName())
+ assert.Equal(t, "file3", dir.Files[0].(*Dir).Files[1].GetName())
+ assert.Equal(t, int64(2), dir.Files[0].(*Dir).Files[1].GetSize())
+ assert.Equal(t, ' ', dir.Files[0].(*Dir).Files[1].GetFlag())
+
+ assert.Equal(t, 'e', dir.Files[1].GetFlag())
+}
+
+func TestBrokenSymlinkSkippedSeq(t *testing.T) {
+ fin := testdir.CreateTestDir()
+ defer fin()
+
+ err := os.Mkdir("test_dir/empty", 0644)
+ assert.Nil(t, err)
+
+ err = os.Symlink("xxx", "test_dir/nested/file3")
+ assert.Nil(t, err)
+
+ analyzer := CreateSeqAnalyzer()
+ analyzer.SetFollowSymlinks(true)
+ dir := analyzer.AnalyzeDir(
+ "test_dir", func(_, _ string) bool { return false }, false,
+ ).(*Dir)
+ analyzer.GetDone().Wait()
+ dir.UpdateStats(make(fs.HardLinkedItems))
+
+ sort.Sort(sort.Reverse(dir.Files))
+
+ assert.Equal(t, int64(7+4096*4), dir.Size)
+ assert.Equal(t, 6, dir.ItemCount)
+
+ assert.Equal(t, '!', dir.Files[0].GetFlag())
+}
+
+func BenchmarkAnalyzeDirSeq(b *testing.B) {
+ fin := testdir.CreateTestDir()
+ defer fin()
+
+ b.ResetTimer()
+
+ analyzer := CreateSeqAnalyzer()
+ dir := analyzer.AnalyzeDir(
+ "test_dir", func(_, _ string) bool { return false }, false,
+ )
+ analyzer.GetDone().Wait()
+ dir.UpdateStats(make(fs.HardLinkedItems))
+}