summaryrefslogtreecommitdiffstats
path: root/jrnl
diff options
context:
space:
mode:
authorMicah Jerome Ellison <micah.jerome.ellison@gmail.com>2023-04-29 15:49:41 -0700
committerGitHub <noreply@github.com>2023-04-29 15:49:41 -0700
commit95836a7dd1192150ed8fb51d86c94a911dd5c601 (patch)
tree60d330654ec6433744d5756fbbb01466df49a54d /jrnl
parent88aa2491b012042eb553648bc8465b54fd389267 (diff)
Only read text files that look like entries when opening folder journal (#1697)
* Add text file that should be ignored to basic test folder journal. Makes tons of tests fail * Add additional files that should be ignored by FolderJournal * Ignore all files in folder journal except year/month/day.txt * Completely remake get_files in FolderJournal: - move get_files into FolderJournal class and add underscore prefix - create iterables to get for year/month folders and day files - make year/month/day file reading strict: only exact expected months and days out of all possible months and days * Restore accidentally-deleted self.sort() line * Use match instead of string comparison to be os-agnostic * Explicitly declare static methods * Filter with glob first for max performance * Explicitly check for valid dates in FolderJournal and add unit test * Remove unneeded jrnl import * Clean up method comment and add type hints * Add is_valid_date unit test * Elucidate comment Co-authored-by: Jonathan Wren <jonathan@nowandwren.com>
Diffstat (limited to 'jrnl')
-rw-r--r--jrnl/journals/FolderJournal.py68
-rw-r--r--jrnl/time.py8
2 files changed, 60 insertions, 16 deletions
diff --git a/jrnl/journals/FolderJournal.py b/jrnl/journals/FolderJournal.py
index 88fa21e1..0d497fb8 100644
--- a/jrnl/journals/FolderJournal.py
+++ b/jrnl/journals/FolderJournal.py
@@ -2,8 +2,8 @@
# License: https://www.gnu.org/licenses/gpl-3.0.html
import codecs
-import fnmatch
import os
+import pathlib
from typing import TYPE_CHECKING
from jrnl import time
@@ -13,14 +13,11 @@ from .Journal import Journal
if TYPE_CHECKING:
from jrnl.journals import Entry
-
-def get_files(journal_config: str) -> list[str]:
- """Searches through sub directories starting with journal_config and find all text files"""
- filenames = []
- for root, dirnames, f in os.walk(journal_config):
- for filename in fnmatch.filter(f, "*.txt"):
- filenames.append(os.path.join(root, filename))
- return filenames
+# glob search patterns for folder/file structure
+DIGIT_PATTERN = "[0123456789]"
+YEAR_PATTERN = DIGIT_PATTERN * 4
+MONTH_PATTERN = "[01]" + DIGIT_PATTERN
+DAY_PATTERN = "[0123]" + DIGIT_PATTERN + ".txt"
class Folder(Journal):
@@ -35,12 +32,15 @@ class Folder(Journal):
def open(self) -> "Folder":
filenames = []
self.entries = []
- filenames = get_files(self.config["journal"])
- for filename in filenames:
- with codecs.open(filename, "r", "utf-8") as f:
- journal = f.read()
- self.entries.extend(self._parse(journal))
- self.sort()
+
+ if os.path.exists(self.config["journal"]):
+ filenames = Folder._get_files(self.config["journal"])
+ for filename in filenames:
+ with codecs.open(filename, "r", "utf-8") as f:
+ journal = f.read()
+ self.entries.extend(self._parse(journal))
+ self.sort()
+
return self
def write(self) -> None:
@@ -81,7 +81,7 @@ class Folder(Journal):
journal_file.write(journal)
# look for and delete empty files
filenames = []
- filenames = get_files(self.config["journal"])
+ filenames = Folder._get_files(self.config["journal"])
for filename in filenames:
if os.stat(filename).st_size <= 0:
os.remove(filename)
@@ -119,3 +119,39 @@ class Folder(Journal):
self.increment_change_counts_by_edit(mod_entries)
self.entries = mod_entries
+
+ @staticmethod
+ def _get_files(journal_path: str) -> list[str]:
+ """Searches through sub directories starting with journal_path and find all text files that look like entries"""
+ for year_folder in Folder._get_year_folders(pathlib.Path(journal_path)):
+ for month_folder in Folder._get_month_folders(year_folder):
+ yield from Folder._get_day_files(month_folder)
+
+ @staticmethod
+ def _get_year_folders(path: pathlib.Path) -> list[pathlib.Path]:
+ for child in path.glob(YEAR_PATTERN):
+ if child.is_dir():
+ yield child
+ return
+
+ @staticmethod
+ def _get_month_folders(path: pathlib.Path) -> list[pathlib.Path]:
+ for child in path.glob(MONTH_PATTERN):
+ if int(child.name) > 0 and int(child.name) <= 12 and path.is_dir():
+ yield child
+ return
+
+ @staticmethod
+ def _get_day_files(path: pathlib.Path) -> list[str]:
+ for child in path.glob(DAY_PATTERN):
+ if (
+ int(child.stem) > 0
+ and int(child.stem) <= 31
+ and time.is_valid_date(
+ year=int(path.parent.name),
+ month=int(path.name),
+ day=int(child.stem),
+ )
+ and child.is_file()
+ ):
+ yield str(child)
diff --git a/jrnl/time.py b/jrnl/time.py
index 514d94f2..dd6fcb0f 100644
--- a/jrnl/time.py
+++ b/jrnl/time.py
@@ -89,3 +89,11 @@ def parse(
if dt.days < -28 and not year_present:
date = date.replace(date.year - 1)
return date
+
+
+def is_valid_date(year: int, month: int, day: int) -> bool:
+ try:
+ datetime.datetime(year, month, day)
+ return True
+ except ValueError:
+ return False