diff options
author | Micah Jerome Ellison <micah.jerome.ellison@gmail.com> | 2023-04-29 15:49:41 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-29 15:49:41 -0700 |
commit | 95836a7dd1192150ed8fb51d86c94a911dd5c601 (patch) | |
tree | 60d330654ec6433744d5756fbbb01466df49a54d /jrnl | |
parent | 88aa2491b012042eb553648bc8465b54fd389267 (diff) |
Only read text files that look like entries when opening folder journal (#1697)
* Add text file that should be ignored to basic test folder journal. Makes tons of tests fail
* Add additional files that should be ignored by FolderJournal
* Ignore all files in folder journal except year/month/day.txt
* Completely remake get_files in FolderJournal:
- move get_files into FolderJournal class and add underscore prefix
- create iterables to get for year/month folders and day files
- make year/month/day file reading strict: only exact expected months and days out of all possible months and days
* Restore accidentally-deleted self.sort() line
* Use match instead of string comparison to be os-agnostic
* Explicitly declare static methods
* Filter with glob first for max performance
* Explicitly check for valid dates in FolderJournal and add unit test
* Remove unneeded jrnl import
* Clean up method comment and add type hints
* Add is_valid_date unit test
* Elucidate comment
Co-authored-by: Jonathan Wren <jonathan@nowandwren.com>
Diffstat (limited to 'jrnl')
-rw-r--r-- | jrnl/journals/FolderJournal.py | 68 | ||||
-rw-r--r-- | jrnl/time.py | 8 |
2 files changed, 60 insertions, 16 deletions
diff --git a/jrnl/journals/FolderJournal.py b/jrnl/journals/FolderJournal.py index 88fa21e1..0d497fb8 100644 --- a/jrnl/journals/FolderJournal.py +++ b/jrnl/journals/FolderJournal.py @@ -2,8 +2,8 @@ # License: https://www.gnu.org/licenses/gpl-3.0.html import codecs -import fnmatch import os +import pathlib from typing import TYPE_CHECKING from jrnl import time @@ -13,14 +13,11 @@ from .Journal import Journal if TYPE_CHECKING: from jrnl.journals import Entry - -def get_files(journal_config: str) -> list[str]: - """Searches through sub directories starting with journal_config and find all text files""" - filenames = [] - for root, dirnames, f in os.walk(journal_config): - for filename in fnmatch.filter(f, "*.txt"): - filenames.append(os.path.join(root, filename)) - return filenames +# glob search patterns for folder/file structure +DIGIT_PATTERN = "[0123456789]" +YEAR_PATTERN = DIGIT_PATTERN * 4 +MONTH_PATTERN = "[01]" + DIGIT_PATTERN +DAY_PATTERN = "[0123]" + DIGIT_PATTERN + ".txt" class Folder(Journal): @@ -35,12 +32,15 @@ class Folder(Journal): def open(self) -> "Folder": filenames = [] self.entries = [] - filenames = get_files(self.config["journal"]) - for filename in filenames: - with codecs.open(filename, "r", "utf-8") as f: - journal = f.read() - self.entries.extend(self._parse(journal)) - self.sort() + + if os.path.exists(self.config["journal"]): + filenames = Folder._get_files(self.config["journal"]) + for filename in filenames: + with codecs.open(filename, "r", "utf-8") as f: + journal = f.read() + self.entries.extend(self._parse(journal)) + self.sort() + return self def write(self) -> None: @@ -81,7 +81,7 @@ class Folder(Journal): journal_file.write(journal) # look for and delete empty files filenames = [] - filenames = get_files(self.config["journal"]) + filenames = Folder._get_files(self.config["journal"]) for filename in filenames: if os.stat(filename).st_size <= 0: os.remove(filename) @@ -119,3 +119,39 @@ class Folder(Journal): self.increment_change_counts_by_edit(mod_entries) self.entries = mod_entries + + @staticmethod + def _get_files(journal_path: str) -> list[str]: + """Searches through sub directories starting with journal_path and find all text files that look like entries""" + for year_folder in Folder._get_year_folders(pathlib.Path(journal_path)): + for month_folder in Folder._get_month_folders(year_folder): + yield from Folder._get_day_files(month_folder) + + @staticmethod + def _get_year_folders(path: pathlib.Path) -> list[pathlib.Path]: + for child in path.glob(YEAR_PATTERN): + if child.is_dir(): + yield child + return + + @staticmethod + def _get_month_folders(path: pathlib.Path) -> list[pathlib.Path]: + for child in path.glob(MONTH_PATTERN): + if int(child.name) > 0 and int(child.name) <= 12 and path.is_dir(): + yield child + return + + @staticmethod + def _get_day_files(path: pathlib.Path) -> list[str]: + for child in path.glob(DAY_PATTERN): + if ( + int(child.stem) > 0 + and int(child.stem) <= 31 + and time.is_valid_date( + year=int(path.parent.name), + month=int(path.name), + day=int(child.stem), + ) + and child.is_file() + ): + yield str(child) diff --git a/jrnl/time.py b/jrnl/time.py index 514d94f2..dd6fcb0f 100644 --- a/jrnl/time.py +++ b/jrnl/time.py @@ -89,3 +89,11 @@ def parse( if dt.days < -28 and not year_present: date = date.replace(date.year - 1) return date + + +def is_valid_date(year: int, month: int, day: int) -> bool: + try: + datetime.datetime(year, month, day) + return True + except ValueError: + return False |