summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMicah Jerome Ellison <micah.jerome.ellison@gmail.com>2023-04-29 15:49:41 -0700
committerGitHub <noreply@github.com>2023-04-29 15:49:41 -0700
commit95836a7dd1192150ed8fb51d86c94a911dd5c601 (patch)
tree60d330654ec6433744d5756fbbb01466df49a54d
parent88aa2491b012042eb553648bc8465b54fd389267 (diff)
Only read text files that look like entries when opening folder journal (#1697)
* Add text file that should be ignored to basic test folder journal. Makes tons of tests fail * Add additional files that should be ignored by FolderJournal * Ignore all files in folder journal except year/month/day.txt * Completely remake get_files in FolderJournal: - move get_files into FolderJournal class and add underscore prefix - create iterables to get for year/month folders and day files - make year/month/day file reading strict: only exact expected months and days out of all possible months and days * Restore accidentally-deleted self.sort() line * Use match instead of string comparison to be os-agnostic * Explicitly declare static methods * Filter with glob first for max performance * Explicitly check for valid dates in FolderJournal and add unit test * Remove unneeded jrnl import * Clean up method comment and add type hints * Add is_valid_date unit test * Elucidate comment Co-authored-by: Jonathan Wren <jonathan@nowandwren.com>
-rw-r--r--jrnl/journals/FolderJournal.py68
-rw-r--r--jrnl/time.py8
-rw-r--r--tests/data/journals/basic_folder/2020/09/should-be-ignored.txt4
-rw-r--r--tests/data/journals/basic_folder/2020/should-be-ignored.txt4
-rw-r--r--tests/data/journals/basic_folder/should-be-ignored.txt4
-rw-r--r--tests/unit/test_journals_folder_journal.py59
-rw-r--r--tests/unit/test_time.py22
7 files changed, 153 insertions, 16 deletions
diff --git a/jrnl/journals/FolderJournal.py b/jrnl/journals/FolderJournal.py
index 88fa21e1..0d497fb8 100644
--- a/jrnl/journals/FolderJournal.py
+++ b/jrnl/journals/FolderJournal.py
@@ -2,8 +2,8 @@
# License: https://www.gnu.org/licenses/gpl-3.0.html
import codecs
-import fnmatch
import os
+import pathlib
from typing import TYPE_CHECKING
from jrnl import time
@@ -13,14 +13,11 @@ from .Journal import Journal
if TYPE_CHECKING:
from jrnl.journals import Entry
-
-def get_files(journal_config: str) -> list[str]:
- """Searches through sub directories starting with journal_config and find all text files"""
- filenames = []
- for root, dirnames, f in os.walk(journal_config):
- for filename in fnmatch.filter(f, "*.txt"):
- filenames.append(os.path.join(root, filename))
- return filenames
+# glob search patterns for folder/file structure
+DIGIT_PATTERN = "[0123456789]"
+YEAR_PATTERN = DIGIT_PATTERN * 4
+MONTH_PATTERN = "[01]" + DIGIT_PATTERN
+DAY_PATTERN = "[0123]" + DIGIT_PATTERN + ".txt"
class Folder(Journal):
@@ -35,12 +32,15 @@ class Folder(Journal):
def open(self) -> "Folder":
filenames = []
self.entries = []
- filenames = get_files(self.config["journal"])
- for filename in filenames:
- with codecs.open(filename, "r", "utf-8") as f:
- journal = f.read()
- self.entries.extend(self._parse(journal))
- self.sort()
+
+ if os.path.exists(self.config["journal"]):
+ filenames = Folder._get_files(self.config["journal"])
+ for filename in filenames:
+ with codecs.open(filename, "r", "utf-8") as f:
+ journal = f.read()
+ self.entries.extend(self._parse(journal))
+ self.sort()
+
return self
def write(self) -> None:
@@ -81,7 +81,7 @@ class Folder(Journal):
journal_file.write(journal)
# look for and delete empty files
filenames = []
- filenames = get_files(self.config["journal"])
+ filenames = Folder._get_files(self.config["journal"])
for filename in filenames:
if os.stat(filename).st_size <= 0:
os.remove(filename)
@@ -119,3 +119,39 @@ class Folder(Journal):
self.increment_change_counts_by_edit(mod_entries)
self.entries = mod_entries
+
+ @staticmethod
+ def _get_files(journal_path: str) -> list[str]:
+ """Searches through sub directories starting with journal_path and find all text files that look like entries"""
+ for year_folder in Folder._get_year_folders(pathlib.Path(journal_path)):
+ for month_folder in Folder._get_month_folders(year_folder):
+ yield from Folder._get_day_files(month_folder)
+
+ @staticmethod
+ def _get_year_folders(path: pathlib.Path) -> list[pathlib.Path]:
+ for child in path.glob(YEAR_PATTERN):
+ if child.is_dir():
+ yield child
+ return
+
+ @staticmethod
+ def _get_month_folders(path: pathlib.Path) -> list[pathlib.Path]:
+ for child in path.glob(MONTH_PATTERN):
+ if int(child.name) > 0 and int(child.name) <= 12 and path.is_dir():
+ yield child
+ return
+
+ @staticmethod
+ def _get_day_files(path: pathlib.Path) -> list[str]:
+ for child in path.glob(DAY_PATTERN):
+ if (
+ int(child.stem) > 0
+ and int(child.stem) <= 31
+ and time.is_valid_date(
+ year=int(path.parent.name),
+ month=int(path.name),
+ day=int(child.stem),
+ )
+ and child.is_file()
+ ):
+ yield str(child)
diff --git a/jrnl/time.py b/jrnl/time.py
index 514d94f2..dd6fcb0f 100644
--- a/jrnl/time.py
+++ b/jrnl/time.py
@@ -89,3 +89,11 @@ def parse(
if dt.days < -28 and not year_present:
date = date.replace(date.year - 1)
return date
+
+
+def is_valid_date(year: int, month: int, day: int) -> bool:
+ try:
+ datetime.datetime(year, month, day)
+ return True
+ except ValueError:
+ return False
diff --git a/tests/data/journals/basic_folder/2020/09/should-be-ignored.txt b/tests/data/journals/basic_folder/2020/09/should-be-ignored.txt
new file mode 100644
index 00000000..4807e608
--- /dev/null
+++ b/tests/data/journals/basic_folder/2020/09/should-be-ignored.txt
@@ -0,0 +1,4 @@
+[2022-03-02 9:25:00 AM] This file should be ignored (month)
+This text file is in a folder journal's month directory ("2020/09"), but it's not in the file name format used by jrnl for folder journal entries, so it should be ignored.
+
+This file should not ever appear in a test. \ No newline at end of file
diff --git a/tests/data/journals/basic_folder/2020/should-be-ignored.txt b/tests/data/journals/basic_folder/2020/should-be-ignored.txt
new file mode 100644
index 00000000..24f57815
--- /dev/null
+++ b/tests/data/journals/basic_folder/2020/should-be-ignored.txt
@@ -0,0 +1,4 @@
+[2022-03-02 9:25:00 AM] This file should be ignored (year)
+This text file is in a folder journal's year directory ("2020"), but it's not in the file name format used by jrnl for folder journal entries, so it should be ignored.
+
+This file should not ever appear in a test. \ No newline at end of file
diff --git a/tests/data/journals/basic_folder/should-be-ignored.txt b/tests/data/journals/basic_folder/should-be-ignored.txt
new file mode 100644
index 00000000..35b7ae2f
--- /dev/null
+++ b/tests/data/journals/basic_folder/should-be-ignored.txt
@@ -0,0 +1,4 @@
+[2022-03-02 9:25:00 AM] This file should be ignored (root)
+This text file is in a folder journal's root directory, but it's not in the file name format used by jrnl for folder journal entries, so it should be ignored.
+
+This file should not ever appear in a test. \ No newline at end of file
diff --git a/tests/unit/test_journals_folder_journal.py b/tests/unit/test_journals_folder_journal.py
new file mode 100644
index 00000000..09a3535f
--- /dev/null
+++ b/tests/unit/test_journals_folder_journal.py
@@ -0,0 +1,59 @@
+# Copyright © 2012-2023 jrnl contributors
+# License: https://www.gnu.org/licenses/gpl-3.0.html
+
+import pathlib
+from unittest import mock
+
+import pytest
+
+from jrnl.journals.FolderJournal import Folder
+
+
+@pytest.mark.parametrize(
+ "inputs_and_outputs",
+ [
+ [
+ "/2020/01",
+ ["02.txt", "03.txt", "31.txt"],
+ ["/2020/01/02.txt", "/2020/01/03.txt", "/2020/01/31.txt"],
+ ],
+ [
+ "/2020/02", # leap year
+ ["02.txt", "03.txt", "28.txt", "29.txt", "31.txt", "39.txt"],
+ [
+ "/2020/02/02.txt",
+ "/2020/02/03.txt",
+ "/2020/02/28.txt",
+ "/2020/02/29.txt",
+ ],
+ ],
+ [
+ "/2100/02", # not a leap year
+ ["01.txt", "28.txt", "29.txt", "39.txt"],
+ ["/2100/02/01.txt", "/2100/02/28.txt"],
+ ],
+ [
+ "/2023/04",
+ ["29.txt", "30.txt", "31.txt", "39.txt"],
+ ["/2023/04/29.txt", "/2023/04/30.txt"],
+ ],
+ ],
+)
+def test_get_day_files_expected_filtering(inputs_and_outputs):
+ year_month_path, glob_filenames, expected_output = inputs_and_outputs
+
+ year_month_path = pathlib.Path(year_month_path)
+
+ glob_files = map(lambda x: year_month_path / x, glob_filenames)
+ expected_output = list(map(lambda x: str(pathlib.PurePath(x)), expected_output))
+
+ with (
+ mock.patch("pathlib.Path.glob", return_value=glob_files),
+ mock.patch.object(pathlib.Path, "is_file", return_value=True),
+ ):
+ actual_output = list(Folder._get_day_files(year_month_path))
+ actual_output.sort()
+
+ expected_output.sort()
+
+ assert actual_output == expected_output
diff --git a/tests/unit/test_time.py b/tests/unit/test_time.py
index 8bc5ac8e..1901a4dc 100644
--- a/tests/unit/test_time.py
+++ b/tests/unit/test_time.py
@@ -3,6 +3,8 @@
import datetime
+import pytest
+
from jrnl import time
@@ -20,3 +22,23 @@ def test_default_minute_is_added():
default_minute=30,
bracketed=False,
) == datetime.datetime(2020, 6, 20, 0, 30)
+
+
+@pytest.mark.parametrize(
+ "inputs",
+ [
+ [2000, 2, 29, True],
+ [2023, 1, 0, False],
+ [2023, 1, 1, True],
+ [2023, 4, 31, False],
+ [2023, 12, 31, True],
+ [2023, 12, 32, False],
+ [2023, 13, 1, False],
+ [2100, 2, 27, True],
+ [2100, 2, 28, True],
+ [2100, 2, 29, False],
+ ],
+)
+def test_is_valid_date(inputs):
+ year, month, day, expected_result = inputs
+ assert time.is_valid_date(year, month, day) == expected_result