summaryrefslogtreecommitdiffstats
path: root/gitsrht/git.py
blob: ac00fef6bb85f1f77489fdb658360089d3f79780 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from collections import deque
from datetime import datetime, timedelta, timezone
from functools import lru_cache
from gitsrht.redis import redis
from pygit2 import Repository, Tag
import pygit2
import json

def trim_commit(msg):
    if "\n" not in msg:
        return msg
    return msg[:msg.index("\n")]

def commit_time(commit):
    author = commit.author if hasattr(commit, 'author') else commit.tagger
    # Time handling in python is so dumb
    tzinfo = timezone(timedelta(minutes=author.offset))
    tzaware = datetime.fromtimestamp(float(author.time), tzinfo)
    diff = datetime.now(timezone.utc) - tzaware
    return datetime.utcnow() - diff

@lru_cache(maxsize=256)
def CachedRepository(path):
    return _CachedRepository(path)

@lru_cache(maxsize=1024)
def _get_ref(repo, ref):
    return repo._get(ref)

class _CachedRepository(Repository):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def get(self, ref):
        return _get_ref(self, ref)

    def _get(self, ref):
        return super().get(ref)

    def default_branch(self):
        branch = self.branches.get("master")
        if not branch:
            branch = list(self.branches.local)[0]
            branch = self.branches.get(branch)
        return branch

class AnnotatedTreeEntry:
    def __init__(self, repo, entry):
        self._entry = entry
        self._repo = repo
        self.commit = None
        if entry:
            self.id = entry.id.hex
            self.name = entry.name
            self.type = entry.type
            self.filemode = entry.filemode

    def fetch_blob(self):
        if self.type == "tree":
            self.tree = self._repo.get(self.id)
        else:
            self.blob = self._repo.get(self.id)
        return self

    def serialize(self):
        return {
            "id": self.id,
            "name": self.name,
            "type": self.type,
            "filemode": self.filemode,
            "commit": (self.commit.id.hex
                if hasattr(self, "commit") and self.commit else None),
        }

    @staticmethod
    def deserialize(res, repo):
        _id = res["id"]
        self = AnnotatedTreeEntry(repo, None)
        self.id = res["id"]
        self.name = res["name"]
        self.type = res["type"]
        self.filemode = res["filemode"]
        self.commit = repo.get(res["commit"]) if "commit" in res else None
        return self

    def __hash__(self):
        return hash(f"{self.id}:{self.name}")

    def __eq__(self, other):
        return self.id == other.id and self.name == other.name

    def __repr__(self):
        return f"<AnnotatedTreeEntry {self.name} {self.id}>"

def annotate_tree(repo, tree, commit):
    key = f"git.sr.ht:git:tree:{tree.id.hex}"
    cache = redis.get(key)
    if cache:
        try:
            cache = json.loads(cache.decode())
            return [AnnotatedTreeEntry.deserialize(
                e, repo).fetch_blob() for e in cache.values()]
        except:
            redis.delete(key)

    tree = { entry.id.hex: AnnotatedTreeEntry(
        repo, entry) for entry in tree }

    parents = deque(commit.parents)
    left_tree = set(v for v in tree.values())
    unfinished = set(left_tree)
    if not any(commit.parents):
        return [entry.fetch_blob() for entry in tree.values()]
    parent = commit
    for commit in repo.walk(commit.id, pygit2.GIT_SORT_TIME):
        if not any(unfinished):
            break
        right_tree = { entry.id.hex: AnnotatedTreeEntry(repo, entry)
                for entry in parent.tree }
        right_tree = set(v for v in right_tree.values())
        diff = left_tree - right_tree
        for entry in diff:
            if entry.id in tree:
                tree[entry.id].commit = commit
        unfinished = unfinished - diff
        left_tree = right_tree
        parent = commit

    cache = {entry.name: entry.serialize() for entry in tree.values()}
    cache = json.dumps(cache)
    redis.setex(key, cache, timedelta(days=30))

    return [entry.fetch_blob() for entry in tree.values()]