summaryrefslogtreecommitdiffstats
path: root/.github/scripts/summarize_changelog.py
blob: 75648dff1054b9502caa71c5ebdcce9c17db47a4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
"""Changelog v2 summary generator."""

import logging
import re
import sys
from typing import Dict

import requests


def fetch_pr_details(owner: str, repo: str, pr_number: str, github_token: str) -> dict:
    """Fetch details of a specific PR from GitHub."""
    url = f"https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}"
    headers = {"Authorization": f"token {github_token}"}
    response = requests.get(url, headers=headers, timeout=10)
    if response.status_code == 200:
        return response.json()

    logging.error(
        "Failed to fetch PR details for PR #%s. Status code: %s",
        pr_number,
        response.status_code,
    )
    return {}


def parse_and_fetch_pr_details(
    markdown_text: str, owner: str, repo: str, github_token: str
) -> Dict[str, str]:
    """Parse the markdown text and fetch details of PRs mentioned in the text."""
    sections = re.split(r"\n## ", markdown_text)
    categories: Dict[str, str] = {}

    for section in sections:
        split_section = section.split("\n", 1)
        if len(split_section) < 2:
            continue

        category_name = split_section[0].strip()
        items_text = split_section[1].strip()
        items = re.findall(r"- (?:\[.*?\] - )?(.*?) @.*? \(#(\d+)\)", items_text)

        for _, pr_number in items:
            pr_details = fetch_pr_details(owner, repo, pr_number, github_token)
            if pr_details:
                try:
                    pr_info = {
                        "title": pr_details["title"],
                        "body": re.sub(r"\s+", " ", pr_details["body"].strip()).strip(),
                    }
                except Exception as e:
                    logging.error(
                        "Failed to fetch PR details for PR #%s: %s", pr_number, e
                    )
                if category_name in categories:
                    categories[category_name].append(pr_info)  # type: ignore
                else:
                    categories[category_name] = [pr_info]  # type: ignore

    return categories


def insert_summary_into_markdown(
    markdown_text: str, category_name: str, summary: str
) -> str:
    """Insert a summary into the markdown text directly under the specified category name."""
    marker = f"## {category_name}"
    if marker in markdown_text:
        # Find the position right after the category name
        start_pos = markdown_text.find(marker) + len(marker)
        # Find the position of the first newline after the category name to ensure we insert before any content
        newline_pos = markdown_text.find("\n", start_pos)
        if newline_pos != -1:
            # Insert the summary right after the newline that follows the category name
            # Ensuring it's on a new line and followed by two newlines before any subsequent content
            updated_markdown = (
                markdown_text[: newline_pos + 1]
                + "\n"
                + summary
                + markdown_text[newline_pos + 1 :]
            )
        else:
            # If there's no newline (e.g., end of file), just append the summary
            updated_markdown = markdown_text + "\n\n" + summary + "\n"
        return updated_markdown

    logging.error("Category '%s' not found in markdown.", category_name)
    return markdown_text


def summarize_text_with_openai(text: str, openai_api_key: str) -> str:
    """Summarize text using OpenAI's GPT model."""
    from openai import OpenAI  # pylint: disable=C0415

    openai = OpenAI(api_key=openai_api_key)
    response = openai.chat.completions.create(
        model="gpt-4",  # noqa: E501
        messages=[
            {
                "role": "system",
                "content": "Summarize the following text in a concise way to describe what happened in the new release. This will be used on top of the changelog to provide a high-level overview of the changes. Make sure it is well-written, concise, structured and that it captures the essence of the text. It should read like a concise story.",  # noqa: E501 # pylint: disable=C0301
            },
            {"role": "user", "content": text},
        ],
    )
    return response.choices[0].message.content  # type: ignore


def summarize_changelog_v2(
    github_token: str,
    openai_api_key: str,
    owner: str = "OpenBB-finance",
    repo: str = "OpenBBTerminal",
    changelog_v2: str = "CHANGELOG.md",
) -> None:
    """Summarize the Changelog v2 markdown text with PR details."""
    try:
        with open(changelog_v2) as file:
            logging.info("Reading file: %s", changelog_v2)
            data = file.read()
    except OSError as e:
        logging.error("Failed to open or read file: %s", e)
        return

    logging.info("Parsing and fetching PR details...")
    categories = parse_and_fetch_pr_details(data, owner, repo, github_token)

    categories_of_interest = [
        "🚨 OpenBB Platform Breaking Changes",
        "🦋 OpenBB Platform Enhancements",
        "🐛 OpenBB Platform Bug Fixes",
        "📚 OpenBB Documentation Changes",
    ]
    updated_markdown = data

    logging.info("Summarizing text with OpenAI...")
    for category_of_interest in categories_of_interest:
        if category_of_interest in categories:
            pattern = r"\[.*?\]\(.*?\)|[*_`]"
            aggregated_text = "\n".join(
                [
                    f"- {pr['title']}: {re.sub(pattern, '', pr['body'])}"  # type: ignore
                    for pr in categories[category_of_interest]  # type: ignore
                ]
            )
            summary = summarize_text_with_openai(aggregated_text, openai_api_key)
            updated_markdown = insert_summary_into_markdown(
                updated_markdown, category_of_interest, summary
            )

    with open(changelog_v2, "w") as file:
        logging.info("Writing updated file: %s", changelog_v2)
        file.write(updated_markdown)


if __name__ == "__main__":
    if len(sys.argv) < 3:
        logging.error(
            "Usage: python summarize_changelog.py <github_token> <openai_api_key>"
        )
        sys.exit(1)

    token = sys.argv[1]
    openai_key = sys.argv[2]

    summarize_changelog_v2(github_token=token, openai_api_key=openai_key)