summaryrefslogtreecommitdiffstats
path: root/.github/scripts
diff options
context:
space:
mode:
authormaneamarius <locomotion.itservices@gmail.com>2022-06-28 14:22:21 +0300
committerGitHub <noreply@github.com>2022-06-28 14:22:21 +0300
commit2b5999c39337e1fc0f6c25a22e48f2d23fc6e9f8 (patch)
treedb017cf601c5d89b762b96caecd52d91a0b2f259 /.github/scripts
parentaea87f1f5af940a681addb478f224efe00b9933c (diff)
Implement PackageCloud cleanup (#13236)
* add script for netdata packagecloud cleanup * add script for netdata packagecloud cleanup * remove old package-cloud cleanup code * add workflow for packagecloud cleanup * Debug msg * allow individual matrix jobs to complete independently to each other * perform cleanup without dry-run * remove debug msg * remove cleanup line from the slack notification
Diffstat (limited to '.github/scripts')
-rwxr-xr-x.github/scripts/netdata-pkgcloud-cleanup.py190
-rwxr-xr-x.github/scripts/old_package_purging.sh88
2 files changed, 190 insertions, 88 deletions
diff --git a/.github/scripts/netdata-pkgcloud-cleanup.py b/.github/scripts/netdata-pkgcloud-cleanup.py
new file mode 100755
index 0000000000..f6311e47cd
--- /dev/null
+++ b/.github/scripts/netdata-pkgcloud-cleanup.py
@@ -0,0 +1,190 @@
+#!/bin/env python3
+
+import requests
+from requests.auth import HTTPBasicAuth
+from datetime import date, datetime, timedelta
+import os
+import sys
+import argparse
+from pprint import pprint
+from datetime import datetime
+from dateutil import parser
+
+
+class PackageCloud:
+ NUM_PACKAGE_MINOR_TO_KEEP = 5
+ NUM_RETENTION_DAYS = 30
+ # number of pages to process. Use '0' to process all
+ MAX_PAGES = 0
+
+ def __init__(self, repo_type, dry_run=True, auth_token=None):
+ self.headers = {
+ "Accept" : "application/json",
+ "Content-Type" : "application/json",
+ }
+ self.dry_run = dry_run
+ self.repo_type = repo_type
+ if repo_type == "stable":
+ repo = "netdata/netdata"
+ elif repo_type == "devel":
+ repo = "netdata/netdata-devel"
+ elif repo_type == "edge":
+ repo = "netdata/netdata-edge"
+ else:
+ print(f"ERROR: unknown repo type '{repo_type}'!\nAccepted values are: stable,devel,edge")
+ sys.exit(1)
+ self.base_url = f"https://packagecloud.io/api/v1/repos/{repo}"
+ self.auth = HTTPBasicAuth(username=auth_token, password='') if auth_token else None
+
+ def get_all_packages(self):
+ page = 1
+ all_pkg_list = []
+ while True:
+ url = f"{self.base_url}/packages.json?page={page}"
+ if page > self.MAX_PAGES and self.MAX_PAGES != 0:
+ break
+ else:
+ pkg_list = requests.get(url, auth=self.auth, headers=self.headers).json()
+ if len(pkg_list) == 0:
+ break
+ else:
+ print(f"Processing page: {page}")
+ for element in pkg_list:
+ self.is_pkg_older_than_days(element, 30)
+ if element['name'] != 'netdata-repo' and element['name'] != 'netdata-repo-edge':
+ all_pkg_list.append(element)
+ page += 1
+ return all_pkg_list
+
+ def delete_package(self, destroy_url):
+ if self.dry_run:
+ print(f" - DRY_RUN mode. Not deleting package '{destroy_url}'.")
+ else:
+ print(f" - Deleting package: {destroy_url}")
+ url = f"https://packagecloud.io{destroy_url}"
+ response = requests.delete(url, auth=self.auth, headers=self.headers).json()
+ response = None
+ if not response:
+ print(f" Package deleted successfully.")
+ else:
+ print(f" Failed deleting package!")
+
+ def get_destroy_url(self, pkg_url):
+ url = f"https://packagecloud.io{pkg_url}"
+ response = requests.get(url, auth=self.auth, headers=self.headers)
+ response.raise_for_status()
+ return response.json()['destroy_url']
+
+ def get_packages_for_distro(self, distro, all_pkg_list):
+ distro_pkg_list = [ pkg for pkg in all_pkg_list if pkg['distro_version'] == distro ]
+ return distro_pkg_list
+
+ def get_packages_for_arch(self, arch, all_pkg_list):
+ arch_pkg_list = [ pkg for pkg in all_pkg_list if pkg['package_url'].split('/')[11] == arch ]
+ return arch_pkg_list
+
+ def get_arches(self, pkg_list):
+ arches = list(set([pkg['package_url'].split('/')[11] for pkg in pkg_list ]))
+ return arches
+
+ def get_pkg_list(self, pkg_name, pkg_list):
+ filtered_list = [ pkg for pkg in pkg_list if pkg['name'] == pkg_name ]
+ return filtered_list
+
+ def get_minor_versions(self, all_versions):
+ minor_versions = ['.'.join(version.split('.')[:-1]) for version in all_versions ]
+ minor_versions = list(set(minor_versions))
+ minor_versions.sort()
+ return minor_versions
+
+ def is_pkg_older_than_days(self, pkg, num_days):
+ pkg_create_date = datetime.strptime(pkg['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ')
+ time_difference = datetime.now() - pkg_create_date
+ return time_difference.days > num_days
+
+ def cleanup_repo(self):
+ if self.repo_type == 'stable':
+ self.cleanup_stable_repo()
+ else:
+ self.cleanup_edge_repo()
+
+ def cleanup_edge_repo(self):
+ all_pkg_list = self.get_all_packages()
+ pkgs_to_delete = []
+ pkgs_to_keep = []
+ for package in all_pkg_list:
+ if self.is_pkg_older_than_days(package, self.NUM_RETENTION_DAYS):
+ pkgs_to_delete.append(package)
+ else:
+ pkgs_to_keep.append(package)
+ print(f"Keeping the following packages (newer than {self.NUM_RETENTION_DAYS} days):")
+ for pkg in pkgs_to_keep:
+ print(f" > pkg: {pkg['package_html_url']} / created_at: {pkg['created_at']}")
+ print(f"Deleting the following packages (older than {self.NUM_RETENTION_DAYS} days):")
+ for pkg in pkgs_to_delete:
+ print(f" > pkg: {pkg['package_html_url']} / created_at: {pkg['created_at']}")
+ self.delete_package(pkg['destroy_url'])
+
+ def cleanup_stable_repo(self):
+ all_pkg_list = self.get_all_packages()
+ all_distros = list(set([ pkg['distro_version'] for pkg in all_pkg_list ]))
+ all_distros = sorted(all_distros)
+ print(f"<> Distributions list: {all_distros}")
+
+ for distro in all_distros:
+ print(f">> Processing distro: {distro}")
+ pkg_list_distro = self.get_packages_for_distro(distro, all_pkg_list)
+ arches = self.get_arches(pkg_list_distro)
+ print(f" <> Arch list: {arches}")
+ for arch in arches:
+ print(f" >> Processing arch: {distro} -> {arch}")
+ pkg_list_arch = self.get_packages_for_arch(arch, pkg_list_distro)
+ pkg_names = [pkg['name'] for pkg in pkg_list_arch]
+ pkg_names = list(set(pkg_names))
+ print(f" <> Package names: {pkg_names}")
+ for pkg_name in pkg_names:
+ print(f" >> Processing package: {distro} -> {arch} -> {pkg_name}")
+ pkg_list = self.get_pkg_list(pkg_name, pkg_list_arch)
+ pkg_versions = [pkg['version'] for pkg in pkg_list]
+ pkg_minor_versions = self.get_minor_versions(pkg_versions)
+ pkg_minor_to_keep = pkg_minor_versions[-self.NUM_PACKAGE_MINOR_TO_KEEP:]
+ print(f" <> Minor Package Versions to Keep: {pkg_minor_to_keep}")
+ pkg_minor_to_delete = list(set(pkg_minor_versions) - set(pkg_minor_to_keep))
+ print(f" <> Minor Package Versions to Delete: {pkg_minor_to_delete}")
+ urls_to_keep = [pkg['package_url'] for pkg in pkg_list if '.'.join(pkg['version'].split('.')[:-1]) in pkg_minor_to_keep]
+ urls_to_delete = [pkg['package_url'] for pkg in pkg_list if '.'.join(pkg['version'].split('.')[:-1]) in pkg_minor_to_delete]
+ for pkg_url in urls_to_delete:
+ destroy_url = self.get_destroy_url(pkg_url)
+ self.delete_package(destroy_url)
+
+
+def configure():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--repo-type', '-r', required=True,
+ help='Repository type against to perform cleanup')
+ parser.add_argument('--dry-run', '-d', action='store_true',
+ help='Dry-run Mode')
+ args = parser.parse_args()
+ try:
+ token = os.environ['PKGCLOUD_TOKEN']
+ except Exception as e:
+ print(f"FATAL: 'PKGCLOUD_TOKEN' environment variable is not set!", file=sys.stderr)
+ sys.exit(1)
+ repo_type = args.repo_type
+ dry_run = args.dry_run
+ conf = {
+ 'repo_type': args.repo_type,
+ 'dry_run': args.dry_run,
+ 'token': token
+ }
+ return conf
+
+
+def main():
+ config = configure()
+ pkg_cloud = PackageCloud(config['repo_type'], config['dry_run'], config['token'])
+ pkg_cloud.cleanup_repo()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/.github/scripts/old_package_purging.sh b/.github/scripts/old_package_purging.sh
deleted file mode 100755
index 727a1c2566..0000000000
--- a/.github/scripts/old_package_purging.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/usr/bin/env bash
-#
-# Script to handle package cloud retention policy
-# Our open source subscription is limited,
-# so we use this script to control the number of packages maintained historically
-#
-# Dependencies:
-# - PACKAGE_CLOUD_RETENTION_DAYS
-# This is to indicate for how many days back we want to maintain the various RPM and DEB packages on package cloud
-#
-# Copyright : SPDX-License-Identifier: GPL-3.0-or-later
-#
-# Author : Pavlos Emm. Katsoulakis <paul@netdata.cloud>
-#
-set -e
-
-delete_files_for_version() {
- local v="$1"
-
- # Delete the selected filenames in version
- FILES_IN_VERSION=$(jq --sort-keys --arg v "${v}" '.[] | select ( .version | contains($v))' "${PKG_LIST_FILE}" | grep filename | cut -d':' -f 2)
-
- # Iterate through the files and delete them
- for pkg in ${FILES_IN_VERSION/\\n/}; do
- pkg=${pkg/,/}
- pkg=${pkg/\"/}
- pkg=${pkg/\"/}
- echo "Attempting yank on ${pkg}.."
- .github/scripts/package_cloud_wrapper.sh yank "${REPO}" "${pkg}" || echo "Nothing to yank or error on ${pkg}"
- done
-}
-
-# If we are not in netdata git repo, at the top level directory, fail
-TOP_LEVEL=$(basename "$(git rev-parse --show-toplevel)")
-CWD=$(git rev-parse --show-cdup)
-if [ -n "$CWD" ] || [ ! "${TOP_LEVEL}" == "netdata" ]; then
- echo "Run as .github/scripts/$(basename "$0") from top level directory of netdata git repository"
- echo "Old packages yanking cancelled"
- exit 1
-fi
-
-if [ -z "${REPO}" ]; then
- echo "No REPO variable found"
- exit 1
-fi
-
-if [ -z ${PKG_CLOUD_TOKEN} ]; then
- echo "No PKG_CLOUD_TOKEN variable found"
- exit 1
-fi
-
-if [ -z ${PACKAGE_CLOUD_RETENTION_DAYS} ]; then
- echo "No PACKAGE_CLOUD_RETENTION_DAYS variable found"
- exit 1
-fi
-
-TMP_DIR="$(mktemp -d /tmp/netdata-old-package-yanking-XXXXXX)"
-PKG_LIST_FILE="${TMP_DIR}/complete_package_list.json"
-DATE_EPOCH="1970-01-01"
-DATE_UNTIL_TO_DELETE=$(date --date="${PACKAGE_CLOUD_RETENTION_DAYS} day ago" +%Y-%m-%d)
-
-
-echo "Created temp directory: ${TMP_DIR}"
-echo "We will be purging contents up until ${DATE_UNTIL_TO_DELETE}"
-
-echo "Calling package could to retrieve all available packages on ${REPO}"
-curl -sS "https://${PKG_CLOUD_TOKEN}:@packagecloud.io/api/v1/repos/${REPO}/packages.json" > "${PKG_LIST_FILE}"
-
-# Get versions within the desired duration
-#
-VERSIONS_TO_PURGE=$(jq --arg s "${DATE_EPOCH}" --arg e "${DATE_UNTIL_TO_DELETE}" '
-[($s, $e) | strptime("%Y-%m-%d")[0:3]] as $r
- | map(select(
- (.created_at[:19] | strptime("%Y-%m-%dT%H:%M:%S")[0:3]) as $d
- | $d >= $r[0] and $d <= $r[1]
-))' "${PKG_LIST_FILE}" | grep '"version":' | sort -u | sed -e 's/ //g' | cut -d':' -f2)
-
-echo "We will be deleting the following versions: ${VERSIONS_TO_PURGE}"
-for v in ${VERSIONS_TO_PURGE/\n//}; do
- v=${v/\"/}
- v=${v/\"/}
- v=${v/,/}
- echo "Remove all files for version $v"
- delete_files_for_version "${v}"
-done
-
-# Done, clean up
-[ -d "${TMP_DIR}" ] && rm -rf "${TMP_DIR}"