From 3954af138134a155ee988b6a76108d999f867722 Mon Sep 17 00:00:00 2001 From: Bernhard Posselt Date: Mon, 28 Mar 2016 21:52:33 +0200 Subject: adjust updater to be able to use the console based api --- bin/updater/README.rst | 7 +- bin/updater/example-config.ini | 3 +- bin/updater/owncloud_news_updater/application.py | 55 ++++-- bin/updater/owncloud_news_updater/updater.py | 213 ++++++++++++++++------- 4 files changed, 201 insertions(+), 77 deletions(-) (limited to 'bin') diff --git a/bin/updater/README.rst b/bin/updater/README.rst index 39345ec18..f2f0e7f61 100644 --- a/bin/updater/README.rst +++ b/bin/updater/README.rst @@ -1,7 +1,12 @@ ownCloud News Updater ===================== -ownCloud does not require people to install threading or multiprocessing libraries. Because the feed update process is mainly limited by I/O, parallell fetching of RSS feed updates can speed up the updating process significantly. In addition the cronjob can get `into a deadlock `_ which will cause the updater to get stuck resulting in your feeds not to being updated anymore. This can be solved by using a script that uses the `updater API `_ +ownCloud does not require people to install threading or multiprocessing libraries. +Because the feed update process is mainly limited by I/O, +parallell fetching of RSS feed updates can speed up the updating process significantly. + +This can be done by using a script that uses the `updater REST API `_ +or (new in 8.1.0) the console based update API Preinstallation --------------- diff --git a/bin/updater/example-config.ini b/bin/updater/example-config.ini index 35aa12f66..145c7e3a6 100644 --- a/bin/updater/example-config.ini +++ b/bin/updater/example-config.ini @@ -6,5 +6,6 @@ user = admin password = admin threads = 10 interval = 900 +logleve = error testrun = false -url = http://localhost/owncloud \ No newline at end of file +url = http://localhost/owncloud diff --git a/bin/updater/owncloud_news_updater/application.py b/bin/updater/owncloud_news_updater/application.py index b6d166b03..f0c61284d 100755 --- a/bin/updater/owncloud_news_updater/application.py +++ b/bin/updater/owncloud_news_updater/application.py @@ -10,11 +10,12 @@ __license__ = 'AGPL3+' __maintainer__ = 'Bernhard Posselt' __email__ = 'dev@bernhard-posselt.com' +import os import sys import argparse import configparser -from owncloud_news_updater.updater import Updater +from owncloud_news_updater.updater import WebUpdater, ConsoleUpdater def main(): @@ -37,17 +38,27 @@ def main(): will be subtracted from the interval.', default=15*60, type=int) + parser.add_argument('--loglevel', '-l', + help='Log granularity, info will log all urls and received data, error \ + will only log errors', + default='error', + choices=['info', 'error']) parser.add_argument('--config', '-c', help='Path to config file where all parameters except can be defined \ as key values pair. An example is in bin/example_config.ini') parser.add_argument('--user', '-u', help='Admin username to log into ownCloud. Must be specified on the \ - command line or in the config file.') + command line or in the config file if the updater should update over \ + HTTP') parser.add_argument('--password', '-p', - help='Admin password to log into ownCloud') + help='Admin password to log into ownCloud if the updater should update \ + over HTTP') parser.add_argument('url', - help='The URL where owncloud is installed. Must be specified on the \ - command line or in the config file.', + help='The URL or absolute path to the directory where owncloud is \ + installed. Must be specified on the command line or in the config \ + file. If the URL starts with http:// or https://, a user and password \ + are required. Otherwise updater tries to use the console based API \ + which was added in 8.1.0', nargs='?') args = parser.parse_args() @@ -74,22 +85,42 @@ def main(): if 'url' in config_values: args.url = config_values['url'] + if not args.url: + self._exit('No url or directory given') + + # if url starts with a /, the console based API will be used + isWeb = args.url.startswith('http://') or args.url.startswith('https://') + # url and user must be specified either from the command line or in the # config file - if not args.url or not args.user: - parser.print_help() - exit(1) + if isWeb and not args.user: + _exit(parser, 'Web API requires a user') + + if not isWeb and not os.path.isabs(args.url): + _exit(parser, ('Absolute path to ownCloud installation required, given ' + '%s') % args.url) + + if not isWeb and not os.path.isdir(args.url): + _exit(parser, '%s is not a directory' % args.url) # create the updater and run the threads - updater = Updater(args.url, args.threads, args.interval, args.user, - args.password, args.timeout, args.testrun) + if isWeb: + updater = WebUpdater(args.url, args.threads, args.interval, + args.testrun, args.user, args.password, + args.timeout, args.loglevel) + else: + updater = ConsoleUpdater(args.url, args.threads, args.interval, + args.testrun, args.loglevel) updater.run() +def _exit(parser, message): + print(message, file=sys.stderr) + parser.print_help() + exit(1) + if __name__ == '__main__': if sys.version_info < (3, 0): print('Python 3.0 or higher is required to run this script') else: main() - - diff --git a/bin/updater/owncloud_news_updater/updater.py b/bin/updater/owncloud_news_updater/updater.py index f59c4f05d..1712781e8 100644 --- a/bin/updater/owncloud_news_updater/updater.py +++ b/bin/updater/owncloud_news_updater/updater.py @@ -6,6 +6,7 @@ import requests import time import logging import urllib +from subprocess import check_output def check_status_code(response): if response.status_code != 200: @@ -15,113 +16,199 @@ def check_status_code(response): class Updater: - def __init__(self, base_url, thread_num, interval, user, password, timeout, - run_once): + def __init__(self, thread_num, interval, run_once, log_level): self.thread_num = thread_num - self.interval = interval - self.base_url = base_url - self.user = user - self.password = password - self.timeout = timeout self.run_once = run_once - - if self.base_url[-1] != '/': - self.base_url += '/' - self.base_url += 'index.php/apps/news/api/v1-2' - - self.before_cleanup_url = '%s/cleanup/before-update' % self.base_url - self.after_cleanup_url = '%s/cleanup/after-update' % self.base_url - self.all_feeds_url = '%s/feeds/all' % self.base_url - self.update_url = '%s/feeds/update' % self.base_url - + self.interval = interval # logging format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' logging.basicConfig(format=format) self.logger = logging.getLogger('ownCloud News Updater') - self.logger.setLevel(logging.INFO) - + if log_level == 'info': + self.logger.setLevel(logging.INFO) + else: + self.logger.setLevel(logging.ERROR) def run(self): + if self.run_once: + self.logger.info('Running update once with %d threads' % + self.thread_num) + else: + self.logger.info(('Running update in an interval of %d seconds ' + 'using %d threads') % (self.interval, + self.thread_num)) while True: self.start_time = time.time() # reset clock - try: - # run the cleanup request and get all the feeds to update - auth = (self.user, self.password) - - before = requests.get(self.before_cleanup_url, auth=auth) - check_status_code(before) - - feeds_response = requests.get(self.all_feeds_url, auth=auth) - check_status_code(feeds_response) + self.before_update() + feeds = self.all_feeds() - feeds_json = feeds_response.text - feeds = json.loads(feeds_json)['feeds'] - - # start thread_num threads which update the feeds threads = [] for num in range(0, self.thread_num): - thread = UpdateThread(feeds, self.update_url, self.user, - self.password, self.timeout, self.logger) + thread = self.start_update_thread(feeds) thread.start() threads.append(thread) - for thread in threads: thread.join() - after = requests.get(self.after_cleanup_url, auth=auth) - check_status_code(after) + self.after_update() if self.run_once: return - # wait until the interval finished to run again and subtract # the update run time from the interval - timeout = self.interval - int((time.time() - self.start_time)) + update_duration_seconds = int((time.time() - self.start_time)) + timeout = self.interval - update_duration_seconds if timeout > 0: + self.logger.info(('Finished updating in %d seconds, ' + 'next update in %d seconds') % + (update_duration_seconds, timeout)) time.sleep(timeout) - except (Exception) as e: - self.logger.error('%s: %s Trying again in 30 seconds' % - (self.base_url, e)) + self.logger.error('%s: Trying again in 30 seconds' % e) time.sleep(30) + def before_update(self): + raise NotImplementedError + + def start_update_thread(self, feeds): + raise NotImplementedError + + def all_feeds(self): + raise NotImplementedError + + def after_update(self): + raise NotImplementedError + class UpdateThread(threading.Thread): lock = threading.Lock() - def __init__(self, feeds, update_url, user, password, timeout, logger): + def __init__(self, feeds, logger): super().__init__() self.feeds = feeds - self.update_url = update_url - self.user = user - self.password = password - self.timeout = timeout self.logger = logger def run(self): while True: - with UpdateThread.lock: + with WebUpdateThread.lock: if len(self.feeds) > 0: feed = self.feeds.pop() else: return + try: + self.logger.info('Updating feed with id %s and user %s' % + (feed['id'], feed['userId'])) + self.update_feed(feed) + except (Exception) as e: + self.logger.error(e) - feed['feedId'] = feed['id'] - del feed['id'] + def update_feed(self, feed): + raise NotImplementedError - # call the update method of one feed - data = urllib.parse.urlencode(feed) - headers = { - 'Content-type': 'application/json', - 'Accept': 'text/plain' - } - url = '%s?%s' % (self.update_url, data) - try: - auth = (self.user, self.password) - request = requests.get(url, auth=auth, timeout=self.timeout) - check_status_code(request) - except (Exception) as e: - self.logger.error('%s: %s' % (url, e)) +class WebUpdater(Updater): + + def __init__(self, base_url, thread_num, interval, run_once, + user, password, timeout, log_level): + super().__init__(thread_num, interval, run_once, log_level) + self.base_url = base_url + self.auth = (user, password) + self.timeout = timeout + + if self.base_url[-1] != '/': + self.base_url += '/' + self.base_url += 'index.php/apps/news/api/v1-2' + + self.before_cleanup_url = '%s/cleanup/before-update' % self.base_url + self.after_cleanup_url = '%s/cleanup/after-update' % self.base_url + self.all_feeds_url = '%s/feeds/all' % self.base_url + self.update_url = '%s/feeds/update' % self.base_url + + def before_update(self): + self.logger.info('Calling before update url: %s' % self.before_cleanup_url) + before = requests.get(self.before_cleanup_url, auth=self.auth) + check_status_code(before) + + def start_update_thread(self, feeds): + return WebUpdateThread(feeds, self.logger, self.update_url, self.auth, + self.timeout) + + def all_feeds(self): + feeds_response = requests.get(self.all_feeds_url, auth=self.auth) + check_status_code(feeds_response) + feeds_json = feeds_response.text + self.logger.info('Received these feeds to update: %s' % feeds_json) + return json.loads(feeds_json)['feeds'] + + def after_update(self): + self.logger.info('Calling after update url: %s' % self.after_cleanup_url) + after = requests.get(self.after_cleanup_url, auth=self.auth) + check_status_code(after) + + +class WebUpdateThread(UpdateThread): + + def __init__(self, feeds, logger, update_url, auth, timeout): + super().__init__(feeds, logger) + self.update_url = update_url + self.auth = auth + self.timeout = timeout + + def update_feed(self, feed): + # rewrite parameters, a feeds id is mapped to feedId + feed['feedId'] = feed['id'] + del feed['id'] + + # turn the pyton dict into url parameters + data = urllib.parse.urlencode(feed) + headers = { + 'Accept': 'text/plain' + } + url = '%s?%s' % (self.update_url, data) + request = requests.get(url, auth=self.auth, timeout=self.timeout) + check_status_code(request) + + +class ConsoleUpdater(Updater): + + def __init__(self, directory, thread_num, interval, run_once, log_level): + super().__init__(thread_num, interval, run_once, log_level) + self.directory = directory.rstrip('/') + base_command = ['php', '-f', self.directory + '/occ'] + self.before_cleanup_command = base_command + ['news:updater:before-update'] + self.all_feeds_command = base_command + ['news:updater:all-feeds'] + self.update_feed_command = base_command + ['news:updater:update-feed'] + self.after_cleanup_command = base_command + ['news:updater:after-update'] + + def before_update(self): + self.logger.info('Running before update command %s' % + ' '.join(self.before_cleanup_command)) + check_output(self.before_cleanup_command) + + def start_update_thread(self, feeds): + return ConsoleUpdateThread(feeds, self.logger, self.update_feed_command) + + def all_feeds(self): + feeds_json = check_output(self.all_feeds_command).strip() + feeds_json = str(feeds_json, 'utf-8') + self.logger.info('Received these feeds to update: %s' % feeds_json) + return json.loads(feeds_json)['feeds'] + + def after_update(self): + self.logger.info('Running after update command %s' % + ' '.join(self.after_cleanup_command)) + check_output(self.before_cleanup_command) + + +class ConsoleUpdateThread(UpdateThread): + + def __init__(self, feeds, logger, update_base_command): + super().__init__(feeds, logger) + self.update_base_command = update_base_command + + def update_feed(self, feed): + command = self.update_base_command + [str(feed['id']), feed['userId']] + self.logger.info('Running update command %s' % ' '.join(command)) + check_output(command) -- cgit v1.2.3