From 0ba63e99c6c896a55c2ba0b59c3fc32023ea79ab Mon Sep 17 00:00:00 2001 From: Matthias Bilger Date: Thu, 5 Sep 2019 17:43:07 +0200 Subject: [PATCH] repeat every 30 seconds --- epgrab.py | 73 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/epgrab.py b/epgrab.py index 220f6ba..14bcf63 100644 --- a/epgrab.py +++ b/epgrab.py @@ -4,7 +4,9 @@ import logging import tinydb import json import pushover +import time logging.basicConfig(level=logging.DEBUG) +_logger = logging.getLogger(__name__) pushover.init('abz8is31hd3m2e36g62w4msihj87cr') db = tinydb.TinyDB(os.getenv('DB_FILE',default='/data/grabbing.json')) @@ -17,39 +19,42 @@ def thash(inp: str): cfgs = db.table('cfgs') grabs = db.table('grabs') Entry = tinydb.Query() -g = grab.Grab() -for cfg in config['grab']: - cfgid = thash(str(cfg)) - cfgentry = {'id': cfgid} - cfgentry.update(cfg) - if len(cfgs.search(Entry.id == cfgid)) == 0: - cfgs.insert(cfgentry) - g.go(cfg['url']) - - for xpath in cfg['xpaths']: - for elem in g.doc.select(xpath): - txt = elem.text() - try: - url = g.make_url_absolute(elem.attr('href')) - except: - url = None - info = '%s: %s' % (elem.text(), url) - id = thash(info) - existing_grab = grabs.search(Entry.id == id) - if len(existing_grab) > 0: - exists = False - for eg in existing_grab: - if eg['cfgid'] == cfgid: - exists = True - if exists: - continue - grabs.insert({ - 'id': id, - 'cfgid': cfgid, - 'info': info - }) - pushover.Client("u5w9h8gc7hpzvr5a2kh2xh4m9zpidq").send_message(txt, title=txt[:50], url=url) - - +_logger.info('initialization done, start grabbing') +while True: + g = grab.Grab() + for cfg in config['grab']: + cfgid = thash(str(cfg)) + _logger.info('grabbing with config %s', cfgid) + cfgentry = {'id': cfgid} + cfgentry.update(cfg) + if len(cfgs.search(Entry.id == cfgid)) == 0: + cfgs.insert(cfgentry) + g.go(cfg['url']) + for xpath in cfg['xpaths']: + for elem in g.doc.select(xpath): + txt = elem.text() + try: + url = g.make_url_absolute(elem.attr('href')) + except: + url = None + info = '%s: %s' % (elem.text(), url) + id = thash(info) + existing_grab = grabs.search(Entry.id == id) + if len(existing_grab) > 0: + exists = False + for eg in existing_grab: + if eg['cfgid'] == cfgid: + exists = True + if exists: + continue + grabs.insert({ + 'id': id, + 'cfgid': cfgid, + 'info': info + }) + pushover.Client("u5w9h8gc7hpzvr5a2kh2xh4m9zpidq").send_message(txt, title=txt[:50], url=url) + _logger.info('news found %s', info) + _logger.info('sleeping') + time.sleep(30)