repeat every 30 seconds
This commit is contained in:
73
epgrab.py
73
epgrab.py
@@ -4,7 +4,9 @@ import logging
|
||||
import tinydb
|
||||
import json
|
||||
import pushover
|
||||
import time
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
pushover.init('abz8is31hd3m2e36g62w4msihj87cr')
|
||||
db = tinydb.TinyDB(os.getenv('DB_FILE',default='/data/grabbing.json'))
|
||||
@@ -17,39 +19,42 @@ def thash(inp: str):
|
||||
cfgs = db.table('cfgs')
|
||||
grabs = db.table('grabs')
|
||||
Entry = tinydb.Query()
|
||||
g = grab.Grab()
|
||||
for cfg in config['grab']:
|
||||
cfgid = thash(str(cfg))
|
||||
cfgentry = {'id': cfgid}
|
||||
cfgentry.update(cfg)
|
||||
if len(cfgs.search(Entry.id == cfgid)) == 0:
|
||||
cfgs.insert(cfgentry)
|
||||
g.go(cfg['url'])
|
||||
|
||||
for xpath in cfg['xpaths']:
|
||||
for elem in g.doc.select(xpath):
|
||||
txt = elem.text()
|
||||
try:
|
||||
url = g.make_url_absolute(elem.attr('href'))
|
||||
except:
|
||||
url = None
|
||||
info = '%s: %s' % (elem.text(), url)
|
||||
id = thash(info)
|
||||
existing_grab = grabs.search(Entry.id == id)
|
||||
if len(existing_grab) > 0:
|
||||
exists = False
|
||||
for eg in existing_grab:
|
||||
if eg['cfgid'] == cfgid:
|
||||
exists = True
|
||||
if exists:
|
||||
continue
|
||||
grabs.insert({
|
||||
'id': id,
|
||||
'cfgid': cfgid,
|
||||
'info': info
|
||||
})
|
||||
pushover.Client("u5w9h8gc7hpzvr5a2kh2xh4m9zpidq").send_message(txt, title=txt[:50], url=url)
|
||||
|
||||
|
||||
_logger.info('initialization done, start grabbing')
|
||||
while True:
|
||||
g = grab.Grab()
|
||||
for cfg in config['grab']:
|
||||
cfgid = thash(str(cfg))
|
||||
_logger.info('grabbing with config %s', cfgid)
|
||||
cfgentry = {'id': cfgid}
|
||||
cfgentry.update(cfg)
|
||||
if len(cfgs.search(Entry.id == cfgid)) == 0:
|
||||
cfgs.insert(cfgentry)
|
||||
g.go(cfg['url'])
|
||||
|
||||
for xpath in cfg['xpaths']:
|
||||
for elem in g.doc.select(xpath):
|
||||
txt = elem.text()
|
||||
try:
|
||||
url = g.make_url_absolute(elem.attr('href'))
|
||||
except:
|
||||
url = None
|
||||
info = '%s: %s' % (elem.text(), url)
|
||||
id = thash(info)
|
||||
existing_grab = grabs.search(Entry.id == id)
|
||||
if len(existing_grab) > 0:
|
||||
exists = False
|
||||
for eg in existing_grab:
|
||||
if eg['cfgid'] == cfgid:
|
||||
exists = True
|
||||
if exists:
|
||||
continue
|
||||
grabs.insert({
|
||||
'id': id,
|
||||
'cfgid': cfgid,
|
||||
'info': info
|
||||
})
|
||||
pushover.Client("u5w9h8gc7hpzvr5a2kh2xh4m9zpidq").send_message(txt, title=txt[:50], url=url)
|
||||
_logger.info('news found %s', info)
|
||||
_logger.info('sleeping')
|
||||
time.sleep(30)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user