infomentor/infomentor/connector.py

import requests
import re
import json
import os
import http.cookiejar
import time
import math
import datetime
import contextlib
import logging
import urllib.parse
import uuid
from infomentor import model

class InfomentorFile(object):
    '''Represent a file which is downloaded'''
    def __init__(self, directory, filename):
        if directory is None:
            raise Exception('directory is required')
        self.filename = filename
        self.randomid = str(uuid.uuid4())
        self.directory = directory

    @property
    def targetfile(self):
        return os.path.join(self.directory, self.fullfilename)

    @property
    def targetdir(self):
        return os.path.join(self.directory, self.randomid)

    @property
    def fullfilename(self):
        if self.filename is None:
            raise Exception('no filename set')
        return os.path.join(self.randomid, self.filename)

    def save_file(self, content):
        os.makedirs(self.targetdir, exist_ok=True)
        with open(self.targetfile, 'wb+') as f:
            f.write(content)


class Infomentor(object):
    '''Basic object for handling infomentor site login and fetching of data'''

    BASE_IM1 = 'https://im1.infomentor.de/Germany/Germany/Production'
    BASE_MIM = 'https://mein.infomentor.de'

    def __init__(self, user, logger=None):
        '''Create informentor object for username'''
        self.logger = logger or logging.getLogger(__name__)
        self._last_result = None
        self.user = user
        self._create_session()

    def _create_session(self):
        '''Create the session for handling all further requests'''
        self.session = requests.Session()
        self.session.headers.update({'User-Agent': 'Mozilla/5.0'})
        self._load_cookies()

    def _load_cookies(self):
        '''Setup the cookie requests'''
        os.makedirs('cookiejars', exist_ok=True)
        self.session.cookies = http.cookiejar.MozillaCookieJar(
            filename='cookiejars/{}.cookies'.format(self.user)
        )
        with contextlib.suppress(FileNotFoundError):
            self.session.cookies.load(ignore_discard=True, ignore_expires=True)

    def login(self, password):
        '''Login using the given password'''
        if self.logged_in(self.user):
            return True
        self._do_login(self.user, password)
        return self.logged_in(self.user)

    def logged_in(self, username):
        '''Check if user is logged in (with cookies)'''
        ts = math.floor(time.time())
        auth_check_url = 'authentication/authentication/' + \
            'isauthenticated/?_={}000'.format(ts)
        url = self._mim_url(auth_check_url)
        r = self._do_post(url)
        self.logger.info('%s loggedin: %s', username, r.text)
        return r.text.lower() == 'true'

    def _do_login(self, user, password):
        self._do_request_initial_token()
        self._perform_login(password)
        self._finalize_login()

    def _do_request_initial_token(self):
        '''Request initial oauth_token'''
        # Get the initial oauth token
        self._do_get(self._mim_url())
        self._oauth_token = self._get_auth_token()
        # This request is performed by the browser, the reason is unclear
        login_url = self._mim_url(
            'Authentication/Authentication/Login?ReturnUrl=%2F')
        self._do_get(login_url)

    def _get_auth_token(self):
        '''Reading oauth_token from response text'''
        token_re = r'name="oauth_token" value="([^"]*)"'
        tokens = re.findall(token_re, self._last_result.text)
        if len(tokens) != 1:
            self.logger.error('OAUTH_TOKEN not found')
            raise Exception('Invalid Count of tokens')
        return tokens[0]

    def _perform_login(self, password):
        self._do_post(
            self._im1_url('mentor/'),
            data={'oauth_token': self._oauth_token}
        )
        # Extract the hidden fields content
        payload = self._get_hidden_fields()
        # update with the missing and the login parameters
        payload.update({
            'login_ascx$txtNotandanafn': self.user,
            'login_ascx$txtLykilord': password,
            '__EVENTTARGET': 'login_ascx$btnLogin',
            '__EVENTARGUMENT': ''
        })

        # perform the login
        self._do_post(
            self._im1_url('mentor/'),
            data=payload,
            headers={
                'Referer': self._im1_url('mentor/'),
                'Content-Type': 'application/x-www-form-urlencoded'
            }
        )

    def _get_hidden_fields(self):
        hiddenfields = self._extract_hidden_fields()
        field_values = {}
        for f in hiddenfields:
            names = re.findall('name="([^"]*)"', f)
            if len(names) != 1:
                self.logger.error('Could not parse hidden field (fieldname)')
                continue
            values = re.findall('value="([^"]*)"', f)
            if len(values) != 1:
                self.logger.error('Could not parse hidden field (value)')
                continue
            field_values[names[0]] = values[0]
        return field_values

    def _extract_hidden_fields(self):
        hidden_re = '<input type="hidden"(.*?) />'
        hiddenfields = re.findall(hidden_re, self._last_result.text)
        return hiddenfields

    def _finalize_login(self):
        # Read the oauth token which is the final token for the login
        oauth_token = self._get_auth_token()
        # authenticate
        self._do_post(
            self._im1_url('mentor/'),
            data={'oauth_token': oauth_token}
        )
        self._do_get(self._mim_url())

    def _do_post(self, url, **kwargs):
        '''Post request for session'''
        self.logger.info('post to: %s', url)
        if 'data' in kwargs:
            self.logger.info('data: %s', json.dumps(kwargs['data'], indent=2))
        self._last_result = self.session.post(url, **kwargs)
        self.logger.info('result: %d', self._last_result.status_code)
        self._save_cookies()
        return self._last_result

    def _do_get(self, url, **kwargs):
        '''get request for session'''
        self.logger.info('get: %s', url)
        self._last_result = self.session.get(url, **kwargs)
        self.logger.info('result: %d', self._last_result.status_code)
        self._save_cookies()
        if self._last_result.status_code != 200:
            raise Exception('Got response with code {}'.format(
                self._last_result.status_code
            ))
        return self._last_result

    def _save_cookies(self):
        '''Save cookies'''
        self.session.cookies.save(ignore_discard=True, ignore_expires=True)

    def download_file(self, url, filename=None, directory=None):
        '''download a file with given name or provided filename'''
        self.logger.info('fetching download: %s', url)
        if filename is not None or directory is not None:
            return self._download_file(url, directory, filename)
        else:
            self.logger.error('fetching download requires filename or folder')
            raise Exception('Download Failed')

    def _get_filename_from_cd(self):
        '''determine filename from headers or random uuid'''
        cd = self._last_result.headers.get('content-disposition')
        if cd:
            filename_re = r'''
                .* # Anything
                (?:
                    filename=(?P<native>.+) # normal filename
                    |
                    filename\*=(?P<extended>.+) # extended filename
                ) # The filename
                (?:$|;.*) # End or more
            '''
            fname = re.match(filename_re, cd, flags=re.VERBOSE)
            filename = fname.group('native')
            if filename is not None and len(filename) != 0:
                return filename
            filename = fname.group('extended')
            if filename is not None and len(filename) != 0:
                encoding, string = filename.split("''")
                return urllib.parse.unquote(string, encoding)
        filename = str(uuid.uuid4())
        self.logger.warning(
            'no filename detected in %s: using random filename %s',
            cd, filename)
        return filename

    def _download_file(self, url, directory, filename=None):
        '''download a file with  provided filename'''
        file = InfomentorFile(directory, filename)
        self.logger.info('to (randomized) directory %s', file.targetdir)
        url = self._mim_url(url)
        self._do_get(url)
        if filename is None:
            self.logger.info('determine filename from headers')
            filename = self._get_filename_from_cd()
            self.logger.info('determined filename: %s', filename)
        file.filename = filename
        self.logger.info('full filename: %s', file.fullfilename)
        file.save_file(self._last_result.content)
        return file.fullfilename

    def _build_url(self, path='', base=BASE_IM1):
        return '{}/{}'.format(base, path)

    def _mim_url(self, path=''):
        return self._build_url(path, base=self.BASE_MIM)

    def _im1_url(self, path=''):
        return self._build_url(path, base=self.BASE_IM1)

    def get_news_list(self):
        self.logger.info('fetching news')
        self._do_post(self._mim_url('News/news/GetArticleList'))
        news_json = self.get_json_return()
        return [str(i['id']) for i in news_json['items']]

    def parse_news(self, news_json):
        idlist = [str(i['id']) for i in im_news['items']]
        self.logger.info('Parsing %d news (%s)', im_news['totalItems'], ', '.join(idlist))
        for news_item in reversed(im_news['items']):
                newsdata = self.im.get_article(news_item['id'])

    def get_news_article(self, id):
        article_json = self.get_article(id)
        storenewsdata = {
            k: article_json[k] for k in ('title', 'content', 'date')
        }
        storenewsdata['news_id'] = article_json['id']
        storenewsdata['raw'] = json.dumps(article_json)
        storenewsdata['attachments'] = []
        for attachment in article_json['attachments']:
            self.logger.info('found attachment %s', attachment['title'])
            att_id = re.findall('Download/([0-9]+)?', attachment['url'])[0]
            f = self.download_file(attachment['url'], directory='files')
            try:
                storenewsdata['attachments'].append(model.Attachment(attachment_id=att_id, url=attachment['url'], localpath=f, title=attachment['title']))
            except Exception as e:
                self.logger.exception('failed to store attachment')
        news = model.News(**storenewsdata)
        with contextlib.suppress(Exception):
            news.imagefile = self.get_newsimage(id)
        return news

    def get_article(self, id):
        self.logger.info('fetching article: %s', id)
        self._do_post(
            self._mim_url('News/news/GetArticle'),
            data={'id': id}
        )
        return self.get_json_return()

    def get_newsimage(self, id):
        self.logger.info('fetching article image: %s', id)
        filename = '{}.image'.format(id)
        url = self._mim_url('News/NewsImage/GetImage?id={}'.format(id))
        return self.download_file(url, directory='images', filename=filename)

    def get_calendar(self, offset=0, weeks=1):
        self.logger.info('fetching calendar')
        data = self._get_week_dates(offset=offset, weeks=weeks)
        self._do_post(
            self._mim_url('Calendar/Calendar/getEntries'),
            data=data
        )
        return self.get_json_return()

    def get_event(self, eventid):
        self.logger.info('fetching calendar entry')
        data = {'id': eventid}
        self._do_post(
            self._mim_url('Calendar/Calendar/getEntry'),
            data=data
        )
        return self.get_json_return()

    def get_homework(self, offset=0):
        self.logger.info('fetching homework')
        startofweek = self._get_start_of_week(offset)
        timestamp = startofweek.strftime('%Y-%m-%dT00:00:00.000Z')
        data = {
            'date': timestamp,
            'isWeek': True,
        }
        self._do_post(
            self._mim_url('Homework/homework/GetHomework'),
            data=data
        )
        return self.get_json_return()

    def get_homework_list(self):
        self._homework = {}
        homeworklist = []
        homework = []
        homework.extend(self.get_homework())
        homework.extend(self.get_homework(1))
        for dategroup in homework:
            for hw in dategroup['items']:
                if hw['id'] == 0:
                    continue
                else:
                    self._homework[hw['id']] = hw
                    homeworklist.append(hw['id'])
        return homeworklist

    def get_homework_info(self, id):
        hw = self._homework[id]
        storehw = {
            k: hw[k] for k in ('subject', 'courseElement')
        }
        storehw['homework_id'] = hw['id']
        storehw['text'] = hw['homeworkText']
        storehw['attachments'] = []
        for attachment in hw['attachments']:
            self.logger.info('found attachment %s', attachment['title'])
            att_id = re.findall('Download/([0-9]+)?', attachment['url'])[0]
            f = self.download_file(attachment['url'], directory='files')
            try:
                storehw['attachments'].append(model.Attachment(attachment_id=att_id, url=attachment['url'], localpath=f, title=attachment['title']))
            except Exception as e:
                self.logger.exception('failed to store attachment')
        hw = model.Homework(**storehw)
        return hw

    def get_timetable(self, offset=0):
        self.logger.info('fetching timetable')
        data = self._get_week_dates(offset)
        self._do_post(
            self._mim_url('timetable/timetable/gettimetablelist'),
            data=data
        )
        return self.get_json_return()

    def get_json_return(self):
        try:
            return self._last_result.json()
        except json.JSONDecodeError as jse:
            self.logger.exception('JSON coudl not be decoded')
            self.logger.info('status code: %d', self._last_result.status_code)
            self.logger.info('response was: %s', self._last_result.text)
            raise

    def _get_week_dates(self, offset=0, weeks=1):
        weekoffset = datetime.timedelta(days=7*offset)

        startofweek = self._get_start_of_week()
        endofweek = startofweek + datetime.timedelta(days=5+7*(weeks-1))

        startofweek += weekoffset
        endofweek += weekoffset

        now = datetime.datetime.now()
        utctime = datetime.datetime.utcnow()
        utcoffset = (now.hour - utctime.hour)*60

        data = {
            'UTCOffset': utcoffset,
            'start': startofweek.strftime('%Y-%m-%d'),
            'end': endofweek.strftime('%Y-%m-%d'),
        }
        return data

    def _get_start_of_week(self, offset=0):
        now = datetime.datetime.now()
        dayofweek = now.weekday()
        startofweek = now - datetime.timedelta(days=dayofweek)
        startofweek -= datetime.timedelta(days=offset*7)
        return startofweek