From 3e3f72926f234431337f8812ec3a5728ce026436 Mon Sep 17 00:00:00 2001 From: Geoffrey Frogeye Date: Tue, 18 Oct 2016 21:53:08 +0200 Subject: [PATCH] IMA 4 support --- Edt.py | 162 ------------------------------------------------------------------------------------------------------------------------------------------------------------------ parse.py | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 186 insertions(+), 168 deletions(-) delete mode 100644 Edt.py diff --git a/Edt.py b/Edt.py deleted file mode 100644 index 129bf79..0000000 --- a/Edt.py +++ /dev/null @@ -1,162 +0,0 @@ -""" -Class and variables needed for Edt manipulation & parsing -""" - -from html.parser import HTMLParser -from icalendar import Event as CalEvent -import datetime - -DAYS_PER_WEEK = 6 -SLOTS = [((8, 0), (10, 0)), ((10, 15), (12, 15)), ((13, 45), (15, 45)), ((16, 0), (18, 0))] - -TABLE_1_DATE_X = 1 -TABLE_1_FIRST_SLOT_X = 2 - -TABLE_2_DATE_X = 0 -TABLE_2_FIRST_SLOT_X = 1 - -class TableHTMLParser(HTMLParser): - tables = [] # Tables - table = False # Current table content - line = False # Current line content - cell = False # Current cell content - cellx = 1 - celly = 1 - - # Logic - def iscell(self): - """ - Return if we are currently in a cell - """ - return isinstance(self.cell, str) - - def isline(self): - """ - Return if we are currently in a line - """ - return isinstance(self.line, list) - - def istable(self): - """ - Return if we are currently in a table - """ - return isinstance(self.table, list) - - # Actions - def endcell(self): - if self.iscell(): - self.line.append((self.cell.strip(), self.cellx, self.celly)) - self.cell = False - - def endline(self): - self.endcell() - if self.isline(): - self.table.append(self.line.copy()) - self.line = False - - def endtable(self): - self.endline() - if self.istable(): - self.tables.append(self.table.copy()) - self.table = False - - # Inheritance - def handle_starttag(self, tag, attrs): - #print("Encountered a start tag:", tag) - if tag == 'table': - self.table = [] - elif tag == 'tr': - self.endline() - self.line = [] - elif tag == 'td': - self.endcell() - self.cell = '' - self.cellx = 1 - self.celly = 1 - for attr in attrs: - if attr[0] == 'colspan': - self.cellx = int(attr[1]) - elif attr[0] == 'rowspan': - self.celly = int(attr[1]) - - def handle_endtag(self, tag): - #print("Encountered an end tag :", tag) - if tag == 'table': - self.endtable() - elif tag == 'tr': - self.endline() - elif tag == 'td': - self.endcell() - - def handle_data(self, data): - #print("Encountered some data :", data) - if self.iscell(): - self.cell += data - -# TODO Allow class customisation - -class Event: - # Mined data - shortText = '' - longText = '' - date = False - slot = 0 - - # Generated data - shortName = '' - longName = '' - location = '' - startTime = False - endTime = False - active = False - - def feedShortText(self, shortText): - self.shortText = shortText - - def feedLongText(self, longText): - self.longText = longText - - def feedSlot(self, slot): - self.slot = slot - - def feedDate(self, date): - self.date = date - - def endFeed(self): - self.shortName = self.shortText - self.longName = self.longText - - if self.longName: - self.active = True - - if self.shortName == 'Ediff' or self.shortName == 'R. TOEIC': - self.active = False - - if self.date and isinstance(self.slot, int): - h, m = SLOTS[self.slot][0] - self.startTime = self.date + datetime.timedelta(hours=h, minutes=m) - h, m = SLOTS[self.slot][1] - self.endTime = self.date + datetime.timedelta(hours=h, minutes=m) - - if self.longName: - e = self.longName.split('(') - if len(e) >= 2: - f = e[1].split(')') - self.longName = e[0].strip() - self.location = f[0].strip() - - - def __str__(self): - if self.active: - return self.shortName+ ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '') - else: - return 'Inactive event' - - def getEvent(self): - e = CalEvent() - e.add('summary', self.shortName) - e.add('description', self.longName) - e.add('dtstart', self.startTime) - e.add('dtend', self.endTime) - e.add('location', self.location) - return e diff --git a/parse.py b/parse.py index 82b5e0f..aef7c34 100755 --- a/parse.py +++ b/parse.py @@ -5,15 +5,189 @@ import argparse import datetime import urllib.request from icalendar import Calendar -from Edt import * +from html.parser import HTMLParser +from icalendar import Calendar, Event as CalEvent # Parse command line arguments -parser = argparse.ArgumentParser(description='Convertit l\'emploi du temps IMA3 en ICS') +parser = argparse.ArgumentParser(description='Convertit l\'emploi du temps IMA en ICS') +parser.add_argument('annee', metavar='ANNEE', type=int, help='année (3 ou 4)') parser.add_argument('edt', metavar='EDT', type=str, help='la page pointant vers l\'emploi du temps concerné') parser.add_argument('-o', '--output', dest='file', type=str, default='-', help='fichier de sortie, - pour stdout') args = parser.parse_args() -with urllib.request.urlopen('http://dptima3.polytech-lille.net/' + args.edt + '.html') as handle: +if args.annee == 3: + url = 'http://dptima3.polytech-lille.net/' + args.edt + '.html' + SLOTS = [(( 8, 0), (10, 0)), + ((10, 20), (12, 20)), + ((13, 50), (15, 50)), + ((16, 10), (18, 10))] + DATE_FORMAT = '%d/%m/%Y' +elif args.annee == 4: + url = 'http://www.lifl.fr/~forget/EDT/' + args.edt + '.html' + SLOTS = [(( 8, 0), ( 9, 0)), + (( 9, 10), (10, 10)), + ((10, 20), (11, 20)), + ((11, 30), (12, 30)), + ((13, 50), (14, 50)), + ((15, 00), (16, 00)), + ((16, 10), (17, 10)), + ((17, 20), (18, 20))] + DATE_FORMAT = '%d/%m/%y' +else: + raise ValueError('Année inconnue : ' + annee) + +DAYS_PER_WEEK = 6 + +TABLE_1_DATE_X = 1 +TABLE_1_FIRST_SLOT_X = 2 + +TABLE_2_DATE_X = 0 +TABLE_2_FIRST_SLOT_X = 1 + +class TableHTMLParser(HTMLParser): + tables = [] # Tables + table = False # Current table content + line = False # Current line content + cell = False # Current cell content + cellx = 1 + celly = 1 + + # Logic + def iscell(self): + """ + Return if we are currently in a cell + """ + return isinstance(self.cell, str) + + def isline(self): + """ + Return if we are currently in a line + """ + return isinstance(self.line, list) + + def istable(self): + """ + Return if we are currently in a table + """ + return isinstance(self.table, list) + + # Actions + def endcell(self): + if self.iscell(): + self.line.append((self.cell.strip(), self.cellx, self.celly)) + self.cell = False + + def endline(self): + self.endcell() + if self.isline(): + self.table.append(self.line.copy()) + self.line = False + + def endtable(self): + self.endline() + if self.istable(): + self.tables.append(self.table.copy()) + self.table = False + + # Inheritance + def handle_starttag(self, tag, attrs): + #print("Encountered a start tag:", tag) + if tag == 'table': + self.table = [] + elif tag == 'tr': + self.endline() + self.line = [] + elif tag == 'td': + self.endcell() + self.cell = '' + self.cellx = 1 + self.celly = 1 + for attr in attrs: + if attr[0] == 'colspan': + self.cellx = int(attr[1]) + elif attr[0] == 'rowspan': + self.celly = int(attr[1]) + + def handle_endtag(self, tag): + #print("Encountered an end tag :", tag) + if tag == 'table': + self.endtable() + elif tag == 'tr': + self.endline() + elif tag == 'td': + self.endcell() + + def handle_data(self, data): + #print("Encountered some data :", data) + if self.iscell(): + self.cell += data + +# TODO Do something that really is OOP or do not... + +class Event: + # Mined data + shortText = '' + longText = '' + date = False + slot = 0 + + # Generated data + shortName = '' + longName = '' + location = '' + startTime = False + endTime = False + active = False + + def feedShortText(self, shortText): + self.shortText = shortText + + def feedLongText(self, longText): + self.longText = longText + + def feedSlot(self, slot): + self.slot = slot + + def feedDate(self, date): + self.date = date + + def endFeed(self): + self.shortName = self.shortText + self.longName = self.longText + + if self.longName: + self.active = True + + if self.date and isinstance(self.slot, int): + h, m = SLOTS[self.slot][0] + self.startTime = self.date + datetime.timedelta(hours=h, minutes=m) + h, m = SLOTS[self.slot][1] + self.endTime = self.date + datetime.timedelta(hours=h, minutes=m) + + if self.longName: + e = self.longName.split('(') + if len(e) >= 2: + f = e[1].split(')') + self.longName = e[0].strip() + self.location = f[0].strip() + + + def __str__(self): + if self.active: + return self.shortName+ ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '') + else: + return 'Inactive event' + + def getEvent(self): + e = CalEvent() + e.add('summary', self.shortName) + e.add('description', self.longName) + e.add('dtstart', self.startTime) + e.add('dtend', self.endTime) + e.add('location', self.location) + return e + +with urllib.request.urlopen(url) as handle: htmlStr = handle.read().decode('iso-8859-15') # Read HTML tables @@ -62,11 +236,12 @@ days = dict() # Parsing table 1 for line in tables[0]: try: - day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], '%d/%m/%Y') + day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], DATE_FORMAT) except (ValueError, TypeError): # This is not a date, no data to grab here continue + print(line) for day in range(DAYS_PER_WEEK): date = day1date + datetime.timedelta(days=day) @@ -74,13 +249,18 @@ for line in tables[0]: days[date] = [Event() for s in range(len(SLOTS))] for slot in range(len(SLOTS)): - days[date][slot].feedShortText(line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X]) + try: + cell = line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X] + except IndexError: + # Out of the table: saturday afternoon + break + days[date][slot].feedShortText(cell) continue # Parsing table 2 for line in tables[1]: try: - date = datetime.datetime.strptime(line[TABLE_2_DATE_X], '%d/%m/%Y') + date = datetime.datetime.strptime(line[TABLE_2_DATE_X], DATE_FORMAT) except ValueError: # This is not a date, no data to grab here continue -- libgit2 0.21.2