#!/usr/bin/env python3 import sys import argparse import datetime import urllib.request from icalendar import Calendar from html.parser import HTMLParser from icalendar import vDatetime, Calendar, Event as CalEvent # Parse command line arguments parser = argparse.ArgumentParser(description='Convertit l\'emploi du temps IMA en ICS') parser.add_argument('annee', metavar='ANNEE', type=int, help='année (3 ou 4)') parser.add_argument('edt', metavar='EDT', type=str, help='la page pointant vers l\'emploi du temps concerné') parser.add_argument('-o', '--output', dest='file', type=str, default='-', help='fichier de sortie, - pour stdout') args = parser.parse_args() if args.annee == 3: url = 'http://dptima3.polytech-lille.net/' + args.edt + '.html' SLOTS = [(( 8, 0), (10, 0)), ((10, 20), (12, 20)), ((13, 50), (15, 50)), ((16, 10), (18, 10))] DATE_FORMAT = '%d/%m/%Y' elif args.annee == 4: url = 'http://www.lifl.fr/~forget/EDT/' + args.edt + '.html' SLOTS = [(( 8, 0), ( 9, 0)), (( 9, 10), (10, 10)), ((10, 20), (11, 20)), ((11, 30), (12, 30)), ((13, 50), (14, 50)), ((15, 00), (16, 00)), ((16, 10), (17, 10)), ((17, 20), (18, 20))] DATE_FORMAT = '%d/%m/%y' else: raise ValueError('Année inconnue : ' + annee) DAYS_PER_WEEK = 6 TABLE_1_DATE_X = 1 TABLE_1_FIRST_SLOT_X = 2 TABLE_2_DATE_X = 0 TABLE_2_FIRST_SLOT_X = 1 class TableHTMLParser(HTMLParser): tables = [] # Tables table = False # Current table content line = False # Current line content cell = False # Current cell content cellx = 1 celly = 1 # Logic def iscell(self): """ Return if we are currently in a cell """ return isinstance(self.cell, str) def isline(self): """ Return if we are currently in a line """ return isinstance(self.line, list) def istable(self): """ Return if we are currently in a table """ return isinstance(self.table, list) # Actions def endcell(self): if self.iscell(): self.line.append((self.cell.strip(), self.cellx, self.celly)) self.cell = False def endline(self): self.endcell() if self.isline(): self.table.append(self.line.copy()) self.line = False def endtable(self): self.endline() if self.istable(): self.tables.append(self.table.copy()) self.table = False # Inheritance def handle_starttag(self, tag, attrs): if tag == 'table': self.table = [] elif tag == 'tr': self.endline() self.line = [] elif tag == 'td': self.endcell() self.cell = '' self.cellx = 1 self.celly = 1 for attr in attrs: if attr[0] == 'colspan': self.cellx = int(attr[1]) elif attr[0] == 'rowspan': self.celly = int(attr[1]) def handle_endtag(self, tag): if tag == 'table': self.endtable() elif tag == 'tr': self.endline() elif tag == 'td': self.endcell() def handle_data(self, data): if self.iscell(): self.cell += data # TODO Use HTTP header date UPDATE_TIME = datetime.datetime.now() # TODO Do something that really is OOP or do not... class Event: # Mined data shortText = '' longText = '' date = False begSlot = 0 endSlot = 0 # Generated data shortName = '' longName = '' location = '' startTime = False endTime = False active = False def feedShortText(self, shortText): self.shortText = shortText def feedLongText(self, longText): self.longText = longText def feedBegSlot(self, slot): self.begSlot = slot def feedEndSlot(self, slot): self.endSlot = slot def feedDate(self, date): self.date = date def endFeed(self): self.shortName = self.shortText self.longName = self.longText if self.longName: self.active = True if self.date and isinstance(self.begSlot, int) and isinstance(self.endSlot, int): h, m = SLOTS[self.begSlot][0] self.startTime = self.date + datetime.timedelta(hours=h, minutes=m) h, m = SLOTS[self.endSlot][1] self.endTime = self.date + datetime.timedelta(hours=h, minutes=m) if self.longName: e = self.longName.split('(') if len(e) >= 2: f = e[1].split(')') self.longName = e[0].strip() self.location = f[0].strip() def __str__(self): if self.active: return self.shortName + ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '') else: return 'Inactive event' def getEvent(self): e = CalEvent() e.add('uid', '-'.join([ 'polytech', 'ima' + str(args.annee), args.edt, vDatetime(self.startTime).to_ical().decode(), vDatetime(self.endTime).to_ical().decode(), self.shortName ])) e.add('summary', self.shortName) e.add('description', self.longName) e.add('dtstart', self.startTime) e.add('dtend', self.endTime) e.add('location', self.location) e.add('last-modified', UPDATE_TIME) e.add('dtstamp', UPDATE_TIME) return e with urllib.request.urlopen(url) as handle: htmlStr = handle.read().decode('iso-8859-15') # Read HTML tables parser = TableHTMLParser() parser.feed(htmlStr) # Dupplicates cells with colspan & rowspan tables = [] for parserTable in parser.tables: # Figuring out dimensions X, Y = 0, 0 for cell in parserTable[0]: X += cell[1] for line in parserTable: Y += line[0][2] # Constructing table with reals dimensions table = [] for y in range(Y): line = [] for x in range(X): line.append(False) table.append(line) # Filling table with parsed table x, y = 0, 0 for line in parserTable: for cell in line: # Offsetting to the right if cell is not empty while isinstance(table[y][x], str): x += 1 # Copying values for y2 in range(y, y + cell[2]): for x2 in range(x, x + cell[1]): table[y2][x2] = cell[0] x = 0 y += 1 tables.append(table) # Creating events days = dict() # Parsing table 1 for line in tables[0]: try: day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], DATE_FORMAT) except (ValueError, TypeError): # This is not a date, no data to grab here continue for day in range(DAYS_PER_WEEK): date = day1date + datetime.timedelta(days=day) if date not in days: days[date] = [Event() for s in range(len(SLOTS))] for slot in range(len(SLOTS)): try: cell = line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X] except IndexError: # Out of the table: saturday afternoon break days[date][slot].feedShortText(cell) continue # Parsing table 2 for line in tables[1]: try: date = datetime.datetime.strptime(line[TABLE_2_DATE_X], DATE_FORMAT) except ValueError: # This is not a date, no data to grab here continue if date not in days: days[date] = [Event() for s in range(len(SLOTS))] for slot in range(len(SLOTS)): days[date][slot].feedLongText(line[slot + TABLE_2_FIRST_SLOT_X]) # Feeding back time and slot to events events = [] for day in days: prevEvent = False for slot in range(len(SLOTS)): event = days[day][slot] if prevEvent: if prevEvent.longText == event.longText: prevEvent.feedEndSlot(slot) else: prevEvent.endFeed() events.append(prevEvent) if not prevEvent or (prevEvent and prevEvent.longText != event.longText): event.feedDate(day) event.feedBegSlot(slot) event.feedEndSlot(slot) prevEvent = event prevEvent.endFeed() events.append(prevEvent) # Creating calendar cal = Calendar() cal.add('proid', '-//geoffrey.frogeye.fr//NONSGML Icalendar Calendar//EN') cal.add('version', '2.0') cal.add('calscale', 'GREGORIAN') cal.add('x-wr-calname', 'Polytech IMA ' + str(args.annee) + ' ' + args.edt) for event in events: if event.active: print(event, file=sys.stderr) cal.add_component(event.getEvent()) # Writing calendar to file data = cal.to_ical() if args.file == '-': sys.stdout.write(data.decode('utf-8')) else: with open(args.file, 'wb') as f: f.write(data)