parse.py 3.23 KB
#!/usr/bin/env python3

import sys
import argparse
import datetime
import urllib.request
from icalendar import Calendar
from Edt import *

# Parse command line arguments
parser = argparse.ArgumentParser(description='Convertit l\'emploi du temps IMA3 en ICS')
parser.add_argument('edt', metavar='EDT', type=str, help='la page pointant vers l\'emploi du temps concerné')
parser.add_argument('-o', '--output', dest='file', type=str, default='-', help='fichier de sortie, - pour stdout')
args = parser.parse_args()

with urllib.request.urlopen('http://dptima3.polytech-lille.net/' + args.edt + '.html') as handle:
    htmlStr = handle.read().decode('iso-8859-15')

# Read HTML tables
parser = TableHTMLParser()
parser.feed(htmlStr)

# Dupplicates cells with colspan & rowspan
tables = []

for parserTable in parser.tables:
    # Figuring out dimensions
    X, Y = 0, 0
    for cell in parserTable[0]:
        X += cell[1]
    for line in parserTable:
        Y += line[0][2]

    # Constructing table with reals dimensions
    table = []
    for y in range(Y):
        line = []
        for x in range(X):
            line.append(False)
        table.append(line)

    # Filling table with parsed table
    x, y = 0, 0
    for line in parserTable:
        for cell in line:
            # Offsetting to the right if cell is not empty
            while isinstance(table[y][x], str):
                x += 1

            # Copying values
            for y2 in range(y, y + cell[2]):
                for x2 in range(x, x + cell[1]):
                    table[y2][x2] = cell[0]
        x = 0
        y += 1

    tables.append(table)

# Creating events
days = dict()

# Parsing table 1
for line in tables[0]:
    try:
        day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], '%d/%m/%Y')
    except (ValueError, TypeError):
        # This is not a date, no data to grab here
        continue

    for day in range(DAYS_PER_WEEK):
        date = day1date + datetime.timedelta(days=day)

        if date not in days:
            days[date] = [Event() for s in range(len(SLOTS))]

        for slot in range(len(SLOTS)):
            days[date][slot].feedShortText(line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X])
            continue

# Parsing table 2
for line in tables[1]:
    try:
        date = datetime.datetime.strptime(line[TABLE_2_DATE_X], '%d/%m/%Y')
    except ValueError:
        # This is not a date, no data to grab here
        continue

    if date not in days:
        days[date] = [Event() for s in range(len(SLOTS))]

    for slot in range(len(SLOTS)):
        days[date][slot].feedLongText(line[slot + TABLE_2_FIRST_SLOT_X])

# Feeding back time and slot to events
events = []
for day in days:
    for slot in range(len(SLOTS)):
        event = days[day][slot]
        event.feedDate(day)
        event.feedSlot(slot)
        event.endFeed()
        events.append(event)

# Creating calendar
cal = Calendar()
cal.add('proid', '-//Cours Polytech//mxm.dk//')
cal.add('version', '2.0')

for event in events:
    if event.active:
        print(event, file=sys.stderr)
        cal.add_component(event.getEvent())

# Writing calendar to file
data = cal.to_ical()
if args.file == '-':
    sys.stdout.write(data.decode('utf-8'))
else:
    with open(args.file, 'wb') as f:
        f.write(data)