parse.py
3.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python3
import sys
import argparse
import datetime
import urllib.request
from icalendar import Calendar
from Edt import *
# Parse command line arguments
parser = argparse.ArgumentParser(description='Convertit l\'emploi du temps IMA3 en ICS')
parser.add_argument('edt', metavar='EDT', type=str, help='la page pointant vers l\'emploi du temps concerné')
parser.add_argument('-o', '--output', dest='file', type=str, default='-', help='fichier de sortie, - pour stdout')
args = parser.parse_args()
with urllib.request.urlopen('http://dptima3.polytech-lille.net/' + args.edt + '.html') as handle:
htmlStr = handle.read().decode('iso-8859-15')
# Read HTML tables
parser = TableHTMLParser()
parser.feed(htmlStr)
# Dupplicates cells with colspan & rowspan
tables = []
for parserTable in parser.tables:
# Figuring out dimensions
X, Y = 0, 0
for cell in parserTable[0]:
X += cell[1]
for line in parserTable:
Y += line[0][2]
# Constructing table with reals dimensions
table = []
for y in range(Y):
line = []
for x in range(X):
line.append(False)
table.append(line)
# Filling table with parsed table
x, y = 0, 0
for line in parserTable:
for cell in line:
# Offsetting to the right if cell is not empty
while isinstance(table[y][x], str):
x += 1
# Copying values
for y2 in range(y, y + cell[2]):
for x2 in range(x, x + cell[1]):
table[y2][x2] = cell[0]
x = 0
y += 1
tables.append(table)
# Creating events
days = dict()
# Parsing table 1
for line in tables[0]:
try:
day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], '%d/%m/%Y')
except (ValueError, TypeError):
# This is not a date, no data to grab here
continue
for day in range(DAYS_PER_WEEK):
date = day1date + datetime.timedelta(days=day)
if date not in days:
days[date] = [Event() for s in range(len(SLOTS))]
for slot in range(len(SLOTS)):
days[date][slot].feedShortText(line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X])
continue
# Parsing table 2
for line in tables[1]:
try:
date = datetime.datetime.strptime(line[TABLE_2_DATE_X], '%d/%m/%Y')
except ValueError:
# This is not a date, no data to grab here
continue
if date not in days:
days[date] = [Event() for s in range(len(SLOTS))]
for slot in range(len(SLOTS)):
days[date][slot].feedLongText(line[slot + TABLE_2_FIRST_SLOT_X])
# Feeding back time and slot to events
events = []
for day in days:
for slot in range(len(SLOTS)):
event = days[day][slot]
event.feedDate(day)
event.feedSlot(slot)
event.endFeed()
events.append(event)
# Creating calendar
cal = Calendar()
cal.add('proid', '-//Cours Polytech//mxm.dk//')
cal.add('version', '2.0')
for event in events:
if event.active:
print(event, file=sys.stderr)
cal.add_component(event.getEvent())
# Writing calendar to file
data = cal.to_ical()
if args.file == '-':
sys.stdout.write(data.decode('utf-8'))
else:
with open(args.file, 'wb') as f:
f.write(data)