Commit 99b1d1357a6205398d3edb41475a88b4b32c9bae
0 parents
Initial commit
Showing
5 changed files
with
375 additions
and
0 deletions
Show diff stats
1 | +++ a/.gitignore | ||
@@ -0,0 +1,94 @@ | @@ -0,0 +1,94 @@ | ||
1 | +# Byte-compiled / optimized / DLL files | ||
2 | +__pycache__/ | ||
3 | +*.py[cod] | ||
4 | +*$py.class | ||
5 | + | ||
6 | +# C extensions | ||
7 | +*.so | ||
8 | + | ||
9 | +# Distribution / packaging | ||
10 | +.Python | ||
11 | +env/ | ||
12 | +build/ | ||
13 | +develop-eggs/ | ||
14 | +dist/ | ||
15 | +downloads/ | ||
16 | +eggs/ | ||
17 | +.eggs/ | ||
18 | +lib/ | ||
19 | +lib64/ | ||
20 | +parts/ | ||
21 | +sdist/ | ||
22 | +var/ | ||
23 | +*.egg-info/ | ||
24 | +.installed.cfg | ||
25 | +*.egg | ||
26 | + | ||
27 | +# PyInstaller | ||
28 | +# Usually these files are written by a python script from a template | ||
29 | +# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
30 | +*.manifest | ||
31 | +*.spec | ||
32 | + | ||
33 | +# Installer logs | ||
34 | +pip-log.txt | ||
35 | +pip-delete-this-directory.txt | ||
36 | + | ||
37 | +# Unit test / coverage reports | ||
38 | +htmlcov/ | ||
39 | +.tox/ | ||
40 | +.coverage | ||
41 | +.coverage.* | ||
42 | +.cache | ||
43 | +nosetests.xml | ||
44 | +coverage.xml | ||
45 | +*,cover | ||
46 | +.hypothesis/ | ||
47 | + | ||
48 | +# Translations | ||
49 | +*.mo | ||
50 | +*.pot | ||
51 | + | ||
52 | +# Django stuff: | ||
53 | +*.log | ||
54 | +local_settings.py | ||
55 | + | ||
56 | +# Flask stuff: | ||
57 | +instance/ | ||
58 | +.webassets-cache | ||
59 | + | ||
60 | +# Scrapy stuff: | ||
61 | +.scrapy | ||
62 | + | ||
63 | +# Sphinx documentation | ||
64 | +docs/_build/ | ||
65 | + | ||
66 | +# PyBuilder | ||
67 | +target/ | ||
68 | + | ||
69 | +# IPython Notebook | ||
70 | +.ipynb_checkpoints | ||
71 | + | ||
72 | +# pyenv | ||
73 | +.python-version | ||
74 | + | ||
75 | +# celery beat schedule file | ||
76 | +celerybeat-schedule | ||
77 | + | ||
78 | +# dotenv | ||
79 | +.env | ||
80 | + | ||
81 | +# virtualenv | ||
82 | +.venv/ | ||
83 | +venv/ | ||
84 | +ENV/ | ||
85 | + | ||
86 | +# Spyder project settings | ||
87 | +.spyderproject | ||
88 | + | ||
89 | +# Rope project settings | ||
90 | +.ropeproject | ||
91 | + | ||
92 | +# Custom | ||
93 | +*.html | ||
94 | +*.ics |
1 | +++ a/Edt.py | ||
@@ -0,0 +1,162 @@ | @@ -0,0 +1,162 @@ | ||
1 | +""" | ||
2 | +Class and variables needed for Edt manipulation & parsing | ||
3 | +""" | ||
4 | + | ||
5 | +from html.parser import HTMLParser | ||
6 | +from icalendar import Event as CalEvent | ||
7 | +import datetime | ||
8 | + | ||
9 | +DAYS_PER_WEEK = 6 | ||
10 | +SLOTS = [((8, 0), (10, 0)), ((10, 15), (12, 15)), ((13, 45), (15, 45)), ((16, 0), (18, 0))] | ||
11 | + | ||
12 | +TABLE_1_DATE_X = 1 | ||
13 | +TABLE_1_FIRST_SLOT_X = 2 | ||
14 | + | ||
15 | +TABLE_2_DATE_X = 0 | ||
16 | +TABLE_2_FIRST_SLOT_X = 1 | ||
17 | + | ||
18 | +class TableHTMLParser(HTMLParser): | ||
19 | + tables = [] # Tables | ||
20 | + table = False # Current table content | ||
21 | + line = False # Current line content | ||
22 | + cell = False # Current cell content | ||
23 | + cellx = 1 | ||
24 | + celly = 1 | ||
25 | + | ||
26 | + # Logic | ||
27 | + def iscell(self): | ||
28 | + """ | ||
29 | + Return if we are currently in a cell | ||
30 | + """ | ||
31 | + return isinstance(self.cell, str) | ||
32 | + | ||
33 | + def isline(self): | ||
34 | + """ | ||
35 | + Return if we are currently in a line | ||
36 | + """ | ||
37 | + return isinstance(self.line, list) | ||
38 | + | ||
39 | + def istable(self): | ||
40 | + """ | ||
41 | + Return if we are currently in a table | ||
42 | + """ | ||
43 | + return isinstance(self.table, list) | ||
44 | + | ||
45 | + # Actions | ||
46 | + def endcell(self): | ||
47 | + if self.iscell(): | ||
48 | + self.line.append((self.cell.strip(), self.cellx, self.celly)) | ||
49 | + self.cell = False | ||
50 | + | ||
51 | + def endline(self): | ||
52 | + self.endcell() | ||
53 | + if self.isline(): | ||
54 | + self.table.append(self.line.copy()) | ||
55 | + self.line = False | ||
56 | + | ||
57 | + def endtable(self): | ||
58 | + self.endline() | ||
59 | + if self.istable(): | ||
60 | + self.tables.append(self.table.copy()) | ||
61 | + self.table = False | ||
62 | + | ||
63 | + # Inheritance | ||
64 | + def handle_starttag(self, tag, attrs): | ||
65 | + #print("Encountered a start tag:", tag) | ||
66 | + if tag == 'table': | ||
67 | + self.table = [] | ||
68 | + elif tag == 'tr': | ||
69 | + self.endline() | ||
70 | + self.line = [] | ||
71 | + elif tag == 'td': | ||
72 | + self.endcell() | ||
73 | + self.cell = '' | ||
74 | + self.cellx = 1 | ||
75 | + self.celly = 1 | ||
76 | + for attr in attrs: | ||
77 | + if attr[0] == 'colspan': | ||
78 | + self.cellx = int(attr[1]) | ||
79 | + elif attr[0] == 'rowspan': | ||
80 | + self.celly = int(attr[1]) | ||
81 | + | ||
82 | + def handle_endtag(self, tag): | ||
83 | + #print("Encountered an end tag :", tag) | ||
84 | + if tag == 'table': | ||
85 | + self.endtable() | ||
86 | + elif tag == 'tr': | ||
87 | + self.endline() | ||
88 | + elif tag == 'td': | ||
89 | + self.endcell() | ||
90 | + | ||
91 | + def handle_data(self, data): | ||
92 | + #print("Encountered some data :", data) | ||
93 | + if self.iscell(): | ||
94 | + self.cell += data | ||
95 | + | ||
96 | +# TODO Allow class customisation | ||
97 | + | ||
98 | +class Event: | ||
99 | + # Mined data | ||
100 | + shortText = '' | ||
101 | + longText = '' | ||
102 | + date = False | ||
103 | + slot = 0 | ||
104 | + | ||
105 | + # Generated data | ||
106 | + shortName = '' | ||
107 | + longName = '' | ||
108 | + location = '' | ||
109 | + startTime = False | ||
110 | + endTime = False | ||
111 | + active = False | ||
112 | + | ||
113 | + def feedShortText(self, shortText): | ||
114 | + self.shortText = shortText | ||
115 | + | ||
116 | + def feedLongText(self, longText): | ||
117 | + self.longText = longText | ||
118 | + | ||
119 | + def feedSlot(self, slot): | ||
120 | + self.slot = slot | ||
121 | + | ||
122 | + def feedDate(self, date): | ||
123 | + self.date = date | ||
124 | + | ||
125 | + def endFeed(self): | ||
126 | + self.shortName = self.shortText | ||
127 | + self.longName = self.longText | ||
128 | + | ||
129 | + if self.longName: | ||
130 | + self.active = True | ||
131 | + | ||
132 | + if self.shortName == 'Ediff' or self.shortName == 'R. TOEIC': | ||
133 | + self.active = False | ||
134 | + | ||
135 | + if self.date and isinstance(self.slot, int): | ||
136 | + h, m = SLOTS[self.slot][0] | ||
137 | + self.startTime = self.date + datetime.timedelta(hours=h, minutes=m) | ||
138 | + h, m = SLOTS[self.slot][1] | ||
139 | + self.endTime = self.date + datetime.timedelta(hours=h, minutes=m) | ||
140 | + | ||
141 | + if self.longName: | ||
142 | + e = self.longName.split('(') | ||
143 | + if len(e) >= 2: | ||
144 | + f = e[1].split(')') | ||
145 | + self.longName = e[0].strip() | ||
146 | + self.location = f[0].strip() | ||
147 | + | ||
148 | + | ||
149 | + def __str__(self): | ||
150 | + if self.active: | ||
151 | + return self.shortName+ ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '') | ||
152 | + else: | ||
153 | + return 'Inactive event' | ||
154 | + | ||
155 | + def getEvent(self): | ||
156 | + e = CalEvent() | ||
157 | + e.add('summary', self.shortName) | ||
158 | + e.add('description', self.longName) | ||
159 | + e.add('dtstart', self.startTime) | ||
160 | + e.add('dtend', self.endTime) | ||
161 | + e.add('location', self.location) | ||
162 | + return e |
1 | +++ a/README.md | ||
@@ -0,0 +1,5 @@ | @@ -0,0 +1,5 @@ | ||
1 | +#Convertit l'emploi du temps IMA3 en fichier ICS | ||
2 | + | ||
3 | +Convertit le fichier HTML récupéré du site <http://dptima3.polytech-lille.net/> en fichier ICS importable sur différentes applications de Calendrier. | ||
4 | + | ||
5 | +À ce stade l'application est plus une preuve de concept que quelque chose d'utilisable pour tous puisque mes préférences personelles sont hardcodées (par exemple j'ai enlevé les Remédiations TOEIC qui ne me concernent pas). On pourrait l'améliorer en permettant la surcharge de Edt.Event pour pouvoir choisir le groupe de TP (et récupérer le fichier HTML directement), choisir d'afficher le petit titre ou le grand comme intitulé d'évènement, rejeter certains intitulés de cours, en modifier d'autres (LV2 → Allemand par exemple)... |
1 | +++ a/parse.py | ||
@@ -0,0 +1,113 @@ | @@ -0,0 +1,113 @@ | ||
1 | +#!/usr/bin/env python3 | ||
2 | + | ||
3 | +import datetime | ||
4 | +from icalendar import Calendar | ||
5 | +from Edt import * | ||
6 | + | ||
7 | +htmlHandle = open('groupe12.html', 'rb') | ||
8 | +htmlStr = htmlHandle.read().decode('iso-8859-15') | ||
9 | + | ||
10 | +# Read HTML tables | ||
11 | +parser = TableHTMLParser() | ||
12 | +parser.feed(htmlStr) | ||
13 | + | ||
14 | +# Dupplicates cells with colspan & rowspan | ||
15 | +tables = [] | ||
16 | + | ||
17 | +for parserTable in parser.tables: | ||
18 | + # Figuring out dimensions | ||
19 | + X, Y = 0, 0 | ||
20 | + for cell in parserTable[0]: | ||
21 | + X += cell[1] | ||
22 | + for line in parserTable: | ||
23 | + Y += line[0][2] | ||
24 | + | ||
25 | + # Constructing table with reals dimensions | ||
26 | + table = [] | ||
27 | + for y in range(Y): | ||
28 | + line = [] | ||
29 | + for x in range(X): | ||
30 | + line.append(False) | ||
31 | + table.append(line) | ||
32 | + | ||
33 | + # Filling table with parsed table | ||
34 | + x, y = 0, 0 | ||
35 | + for line in parserTable: | ||
36 | + for cell in line: | ||
37 | + # Offsetting to the right if cell is not empty | ||
38 | + while isinstance(table[y][x], str): | ||
39 | + x += 1 | ||
40 | + | ||
41 | + # Copying values | ||
42 | + for y2 in range(y, y + cell[2]): | ||
43 | + for x2 in range(x, x + cell[1]): | ||
44 | + table[y2][x2] = cell[0] | ||
45 | + x = 0 | ||
46 | + y += 1 | ||
47 | + | ||
48 | + #for line in table: | ||
49 | + # print(' | '.join([str(cell) for cell in line])) | ||
50 | + #print('-'*20) | ||
51 | + | ||
52 | + tables.append(table) | ||
53 | + | ||
54 | +# Creating events | ||
55 | +days = dict() | ||
56 | + | ||
57 | +# Parsing table 1 | ||
58 | +for line in tables[0]: | ||
59 | + try: | ||
60 | + day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], '%d/%m/%Y') | ||
61 | + except (ValueError, TypeError): | ||
62 | + # This is not a date, no data to grab here | ||
63 | + continue | ||
64 | + | ||
65 | + for day in range(DAYS_PER_WEEK): | ||
66 | + date = day1date + datetime.timedelta(days=day) | ||
67 | + | ||
68 | + if date not in days: | ||
69 | + days[date] = [Event() for s in range(len(SLOTS))] | ||
70 | + | ||
71 | + for slot in range(len(SLOTS)): | ||
72 | + days[date][slot].feedShortText(line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X]) | ||
73 | + continue | ||
74 | + | ||
75 | +# Parsing table 2 | ||
76 | +for line in tables[1]: | ||
77 | + try: | ||
78 | + date = datetime.datetime.strptime(line[TABLE_2_DATE_X], '%d/%m/%Y') | ||
79 | + except ValueError: | ||
80 | + # This is not a date, no data to grab here | ||
81 | + continue | ||
82 | + | ||
83 | + if date not in days: | ||
84 | + days[date] = [Event() for s in range(len(SLOTS))] | ||
85 | + | ||
86 | + for slot in range(len(SLOTS)): | ||
87 | + days[date][slot].feedLongText(line[slot + TABLE_2_FIRST_SLOT_X]) | ||
88 | + | ||
89 | +# Feeding back time and slot to events | ||
90 | +events = [] | ||
91 | +for day in days: | ||
92 | + for slot in range(len(SLOTS)): | ||
93 | + event = days[day][slot] | ||
94 | + event.feedDate(day) | ||
95 | + event.feedSlot(slot) | ||
96 | + event.endFeed() | ||
97 | + events.append(event) | ||
98 | + | ||
99 | +# Creating calendar | ||
100 | +cal = Calendar() | ||
101 | +cal.add('proid', '-//Cours Polytech//mxm.dk//') | ||
102 | +cal.add('version', '2.0') | ||
103 | + | ||
104 | +for event in events: | ||
105 | + if event.active: | ||
106 | + print(event) | ||
107 | + cal.add_component(event.getEvent()) | ||
108 | + | ||
109 | +# Writing calendar to file | ||
110 | +f = open('groupe12.ics', 'wb') | ||
111 | +f.write(cal.to_ical()) | ||
112 | +f.close() | ||
113 | + |