Commit 0ff6a1bf0e36e1923008361068c00562e1985de2
Merge branch 'master' into davical
Showing
2 changed files
with
200 additions
and
172 deletions
Show diff stats
Edt.py deleted
@@ -1,162 +0,0 @@ | @@ -1,162 +0,0 @@ | ||
1 | -""" | ||
2 | -Class and variables needed for Edt manipulation & parsing | ||
3 | -""" | ||
4 | - | ||
5 | -from html.parser import HTMLParser | ||
6 | -from icalendar import Event as CalEvent | ||
7 | -import datetime | ||
8 | - | ||
9 | -DAYS_PER_WEEK = 6 | ||
10 | -SLOTS = [((8, 0), (10, 0)), ((10, 15), (12, 15)), ((13, 45), (15, 45)), ((16, 0), (18, 0))] | ||
11 | - | ||
12 | -TABLE_1_DATE_X = 1 | ||
13 | -TABLE_1_FIRST_SLOT_X = 2 | ||
14 | - | ||
15 | -TABLE_2_DATE_X = 0 | ||
16 | -TABLE_2_FIRST_SLOT_X = 1 | ||
17 | - | ||
18 | -class TableHTMLParser(HTMLParser): | ||
19 | - tables = [] # Tables | ||
20 | - table = False # Current table content | ||
21 | - line = False # Current line content | ||
22 | - cell = False # Current cell content | ||
23 | - cellx = 1 | ||
24 | - celly = 1 | ||
25 | - | ||
26 | - # Logic | ||
27 | - def iscell(self): | ||
28 | - """ | ||
29 | - Return if we are currently in a cell | ||
30 | - """ | ||
31 | - return isinstance(self.cell, str) | ||
32 | - | ||
33 | - def isline(self): | ||
34 | - """ | ||
35 | - Return if we are currently in a line | ||
36 | - """ | ||
37 | - return isinstance(self.line, list) | ||
38 | - | ||
39 | - def istable(self): | ||
40 | - """ | ||
41 | - Return if we are currently in a table | ||
42 | - """ | ||
43 | - return isinstance(self.table, list) | ||
44 | - | ||
45 | - # Actions | ||
46 | - def endcell(self): | ||
47 | - if self.iscell(): | ||
48 | - self.line.append((self.cell.strip(), self.cellx, self.celly)) | ||
49 | - self.cell = False | ||
50 | - | ||
51 | - def endline(self): | ||
52 | - self.endcell() | ||
53 | - if self.isline(): | ||
54 | - self.table.append(self.line.copy()) | ||
55 | - self.line = False | ||
56 | - | ||
57 | - def endtable(self): | ||
58 | - self.endline() | ||
59 | - if self.istable(): | ||
60 | - self.tables.append(self.table.copy()) | ||
61 | - self.table = False | ||
62 | - | ||
63 | - # Inheritance | ||
64 | - def handle_starttag(self, tag, attrs): | ||
65 | - #print("Encountered a start tag:", tag) | ||
66 | - if tag == 'table': | ||
67 | - self.table = [] | ||
68 | - elif tag == 'tr': | ||
69 | - self.endline() | ||
70 | - self.line = [] | ||
71 | - elif tag == 'td': | ||
72 | - self.endcell() | ||
73 | - self.cell = '' | ||
74 | - self.cellx = 1 | ||
75 | - self.celly = 1 | ||
76 | - for attr in attrs: | ||
77 | - if attr[0] == 'colspan': | ||
78 | - self.cellx = int(attr[1]) | ||
79 | - elif attr[0] == 'rowspan': | ||
80 | - self.celly = int(attr[1]) | ||
81 | - | ||
82 | - def handle_endtag(self, tag): | ||
83 | - #print("Encountered an end tag :", tag) | ||
84 | - if tag == 'table': | ||
85 | - self.endtable() | ||
86 | - elif tag == 'tr': | ||
87 | - self.endline() | ||
88 | - elif tag == 'td': | ||
89 | - self.endcell() | ||
90 | - | ||
91 | - def handle_data(self, data): | ||
92 | - #print("Encountered some data :", data) | ||
93 | - if self.iscell(): | ||
94 | - self.cell += data | ||
95 | - | ||
96 | -# TODO Allow class customisation | ||
97 | - | ||
98 | -class Event: | ||
99 | - # Mined data | ||
100 | - shortText = '' | ||
101 | - longText = '' | ||
102 | - date = False | ||
103 | - slot = 0 | ||
104 | - | ||
105 | - # Generated data | ||
106 | - shortName = '' | ||
107 | - longName = '' | ||
108 | - location = '' | ||
109 | - startTime = False | ||
110 | - endTime = False | ||
111 | - active = False | ||
112 | - | ||
113 | - def feedShortText(self, shortText): | ||
114 | - self.shortText = shortText | ||
115 | - | ||
116 | - def feedLongText(self, longText): | ||
117 | - self.longText = longText | ||
118 | - | ||
119 | - def feedSlot(self, slot): | ||
120 | - self.slot = slot | ||
121 | - | ||
122 | - def feedDate(self, date): | ||
123 | - self.date = date | ||
124 | - | ||
125 | - def endFeed(self): | ||
126 | - self.shortName = self.shortText | ||
127 | - self.longName = self.longText | ||
128 | - | ||
129 | - if self.longName: | ||
130 | - self.active = True | ||
131 | - | ||
132 | - if self.shortName == 'Ediff' or self.shortName == 'R. TOEIC': | ||
133 | - self.active = False | ||
134 | - | ||
135 | - if self.date and isinstance(self.slot, int): | ||
136 | - h, m = SLOTS[self.slot][0] | ||
137 | - self.startTime = self.date + datetime.timedelta(hours=h, minutes=m) | ||
138 | - h, m = SLOTS[self.slot][1] | ||
139 | - self.endTime = self.date + datetime.timedelta(hours=h, minutes=m) | ||
140 | - | ||
141 | - if self.longName: | ||
142 | - e = self.longName.split('(') | ||
143 | - if len(e) >= 2: | ||
144 | - f = e[1].split(')') | ||
145 | - self.longName = e[0].strip() | ||
146 | - self.location = f[0].strip() | ||
147 | - | ||
148 | - | ||
149 | - def __str__(self): | ||
150 | - if self.active: | ||
151 | - return self.shortName+ ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '') | ||
152 | - else: | ||
153 | - return 'Inactive event' | ||
154 | - | ||
155 | - def getEvent(self): | ||
156 | - e = CalEvent() | ||
157 | - e.add('summary', self.shortName) | ||
158 | - e.add('description', self.longName) | ||
159 | - e.add('dtstart', self.startTime) | ||
160 | - e.add('dtend', self.endTime) | ||
161 | - e.add('location', self.location) | ||
162 | - return e |
parse.py
@@ -5,15 +5,190 @@ import argparse | @@ -5,15 +5,190 @@ import argparse | ||
5 | import datetime | 5 | import datetime |
6 | import urllib.request | 6 | import urllib.request |
7 | from icalendar import Calendar | 7 | from icalendar import Calendar |
8 | -from Edt import * | 8 | +from html.parser import HTMLParser |
9 | +from icalendar import Calendar, Event as CalEvent | ||
9 | 10 | ||
10 | # Parse command line arguments | 11 | # Parse command line arguments |
11 | -parser = argparse.ArgumentParser(description='Convertit l\'emploi du temps IMA3 en ICS') | 12 | +parser = argparse.ArgumentParser(description='Convertit l\'emploi du temps IMA en ICS') |
13 | +parser.add_argument('annee', metavar='ANNEE', type=int, help='annรฉe (3 ou 4)') | ||
12 | parser.add_argument('edt', metavar='EDT', type=str, help='la page pointant vers l\'emploi du temps concernรฉ') | 14 | parser.add_argument('edt', metavar='EDT', type=str, help='la page pointant vers l\'emploi du temps concernรฉ') |
13 | parser.add_argument('-o', '--output', dest='file', type=str, default='-', help='fichier de sortie, - pour stdout') | 15 | parser.add_argument('-o', '--output', dest='file', type=str, default='-', help='fichier de sortie, - pour stdout') |
14 | args = parser.parse_args() | 16 | args = parser.parse_args() |
15 | 17 | ||
16 | -with urllib.request.urlopen('http://dptima3.polytech-lille.net/' + args.edt + '.html') as handle: | 18 | +if args.annee == 3: |
19 | + url = 'http://dptima3.polytech-lille.net/' + args.edt + '.html' | ||
20 | + SLOTS = [(( 8, 0), (10, 0)), | ||
21 | + ((10, 20), (12, 20)), | ||
22 | + ((13, 50), (15, 50)), | ||
23 | + ((16, 10), (18, 10))] | ||
24 | + DATE_FORMAT = '%d/%m/%Y' | ||
25 | +elif args.annee == 4: | ||
26 | + url = 'http://www.lifl.fr/~forget/EDT/' + args.edt + '.html' | ||
27 | + SLOTS = [(( 8, 0), ( 9, 0)), | ||
28 | + (( 9, 10), (10, 10)), | ||
29 | + ((10, 20), (11, 20)), | ||
30 | + ((11, 30), (12, 30)), | ||
31 | + ((13, 50), (14, 50)), | ||
32 | + ((15, 00), (16, 00)), | ||
33 | + ((16, 10), (17, 10)), | ||
34 | + ((17, 20), (18, 20))] | ||
35 | + DATE_FORMAT = '%d/%m/%y' | ||
36 | +else: | ||
37 | + raise ValueError('Annรฉe inconnue : ' + annee) | ||
38 | + | ||
39 | +DAYS_PER_WEEK = 6 | ||
40 | + | ||
41 | +TABLE_1_DATE_X = 1 | ||
42 | +TABLE_1_FIRST_SLOT_X = 2 | ||
43 | + | ||
44 | +TABLE_2_DATE_X = 0 | ||
45 | +TABLE_2_FIRST_SLOT_X = 1 | ||
46 | + | ||
47 | +class TableHTMLParser(HTMLParser): | ||
48 | + tables = [] # Tables | ||
49 | + table = False # Current table content | ||
50 | + line = False # Current line content | ||
51 | + cell = False # Current cell content | ||
52 | + cellx = 1 | ||
53 | + celly = 1 | ||
54 | + | ||
55 | + # Logic | ||
56 | + def iscell(self): | ||
57 | + """ | ||
58 | + Return if we are currently in a cell | ||
59 | + """ | ||
60 | + return isinstance(self.cell, str) | ||
61 | + | ||
62 | + def isline(self): | ||
63 | + """ | ||
64 | + Return if we are currently in a line | ||
65 | + """ | ||
66 | + return isinstance(self.line, list) | ||
67 | + | ||
68 | + def istable(self): | ||
69 | + """ | ||
70 | + Return if we are currently in a table | ||
71 | + """ | ||
72 | + return isinstance(self.table, list) | ||
73 | + | ||
74 | + # Actions | ||
75 | + def endcell(self): | ||
76 | + if self.iscell(): | ||
77 | + self.line.append((self.cell.strip(), self.cellx, self.celly)) | ||
78 | + self.cell = False | ||
79 | + | ||
80 | + def endline(self): | ||
81 | + self.endcell() | ||
82 | + if self.isline(): | ||
83 | + self.table.append(self.line.copy()) | ||
84 | + self.line = False | ||
85 | + | ||
86 | + def endtable(self): | ||
87 | + self.endline() | ||
88 | + if self.istable(): | ||
89 | + self.tables.append(self.table.copy()) | ||
90 | + self.table = False | ||
91 | + | ||
92 | + # Inheritance | ||
93 | + def handle_starttag(self, tag, attrs): | ||
94 | + if tag == 'table': | ||
95 | + self.table = [] | ||
96 | + elif tag == 'tr': | ||
97 | + self.endline() | ||
98 | + self.line = [] | ||
99 | + elif tag == 'td': | ||
100 | + self.endcell() | ||
101 | + self.cell = '' | ||
102 | + self.cellx = 1 | ||
103 | + self.celly = 1 | ||
104 | + for attr in attrs: | ||
105 | + if attr[0] == 'colspan': | ||
106 | + self.cellx = int(attr[1]) | ||
107 | + elif attr[0] == 'rowspan': | ||
108 | + self.celly = int(attr[1]) | ||
109 | + | ||
110 | + def handle_endtag(self, tag): | ||
111 | + if tag == 'table': | ||
112 | + self.endtable() | ||
113 | + elif tag == 'tr': | ||
114 | + self.endline() | ||
115 | + elif tag == 'td': | ||
116 | + self.endcell() | ||
117 | + | ||
118 | + def handle_data(self, data): | ||
119 | + if self.iscell(): | ||
120 | + self.cell += data | ||
121 | + | ||
122 | +# TODO Do something that really is OOP or do not... | ||
123 | + | ||
124 | +class Event: | ||
125 | + # Mined data | ||
126 | + shortText = '' | ||
127 | + longText = '' | ||
128 | + date = False | ||
129 | + begSlot = 0 | ||
130 | + endSlot = 0 | ||
131 | + | ||
132 | + # Generated data | ||
133 | + shortName = '' | ||
134 | + longName = '' | ||
135 | + location = '' | ||
136 | + startTime = False | ||
137 | + endTime = False | ||
138 | + active = False | ||
139 | + | ||
140 | + def feedShortText(self, shortText): | ||
141 | + self.shortText = shortText | ||
142 | + | ||
143 | + def feedLongText(self, longText): | ||
144 | + self.longText = longText | ||
145 | + | ||
146 | + def feedBegSlot(self, slot): | ||
147 | + self.begSlot = slot | ||
148 | + | ||
149 | + def feedEndSlot(self, slot): | ||
150 | + self.endSlot = slot | ||
151 | + | ||
152 | + def feedDate(self, date): | ||
153 | + self.date = date | ||
154 | + | ||
155 | + def endFeed(self): | ||
156 | + self.shortName = self.shortText | ||
157 | + self.longName = self.longText | ||
158 | + | ||
159 | + if self.longName: | ||
160 | + self.active = True | ||
161 | + | ||
162 | + if self.date and isinstance(self.begSlot, int) and isinstance(self.endSlot, int): | ||
163 | + h, m = SLOTS[self.begSlot][0] | ||
164 | + self.startTime = self.date + datetime.timedelta(hours=h, minutes=m) | ||
165 | + h, m = SLOTS[self.endSlot][1] | ||
166 | + self.endTime = self.date + datetime.timedelta(hours=h, minutes=m) | ||
167 | + | ||
168 | + if self.longName: | ||
169 | + e = self.longName.split('(') | ||
170 | + if len(e) >= 2: | ||
171 | + f = e[1].split(')') | ||
172 | + self.longName = e[0].strip() | ||
173 | + self.location = f[0].strip() | ||
174 | + | ||
175 | + | ||
176 | + def __str__(self): | ||
177 | + if self.active: | ||
178 | + return self.shortName+ ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '') | ||
179 | + else: | ||
180 | + return 'Inactive event' | ||
181 | + | ||
182 | + def getEvent(self): | ||
183 | + e = CalEvent() | ||
184 | + e.add('summary', self.shortName) | ||
185 | + e.add('description', self.longName) | ||
186 | + e.add('dtstart', self.startTime) | ||
187 | + e.add('dtend', self.endTime) | ||
188 | + e.add('location', self.location) | ||
189 | + return e | ||
190 | + | ||
191 | +with urllib.request.urlopen(url) as handle: | ||
17 | htmlStr = handle.read().decode('iso-8859-15') | 192 | htmlStr = handle.read().decode('iso-8859-15') |
18 | 193 | ||
19 | # Read HTML tables | 194 | # Read HTML tables |
@@ -62,7 +237,7 @@ days = dict() | @@ -62,7 +237,7 @@ days = dict() | ||
62 | # Parsing table 1 | 237 | # Parsing table 1 |
63 | for line in tables[0]: | 238 | for line in tables[0]: |
64 | try: | 239 | try: |
65 | - day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], '%d/%m/%Y') | 240 | + day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], DATE_FORMAT) |
66 | except (ValueError, TypeError): | 241 | except (ValueError, TypeError): |
67 | # This is not a date, no data to grab here | 242 | # This is not a date, no data to grab here |
68 | continue | 243 | continue |
@@ -74,13 +249,18 @@ for line in tables[0]: | @@ -74,13 +249,18 @@ for line in tables[0]: | ||
74 | days[date] = [Event() for s in range(len(SLOTS))] | 249 | days[date] = [Event() for s in range(len(SLOTS))] |
75 | 250 | ||
76 | for slot in range(len(SLOTS)): | 251 | for slot in range(len(SLOTS)): |
77 | - days[date][slot].feedShortText(line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X]) | 252 | + try: |
253 | + cell = line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X] | ||
254 | + except IndexError: | ||
255 | + # Out of the table: saturday afternoon | ||
256 | + break | ||
257 | + days[date][slot].feedShortText(cell) | ||
78 | continue | 258 | continue |
79 | 259 | ||
80 | # Parsing table 2 | 260 | # Parsing table 2 |
81 | for line in tables[1]: | 261 | for line in tables[1]: |
82 | try: | 262 | try: |
83 | - date = datetime.datetime.strptime(line[TABLE_2_DATE_X], '%d/%m/%Y') | 263 | + date = datetime.datetime.strptime(line[TABLE_2_DATE_X], DATE_FORMAT) |
84 | except ValueError: | 264 | except ValueError: |
85 | # This is not a date, no data to grab here | 265 | # This is not a date, no data to grab here |
86 | continue | 266 | continue |
@@ -94,12 +274,22 @@ for line in tables[1]: | @@ -94,12 +274,22 @@ for line in tables[1]: | ||
94 | # Feeding back time and slot to events | 274 | # Feeding back time and slot to events |
95 | events = [] | 275 | events = [] |
96 | for day in days: | 276 | for day in days: |
277 | + prevEvent = False | ||
97 | for slot in range(len(SLOTS)): | 278 | for slot in range(len(SLOTS)): |
98 | event = days[day][slot] | 279 | event = days[day][slot] |
99 | - event.feedDate(day) | ||
100 | - event.feedSlot(slot) | ||
101 | - event.endFeed() | ||
102 | - events.append(event) | 280 | + if prevEvent: |
281 | + if prevEvent.longText == event.longText: | ||
282 | + prevEvent.feedEndSlot(slot) | ||
283 | + else: | ||
284 | + prevEvent.endFeed() | ||
285 | + events.append(prevEvent) | ||
286 | + if not prevEvent or (prevEvent and prevEvent.longText != event.longText): | ||
287 | + event.feedDate(day) | ||
288 | + event.feedBegSlot(slot) | ||
289 | + event.feedEndSlot(slot) | ||
290 | + prevEvent = event | ||
291 | + prevEvent.endFeed() | ||
292 | + events.append(prevEvent) | ||
103 | 293 | ||
104 | # Creating calendar | 294 | # Creating calendar |
105 | cal = Calendar() | 295 | cal = Calendar() |