Commit 0ff6a1bf0e36e1923008361068c00562e1985de2
Merge branch 'master' into davical
Showing
2 changed files
with
200 additions
and
172 deletions
Show diff stats
Edt.py deleted
... | ... | @@ -1,162 +0,0 @@ |
1 | -""" | |
2 | -Class and variables needed for Edt manipulation & parsing | |
3 | -""" | |
4 | - | |
5 | -from html.parser import HTMLParser | |
6 | -from icalendar import Event as CalEvent | |
7 | -import datetime | |
8 | - | |
9 | -DAYS_PER_WEEK = 6 | |
10 | -SLOTS = [((8, 0), (10, 0)), ((10, 15), (12, 15)), ((13, 45), (15, 45)), ((16, 0), (18, 0))] | |
11 | - | |
12 | -TABLE_1_DATE_X = 1 | |
13 | -TABLE_1_FIRST_SLOT_X = 2 | |
14 | - | |
15 | -TABLE_2_DATE_X = 0 | |
16 | -TABLE_2_FIRST_SLOT_X = 1 | |
17 | - | |
18 | -class TableHTMLParser(HTMLParser): | |
19 | - tables = [] # Tables | |
20 | - table = False # Current table content | |
21 | - line = False # Current line content | |
22 | - cell = False # Current cell content | |
23 | - cellx = 1 | |
24 | - celly = 1 | |
25 | - | |
26 | - # Logic | |
27 | - def iscell(self): | |
28 | - """ | |
29 | - Return if we are currently in a cell | |
30 | - """ | |
31 | - return isinstance(self.cell, str) | |
32 | - | |
33 | - def isline(self): | |
34 | - """ | |
35 | - Return if we are currently in a line | |
36 | - """ | |
37 | - return isinstance(self.line, list) | |
38 | - | |
39 | - def istable(self): | |
40 | - """ | |
41 | - Return if we are currently in a table | |
42 | - """ | |
43 | - return isinstance(self.table, list) | |
44 | - | |
45 | - # Actions | |
46 | - def endcell(self): | |
47 | - if self.iscell(): | |
48 | - self.line.append((self.cell.strip(), self.cellx, self.celly)) | |
49 | - self.cell = False | |
50 | - | |
51 | - def endline(self): | |
52 | - self.endcell() | |
53 | - if self.isline(): | |
54 | - self.table.append(self.line.copy()) | |
55 | - self.line = False | |
56 | - | |
57 | - def endtable(self): | |
58 | - self.endline() | |
59 | - if self.istable(): | |
60 | - self.tables.append(self.table.copy()) | |
61 | - self.table = False | |
62 | - | |
63 | - # Inheritance | |
64 | - def handle_starttag(self, tag, attrs): | |
65 | - #print("Encountered a start tag:", tag) | |
66 | - if tag == 'table': | |
67 | - self.table = [] | |
68 | - elif tag == 'tr': | |
69 | - self.endline() | |
70 | - self.line = [] | |
71 | - elif tag == 'td': | |
72 | - self.endcell() | |
73 | - self.cell = '' | |
74 | - self.cellx = 1 | |
75 | - self.celly = 1 | |
76 | - for attr in attrs: | |
77 | - if attr[0] == 'colspan': | |
78 | - self.cellx = int(attr[1]) | |
79 | - elif attr[0] == 'rowspan': | |
80 | - self.celly = int(attr[1]) | |
81 | - | |
82 | - def handle_endtag(self, tag): | |
83 | - #print("Encountered an end tag :", tag) | |
84 | - if tag == 'table': | |
85 | - self.endtable() | |
86 | - elif tag == 'tr': | |
87 | - self.endline() | |
88 | - elif tag == 'td': | |
89 | - self.endcell() | |
90 | - | |
91 | - def handle_data(self, data): | |
92 | - #print("Encountered some data :", data) | |
93 | - if self.iscell(): | |
94 | - self.cell += data | |
95 | - | |
96 | -# TODO Allow class customisation | |
97 | - | |
98 | -class Event: | |
99 | - # Mined data | |
100 | - shortText = '' | |
101 | - longText = '' | |
102 | - date = False | |
103 | - slot = 0 | |
104 | - | |
105 | - # Generated data | |
106 | - shortName = '' | |
107 | - longName = '' | |
108 | - location = '' | |
109 | - startTime = False | |
110 | - endTime = False | |
111 | - active = False | |
112 | - | |
113 | - def feedShortText(self, shortText): | |
114 | - self.shortText = shortText | |
115 | - | |
116 | - def feedLongText(self, longText): | |
117 | - self.longText = longText | |
118 | - | |
119 | - def feedSlot(self, slot): | |
120 | - self.slot = slot | |
121 | - | |
122 | - def feedDate(self, date): | |
123 | - self.date = date | |
124 | - | |
125 | - def endFeed(self): | |
126 | - self.shortName = self.shortText | |
127 | - self.longName = self.longText | |
128 | - | |
129 | - if self.longName: | |
130 | - self.active = True | |
131 | - | |
132 | - if self.shortName == 'Ediff' or self.shortName == 'R. TOEIC': | |
133 | - self.active = False | |
134 | - | |
135 | - if self.date and isinstance(self.slot, int): | |
136 | - h, m = SLOTS[self.slot][0] | |
137 | - self.startTime = self.date + datetime.timedelta(hours=h, minutes=m) | |
138 | - h, m = SLOTS[self.slot][1] | |
139 | - self.endTime = self.date + datetime.timedelta(hours=h, minutes=m) | |
140 | - | |
141 | - if self.longName: | |
142 | - e = self.longName.split('(') | |
143 | - if len(e) >= 2: | |
144 | - f = e[1].split(')') | |
145 | - self.longName = e[0].strip() | |
146 | - self.location = f[0].strip() | |
147 | - | |
148 | - | |
149 | - def __str__(self): | |
150 | - if self.active: | |
151 | - return self.shortName+ ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '') | |
152 | - else: | |
153 | - return 'Inactive event' | |
154 | - | |
155 | - def getEvent(self): | |
156 | - e = CalEvent() | |
157 | - e.add('summary', self.shortName) | |
158 | - e.add('description', self.longName) | |
159 | - e.add('dtstart', self.startTime) | |
160 | - e.add('dtend', self.endTime) | |
161 | - e.add('location', self.location) | |
162 | - return e |
parse.py
... | ... | @@ -5,15 +5,190 @@ import argparse |
5 | 5 | import datetime |
6 | 6 | import urllib.request |
7 | 7 | from icalendar import Calendar |
8 | -from Edt import * | |
8 | +from html.parser import HTMLParser | |
9 | +from icalendar import Calendar, Event as CalEvent | |
9 | 10 | |
10 | 11 | # Parse command line arguments |
11 | -parser = argparse.ArgumentParser(description='Convertit l\'emploi du temps IMA3 en ICS') | |
12 | +parser = argparse.ArgumentParser(description='Convertit l\'emploi du temps IMA en ICS') | |
13 | +parser.add_argument('annee', metavar='ANNEE', type=int, help='annรฉe (3 ou 4)') | |
12 | 14 | parser.add_argument('edt', metavar='EDT', type=str, help='la page pointant vers l\'emploi du temps concernรฉ') |
13 | 15 | parser.add_argument('-o', '--output', dest='file', type=str, default='-', help='fichier de sortie, - pour stdout') |
14 | 16 | args = parser.parse_args() |
15 | 17 | |
16 | -with urllib.request.urlopen('http://dptima3.polytech-lille.net/' + args.edt + '.html') as handle: | |
18 | +if args.annee == 3: | |
19 | + url = 'http://dptima3.polytech-lille.net/' + args.edt + '.html' | |
20 | + SLOTS = [(( 8, 0), (10, 0)), | |
21 | + ((10, 20), (12, 20)), | |
22 | + ((13, 50), (15, 50)), | |
23 | + ((16, 10), (18, 10))] | |
24 | + DATE_FORMAT = '%d/%m/%Y' | |
25 | +elif args.annee == 4: | |
26 | + url = 'http://www.lifl.fr/~forget/EDT/' + args.edt + '.html' | |
27 | + SLOTS = [(( 8, 0), ( 9, 0)), | |
28 | + (( 9, 10), (10, 10)), | |
29 | + ((10, 20), (11, 20)), | |
30 | + ((11, 30), (12, 30)), | |
31 | + ((13, 50), (14, 50)), | |
32 | + ((15, 00), (16, 00)), | |
33 | + ((16, 10), (17, 10)), | |
34 | + ((17, 20), (18, 20))] | |
35 | + DATE_FORMAT = '%d/%m/%y' | |
36 | +else: | |
37 | + raise ValueError('Annรฉe inconnue : ' + annee) | |
38 | + | |
39 | +DAYS_PER_WEEK = 6 | |
40 | + | |
41 | +TABLE_1_DATE_X = 1 | |
42 | +TABLE_1_FIRST_SLOT_X = 2 | |
43 | + | |
44 | +TABLE_2_DATE_X = 0 | |
45 | +TABLE_2_FIRST_SLOT_X = 1 | |
46 | + | |
47 | +class TableHTMLParser(HTMLParser): | |
48 | + tables = [] # Tables | |
49 | + table = False # Current table content | |
50 | + line = False # Current line content | |
51 | + cell = False # Current cell content | |
52 | + cellx = 1 | |
53 | + celly = 1 | |
54 | + | |
55 | + # Logic | |
56 | + def iscell(self): | |
57 | + """ | |
58 | + Return if we are currently in a cell | |
59 | + """ | |
60 | + return isinstance(self.cell, str) | |
61 | + | |
62 | + def isline(self): | |
63 | + """ | |
64 | + Return if we are currently in a line | |
65 | + """ | |
66 | + return isinstance(self.line, list) | |
67 | + | |
68 | + def istable(self): | |
69 | + """ | |
70 | + Return if we are currently in a table | |
71 | + """ | |
72 | + return isinstance(self.table, list) | |
73 | + | |
74 | + # Actions | |
75 | + def endcell(self): | |
76 | + if self.iscell(): | |
77 | + self.line.append((self.cell.strip(), self.cellx, self.celly)) | |
78 | + self.cell = False | |
79 | + | |
80 | + def endline(self): | |
81 | + self.endcell() | |
82 | + if self.isline(): | |
83 | + self.table.append(self.line.copy()) | |
84 | + self.line = False | |
85 | + | |
86 | + def endtable(self): | |
87 | + self.endline() | |
88 | + if self.istable(): | |
89 | + self.tables.append(self.table.copy()) | |
90 | + self.table = False | |
91 | + | |
92 | + # Inheritance | |
93 | + def handle_starttag(self, tag, attrs): | |
94 | + if tag == 'table': | |
95 | + self.table = [] | |
96 | + elif tag == 'tr': | |
97 | + self.endline() | |
98 | + self.line = [] | |
99 | + elif tag == 'td': | |
100 | + self.endcell() | |
101 | + self.cell = '' | |
102 | + self.cellx = 1 | |
103 | + self.celly = 1 | |
104 | + for attr in attrs: | |
105 | + if attr[0] == 'colspan': | |
106 | + self.cellx = int(attr[1]) | |
107 | + elif attr[0] == 'rowspan': | |
108 | + self.celly = int(attr[1]) | |
109 | + | |
110 | + def handle_endtag(self, tag): | |
111 | + if tag == 'table': | |
112 | + self.endtable() | |
113 | + elif tag == 'tr': | |
114 | + self.endline() | |
115 | + elif tag == 'td': | |
116 | + self.endcell() | |
117 | + | |
118 | + def handle_data(self, data): | |
119 | + if self.iscell(): | |
120 | + self.cell += data | |
121 | + | |
122 | +# TODO Do something that really is OOP or do not... | |
123 | + | |
124 | +class Event: | |
125 | + # Mined data | |
126 | + shortText = '' | |
127 | + longText = '' | |
128 | + date = False | |
129 | + begSlot = 0 | |
130 | + endSlot = 0 | |
131 | + | |
132 | + # Generated data | |
133 | + shortName = '' | |
134 | + longName = '' | |
135 | + location = '' | |
136 | + startTime = False | |
137 | + endTime = False | |
138 | + active = False | |
139 | + | |
140 | + def feedShortText(self, shortText): | |
141 | + self.shortText = shortText | |
142 | + | |
143 | + def feedLongText(self, longText): | |
144 | + self.longText = longText | |
145 | + | |
146 | + def feedBegSlot(self, slot): | |
147 | + self.begSlot = slot | |
148 | + | |
149 | + def feedEndSlot(self, slot): | |
150 | + self.endSlot = slot | |
151 | + | |
152 | + def feedDate(self, date): | |
153 | + self.date = date | |
154 | + | |
155 | + def endFeed(self): | |
156 | + self.shortName = self.shortText | |
157 | + self.longName = self.longText | |
158 | + | |
159 | + if self.longName: | |
160 | + self.active = True | |
161 | + | |
162 | + if self.date and isinstance(self.begSlot, int) and isinstance(self.endSlot, int): | |
163 | + h, m = SLOTS[self.begSlot][0] | |
164 | + self.startTime = self.date + datetime.timedelta(hours=h, minutes=m) | |
165 | + h, m = SLOTS[self.endSlot][1] | |
166 | + self.endTime = self.date + datetime.timedelta(hours=h, minutes=m) | |
167 | + | |
168 | + if self.longName: | |
169 | + e = self.longName.split('(') | |
170 | + if len(e) >= 2: | |
171 | + f = e[1].split(')') | |
172 | + self.longName = e[0].strip() | |
173 | + self.location = f[0].strip() | |
174 | + | |
175 | + | |
176 | + def __str__(self): | |
177 | + if self.active: | |
178 | + return self.shortName+ ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '') | |
179 | + else: | |
180 | + return 'Inactive event' | |
181 | + | |
182 | + def getEvent(self): | |
183 | + e = CalEvent() | |
184 | + e.add('summary', self.shortName) | |
185 | + e.add('description', self.longName) | |
186 | + e.add('dtstart', self.startTime) | |
187 | + e.add('dtend', self.endTime) | |
188 | + e.add('location', self.location) | |
189 | + return e | |
190 | + | |
191 | +with urllib.request.urlopen(url) as handle: | |
17 | 192 | htmlStr = handle.read().decode('iso-8859-15') |
18 | 193 | |
19 | 194 | # Read HTML tables |
... | ... | @@ -62,7 +237,7 @@ days = dict() |
62 | 237 | # Parsing table 1 |
63 | 238 | for line in tables[0]: |
64 | 239 | try: |
65 | - day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], '%d/%m/%Y') | |
240 | + day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], DATE_FORMAT) | |
66 | 241 | except (ValueError, TypeError): |
67 | 242 | # This is not a date, no data to grab here |
68 | 243 | continue |
... | ... | @@ -74,13 +249,18 @@ for line in tables[0]: |
74 | 249 | days[date] = [Event() for s in range(len(SLOTS))] |
75 | 250 | |
76 | 251 | for slot in range(len(SLOTS)): |
77 | - days[date][slot].feedShortText(line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X]) | |
252 | + try: | |
253 | + cell = line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X] | |
254 | + except IndexError: | |
255 | + # Out of the table: saturday afternoon | |
256 | + break | |
257 | + days[date][slot].feedShortText(cell) | |
78 | 258 | continue |
79 | 259 | |
80 | 260 | # Parsing table 2 |
81 | 261 | for line in tables[1]: |
82 | 262 | try: |
83 | - date = datetime.datetime.strptime(line[TABLE_2_DATE_X], '%d/%m/%Y') | |
263 | + date = datetime.datetime.strptime(line[TABLE_2_DATE_X], DATE_FORMAT) | |
84 | 264 | except ValueError: |
85 | 265 | # This is not a date, no data to grab here |
86 | 266 | continue |
... | ... | @@ -94,12 +274,22 @@ for line in tables[1]: |
94 | 274 | # Feeding back time and slot to events |
95 | 275 | events = [] |
96 | 276 | for day in days: |
277 | + prevEvent = False | |
97 | 278 | for slot in range(len(SLOTS)): |
98 | 279 | event = days[day][slot] |
99 | - event.feedDate(day) | |
100 | - event.feedSlot(slot) | |
101 | - event.endFeed() | |
102 | - events.append(event) | |
280 | + if prevEvent: | |
281 | + if prevEvent.longText == event.longText: | |
282 | + prevEvent.feedEndSlot(slot) | |
283 | + else: | |
284 | + prevEvent.endFeed() | |
285 | + events.append(prevEvent) | |
286 | + if not prevEvent or (prevEvent and prevEvent.longText != event.longText): | |
287 | + event.feedDate(day) | |
288 | + event.feedBegSlot(slot) | |
289 | + event.feedEndSlot(slot) | |
290 | + prevEvent = event | |
291 | + prevEvent.endFeed() | |
292 | + events.append(prevEvent) | |
103 | 293 | |
104 | 294 | # Creating calendar |
105 | 295 | cal = Calendar() | ... | ... |