Commit 0ff6a1bf0e36e1923008361068c00562e1985de2

Authored by Geoffrey PREUD'HOMME
2 parents c7b78710 7a72c101

Merge branch 'master' into davical

Showing 2 changed files with 200 additions and 172 deletions   Show diff stats
Edt.py deleted
@@ -1,162 +0,0 @@ @@ -1,162 +0,0 @@
1 -"""  
2 -Class and variables needed for Edt manipulation & parsing  
3 -"""  
4 -  
5 -from html.parser import HTMLParser  
6 -from icalendar import Event as CalEvent  
7 -import datetime  
8 -  
9 -DAYS_PER_WEEK = 6  
10 -SLOTS = [((8, 0), (10, 0)), ((10, 15), (12, 15)), ((13, 45), (15, 45)), ((16, 0), (18, 0))]  
11 -  
12 -TABLE_1_DATE_X = 1  
13 -TABLE_1_FIRST_SLOT_X = 2  
14 -  
15 -TABLE_2_DATE_X = 0  
16 -TABLE_2_FIRST_SLOT_X = 1  
17 -  
18 -class TableHTMLParser(HTMLParser):  
19 - tables = [] # Tables  
20 - table = False # Current table content  
21 - line = False # Current line content  
22 - cell = False # Current cell content  
23 - cellx = 1  
24 - celly = 1  
25 -  
26 - # Logic  
27 - def iscell(self):  
28 - """  
29 - Return if we are currently in a cell  
30 - """  
31 - return isinstance(self.cell, str)  
32 -  
33 - def isline(self):  
34 - """  
35 - Return if we are currently in a line  
36 - """  
37 - return isinstance(self.line, list)  
38 -  
39 - def istable(self):  
40 - """  
41 - Return if we are currently in a table  
42 - """  
43 - return isinstance(self.table, list)  
44 -  
45 - # Actions  
46 - def endcell(self):  
47 - if self.iscell():  
48 - self.line.append((self.cell.strip(), self.cellx, self.celly))  
49 - self.cell = False  
50 -  
51 - def endline(self):  
52 - self.endcell()  
53 - if self.isline():  
54 - self.table.append(self.line.copy())  
55 - self.line = False  
56 -  
57 - def endtable(self):  
58 - self.endline()  
59 - if self.istable():  
60 - self.tables.append(self.table.copy())  
61 - self.table = False  
62 -  
63 - # Inheritance  
64 - def handle_starttag(self, tag, attrs):  
65 - #print("Encountered a start tag:", tag)  
66 - if tag == 'table':  
67 - self.table = []  
68 - elif tag == 'tr':  
69 - self.endline()  
70 - self.line = []  
71 - elif tag == 'td':  
72 - self.endcell()  
73 - self.cell = ''  
74 - self.cellx = 1  
75 - self.celly = 1  
76 - for attr in attrs:  
77 - if attr[0] == 'colspan':  
78 - self.cellx = int(attr[1])  
79 - elif attr[0] == 'rowspan':  
80 - self.celly = int(attr[1])  
81 -  
82 - def handle_endtag(self, tag):  
83 - #print("Encountered an end tag :", tag)  
84 - if tag == 'table':  
85 - self.endtable()  
86 - elif tag == 'tr':  
87 - self.endline()  
88 - elif tag == 'td':  
89 - self.endcell()  
90 -  
91 - def handle_data(self, data):  
92 - #print("Encountered some data :", data)  
93 - if self.iscell():  
94 - self.cell += data  
95 -  
96 -# TODO Allow class customisation  
97 -  
98 -class Event:  
99 - # Mined data  
100 - shortText = ''  
101 - longText = ''  
102 - date = False  
103 - slot = 0  
104 -  
105 - # Generated data  
106 - shortName = ''  
107 - longName = ''  
108 - location = ''  
109 - startTime = False  
110 - endTime = False  
111 - active = False  
112 -  
113 - def feedShortText(self, shortText):  
114 - self.shortText = shortText  
115 -  
116 - def feedLongText(self, longText):  
117 - self.longText = longText  
118 -  
119 - def feedSlot(self, slot):  
120 - self.slot = slot  
121 -  
122 - def feedDate(self, date):  
123 - self.date = date  
124 -  
125 - def endFeed(self):  
126 - self.shortName = self.shortText  
127 - self.longName = self.longText  
128 -  
129 - if self.longName:  
130 - self.active = True  
131 -  
132 - if self.shortName == 'Ediff' or self.shortName == 'R. TOEIC':  
133 - self.active = False  
134 -  
135 - if self.date and isinstance(self.slot, int):  
136 - h, m = SLOTS[self.slot][0]  
137 - self.startTime = self.date + datetime.timedelta(hours=h, minutes=m)  
138 - h, m = SLOTS[self.slot][1]  
139 - self.endTime = self.date + datetime.timedelta(hours=h, minutes=m)  
140 -  
141 - if self.longName:  
142 - e = self.longName.split('(')  
143 - if len(e) >= 2:  
144 - f = e[1].split(')')  
145 - self.longName = e[0].strip()  
146 - self.location = f[0].strip()  
147 -  
148 -  
149 - def __str__(self):  
150 - if self.active:  
151 - return self.shortName+ ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '')  
152 - else:  
153 - return 'Inactive event'  
154 -  
155 - def getEvent(self):  
156 - e = CalEvent()  
157 - e.add('summary', self.shortName)  
158 - e.add('description', self.longName)  
159 - e.add('dtstart', self.startTime)  
160 - e.add('dtend', self.endTime)  
161 - e.add('location', self.location)  
162 - return e  
@@ -5,15 +5,190 @@ import argparse @@ -5,15 +5,190 @@ import argparse
5 import datetime 5 import datetime
6 import urllib.request 6 import urllib.request
7 from icalendar import Calendar 7 from icalendar import Calendar
8 -from Edt import * 8 +from html.parser import HTMLParser
  9 +from icalendar import Calendar, Event as CalEvent
9 10
10 # Parse command line arguments 11 # Parse command line arguments
11 -parser = argparse.ArgumentParser(description='Convertit l\'emploi du temps IMA3 en ICS') 12 +parser = argparse.ArgumentParser(description='Convertit l\'emploi du temps IMA en ICS')
  13 +parser.add_argument('annee', metavar='ANNEE', type=int, help='annรฉe (3 ou 4)')
12 parser.add_argument('edt', metavar='EDT', type=str, help='la page pointant vers l\'emploi du temps concernรฉ') 14 parser.add_argument('edt', metavar='EDT', type=str, help='la page pointant vers l\'emploi du temps concernรฉ')
13 parser.add_argument('-o', '--output', dest='file', type=str, default='-', help='fichier de sortie, - pour stdout') 15 parser.add_argument('-o', '--output', dest='file', type=str, default='-', help='fichier de sortie, - pour stdout')
14 args = parser.parse_args() 16 args = parser.parse_args()
15 17
16 -with urllib.request.urlopen('http://dptima3.polytech-lille.net/' + args.edt + '.html') as handle: 18 +if args.annee == 3:
  19 + url = 'http://dptima3.polytech-lille.net/' + args.edt + '.html'
  20 + SLOTS = [(( 8, 0), (10, 0)),
  21 + ((10, 20), (12, 20)),
  22 + ((13, 50), (15, 50)),
  23 + ((16, 10), (18, 10))]
  24 + DATE_FORMAT = '%d/%m/%Y'
  25 +elif args.annee == 4:
  26 + url = 'http://www.lifl.fr/~forget/EDT/' + args.edt + '.html'
  27 + SLOTS = [(( 8, 0), ( 9, 0)),
  28 + (( 9, 10), (10, 10)),
  29 + ((10, 20), (11, 20)),
  30 + ((11, 30), (12, 30)),
  31 + ((13, 50), (14, 50)),
  32 + ((15, 00), (16, 00)),
  33 + ((16, 10), (17, 10)),
  34 + ((17, 20), (18, 20))]
  35 + DATE_FORMAT = '%d/%m/%y'
  36 +else:
  37 + raise ValueError('Annรฉe inconnue : ' + annee)
  38 +
  39 +DAYS_PER_WEEK = 6
  40 +
  41 +TABLE_1_DATE_X = 1
  42 +TABLE_1_FIRST_SLOT_X = 2
  43 +
  44 +TABLE_2_DATE_X = 0
  45 +TABLE_2_FIRST_SLOT_X = 1
  46 +
  47 +class TableHTMLParser(HTMLParser):
  48 + tables = [] # Tables
  49 + table = False # Current table content
  50 + line = False # Current line content
  51 + cell = False # Current cell content
  52 + cellx = 1
  53 + celly = 1
  54 +
  55 + # Logic
  56 + def iscell(self):
  57 + """
  58 + Return if we are currently in a cell
  59 + """
  60 + return isinstance(self.cell, str)
  61 +
  62 + def isline(self):
  63 + """
  64 + Return if we are currently in a line
  65 + """
  66 + return isinstance(self.line, list)
  67 +
  68 + def istable(self):
  69 + """
  70 + Return if we are currently in a table
  71 + """
  72 + return isinstance(self.table, list)
  73 +
  74 + # Actions
  75 + def endcell(self):
  76 + if self.iscell():
  77 + self.line.append((self.cell.strip(), self.cellx, self.celly))
  78 + self.cell = False
  79 +
  80 + def endline(self):
  81 + self.endcell()
  82 + if self.isline():
  83 + self.table.append(self.line.copy())
  84 + self.line = False
  85 +
  86 + def endtable(self):
  87 + self.endline()
  88 + if self.istable():
  89 + self.tables.append(self.table.copy())
  90 + self.table = False
  91 +
  92 + # Inheritance
  93 + def handle_starttag(self, tag, attrs):
  94 + if tag == 'table':
  95 + self.table = []
  96 + elif tag == 'tr':
  97 + self.endline()
  98 + self.line = []
  99 + elif tag == 'td':
  100 + self.endcell()
  101 + self.cell = ''
  102 + self.cellx = 1
  103 + self.celly = 1
  104 + for attr in attrs:
  105 + if attr[0] == 'colspan':
  106 + self.cellx = int(attr[1])
  107 + elif attr[0] == 'rowspan':
  108 + self.celly = int(attr[1])
  109 +
  110 + def handle_endtag(self, tag):
  111 + if tag == 'table':
  112 + self.endtable()
  113 + elif tag == 'tr':
  114 + self.endline()
  115 + elif tag == 'td':
  116 + self.endcell()
  117 +
  118 + def handle_data(self, data):
  119 + if self.iscell():
  120 + self.cell += data
  121 +
  122 +# TODO Do something that really is OOP or do not...
  123 +
  124 +class Event:
  125 + # Mined data
  126 + shortText = ''
  127 + longText = ''
  128 + date = False
  129 + begSlot = 0
  130 + endSlot = 0
  131 +
  132 + # Generated data
  133 + shortName = ''
  134 + longName = ''
  135 + location = ''
  136 + startTime = False
  137 + endTime = False
  138 + active = False
  139 +
  140 + def feedShortText(self, shortText):
  141 + self.shortText = shortText
  142 +
  143 + def feedLongText(self, longText):
  144 + self.longText = longText
  145 +
  146 + def feedBegSlot(self, slot):
  147 + self.begSlot = slot
  148 +
  149 + def feedEndSlot(self, slot):
  150 + self.endSlot = slot
  151 +
  152 + def feedDate(self, date):
  153 + self.date = date
  154 +
  155 + def endFeed(self):
  156 + self.shortName = self.shortText
  157 + self.longName = self.longText
  158 +
  159 + if self.longName:
  160 + self.active = True
  161 +
  162 + if self.date and isinstance(self.begSlot, int) and isinstance(self.endSlot, int):
  163 + h, m = SLOTS[self.begSlot][0]
  164 + self.startTime = self.date + datetime.timedelta(hours=h, minutes=m)
  165 + h, m = SLOTS[self.endSlot][1]
  166 + self.endTime = self.date + datetime.timedelta(hours=h, minutes=m)
  167 +
  168 + if self.longName:
  169 + e = self.longName.split('(')
  170 + if len(e) >= 2:
  171 + f = e[1].split(')')
  172 + self.longName = e[0].strip()
  173 + self.location = f[0].strip()
  174 +
  175 +
  176 + def __str__(self):
  177 + if self.active:
  178 + return self.shortName+ ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '')
  179 + else:
  180 + return 'Inactive event'
  181 +
  182 + def getEvent(self):
  183 + e = CalEvent()
  184 + e.add('summary', self.shortName)
  185 + e.add('description', self.longName)
  186 + e.add('dtstart', self.startTime)
  187 + e.add('dtend', self.endTime)
  188 + e.add('location', self.location)
  189 + return e
  190 +
  191 +with urllib.request.urlopen(url) as handle:
17 htmlStr = handle.read().decode('iso-8859-15') 192 htmlStr = handle.read().decode('iso-8859-15')
18 193
19 # Read HTML tables 194 # Read HTML tables
@@ -62,7 +237,7 @@ days = dict() @@ -62,7 +237,7 @@ days = dict()
62 # Parsing table 1 237 # Parsing table 1
63 for line in tables[0]: 238 for line in tables[0]:
64 try: 239 try:
65 - day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], '%d/%m/%Y') 240 + day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], DATE_FORMAT)
66 except (ValueError, TypeError): 241 except (ValueError, TypeError):
67 # This is not a date, no data to grab here 242 # This is not a date, no data to grab here
68 continue 243 continue
@@ -74,13 +249,18 @@ for line in tables[0]: @@ -74,13 +249,18 @@ for line in tables[0]:
74 days[date] = [Event() for s in range(len(SLOTS))] 249 days[date] = [Event() for s in range(len(SLOTS))]
75 250
76 for slot in range(len(SLOTS)): 251 for slot in range(len(SLOTS)):
77 - days[date][slot].feedShortText(line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X]) 252 + try:
  253 + cell = line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X]
  254 + except IndexError:
  255 + # Out of the table: saturday afternoon
  256 + break
  257 + days[date][slot].feedShortText(cell)
78 continue 258 continue
79 259
80 # Parsing table 2 260 # Parsing table 2
81 for line in tables[1]: 261 for line in tables[1]:
82 try: 262 try:
83 - date = datetime.datetime.strptime(line[TABLE_2_DATE_X], '%d/%m/%Y') 263 + date = datetime.datetime.strptime(line[TABLE_2_DATE_X], DATE_FORMAT)
84 except ValueError: 264 except ValueError:
85 # This is not a date, no data to grab here 265 # This is not a date, no data to grab here
86 continue 266 continue
@@ -94,12 +274,22 @@ for line in tables[1]: @@ -94,12 +274,22 @@ for line in tables[1]:
94 # Feeding back time and slot to events 274 # Feeding back time and slot to events
95 events = [] 275 events = []
96 for day in days: 276 for day in days:
  277 + prevEvent = False
97 for slot in range(len(SLOTS)): 278 for slot in range(len(SLOTS)):
98 event = days[day][slot] 279 event = days[day][slot]
99 - event.feedDate(day)  
100 - event.feedSlot(slot)  
101 - event.endFeed()  
102 - events.append(event) 280 + if prevEvent:
  281 + if prevEvent.longText == event.longText:
  282 + prevEvent.feedEndSlot(slot)
  283 + else:
  284 + prevEvent.endFeed()
  285 + events.append(prevEvent)
  286 + if not prevEvent or (prevEvent and prevEvent.longText != event.longText):
  287 + event.feedDate(day)
  288 + event.feedBegSlot(slot)
  289 + event.feedEndSlot(slot)
  290 + prevEvent = event
  291 + prevEvent.endFeed()
  292 + events.append(prevEvent)
103 293
104 # Creating calendar 294 # Creating calendar
105 cal = Calendar() 295 cal = Calendar()