Commit 3e3f72926f234431337f8812ec3a5728ce026436

Authored by Geoffrey PREUD'HOMME
1 parent 82e73c44

IMA 4 support

Showing 2 changed files with 186 additions and 168 deletions   Show diff stats
Edt.py deleted
@@ -1,162 +0,0 @@ @@ -1,162 +0,0 @@
1 -"""  
2 -Class and variables needed for Edt manipulation & parsing  
3 -"""  
4 -  
5 -from html.parser import HTMLParser  
6 -from icalendar import Event as CalEvent  
7 -import datetime  
8 -  
9 -DAYS_PER_WEEK = 6  
10 -SLOTS = [((8, 0), (10, 0)), ((10, 15), (12, 15)), ((13, 45), (15, 45)), ((16, 0), (18, 0))]  
11 -  
12 -TABLE_1_DATE_X = 1  
13 -TABLE_1_FIRST_SLOT_X = 2  
14 -  
15 -TABLE_2_DATE_X = 0  
16 -TABLE_2_FIRST_SLOT_X = 1  
17 -  
18 -class TableHTMLParser(HTMLParser):  
19 - tables = [] # Tables  
20 - table = False # Current table content  
21 - line = False # Current line content  
22 - cell = False # Current cell content  
23 - cellx = 1  
24 - celly = 1  
25 -  
26 - # Logic  
27 - def iscell(self):  
28 - """  
29 - Return if we are currently in a cell  
30 - """  
31 - return isinstance(self.cell, str)  
32 -  
33 - def isline(self):  
34 - """  
35 - Return if we are currently in a line  
36 - """  
37 - return isinstance(self.line, list)  
38 -  
39 - def istable(self):  
40 - """  
41 - Return if we are currently in a table  
42 - """  
43 - return isinstance(self.table, list)  
44 -  
45 - # Actions  
46 - def endcell(self):  
47 - if self.iscell():  
48 - self.line.append((self.cell.strip(), self.cellx, self.celly))  
49 - self.cell = False  
50 -  
51 - def endline(self):  
52 - self.endcell()  
53 - if self.isline():  
54 - self.table.append(self.line.copy())  
55 - self.line = False  
56 -  
57 - def endtable(self):  
58 - self.endline()  
59 - if self.istable():  
60 - self.tables.append(self.table.copy())  
61 - self.table = False  
62 -  
63 - # Inheritance  
64 - def handle_starttag(self, tag, attrs):  
65 - #print("Encountered a start tag:", tag)  
66 - if tag == 'table':  
67 - self.table = []  
68 - elif tag == 'tr':  
69 - self.endline()  
70 - self.line = []  
71 - elif tag == 'td':  
72 - self.endcell()  
73 - self.cell = ''  
74 - self.cellx = 1  
75 - self.celly = 1  
76 - for attr in attrs:  
77 - if attr[0] == 'colspan':  
78 - self.cellx = int(attr[1])  
79 - elif attr[0] == 'rowspan':  
80 - self.celly = int(attr[1])  
81 -  
82 - def handle_endtag(self, tag):  
83 - #print("Encountered an end tag :", tag)  
84 - if tag == 'table':  
85 - self.endtable()  
86 - elif tag == 'tr':  
87 - self.endline()  
88 - elif tag == 'td':  
89 - self.endcell()  
90 -  
91 - def handle_data(self, data):  
92 - #print("Encountered some data :", data)  
93 - if self.iscell():  
94 - self.cell += data  
95 -  
96 -# TODO Allow class customisation  
97 -  
98 -class Event:  
99 - # Mined data  
100 - shortText = ''  
101 - longText = ''  
102 - date = False  
103 - slot = 0  
104 -  
105 - # Generated data  
106 - shortName = ''  
107 - longName = ''  
108 - location = ''  
109 - startTime = False  
110 - endTime = False  
111 - active = False  
112 -  
113 - def feedShortText(self, shortText):  
114 - self.shortText = shortText  
115 -  
116 - def feedLongText(self, longText):  
117 - self.longText = longText  
118 -  
119 - def feedSlot(self, slot):  
120 - self.slot = slot  
121 -  
122 - def feedDate(self, date):  
123 - self.date = date  
124 -  
125 - def endFeed(self):  
126 - self.shortName = self.shortText  
127 - self.longName = self.longText  
128 -  
129 - if self.longName:  
130 - self.active = True  
131 -  
132 - if self.shortName == 'Ediff' or self.shortName == 'R. TOEIC':  
133 - self.active = False  
134 -  
135 - if self.date and isinstance(self.slot, int):  
136 - h, m = SLOTS[self.slot][0]  
137 - self.startTime = self.date + datetime.timedelta(hours=h, minutes=m)  
138 - h, m = SLOTS[self.slot][1]  
139 - self.endTime = self.date + datetime.timedelta(hours=h, minutes=m)  
140 -  
141 - if self.longName:  
142 - e = self.longName.split('(')  
143 - if len(e) >= 2:  
144 - f = e[1].split(')')  
145 - self.longName = e[0].strip()  
146 - self.location = f[0].strip()  
147 -  
148 -  
149 - def __str__(self):  
150 - if self.active:  
151 - return self.shortName+ ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '')  
152 - else:  
153 - return 'Inactive event'  
154 -  
155 - def getEvent(self):  
156 - e = CalEvent()  
157 - e.add('summary', self.shortName)  
158 - e.add('description', self.longName)  
159 - e.add('dtstart', self.startTime)  
160 - e.add('dtend', self.endTime)  
161 - e.add('location', self.location)  
162 - return e  
@@ -5,15 +5,189 @@ import argparse @@ -5,15 +5,189 @@ import argparse
5 import datetime 5 import datetime
6 import urllib.request 6 import urllib.request
7 from icalendar import Calendar 7 from icalendar import Calendar
8 -from Edt import * 8 +from html.parser import HTMLParser
  9 +from icalendar import Calendar, Event as CalEvent
9 10
10 # Parse command line arguments 11 # Parse command line arguments
11 -parser = argparse.ArgumentParser(description='Convertit l\'emploi du temps IMA3 en ICS') 12 +parser = argparse.ArgumentParser(description='Convertit l\'emploi du temps IMA en ICS')
  13 +parser.add_argument('annee', metavar='ANNEE', type=int, help='annรฉe (3 ou 4)')
12 parser.add_argument('edt', metavar='EDT', type=str, help='la page pointant vers l\'emploi du temps concernรฉ') 14 parser.add_argument('edt', metavar='EDT', type=str, help='la page pointant vers l\'emploi du temps concernรฉ')
13 parser.add_argument('-o', '--output', dest='file', type=str, default='-', help='fichier de sortie, - pour stdout') 15 parser.add_argument('-o', '--output', dest='file', type=str, default='-', help='fichier de sortie, - pour stdout')
14 args = parser.parse_args() 16 args = parser.parse_args()
15 17
16 -with urllib.request.urlopen('http://dptima3.polytech-lille.net/' + args.edt + '.html') as handle: 18 +if args.annee == 3:
  19 + url = 'http://dptima3.polytech-lille.net/' + args.edt + '.html'
  20 + SLOTS = [(( 8, 0), (10, 0)),
  21 + ((10, 20), (12, 20)),
  22 + ((13, 50), (15, 50)),
  23 + ((16, 10), (18, 10))]
  24 + DATE_FORMAT = '%d/%m/%Y'
  25 +elif args.annee == 4:
  26 + url = 'http://www.lifl.fr/~forget/EDT/' + args.edt + '.html'
  27 + SLOTS = [(( 8, 0), ( 9, 0)),
  28 + (( 9, 10), (10, 10)),
  29 + ((10, 20), (11, 20)),
  30 + ((11, 30), (12, 30)),
  31 + ((13, 50), (14, 50)),
  32 + ((15, 00), (16, 00)),
  33 + ((16, 10), (17, 10)),
  34 + ((17, 20), (18, 20))]
  35 + DATE_FORMAT = '%d/%m/%y'
  36 +else:
  37 + raise ValueError('Annรฉe inconnue : ' + annee)
  38 +
  39 +DAYS_PER_WEEK = 6
  40 +
  41 +TABLE_1_DATE_X = 1
  42 +TABLE_1_FIRST_SLOT_X = 2
  43 +
  44 +TABLE_2_DATE_X = 0
  45 +TABLE_2_FIRST_SLOT_X = 1
  46 +
  47 +class TableHTMLParser(HTMLParser):
  48 + tables = [] # Tables
  49 + table = False # Current table content
  50 + line = False # Current line content
  51 + cell = False # Current cell content
  52 + cellx = 1
  53 + celly = 1
  54 +
  55 + # Logic
  56 + def iscell(self):
  57 + """
  58 + Return if we are currently in a cell
  59 + """
  60 + return isinstance(self.cell, str)
  61 +
  62 + def isline(self):
  63 + """
  64 + Return if we are currently in a line
  65 + """
  66 + return isinstance(self.line, list)
  67 +
  68 + def istable(self):
  69 + """
  70 + Return if we are currently in a table
  71 + """
  72 + return isinstance(self.table, list)
  73 +
  74 + # Actions
  75 + def endcell(self):
  76 + if self.iscell():
  77 + self.line.append((self.cell.strip(), self.cellx, self.celly))
  78 + self.cell = False
  79 +
  80 + def endline(self):
  81 + self.endcell()
  82 + if self.isline():
  83 + self.table.append(self.line.copy())
  84 + self.line = False
  85 +
  86 + def endtable(self):
  87 + self.endline()
  88 + if self.istable():
  89 + self.tables.append(self.table.copy())
  90 + self.table = False
  91 +
  92 + # Inheritance
  93 + def handle_starttag(self, tag, attrs):
  94 + #print("Encountered a start tag:", tag)
  95 + if tag == 'table':
  96 + self.table = []
  97 + elif tag == 'tr':
  98 + self.endline()
  99 + self.line = []
  100 + elif tag == 'td':
  101 + self.endcell()
  102 + self.cell = ''
  103 + self.cellx = 1
  104 + self.celly = 1
  105 + for attr in attrs:
  106 + if attr[0] == 'colspan':
  107 + self.cellx = int(attr[1])
  108 + elif attr[0] == 'rowspan':
  109 + self.celly = int(attr[1])
  110 +
  111 + def handle_endtag(self, tag):
  112 + #print("Encountered an end tag :", tag)
  113 + if tag == 'table':
  114 + self.endtable()
  115 + elif tag == 'tr':
  116 + self.endline()
  117 + elif tag == 'td':
  118 + self.endcell()
  119 +
  120 + def handle_data(self, data):
  121 + #print("Encountered some data :", data)
  122 + if self.iscell():
  123 + self.cell += data
  124 +
  125 +# TODO Do something that really is OOP or do not...
  126 +
  127 +class Event:
  128 + # Mined data
  129 + shortText = ''
  130 + longText = ''
  131 + date = False
  132 + slot = 0
  133 +
  134 + # Generated data
  135 + shortName = ''
  136 + longName = ''
  137 + location = ''
  138 + startTime = False
  139 + endTime = False
  140 + active = False
  141 +
  142 + def feedShortText(self, shortText):
  143 + self.shortText = shortText
  144 +
  145 + def feedLongText(self, longText):
  146 + self.longText = longText
  147 +
  148 + def feedSlot(self, slot):
  149 + self.slot = slot
  150 +
  151 + def feedDate(self, date):
  152 + self.date = date
  153 +
  154 + def endFeed(self):
  155 + self.shortName = self.shortText
  156 + self.longName = self.longText
  157 +
  158 + if self.longName:
  159 + self.active = True
  160 +
  161 + if self.date and isinstance(self.slot, int):
  162 + h, m = SLOTS[self.slot][0]
  163 + self.startTime = self.date + datetime.timedelta(hours=h, minutes=m)
  164 + h, m = SLOTS[self.slot][1]
  165 + self.endTime = self.date + datetime.timedelta(hours=h, minutes=m)
  166 +
  167 + if self.longName:
  168 + e = self.longName.split('(')
  169 + if len(e) >= 2:
  170 + f = e[1].split(')')
  171 + self.longName = e[0].strip()
  172 + self.location = f[0].strip()
  173 +
  174 +
  175 + def __str__(self):
  176 + if self.active:
  177 + return self.shortName+ ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '')
  178 + else:
  179 + return 'Inactive event'
  180 +
  181 + def getEvent(self):
  182 + e = CalEvent()
  183 + e.add('summary', self.shortName)
  184 + e.add('description', self.longName)
  185 + e.add('dtstart', self.startTime)
  186 + e.add('dtend', self.endTime)
  187 + e.add('location', self.location)
  188 + return e
  189 +
  190 +with urllib.request.urlopen(url) as handle:
17 htmlStr = handle.read().decode('iso-8859-15') 191 htmlStr = handle.read().decode('iso-8859-15')
18 192
19 # Read HTML tables 193 # Read HTML tables
@@ -62,11 +236,12 @@ days = dict() @@ -62,11 +236,12 @@ days = dict()
62 # Parsing table 1 236 # Parsing table 1
63 for line in tables[0]: 237 for line in tables[0]:
64 try: 238 try:
65 - day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], '%d/%m/%Y') 239 + day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], DATE_FORMAT)
66 except (ValueError, TypeError): 240 except (ValueError, TypeError):
67 # This is not a date, no data to grab here 241 # This is not a date, no data to grab here
68 continue 242 continue
69 243
  244 + print(line)
70 for day in range(DAYS_PER_WEEK): 245 for day in range(DAYS_PER_WEEK):
71 date = day1date + datetime.timedelta(days=day) 246 date = day1date + datetime.timedelta(days=day)
72 247
@@ -74,13 +249,18 @@ for line in tables[0]: @@ -74,13 +249,18 @@ for line in tables[0]:
74 days[date] = [Event() for s in range(len(SLOTS))] 249 days[date] = [Event() for s in range(len(SLOTS))]
75 250
76 for slot in range(len(SLOTS)): 251 for slot in range(len(SLOTS)):
77 - days[date][slot].feedShortText(line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X]) 252 + try:
  253 + cell = line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X]
  254 + except IndexError:
  255 + # Out of the table: saturday afternoon
  256 + break
  257 + days[date][slot].feedShortText(cell)
78 continue 258 continue
79 259
80 # Parsing table 2 260 # Parsing table 2
81 for line in tables[1]: 261 for line in tables[1]:
82 try: 262 try:
83 - date = datetime.datetime.strptime(line[TABLE_2_DATE_X], '%d/%m/%Y') 263 + date = datetime.datetime.strptime(line[TABLE_2_DATE_X], DATE_FORMAT)
84 except ValueError: 264 except ValueError:
85 # This is not a date, no data to grab here 265 # This is not a date, no data to grab here
86 continue 266 continue