Commit 3e3f72926f234431337f8812ec3a5728ce026436

Authored by Geoffrey PREUD'HOMME
1 parent 82e73c44

IMA 4 support

Showing 2 changed files with 186 additions and 168 deletions   Show diff stats
Edt.py deleted
... ... @@ -1,162 +0,0 @@
1   -"""
2   -Class and variables needed for Edt manipulation & parsing
3   -"""
4   -
5   -from html.parser import HTMLParser
6   -from icalendar import Event as CalEvent
7   -import datetime
8   -
9   -DAYS_PER_WEEK = 6
10   -SLOTS = [((8, 0), (10, 0)), ((10, 15), (12, 15)), ((13, 45), (15, 45)), ((16, 0), (18, 0))]
11   -
12   -TABLE_1_DATE_X = 1
13   -TABLE_1_FIRST_SLOT_X = 2
14   -
15   -TABLE_2_DATE_X = 0
16   -TABLE_2_FIRST_SLOT_X = 1
17   -
18   -class TableHTMLParser(HTMLParser):
19   - tables = [] # Tables
20   - table = False # Current table content
21   - line = False # Current line content
22   - cell = False # Current cell content
23   - cellx = 1
24   - celly = 1
25   -
26   - # Logic
27   - def iscell(self):
28   - """
29   - Return if we are currently in a cell
30   - """
31   - return isinstance(self.cell, str)
32   -
33   - def isline(self):
34   - """
35   - Return if we are currently in a line
36   - """
37   - return isinstance(self.line, list)
38   -
39   - def istable(self):
40   - """
41   - Return if we are currently in a table
42   - """
43   - return isinstance(self.table, list)
44   -
45   - # Actions
46   - def endcell(self):
47   - if self.iscell():
48   - self.line.append((self.cell.strip(), self.cellx, self.celly))
49   - self.cell = False
50   -
51   - def endline(self):
52   - self.endcell()
53   - if self.isline():
54   - self.table.append(self.line.copy())
55   - self.line = False
56   -
57   - def endtable(self):
58   - self.endline()
59   - if self.istable():
60   - self.tables.append(self.table.copy())
61   - self.table = False
62   -
63   - # Inheritance
64   - def handle_starttag(self, tag, attrs):
65   - #print("Encountered a start tag:", tag)
66   - if tag == 'table':
67   - self.table = []
68   - elif tag == 'tr':
69   - self.endline()
70   - self.line = []
71   - elif tag == 'td':
72   - self.endcell()
73   - self.cell = ''
74   - self.cellx = 1
75   - self.celly = 1
76   - for attr in attrs:
77   - if attr[0] == 'colspan':
78   - self.cellx = int(attr[1])
79   - elif attr[0] == 'rowspan':
80   - self.celly = int(attr[1])
81   -
82   - def handle_endtag(self, tag):
83   - #print("Encountered an end tag :", tag)
84   - if tag == 'table':
85   - self.endtable()
86   - elif tag == 'tr':
87   - self.endline()
88   - elif tag == 'td':
89   - self.endcell()
90   -
91   - def handle_data(self, data):
92   - #print("Encountered some data :", data)
93   - if self.iscell():
94   - self.cell += data
95   -
96   -# TODO Allow class customisation
97   -
98   -class Event:
99   - # Mined data
100   - shortText = ''
101   - longText = ''
102   - date = False
103   - slot = 0
104   -
105   - # Generated data
106   - shortName = ''
107   - longName = ''
108   - location = ''
109   - startTime = False
110   - endTime = False
111   - active = False
112   -
113   - def feedShortText(self, shortText):
114   - self.shortText = shortText
115   -
116   - def feedLongText(self, longText):
117   - self.longText = longText
118   -
119   - def feedSlot(self, slot):
120   - self.slot = slot
121   -
122   - def feedDate(self, date):
123   - self.date = date
124   -
125   - def endFeed(self):
126   - self.shortName = self.shortText
127   - self.longName = self.longText
128   -
129   - if self.longName:
130   - self.active = True
131   -
132   - if self.shortName == 'Ediff' or self.shortName == 'R. TOEIC':
133   - self.active = False
134   -
135   - if self.date and isinstance(self.slot, int):
136   - h, m = SLOTS[self.slot][0]
137   - self.startTime = self.date + datetime.timedelta(hours=h, minutes=m)
138   - h, m = SLOTS[self.slot][1]
139   - self.endTime = self.date + datetime.timedelta(hours=h, minutes=m)
140   -
141   - if self.longName:
142   - e = self.longName.split('(')
143   - if len(e) >= 2:
144   - f = e[1].split(')')
145   - self.longName = e[0].strip()
146   - self.location = f[0].strip()
147   -
148   -
149   - def __str__(self):
150   - if self.active:
151   - return self.shortName+ ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '')
152   - else:
153   - return 'Inactive event'
154   -
155   - def getEvent(self):
156   - e = CalEvent()
157   - e.add('summary', self.shortName)
158   - e.add('description', self.longName)
159   - e.add('dtstart', self.startTime)
160   - e.add('dtend', self.endTime)
161   - e.add('location', self.location)
162   - return e
parse.py
... ... @@ -5,15 +5,189 @@ import argparse
5 5 import datetime
6 6 import urllib.request
7 7 from icalendar import Calendar
8   -from Edt import *
  8 +from html.parser import HTMLParser
  9 +from icalendar import Calendar, Event as CalEvent
9 10  
10 11 # Parse command line arguments
11   -parser = argparse.ArgumentParser(description='Convertit l\'emploi du temps IMA3 en ICS')
  12 +parser = argparse.ArgumentParser(description='Convertit l\'emploi du temps IMA en ICS')
  13 +parser.add_argument('annee', metavar='ANNEE', type=int, help='annรฉe (3 ou 4)')
12 14 parser.add_argument('edt', metavar='EDT', type=str, help='la page pointant vers l\'emploi du temps concernรฉ')
13 15 parser.add_argument('-o', '--output', dest='file', type=str, default='-', help='fichier de sortie, - pour stdout')
14 16 args = parser.parse_args()
15 17  
16   -with urllib.request.urlopen('http://dptima3.polytech-lille.net/' + args.edt + '.html') as handle:
  18 +if args.annee == 3:
  19 + url = 'http://dptima3.polytech-lille.net/' + args.edt + '.html'
  20 + SLOTS = [(( 8, 0), (10, 0)),
  21 + ((10, 20), (12, 20)),
  22 + ((13, 50), (15, 50)),
  23 + ((16, 10), (18, 10))]
  24 + DATE_FORMAT = '%d/%m/%Y'
  25 +elif args.annee == 4:
  26 + url = 'http://www.lifl.fr/~forget/EDT/' + args.edt + '.html'
  27 + SLOTS = [(( 8, 0), ( 9, 0)),
  28 + (( 9, 10), (10, 10)),
  29 + ((10, 20), (11, 20)),
  30 + ((11, 30), (12, 30)),
  31 + ((13, 50), (14, 50)),
  32 + ((15, 00), (16, 00)),
  33 + ((16, 10), (17, 10)),
  34 + ((17, 20), (18, 20))]
  35 + DATE_FORMAT = '%d/%m/%y'
  36 +else:
  37 + raise ValueError('Annรฉe inconnue : ' + annee)
  38 +
  39 +DAYS_PER_WEEK = 6
  40 +
  41 +TABLE_1_DATE_X = 1
  42 +TABLE_1_FIRST_SLOT_X = 2
  43 +
  44 +TABLE_2_DATE_X = 0
  45 +TABLE_2_FIRST_SLOT_X = 1
  46 +
  47 +class TableHTMLParser(HTMLParser):
  48 + tables = [] # Tables
  49 + table = False # Current table content
  50 + line = False # Current line content
  51 + cell = False # Current cell content
  52 + cellx = 1
  53 + celly = 1
  54 +
  55 + # Logic
  56 + def iscell(self):
  57 + """
  58 + Return if we are currently in a cell
  59 + """
  60 + return isinstance(self.cell, str)
  61 +
  62 + def isline(self):
  63 + """
  64 + Return if we are currently in a line
  65 + """
  66 + return isinstance(self.line, list)
  67 +
  68 + def istable(self):
  69 + """
  70 + Return if we are currently in a table
  71 + """
  72 + return isinstance(self.table, list)
  73 +
  74 + # Actions
  75 + def endcell(self):
  76 + if self.iscell():
  77 + self.line.append((self.cell.strip(), self.cellx, self.celly))
  78 + self.cell = False
  79 +
  80 + def endline(self):
  81 + self.endcell()
  82 + if self.isline():
  83 + self.table.append(self.line.copy())
  84 + self.line = False
  85 +
  86 + def endtable(self):
  87 + self.endline()
  88 + if self.istable():
  89 + self.tables.append(self.table.copy())
  90 + self.table = False
  91 +
  92 + # Inheritance
  93 + def handle_starttag(self, tag, attrs):
  94 + #print("Encountered a start tag:", tag)
  95 + if tag == 'table':
  96 + self.table = []
  97 + elif tag == 'tr':
  98 + self.endline()
  99 + self.line = []
  100 + elif tag == 'td':
  101 + self.endcell()
  102 + self.cell = ''
  103 + self.cellx = 1
  104 + self.celly = 1
  105 + for attr in attrs:
  106 + if attr[0] == 'colspan':
  107 + self.cellx = int(attr[1])
  108 + elif attr[0] == 'rowspan':
  109 + self.celly = int(attr[1])
  110 +
  111 + def handle_endtag(self, tag):
  112 + #print("Encountered an end tag :", tag)
  113 + if tag == 'table':
  114 + self.endtable()
  115 + elif tag == 'tr':
  116 + self.endline()
  117 + elif tag == 'td':
  118 + self.endcell()
  119 +
  120 + def handle_data(self, data):
  121 + #print("Encountered some data :", data)
  122 + if self.iscell():
  123 + self.cell += data
  124 +
  125 +# TODO Do something that really is OOP or do not...
  126 +
  127 +class Event:
  128 + # Mined data
  129 + shortText = ''
  130 + longText = ''
  131 + date = False
  132 + slot = 0
  133 +
  134 + # Generated data
  135 + shortName = ''
  136 + longName = ''
  137 + location = ''
  138 + startTime = False
  139 + endTime = False
  140 + active = False
  141 +
  142 + def feedShortText(self, shortText):
  143 + self.shortText = shortText
  144 +
  145 + def feedLongText(self, longText):
  146 + self.longText = longText
  147 +
  148 + def feedSlot(self, slot):
  149 + self.slot = slot
  150 +
  151 + def feedDate(self, date):
  152 + self.date = date
  153 +
  154 + def endFeed(self):
  155 + self.shortName = self.shortText
  156 + self.longName = self.longText
  157 +
  158 + if self.longName:
  159 + self.active = True
  160 +
  161 + if self.date and isinstance(self.slot, int):
  162 + h, m = SLOTS[self.slot][0]
  163 + self.startTime = self.date + datetime.timedelta(hours=h, minutes=m)
  164 + h, m = SLOTS[self.slot][1]
  165 + self.endTime = self.date + datetime.timedelta(hours=h, minutes=m)
  166 +
  167 + if self.longName:
  168 + e = self.longName.split('(')
  169 + if len(e) >= 2:
  170 + f = e[1].split(')')
  171 + self.longName = e[0].strip()
  172 + self.location = f[0].strip()
  173 +
  174 +
  175 + def __str__(self):
  176 + if self.active:
  177 + return self.shortName+ ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '')
  178 + else:
  179 + return 'Inactive event'
  180 +
  181 + def getEvent(self):
  182 + e = CalEvent()
  183 + e.add('summary', self.shortName)
  184 + e.add('description', self.longName)
  185 + e.add('dtstart', self.startTime)
  186 + e.add('dtend', self.endTime)
  187 + e.add('location', self.location)
  188 + return e
  189 +
  190 +with urllib.request.urlopen(url) as handle:
17 191 htmlStr = handle.read().decode('iso-8859-15')
18 192  
19 193 # Read HTML tables
... ... @@ -62,11 +236,12 @@ days = dict()
62 236 # Parsing table 1
63 237 for line in tables[0]:
64 238 try:
65   - day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], '%d/%m/%Y')
  239 + day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], DATE_FORMAT)
66 240 except (ValueError, TypeError):
67 241 # This is not a date, no data to grab here
68 242 continue
69 243  
  244 + print(line)
70 245 for day in range(DAYS_PER_WEEK):
71 246 date = day1date + datetime.timedelta(days=day)
72 247  
... ... @@ -74,13 +249,18 @@ for line in tables[0]:
74 249 days[date] = [Event() for s in range(len(SLOTS))]
75 250  
76 251 for slot in range(len(SLOTS)):
77   - days[date][slot].feedShortText(line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X])
  252 + try:
  253 + cell = line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X]
  254 + except IndexError:
  255 + # Out of the table: saturday afternoon
  256 + break
  257 + days[date][slot].feedShortText(cell)
78 258 continue
79 259  
80 260 # Parsing table 2
81 261 for line in tables[1]:
82 262 try:
83   - date = datetime.datetime.strptime(line[TABLE_2_DATE_X], '%d/%m/%Y')
  263 + date = datetime.datetime.strptime(line[TABLE_2_DATE_X], DATE_FORMAT)
84 264 except ValueError:
85 265 # This is not a date, no data to grab here
86 266 continue
... ...