Commit 0ff6a1bf0e36e1923008361068c00562e1985de2

Authored by Geoffrey PREUD'HOMME
2 parents c7b78710 7a72c101

Merge branch 'master' into davical

Showing 2 changed files with 200 additions and 172 deletions   Show diff stats
Edt.py deleted
... ... @@ -1,162 +0,0 @@
1   -"""
2   -Class and variables needed for Edt manipulation & parsing
3   -"""
4   -
5   -from html.parser import HTMLParser
6   -from icalendar import Event as CalEvent
7   -import datetime
8   -
9   -DAYS_PER_WEEK = 6
10   -SLOTS = [((8, 0), (10, 0)), ((10, 15), (12, 15)), ((13, 45), (15, 45)), ((16, 0), (18, 0))]
11   -
12   -TABLE_1_DATE_X = 1
13   -TABLE_1_FIRST_SLOT_X = 2
14   -
15   -TABLE_2_DATE_X = 0
16   -TABLE_2_FIRST_SLOT_X = 1
17   -
18   -class TableHTMLParser(HTMLParser):
19   - tables = [] # Tables
20   - table = False # Current table content
21   - line = False # Current line content
22   - cell = False # Current cell content
23   - cellx = 1
24   - celly = 1
25   -
26   - # Logic
27   - def iscell(self):
28   - """
29   - Return if we are currently in a cell
30   - """
31   - return isinstance(self.cell, str)
32   -
33   - def isline(self):
34   - """
35   - Return if we are currently in a line
36   - """
37   - return isinstance(self.line, list)
38   -
39   - def istable(self):
40   - """
41   - Return if we are currently in a table
42   - """
43   - return isinstance(self.table, list)
44   -
45   - # Actions
46   - def endcell(self):
47   - if self.iscell():
48   - self.line.append((self.cell.strip(), self.cellx, self.celly))
49   - self.cell = False
50   -
51   - def endline(self):
52   - self.endcell()
53   - if self.isline():
54   - self.table.append(self.line.copy())
55   - self.line = False
56   -
57   - def endtable(self):
58   - self.endline()
59   - if self.istable():
60   - self.tables.append(self.table.copy())
61   - self.table = False
62   -
63   - # Inheritance
64   - def handle_starttag(self, tag, attrs):
65   - #print("Encountered a start tag:", tag)
66   - if tag == 'table':
67   - self.table = []
68   - elif tag == 'tr':
69   - self.endline()
70   - self.line = []
71   - elif tag == 'td':
72   - self.endcell()
73   - self.cell = ''
74   - self.cellx = 1
75   - self.celly = 1
76   - for attr in attrs:
77   - if attr[0] == 'colspan':
78   - self.cellx = int(attr[1])
79   - elif attr[0] == 'rowspan':
80   - self.celly = int(attr[1])
81   -
82   - def handle_endtag(self, tag):
83   - #print("Encountered an end tag :", tag)
84   - if tag == 'table':
85   - self.endtable()
86   - elif tag == 'tr':
87   - self.endline()
88   - elif tag == 'td':
89   - self.endcell()
90   -
91   - def handle_data(self, data):
92   - #print("Encountered some data :", data)
93   - if self.iscell():
94   - self.cell += data
95   -
96   -# TODO Allow class customisation
97   -
98   -class Event:
99   - # Mined data
100   - shortText = ''
101   - longText = ''
102   - date = False
103   - slot = 0
104   -
105   - # Generated data
106   - shortName = ''
107   - longName = ''
108   - location = ''
109   - startTime = False
110   - endTime = False
111   - active = False
112   -
113   - def feedShortText(self, shortText):
114   - self.shortText = shortText
115   -
116   - def feedLongText(self, longText):
117   - self.longText = longText
118   -
119   - def feedSlot(self, slot):
120   - self.slot = slot
121   -
122   - def feedDate(self, date):
123   - self.date = date
124   -
125   - def endFeed(self):
126   - self.shortName = self.shortText
127   - self.longName = self.longText
128   -
129   - if self.longName:
130   - self.active = True
131   -
132   - if self.shortName == 'Ediff' or self.shortName == 'R. TOEIC':
133   - self.active = False
134   -
135   - if self.date and isinstance(self.slot, int):
136   - h, m = SLOTS[self.slot][0]
137   - self.startTime = self.date + datetime.timedelta(hours=h, minutes=m)
138   - h, m = SLOTS[self.slot][1]
139   - self.endTime = self.date + datetime.timedelta(hours=h, minutes=m)
140   -
141   - if self.longName:
142   - e = self.longName.split('(')
143   - if len(e) >= 2:
144   - f = e[1].split(')')
145   - self.longName = e[0].strip()
146   - self.location = f[0].strip()
147   -
148   -
149   - def __str__(self):
150   - if self.active:
151   - return self.shortName+ ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '')
152   - else:
153   - return 'Inactive event'
154   -
155   - def getEvent(self):
156   - e = CalEvent()
157   - e.add('summary', self.shortName)
158   - e.add('description', self.longName)
159   - e.add('dtstart', self.startTime)
160   - e.add('dtend', self.endTime)
161   - e.add('location', self.location)
162   - return e
parse.py
... ... @@ -5,15 +5,190 @@ import argparse
5 5 import datetime
6 6 import urllib.request
7 7 from icalendar import Calendar
8   -from Edt import *
  8 +from html.parser import HTMLParser
  9 +from icalendar import Calendar, Event as CalEvent
9 10  
10 11 # Parse command line arguments
11   -parser = argparse.ArgumentParser(description='Convertit l\'emploi du temps IMA3 en ICS')
  12 +parser = argparse.ArgumentParser(description='Convertit l\'emploi du temps IMA en ICS')
  13 +parser.add_argument('annee', metavar='ANNEE', type=int, help='annรฉe (3 ou 4)')
12 14 parser.add_argument('edt', metavar='EDT', type=str, help='la page pointant vers l\'emploi du temps concernรฉ')
13 15 parser.add_argument('-o', '--output', dest='file', type=str, default='-', help='fichier de sortie, - pour stdout')
14 16 args = parser.parse_args()
15 17  
16   -with urllib.request.urlopen('http://dptima3.polytech-lille.net/' + args.edt + '.html') as handle:
  18 +if args.annee == 3:
  19 + url = 'http://dptima3.polytech-lille.net/' + args.edt + '.html'
  20 + SLOTS = [(( 8, 0), (10, 0)),
  21 + ((10, 20), (12, 20)),
  22 + ((13, 50), (15, 50)),
  23 + ((16, 10), (18, 10))]
  24 + DATE_FORMAT = '%d/%m/%Y'
  25 +elif args.annee == 4:
  26 + url = 'http://www.lifl.fr/~forget/EDT/' + args.edt + '.html'
  27 + SLOTS = [(( 8, 0), ( 9, 0)),
  28 + (( 9, 10), (10, 10)),
  29 + ((10, 20), (11, 20)),
  30 + ((11, 30), (12, 30)),
  31 + ((13, 50), (14, 50)),
  32 + ((15, 00), (16, 00)),
  33 + ((16, 10), (17, 10)),
  34 + ((17, 20), (18, 20))]
  35 + DATE_FORMAT = '%d/%m/%y'
  36 +else:
  37 + raise ValueError('Annรฉe inconnue : ' + annee)
  38 +
  39 +DAYS_PER_WEEK = 6
  40 +
  41 +TABLE_1_DATE_X = 1
  42 +TABLE_1_FIRST_SLOT_X = 2
  43 +
  44 +TABLE_2_DATE_X = 0
  45 +TABLE_2_FIRST_SLOT_X = 1
  46 +
  47 +class TableHTMLParser(HTMLParser):
  48 + tables = [] # Tables
  49 + table = False # Current table content
  50 + line = False # Current line content
  51 + cell = False # Current cell content
  52 + cellx = 1
  53 + celly = 1
  54 +
  55 + # Logic
  56 + def iscell(self):
  57 + """
  58 + Return if we are currently in a cell
  59 + """
  60 + return isinstance(self.cell, str)
  61 +
  62 + def isline(self):
  63 + """
  64 + Return if we are currently in a line
  65 + """
  66 + return isinstance(self.line, list)
  67 +
  68 + def istable(self):
  69 + """
  70 + Return if we are currently in a table
  71 + """
  72 + return isinstance(self.table, list)
  73 +
  74 + # Actions
  75 + def endcell(self):
  76 + if self.iscell():
  77 + self.line.append((self.cell.strip(), self.cellx, self.celly))
  78 + self.cell = False
  79 +
  80 + def endline(self):
  81 + self.endcell()
  82 + if self.isline():
  83 + self.table.append(self.line.copy())
  84 + self.line = False
  85 +
  86 + def endtable(self):
  87 + self.endline()
  88 + if self.istable():
  89 + self.tables.append(self.table.copy())
  90 + self.table = False
  91 +
  92 + # Inheritance
  93 + def handle_starttag(self, tag, attrs):
  94 + if tag == 'table':
  95 + self.table = []
  96 + elif tag == 'tr':
  97 + self.endline()
  98 + self.line = []
  99 + elif tag == 'td':
  100 + self.endcell()
  101 + self.cell = ''
  102 + self.cellx = 1
  103 + self.celly = 1
  104 + for attr in attrs:
  105 + if attr[0] == 'colspan':
  106 + self.cellx = int(attr[1])
  107 + elif attr[0] == 'rowspan':
  108 + self.celly = int(attr[1])
  109 +
  110 + def handle_endtag(self, tag):
  111 + if tag == 'table':
  112 + self.endtable()
  113 + elif tag == 'tr':
  114 + self.endline()
  115 + elif tag == 'td':
  116 + self.endcell()
  117 +
  118 + def handle_data(self, data):
  119 + if self.iscell():
  120 + self.cell += data
  121 +
  122 +# TODO Do something that really is OOP or do not...
  123 +
  124 +class Event:
  125 + # Mined data
  126 + shortText = ''
  127 + longText = ''
  128 + date = False
  129 + begSlot = 0
  130 + endSlot = 0
  131 +
  132 + # Generated data
  133 + shortName = ''
  134 + longName = ''
  135 + location = ''
  136 + startTime = False
  137 + endTime = False
  138 + active = False
  139 +
  140 + def feedShortText(self, shortText):
  141 + self.shortText = shortText
  142 +
  143 + def feedLongText(self, longText):
  144 + self.longText = longText
  145 +
  146 + def feedBegSlot(self, slot):
  147 + self.begSlot = slot
  148 +
  149 + def feedEndSlot(self, slot):
  150 + self.endSlot = slot
  151 +
  152 + def feedDate(self, date):
  153 + self.date = date
  154 +
  155 + def endFeed(self):
  156 + self.shortName = self.shortText
  157 + self.longName = self.longText
  158 +
  159 + if self.longName:
  160 + self.active = True
  161 +
  162 + if self.date and isinstance(self.begSlot, int) and isinstance(self.endSlot, int):
  163 + h, m = SLOTS[self.begSlot][0]
  164 + self.startTime = self.date + datetime.timedelta(hours=h, minutes=m)
  165 + h, m = SLOTS[self.endSlot][1]
  166 + self.endTime = self.date + datetime.timedelta(hours=h, minutes=m)
  167 +
  168 + if self.longName:
  169 + e = self.longName.split('(')
  170 + if len(e) >= 2:
  171 + f = e[1].split(')')
  172 + self.longName = e[0].strip()
  173 + self.location = f[0].strip()
  174 +
  175 +
  176 + def __str__(self):
  177 + if self.active:
  178 + return self.shortName+ ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '')
  179 + else:
  180 + return 'Inactive event'
  181 +
  182 + def getEvent(self):
  183 + e = CalEvent()
  184 + e.add('summary', self.shortName)
  185 + e.add('description', self.longName)
  186 + e.add('dtstart', self.startTime)
  187 + e.add('dtend', self.endTime)
  188 + e.add('location', self.location)
  189 + return e
  190 +
  191 +with urllib.request.urlopen(url) as handle:
17 192 htmlStr = handle.read().decode('iso-8859-15')
18 193  
19 194 # Read HTML tables
... ... @@ -62,7 +237,7 @@ days = dict()
62 237 # Parsing table 1
63 238 for line in tables[0]:
64 239 try:
65   - day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], '%d/%m/%Y')
  240 + day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], DATE_FORMAT)
66 241 except (ValueError, TypeError):
67 242 # This is not a date, no data to grab here
68 243 continue
... ... @@ -74,13 +249,18 @@ for line in tables[0]:
74 249 days[date] = [Event() for s in range(len(SLOTS))]
75 250  
76 251 for slot in range(len(SLOTS)):
77   - days[date][slot].feedShortText(line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X])
  252 + try:
  253 + cell = line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X]
  254 + except IndexError:
  255 + # Out of the table: saturday afternoon
  256 + break
  257 + days[date][slot].feedShortText(cell)
78 258 continue
79 259  
80 260 # Parsing table 2
81 261 for line in tables[1]:
82 262 try:
83   - date = datetime.datetime.strptime(line[TABLE_2_DATE_X], '%d/%m/%Y')
  263 + date = datetime.datetime.strptime(line[TABLE_2_DATE_X], DATE_FORMAT)
84 264 except ValueError:
85 265 # This is not a date, no data to grab here
86 266 continue
... ... @@ -94,12 +274,22 @@ for line in tables[1]:
94 274 # Feeding back time and slot to events
95 275 events = []
96 276 for day in days:
  277 + prevEvent = False
97 278 for slot in range(len(SLOTS)):
98 279 event = days[day][slot]
99   - event.feedDate(day)
100   - event.feedSlot(slot)
101   - event.endFeed()
102   - events.append(event)
  280 + if prevEvent:
  281 + if prevEvent.longText == event.longText:
  282 + prevEvent.feedEndSlot(slot)
  283 + else:
  284 + prevEvent.endFeed()
  285 + events.append(prevEvent)
  286 + if not prevEvent or (prevEvent and prevEvent.longText != event.longText):
  287 + event.feedDate(day)
  288 + event.feedBegSlot(slot)
  289 + event.feedEndSlot(slot)
  290 + prevEvent = event
  291 + prevEvent.endFeed()
  292 + events.append(prevEvent)
103 293  
104 294 # Creating calendar
105 295 cal = Calendar()
... ...