""" Class and variables needed for Edt manipulation & parsing """ from html.parser import HTMLParser from icalendar import Event as CalEvent import datetime DAYS_PER_WEEK = 6 SLOTS = [((8, 0), (10, 0)), ((10, 15), (12, 15)), ((13, 45), (15, 45)), ((16, 0), (18, 0))] TABLE_1_DATE_X = 1 TABLE_1_FIRST_SLOT_X = 2 TABLE_2_DATE_X = 0 TABLE_2_FIRST_SLOT_X = 1 class TableHTMLParser(HTMLParser): tables = [] # Tables table = False # Current table content line = False # Current line content cell = False # Current cell content cellx = 1 celly = 1 # Logic def iscell(self): """ Return if we are currently in a cell """ return isinstance(self.cell, str) def isline(self): """ Return if we are currently in a line """ return isinstance(self.line, list) def istable(self): """ Return if we are currently in a table """ return isinstance(self.table, list) # Actions def endcell(self): if self.iscell(): self.line.append((self.cell.strip(), self.cellx, self.celly)) self.cell = False def endline(self): self.endcell() if self.isline(): self.table.append(self.line.copy()) self.line = False def endtable(self): self.endline() if self.istable(): self.tables.append(self.table.copy()) self.table = False # Inheritance def handle_starttag(self, tag, attrs): #print("Encountered a start tag:", tag) if tag == 'table': self.table = [] elif tag == 'tr': self.endline() self.line = [] elif tag == 'td': self.endcell() self.cell = '' self.cellx = 1 self.celly = 1 for attr in attrs: if attr[0] == 'colspan': self.cellx = int(attr[1]) elif attr[0] == 'rowspan': self.celly = int(attr[1]) def handle_endtag(self, tag): #print("Encountered an end tag :", tag) if tag == 'table': self.endtable() elif tag == 'tr': self.endline() elif tag == 'td': self.endcell() def handle_data(self, data): #print("Encountered some data :", data) if self.iscell(): self.cell += data # TODO Allow class customisation class Event: # Mined data shortText = '' longText = '' date = False slot = 0 # Generated data shortName = '' longName = '' location = '' startTime = False endTime = False active = False def feedShortText(self, shortText): self.shortText = shortText def feedLongText(self, longText): self.longText = longText def feedSlot(self, slot): self.slot = slot def feedDate(self, date): self.date = date def endFeed(self): self.shortName = self.shortText self.longName = self.longText if self.longName: self.active = True if self.shortName == 'Ediff' or self.shortName == 'R. TOEIC': self.active = False if self.date and isinstance(self.slot, int): h, m = SLOTS[self.slot][0] self.startTime = self.date + datetime.timedelta(hours=h, minutes=m) h, m = SLOTS[self.slot][1] self.endTime = self.date + datetime.timedelta(hours=h, minutes=m) if self.longName: e = self.longName.split('(') if len(e) >= 2: f = e[1].split(')') self.longName = e[0].strip() self.location = f[0].strip() def __str__(self): if self.active: return self.shortName+ ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '') else: return 'Inactive event' def getEvent(self): e = CalEvent() e.add('summary', self.shortName) e.add('description', self.longName) e.add('dtstart', self.startTime) e.add('dtend', self.endTime) e.add('location', self.location) return e