Commit 99b1d1357a6205398d3edb41475a88b4b32c9bae

Authored by Geoffrey PREUD'HOMME
0 parents

Initial commit

.gitignore 0 → 100644
  1 +++ a/.gitignore
... ... @@ -0,0 +1,94 @@
  1 +# Byte-compiled / optimized / DLL files
  2 +__pycache__/
  3 +*.py[cod]
  4 +*$py.class
  5 +
  6 +# C extensions
  7 +*.so
  8 +
  9 +# Distribution / packaging
  10 +.Python
  11 +env/
  12 +build/
  13 +develop-eggs/
  14 +dist/
  15 +downloads/
  16 +eggs/
  17 +.eggs/
  18 +lib/
  19 +lib64/
  20 +parts/
  21 +sdist/
  22 +var/
  23 +*.egg-info/
  24 +.installed.cfg
  25 +*.egg
  26 +
  27 +# PyInstaller
  28 +# Usually these files are written by a python script from a template
  29 +# before PyInstaller builds the exe, so as to inject date/other infos into it.
  30 +*.manifest
  31 +*.spec
  32 +
  33 +# Installer logs
  34 +pip-log.txt
  35 +pip-delete-this-directory.txt
  36 +
  37 +# Unit test / coverage reports
  38 +htmlcov/
  39 +.tox/
  40 +.coverage
  41 +.coverage.*
  42 +.cache
  43 +nosetests.xml
  44 +coverage.xml
  45 +*,cover
  46 +.hypothesis/
  47 +
  48 +# Translations
  49 +*.mo
  50 +*.pot
  51 +
  52 +# Django stuff:
  53 +*.log
  54 +local_settings.py
  55 +
  56 +# Flask stuff:
  57 +instance/
  58 +.webassets-cache
  59 +
  60 +# Scrapy stuff:
  61 +.scrapy
  62 +
  63 +# Sphinx documentation
  64 +docs/_build/
  65 +
  66 +# PyBuilder
  67 +target/
  68 +
  69 +# IPython Notebook
  70 +.ipynb_checkpoints
  71 +
  72 +# pyenv
  73 +.python-version
  74 +
  75 +# celery beat schedule file
  76 +celerybeat-schedule
  77 +
  78 +# dotenv
  79 +.env
  80 +
  81 +# virtualenv
  82 +.venv/
  83 +venv/
  84 +ENV/
  85 +
  86 +# Spyder project settings
  87 +.spyderproject
  88 +
  89 +# Rope project settings
  90 +.ropeproject
  91 +
  92 +# Custom
  93 +*.html
  94 +*.ics
... ...
Edt.py 0 → 100644
  1 +++ a/Edt.py
... ... @@ -0,0 +1,162 @@
  1 +"""
  2 +Class and variables needed for Edt manipulation & parsing
  3 +"""
  4 +
  5 +from html.parser import HTMLParser
  6 +from icalendar import Event as CalEvent
  7 +import datetime
  8 +
  9 +DAYS_PER_WEEK = 6
  10 +SLOTS = [((8, 0), (10, 0)), ((10, 15), (12, 15)), ((13, 45), (15, 45)), ((16, 0), (18, 0))]
  11 +
  12 +TABLE_1_DATE_X = 1
  13 +TABLE_1_FIRST_SLOT_X = 2
  14 +
  15 +TABLE_2_DATE_X = 0
  16 +TABLE_2_FIRST_SLOT_X = 1
  17 +
  18 +class TableHTMLParser(HTMLParser):
  19 + tables = [] # Tables
  20 + table = False # Current table content
  21 + line = False # Current line content
  22 + cell = False # Current cell content
  23 + cellx = 1
  24 + celly = 1
  25 +
  26 + # Logic
  27 + def iscell(self):
  28 + """
  29 + Return if we are currently in a cell
  30 + """
  31 + return isinstance(self.cell, str)
  32 +
  33 + def isline(self):
  34 + """
  35 + Return if we are currently in a line
  36 + """
  37 + return isinstance(self.line, list)
  38 +
  39 + def istable(self):
  40 + """
  41 + Return if we are currently in a table
  42 + """
  43 + return isinstance(self.table, list)
  44 +
  45 + # Actions
  46 + def endcell(self):
  47 + if self.iscell():
  48 + self.line.append((self.cell.strip(), self.cellx, self.celly))
  49 + self.cell = False
  50 +
  51 + def endline(self):
  52 + self.endcell()
  53 + if self.isline():
  54 + self.table.append(self.line.copy())
  55 + self.line = False
  56 +
  57 + def endtable(self):
  58 + self.endline()
  59 + if self.istable():
  60 + self.tables.append(self.table.copy())
  61 + self.table = False
  62 +
  63 + # Inheritance
  64 + def handle_starttag(self, tag, attrs):
  65 + #print("Encountered a start tag:", tag)
  66 + if tag == 'table':
  67 + self.table = []
  68 + elif tag == 'tr':
  69 + self.endline()
  70 + self.line = []
  71 + elif tag == 'td':
  72 + self.endcell()
  73 + self.cell = ''
  74 + self.cellx = 1
  75 + self.celly = 1
  76 + for attr in attrs:
  77 + if attr[0] == 'colspan':
  78 + self.cellx = int(attr[1])
  79 + elif attr[0] == 'rowspan':
  80 + self.celly = int(attr[1])
  81 +
  82 + def handle_endtag(self, tag):
  83 + #print("Encountered an end tag :", tag)
  84 + if tag == 'table':
  85 + self.endtable()
  86 + elif tag == 'tr':
  87 + self.endline()
  88 + elif tag == 'td':
  89 + self.endcell()
  90 +
  91 + def handle_data(self, data):
  92 + #print("Encountered some data :", data)
  93 + if self.iscell():
  94 + self.cell += data
  95 +
  96 +# TODO Allow class customisation
  97 +
  98 +class Event:
  99 + # Mined data
  100 + shortText = ''
  101 + longText = ''
  102 + date = False
  103 + slot = 0
  104 +
  105 + # Generated data
  106 + shortName = ''
  107 + longName = ''
  108 + location = ''
  109 + startTime = False
  110 + endTime = False
  111 + active = False
  112 +
  113 + def feedShortText(self, shortText):
  114 + self.shortText = shortText
  115 +
  116 + def feedLongText(self, longText):
  117 + self.longText = longText
  118 +
  119 + def feedSlot(self, slot):
  120 + self.slot = slot
  121 +
  122 + def feedDate(self, date):
  123 + self.date = date
  124 +
  125 + def endFeed(self):
  126 + self.shortName = self.shortText
  127 + self.longName = self.longText
  128 +
  129 + if self.longName:
  130 + self.active = True
  131 +
  132 + if self.shortName == 'Ediff' or self.shortName == 'R. TOEIC':
  133 + self.active = False
  134 +
  135 + if self.date and isinstance(self.slot, int):
  136 + h, m = SLOTS[self.slot][0]
  137 + self.startTime = self.date + datetime.timedelta(hours=h, minutes=m)
  138 + h, m = SLOTS[self.slot][1]
  139 + self.endTime = self.date + datetime.timedelta(hours=h, minutes=m)
  140 +
  141 + if self.longName:
  142 + e = self.longName.split('(')
  143 + if len(e) >= 2:
  144 + f = e[1].split(')')
  145 + self.longName = e[0].strip()
  146 + self.location = f[0].strip()
  147 +
  148 +
  149 + def __str__(self):
  150 + if self.active:
  151 + return self.shortName+ ' [' + self.longName + '] ' + (str(self.startTime) + ' - ' + (str(self.endTime) + ' ') if self.startTime else '') + (('@ ' + self.location) if self.location else '')
  152 + else:
  153 + return 'Inactive event'
  154 +
  155 + def getEvent(self):
  156 + e = CalEvent()
  157 + e.add('summary', self.shortName)
  158 + e.add('description', self.longName)
  159 + e.add('dtstart', self.startTime)
  160 + e.add('dtend', self.endTime)
  161 + e.add('location', self.location)
  162 + return e
... ...
README.md 0 → 100644
  1 +++ a/README.md
... ... @@ -0,0 +1,5 @@
  1 +#Convertit l'emploi du temps IMA3 en fichier ICS
  2 +
  3 +Convertit le fichier HTML récupéré du site <http://dptima3.polytech-lille.net/> en fichier ICS importable sur différentes applications de Calendrier.
  4 +
  5 +À ce stade l'application est plus une preuve de concept que quelque chose d'utilisable pour tous puisque mes préférences personelles sont hardcodées (par exemple j'ai enlevé les Remédiations TOEIC qui ne me concernent pas). On pourrait l'améliorer en permettant la surcharge de Edt.Event pour pouvoir choisir le groupe de TP (et récupérer le fichier HTML directement), choisir d'afficher le petit titre ou le grand comme intitulé d'évènement, rejeter certains intitulés de cours, en modifier d'autres (LV2 → Allemand par exemple)...
... ...
parse.py 0 → 100755
  1 +++ a/parse.py
... ... @@ -0,0 +1,113 @@
  1 +#!/usr/bin/env python3
  2 +
  3 +import datetime
  4 +from icalendar import Calendar
  5 +from Edt import *
  6 +
  7 +htmlHandle = open('groupe12.html', 'rb')
  8 +htmlStr = htmlHandle.read().decode('iso-8859-15')
  9 +
  10 +# Read HTML tables
  11 +parser = TableHTMLParser()
  12 +parser.feed(htmlStr)
  13 +
  14 +# Dupplicates cells with colspan & rowspan
  15 +tables = []
  16 +
  17 +for parserTable in parser.tables:
  18 + # Figuring out dimensions
  19 + X, Y = 0, 0
  20 + for cell in parserTable[0]:
  21 + X += cell[1]
  22 + for line in parserTable:
  23 + Y += line[0][2]
  24 +
  25 + # Constructing table with reals dimensions
  26 + table = []
  27 + for y in range(Y):
  28 + line = []
  29 + for x in range(X):
  30 + line.append(False)
  31 + table.append(line)
  32 +
  33 + # Filling table with parsed table
  34 + x, y = 0, 0
  35 + for line in parserTable:
  36 + for cell in line:
  37 + # Offsetting to the right if cell is not empty
  38 + while isinstance(table[y][x], str):
  39 + x += 1
  40 +
  41 + # Copying values
  42 + for y2 in range(y, y + cell[2]):
  43 + for x2 in range(x, x + cell[1]):
  44 + table[y2][x2] = cell[0]
  45 + x = 0
  46 + y += 1
  47 +
  48 + #for line in table:
  49 + # print(' | '.join([str(cell) for cell in line]))
  50 + #print('-'*20)
  51 +
  52 + tables.append(table)
  53 +
  54 +# Creating events
  55 +days = dict()
  56 +
  57 +# Parsing table 1
  58 +for line in tables[0]:
  59 + try:
  60 + day1date = datetime.datetime.strptime(line[TABLE_1_DATE_X], '%d/%m/%Y')
  61 + except (ValueError, TypeError):
  62 + # This is not a date, no data to grab here
  63 + continue
  64 +
  65 + for day in range(DAYS_PER_WEEK):
  66 + date = day1date + datetime.timedelta(days=day)
  67 +
  68 + if date not in days:
  69 + days[date] = [Event() for s in range(len(SLOTS))]
  70 +
  71 + for slot in range(len(SLOTS)):
  72 + days[date][slot].feedShortText(line[day * len(SLOTS) + slot + TABLE_1_FIRST_SLOT_X])
  73 + continue
  74 +
  75 +# Parsing table 2
  76 +for line in tables[1]:
  77 + try:
  78 + date = datetime.datetime.strptime(line[TABLE_2_DATE_X], '%d/%m/%Y')
  79 + except ValueError:
  80 + # This is not a date, no data to grab here
  81 + continue
  82 +
  83 + if date not in days:
  84 + days[date] = [Event() for s in range(len(SLOTS))]
  85 +
  86 + for slot in range(len(SLOTS)):
  87 + days[date][slot].feedLongText(line[slot + TABLE_2_FIRST_SLOT_X])
  88 +
  89 +# Feeding back time and slot to events
  90 +events = []
  91 +for day in days:
  92 + for slot in range(len(SLOTS)):
  93 + event = days[day][slot]
  94 + event.feedDate(day)
  95 + event.feedSlot(slot)
  96 + event.endFeed()
  97 + events.append(event)
  98 +
  99 +# Creating calendar
  100 +cal = Calendar()
  101 +cal.add('proid', '-//Cours Polytech//mxm.dk//')
  102 +cal.add('version', '2.0')
  103 +
  104 +for event in events:
  105 + if event.active:
  106 + print(event)
  107 + cal.add_component(event.getEvent())
  108 +
  109 +# Writing calendar to file
  110 +f = open('groupe12.ics', 'wb')
  111 +f.write(cal.to_ical())
  112 +f.close()
  113 +
... ...
requirements.txt 0 → 100644
  1 +++ a/requirements.txt
... ... @@ -0,0 +1 @@
  1 +icalendar>=3.10
... ...