2019-03-11 20:12:49 +01:00
/ * *
* TheCitadelofRicks
*
2020-01-15 13:42:40 +01:00
* Copyright 2019 - 2020 < seil0 @mosad . xyz >
2019-03-11 20:12:49 +01:00
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 3 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston ,
* MA 02110 - 1301 , USA .
*
* /
package org.mosad.thecitadelofricks.hsoparser
import org.jsoup.Jsoup
2019-09-06 23:10:35 +02:00
import org.jsoup.nodes.Document
2019-03-11 20:12:49 +01:00
import org.mosad.thecitadelofricks.Lesson
2019-03-12 22:06:04 +01:00
import org.mosad.thecitadelofricks.TimetableWeek
2019-09-14 00:43:59 +02:00
import org.slf4j.LoggerFactory
2019-03-11 20:12:49 +01:00
2021-10-13 15:57:26 +02:00
/ * *
* @param timetableURL the URL of the timetable you want to get
* @param htmlDoc the html document to use ( the timetableURL will be ignored if this value is present )
* /
class TimetableParser ( timetableURL : String ? = null , htmlDoc : Document ? = null ) {
2019-09-14 00:43:59 +02:00
private var logger : org . slf4j . Logger = LoggerFactory . getLogger ( TimetableParser :: class . java )
2019-03-11 20:12:49 +01:00
private val days = arrayOf ( " Monday " , " Tuesday " , " Wednesday " , " Thursday " , " Friday " , " Saturday " )
2021-10-13 15:57:26 +02:00
private val htmlDoc : Document ? =
htmlDoc
?: if ( timetableURL == null ) {
null
} else {
try {
Jsoup . connect ( timetableURL ) . get ( )
} catch ( gex : Exception ) {
logger . error ( " general TimetableParser error " , gex )
null
}
}
2019-03-11 20:12:49 +01:00
/ * *
2021-10-13 15:57:26 +02:00
* parse the timetable from the previously given url
2019-03-11 20:12:49 +01:00
* the timetable is organised per row not per column ;
* Mon 1 , Tue 1 , Wed 1 , Thur 1 , Fri 1 , Sat 1 , Mon 2 and so on
* /
2021-10-13 15:57:26 +02:00
fun parseTimeTable ( ) : TimetableWeek ? {
if ( htmlDoc == null ) {
return null
2019-09-14 00:43:59 +02:00
}
2019-03-12 22:06:04 +01:00
val timetableWeek = TimetableWeek ( )
2019-09-14 00:43:59 +02:00
val rows = htmlDoc . select ( " table.timetable " ) . select ( " tr[scope= \" row \" ] " )
2019-03-11 20:12:49 +01:00
var sDay = - 1
var sRow = - 1
2019-03-12 22:06:04 +01:00
var sLesson = Lesson ( " " , " " , " " , " " , " " )
2019-03-11 20:12:49 +01:00
// get each row with index, reflects 1 timeslot per day
for ( ( rowIndex , row ) in rows . withIndex ( ) ) {
var day = 0
2019-04-06 11:33:21 +02:00
var lessonIndexDay = 0 // the index of the lesson per timeslot
2019-03-11 20:12:49 +01:00
// elements are now all lessons, including empty ones
2019-04-06 11:33:21 +02:00
row . select ( " td.lastcol, td[style] " ) . forEach { element ->
2019-03-11 20:12:49 +01:00
// if there is a lecture with rowspan="2", we need to shift everything by one to the left. This is stupid and ugly there needs to bee an API
if ( ( sDay > - 1 && sRow > - 1 ) && ( sDay == day && ( ( sRow + 1 ) == rowIndex ) ) ) {
// we found a lecture that is longer than 1 lesson
2019-03-12 22:06:04 +01:00
timetableWeek . days [ day ] . timeslots [ rowIndex ] . add ( sLesson ) // this just works if there is one lecture per slot
2019-03-11 20:12:49 +01:00
// adjust the following slot
sDay ++
sLesson = Lesson (
2019-04-06 11:33:21 +02:00
" $day . $rowIndex . $lessonIndexDay " ,
2019-03-11 20:12:49 +01:00
element . select ( " div.lesson-subject " ) . text ( ) ,
element . select ( " div.lesson-teacher " ) . text ( ) ,
element . select ( " div.lesson-room " ) . text ( ) ,
element . select ( " div.lesson-remark " ) . text ( )
)
// adjust the slot directly as we don't get there anymore
if ( sDay == 5 ) {
2019-03-12 22:06:04 +01:00
timetableWeek . days [ day + 1 ] . timeslots [ rowIndex ] . add ( sLesson )
2019-03-11 20:12:49 +01:00
}
} else {
2019-03-12 22:06:04 +01:00
timetableWeek . days [ day ] . timeslots [ rowIndex ] . add (
2019-03-11 20:12:49 +01:00
Lesson (
2019-04-06 11:33:21 +02:00
" $day . $rowIndex . $lessonIndexDay " ,
2019-03-11 20:12:49 +01:00
element . select ( " div.lesson-subject " ) . text ( ) ,
element . select ( " div.lesson-teacher " ) . text ( ) ,
element . select ( " div.lesson-room " ) . text ( ) ,
element . select ( " div.lesson-remark " ) . text ( )
)
)
}
// we found a lecture with rowspan="2", save day, row and lesson for later adjustment
if ( element . toString ( ) . contains ( " rowspan= \" 2 \" " ) ) {
sDay = day
sRow = rowIndex
2019-03-12 22:06:04 +01:00
sLesson = timetableWeek . days [ day ] . timeslots [ rowIndex ] . get ( index = 0 )
2019-03-11 20:12:49 +01:00
}
2019-04-06 11:33:21 +02:00
lessonIndexDay ++
if ( element . hasClass ( " lastcol " ) )
{
day ++
lessonIndexDay = 0
}
2019-03-11 20:12:49 +01:00
}
}
2019-03-12 22:06:04 +01:00
return timetableWeek
2019-03-11 20:12:49 +01:00
}
2019-09-06 23:10:35 +02:00
/ * *
2021-10-13 15:57:26 +02:00
* parse the week number of the year for the timetable
2019-09-06 23:10:35 +02:00
* /
2021-10-13 15:57:26 +02:00
fun parseWeekNumberYear ( ) : Int ? {
if ( htmlDoc == null ) {
return null
2019-09-06 23:10:35 +02:00
}
return htmlDoc . select ( " h1.timetable-caption " ) . text ( ) . substringAfter ( " - " )
. substringBefore ( " . " ) . replace ( " " , " " ) . toInt ( )
}
2019-03-12 22:06:04 +01:00
@Suppress ( " unused " )
2019-04-06 11:33:21 +02:00
/ * *
* print a timetable
* @param timetable the timetable to print
* /
2019-03-12 22:06:04 +01:00
fun printTimetableWeek ( timetable : TimetableWeek ) {
2019-03-11 20:12:49 +01:00
for ( j in 0. . 5 ) print ( days [ j ] . padEnd ( 75 , ' ' ) + " | " )
println ( )
for ( j in 0. . 5 ) print ( " - " . padEnd ( 76 + ( j . toFloat ( ) . div ( j ) . toInt ( ) ) , '-' ) + " + " )
println ( )
// the timeslot
for ( i in 0. . 5 ) {
for ( j in 0. . 5 ) {
val ldiff = if ( timetable . days [ j ] . timeslots [ i ] . size == 0 ) 1 else timetable . days [ j ] . timeslots [ i ] . size
for ( lesson in timetable . days [ j ] . timeslots [ i ] ) print ( lesson . lessonSubject . padEnd ( 75 / ldiff , ' ' ) )
if ( ldiff == 2 ) print ( " " )
print ( " | " )
}
println ( )
for ( j in 0. . 5 ) {
val ldiff = if ( timetable . days [ j ] . timeslots [ i ] . size == 0 ) 1 else timetable . days [ j ] . timeslots [ i ] . size
for ( lesson in timetable . days [ j ] . timeslots [ i ] ) print ( lesson . lessonTeacher . padEnd ( 75 / ldiff , ' ' ) )
if ( ldiff == 2 ) print ( " " )
print ( " | " )
}
println ( )
for ( j in 0. . 5 ) {
val ldiff = if ( timetable . days [ j ] . timeslots [ i ] . size == 0 ) 1 else timetable . days [ j ] . timeslots [ i ] . size
for ( lesson in timetable . days [ j ] . timeslots [ i ] ) print ( lesson . lessonRoom . padEnd ( 75 / ldiff , ' ' ) )
if ( ldiff == 2 ) print ( " " )
print ( " | " )
}
println ( )
for ( j in 0. . 5 ) print ( " - " . padEnd ( 76 + ( j . toFloat ( ) . div ( j ) . toInt ( ) ) , '-' ) + " + " )
println ( )
}
println ( " \n " )
}
2020-06-08 11:57:20 +02:00
2019-03-11 20:12:49 +01:00
}