Otherwise, the requests may fail (from my experience). Also we don't want to look suspicious (flooding their server with tons of requests at the same time).
201 lines
7.3 KiB
Kotlin
201 lines
7.3 KiB
Kotlin
/**
|
|
* TheCitadelofRicks
|
|
*
|
|
* Copyright 2019-2020 <seil0@mosad.xyz>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
* MA 02110-1301, USA.
|
|
*
|
|
*/
|
|
|
|
package org.mosad.thecitadelofricks.hsoparser
|
|
|
|
import kotlinx.coroutines.runBlocking
|
|
import kotlinx.coroutines.sync.Semaphore
|
|
import org.jsoup.Jsoup
|
|
import org.jsoup.nodes.Document
|
|
import org.mosad.thecitadelofricks.Lesson
|
|
import org.mosad.thecitadelofricks.TimetableWeek
|
|
import org.slf4j.LoggerFactory
|
|
|
|
/**
|
|
* @param timetableURL the URL of the timetable you want to get
|
|
* @param htmlDoc the html document to use (the timetableURL will be ignored if this value is present)
|
|
*/
|
|
class TimetableParser(timetableURL: String? = null, htmlDoc: Document? = null) {
|
|
private var logger: org.slf4j.Logger = LoggerFactory.getLogger(TimetableParser::class.java)
|
|
private val days = arrayOf("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
|
|
|
|
companion object {
|
|
val semaphore = Semaphore(3, 0)
|
|
}
|
|
|
|
private val htmlDoc: Document? =
|
|
htmlDoc
|
|
?: if (timetableURL == null) {
|
|
null
|
|
} else {
|
|
runBlocking {
|
|
try {
|
|
// Only allow sending a limited amount of requests at the same time
|
|
semaphore.acquire()
|
|
Jsoup.connect(timetableURL).get()
|
|
} catch (gex: Exception) {
|
|
logger.error("general TimetableParser error", gex)
|
|
null
|
|
} finally {
|
|
semaphore.release()
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* parse the timetable from the previously given url
|
|
* the timetable is organised per row not per column;
|
|
* Mon 1, Tue 1, Wed 1, Thur 1, Fri 1, Sat 1, Mon 2 and so on
|
|
*/
|
|
fun parseTimeTable(): TimetableWeek? {
|
|
if (htmlDoc == null) {
|
|
return null
|
|
}
|
|
|
|
val timetableWeek = TimetableWeek()
|
|
val rows = htmlDoc.select("table.timetable").select("tr[scope=\"row\"]")
|
|
|
|
var sDay = -1
|
|
var sRow = -1
|
|
var sLesson = Lesson("", "", "", "", "")
|
|
|
|
// get each row with index, reflects 1 timeslot per day
|
|
for ((rowIndex, row) in rows.withIndex()) {
|
|
var day = 0
|
|
var lessonIndexDay = 0 // the index of the lesson per timeslot
|
|
|
|
// elements are now all lessons, including empty ones
|
|
row.select("td.lastcol, td[style]").forEach { element ->
|
|
|
|
// if there is a lecture with rowspan="2", we need to shift everything by one to the left. This is stupid and ugly there needs to bee an API
|
|
if ((sDay > -1 && sRow > -1) && (sDay == day && ((sRow + 1) == rowIndex))) {
|
|
// we found a lecture that is longer than 1 lesson
|
|
timetableWeek.days[day].timeslots[rowIndex].add(sLesson) // this just works if there is one lecture per slot
|
|
|
|
// adjust the following slot
|
|
sDay++
|
|
sLesson = Lesson(
|
|
"$day.$rowIndex.$lessonIndexDay",
|
|
element.select("div.lesson-subject").text(),
|
|
element.select("div.lesson-teacher").text(),
|
|
element.select("div.lesson-room").text(),
|
|
element.select("div.lesson-remark").text()
|
|
)
|
|
|
|
// adjust the slot directly as we don't get there anymore
|
|
if (sDay == 5) {
|
|
timetableWeek.days[day + 1].timeslots[rowIndex].add(sLesson)
|
|
}
|
|
|
|
} else {
|
|
timetableWeek.days[day].timeslots[rowIndex].add(
|
|
Lesson(
|
|
"$day.$rowIndex.$lessonIndexDay",
|
|
element.select("div.lesson-subject").text(),
|
|
element.select("div.lesson-teacher").text(),
|
|
element.select("div.lesson-room").text(),
|
|
element.select("div.lesson-remark").text()
|
|
)
|
|
)
|
|
}
|
|
|
|
// we found a lecture with rowspan="2", save day, row and lesson for later adjustment
|
|
if (element.toString().contains("rowspan=\"2\"")) {
|
|
sDay = day
|
|
sRow = rowIndex
|
|
sLesson = timetableWeek.days[day].timeslots[rowIndex].get(index = 0)
|
|
}
|
|
|
|
lessonIndexDay++
|
|
|
|
if (element.hasClass("lastcol")) {
|
|
day++
|
|
lessonIndexDay = 0
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
return timetableWeek
|
|
}
|
|
|
|
/**
|
|
* parse the week number of the year for the timetable
|
|
*/
|
|
fun parseWeekNumberYear(): Int? {
|
|
if (htmlDoc == null) {
|
|
return null
|
|
}
|
|
|
|
return htmlDoc.select("h1.timetable-caption").text().substringAfter("- ")
|
|
.substringBefore(".").replace(" ", "").toInt()
|
|
}
|
|
|
|
@Suppress("unused")
|
|
/**
|
|
* print a timetable
|
|
* @param timetable the timetable to print
|
|
*/
|
|
fun printTimetableWeek(timetable: TimetableWeek) {
|
|
for (j in 0..5) print(days[j].padEnd(75, ' ') + " | ")
|
|
println()
|
|
for (j in 0..5) print("-".padEnd(76 + (j.toFloat().div(j).toInt()), '-') + "+")
|
|
println()
|
|
|
|
// the timeslot
|
|
for (i in 0..5) {
|
|
|
|
for (j in 0..5) {
|
|
val ldiff = if (timetable.days[j].timeslots[i].size == 0) 1 else timetable.days[j].timeslots[i].size
|
|
|
|
for (lesson in timetable.days[j].timeslots[i]) print(lesson.lessonSubject.padEnd(75 / ldiff, ' '))
|
|
if (ldiff == 2) print(" ")
|
|
print(" | ")
|
|
}
|
|
println()
|
|
|
|
for (j in 0..5) {
|
|
val ldiff = if (timetable.days[j].timeslots[i].size == 0) 1 else timetable.days[j].timeslots[i].size
|
|
|
|
for (lesson in timetable.days[j].timeslots[i]) print(lesson.lessonTeacher.padEnd(75 / ldiff, ' '))
|
|
if (ldiff == 2) print(" ")
|
|
print(" | ")
|
|
}
|
|
println()
|
|
|
|
for (j in 0..5) {
|
|
val ldiff = if (timetable.days[j].timeslots[i].size == 0) 1 else timetable.days[j].timeslots[i].size
|
|
|
|
for (lesson in timetable.days[j].timeslots[i]) print(lesson.lessonRoom.padEnd(75 / ldiff, ' '))
|
|
if (ldiff == 2) print(" ")
|
|
print(" | ")
|
|
}
|
|
println()
|
|
|
|
for (j in 0..5) print("-".padEnd(76 + (j.toFloat().div(j).toInt()), '-') + "+")
|
|
println()
|
|
}
|
|
|
|
println(" \n")
|
|
}
|
|
|
|
} |