This commit is contained in:
Brage 2023-07-28 23:15:31 +02:00
parent 93ec06089d
commit 58f9b6dc61
3 changed files with 213 additions and 88 deletions

View File

@ -23,9 +23,9 @@ class FileNameDeterminate(val title: String, val sanitizedName: String, val ctyp
private fun determineMovieFileName(): MovieInfo? {
val movieEx = MovieEx(title, sanitizedName)
val stripped = when {
movieEx.isDefinedWithYear() != null -> sanitizedName.replace(movieEx.isDefinedWithYear()!!, "").trim()
movieEx.isDefinedWithYear() -> sanitizedName.replace(movieEx.yearRegex(), "").trim()
movieEx.doesContainMovieKeywords() -> sanitizedName.replace(Regex("(?i)\\s*\\(\\s*movie\\s*\\)\\s*"), "").trim()
else -> title
else -> sanitizedName
}
val nonResolutioned = movieEx.removeResolutionAndBeyond(stripped) ?: stripped
return MovieInfo(cleanup(nonResolutioned), cleanup(nonResolutioned))
@ -85,14 +85,18 @@ class FileNameDeterminate(val title: String, val sanitizedName: String, val ctyp
val removalValue = Regex("(i?)([0-9].*[pk]|[ ._-]+[UHD]+[ ._-])").find(input)?.value ?: return null
return input.substring(0, input.indexOf(removalValue))
}
fun yearRegex(): Regex {
return Regex("[ .(][0-9]{4}[ .)]")
}
}
internal class MovieEx(title: String, sanitizedName: String) : Base(title, sanitizedName) {
/**
* @return not null if matches " 2020 " or ".2020."
*/
fun isDefinedWithYear(): String? {
return getMatch("[ .][0-9]{4}[ .]")
fun isDefinedWithYear(): Boolean {
return getMatch(yearRegex().pattern)?.isNotBlank() ?: false
}
/**

View File

@ -1,5 +1,8 @@
package no.iktdev.streamit.content.reader.analyzer.contentDeterminator
import no.iktdev.streamit.content.common.dto.reader.EpisodeInfo
import no.iktdev.streamit.content.common.dto.reader.MovieInfo
import no.iktdev.streamit.content.common.dto.reader.VideoInfo
import org.assertj.core.api.AssertionsForInterfaceTypes.assertThat
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Named
@ -7,125 +10,239 @@ import org.junit.jupiter.api.Test
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.MethodSource
data class DataHolder(
val title: String,
val sanitizedName: String,
val ctype: FileNameDeterminate.ContentType = FileNameDeterminate.ContentType.UNDEFINED
)
class FileNameDeterminateTest {
data class TestData(
val expected: String,
val input: String
val expected: VideoInfo,
val input: DataHolder
)
@ParameterizedTest
@MethodSource("serieTestCases")
fun testDetermineFileNameForSerie(namedTestData: TestData) {
val fileNameDeterminate =
FileNameDeterminate("Iseleve", namedTestData.input, FileNameDeterminate.ContentType.SERIE)
assertEquals(
namedTestData.expected,
fileNameDeterminate.getDeterminedVideoInfo()?.fullName,
"Test case: ${namedTestData.input}"
)
FileNameDeterminate(
namedTestData.input.title,
namedTestData.input.sanitizedName,
FileNameDeterminate.ContentType.SERIE
)
val result = fileNameDeterminate.getDeterminedVideoInfo()
assertThat(result).isNotNull()
assertThat(result?.fullName).isEqualTo(namedTestData.expected.fullName)
}
@ParameterizedTest(name = "{0}")
@MethodSource("movieTestCases")
fun testDetermineFileNameForMovie(namedTestData: TestData) {
val fileNameDeterminate = FileNameDeterminate(
namedTestData.input, namedTestData.input, FileNameDeterminate.ContentType.MOVIE
)
assertEquals(
namedTestData.expected,
fileNameDeterminate.getDeterminedVideoInfo()?.fullName,
"Test case: ${namedTestData.input}"
)
val fileNameDeterminate =
FileNameDeterminate(
namedTestData.input.title,
namedTestData.input.sanitizedName,
FileNameDeterminate.ContentType.MOVIE
)
val result = fileNameDeterminate.getDeterminedVideoInfo()
assertThat(result).isNotNull()
assertThat(result?.fullName).isEqualTo(namedTestData.expected.fullName)
}
@ParameterizedTest()
@MethodSource("undefinedTestCases")
fun testDetermineFileNameForUndefined(namedTestData: TestData) {
val fileNameDeterminate = FileNameDeterminate(
namedTestData.input, namedTestData.input, FileNameDeterminate.ContentType.UNDEFINED
)
assertThat(fileNameDeterminate.getDeterminedVideoInfo()?.fullName).isEqualTo(namedTestData.expected)
val fileNameDeterminate =
FileNameDeterminate(
namedTestData.input.title,
namedTestData.input.sanitizedName,
FileNameDeterminate.ContentType.UNDEFINED
)
val result = fileNameDeterminate.getDeterminedVideoInfo()
assertThat(result).isNotNull()
assertThat(result?.fullName).isEqualTo(namedTestData.expected.fullName)
}
@Test
fun test() {
val namedTestData = TestData("Game of Thrones - S01E01", "Game of Thrones - 01")
val fileNameDeterminate = FileNameDeterminate(
namedTestData.input, namedTestData.input, FileNameDeterminate.ContentType.UNDEFINED
"Game of Thrones", "Game of Thrones - 01", FileNameDeterminate.ContentType.UNDEFINED
)
assertThat(fileNameDeterminate.getDeterminedVideoInfo()?.fullName).isEqualTo(namedTestData.expected)
assertThat(fileNameDeterminate.getDeterminedVideoInfo()?.fullName).isEqualTo("Game of Thrones - S01E01")
val td = TestData(
expected = EpisodeInfo(title = "Game of Thrones", fullName = "Game of Thrones - S01E01", episode = 1, season = 1, episodeTitle = ""),
input = DataHolder("Game of Thrones", "Game of Thrones - 01")
)
val fileNameDeterminate2 = FileNameDeterminate(
td.input.title, td.input.sanitizedName, FileNameDeterminate.ContentType.UNDEFINED
)
assertThat(fileNameDeterminate2.getDeterminedVideoInfo()?.fullName).isEqualTo(td.expected.fullName)
}
@Test
fun testWildStuff() {
val namedTestData = TestData("The Potato man", "The.Potato.man.2023.1080p.L950XL.x265-WIN10")
val fileNameDeterminate = FileNameDeterminate(
namedTestData.input, namedTestData.input, FileNameDeterminate.ContentType.UNDEFINED
"The Potato man", "The.Potato.man.2023.1080p.L950XL.x265-WIN10", FileNameDeterminate.ContentType.UNDEFINED
)
assertThat(fileNameDeterminate.getDeterminedVideoInfo()?.fullName).isEqualTo(namedTestData.expected)
assertThat(fileNameDeterminate.getDeterminedVideoInfo()?.fullName).isEqualTo("The Potato man")
}
companion object {
@JvmStatic
fun serieTestCases(): List<Named<TestData>> {
return listOf(
Named.of("Is defined", TestData("Iseleve - S01E13", "Iseleve - 13")),
Named.of("Contains episode title", TestData("Iseleve - S01E13 - potetmos", "Iseleve - 13 potetmos")),
Named.of("Season and Episode in S01E01 format", TestData("Iseleve - S01E13", "Iseleve - S1E13")),
Named.of(
"Season and Episode with episode title",
TestData("Iseleve - S01E13 - potetmos", "Iseleve - S1E13 potetmos")
),
Named.of("Season and Episode with space separator", TestData("Iseleve - S01E13", "Iseleve - S1 13")),
Named.of(
"Season and Episode with space separator and episode title",
TestData("Iseleve - S01E13 - potetos", "Iseleve - S1 13 potetos")
),
Named.of("Lowercase season and episode", TestData("Iseleve - S01E13", "Iseleve - s1e13")),
Named.of(
"Episode title with Season and Episode in text",
TestData("Iseleve - S01E13", "Iseleve - Season 1 Episode 13")
),
Named.of(
"Episode title with Season and Episode in text and episode title",
TestData("Iseleve - S01E13 - Potetmos", "Iseleve - Season 1 Episode 13 Potetmos")
)
Named.of("Is defined", TestData(
expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13", episode = 13, season = 1, episodeTitle = "" ),
DataHolder("Iseleve","Iseleve - 13")
)),
Named.of("Is defined", TestData(
expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13 - potetmos", episode = 13, season = 1, episodeTitle = "potetmos" ),
input = DataHolder("Iseleve","Iseleve - 13 potetmos")
)),
Named.of("Season and Episode in S01E01 format", TestData(
expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13", episode = 13, season = 1, episodeTitle = "" ),
input = DataHolder("Iseleve","Iseleve - S1E13")
)),
Named.of("Season and Episode with episode title", TestData(
expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13 - potetmos", episode = 13, season = 1, episodeTitle = "potetmos" ),
input = DataHolder("Iseleve","Iseleve - S1E13 potetmos")
)),
Named.of("Season and Episode with space separator", TestData(
expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13", episode = 13, season = 1, episodeTitle = "" ),
input = DataHolder("Iseleve","Iseleve - S1 13")
)),
Named.of("Season and Episode with space separator and episode title", TestData(
expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13 - potetos", episode = 13, season = 1, episodeTitle = "" ),
input = DataHolder("Iseleve","Iseleve - S1 13 potetos")
)),
Named.of("Lowercase season and episode", TestData(
expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13", episode = 13, season = 1, episodeTitle = "" ),
input = DataHolder("Iseleve","Iseleve - s1e13")
)),
Named.of("Episode title with Season and Episode in text", TestData(
expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13", episode = 13, season = 1, episodeTitle = "" ),
input = DataHolder("Iseleve","Iseleve - Season 1 Episode 13")
)),
Named.of("Episode title with Season and Episode in text and episode title", TestData(
expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13 - Potetmos", episode = 13, season = 1, episodeTitle = "Potetmos" ),
input = DataHolder("Iseleve","Iseleve - Season 1 Episode 13 Potetmos")
)),
)
}
@JvmStatic
fun movieTestCases(): List<Named<TestData>> {
return listOf(
Named.of("Movie with year", TestData("Some Movie (2012)", "Some Movie (2012)")),
Named.of("Movie without year", TestData("Another Movie", "Another Movie")),
Named.of("Movie with year and additional info", TestData("Awesome Movie (2012) - Part 1", "Awesome Movie (2012) - Part 1")),
//Named.of("Movie with year and spaces", TestData("Space Odyssey (2010)", "Space Odyssey (2010)")),
//Named.of("Movie with year and parentheses", TestData("Sci-Fi Movie (2015)", "Sci-Fi Movie (((2015)))")),
//Named.of("Movie with year and hyphen", TestData("Action Flick (2008)", "Action Flick - 2008")),
//Named.of("Movie with year and brackets", TestData("Blockbuster (2011)", "Blockbuster [2011]")),
//Named.of("Movie with year and period", TestData("Time Travelers. (2022)", "Time Travelers. .2022.")),
//Named.of("Movie with year and underscores", TestData("Hidden Gem (1999)", "Hidden Gem _1999_")),
Named.of("Movie with title as '2012'", TestData("2012", "2012")),
Named.of("Movie with title as '2020'", TestData("2020 (2012)", "2020 (2012)")),
Named.of("Movie with title as '2049'", TestData("2049 (2017)", "2049 (2017)")),
Named.of("Movie with title as '3000'", TestData("3000 (2000)", "3000 (2000)"))
Named.of(
"Movie with year", TestData(
MovieInfo("Some Movie", "Some Movie"),
DataHolder("Some Movie (2012)", "Some Movie (2012)", FileNameDeterminate.ContentType.MOVIE)
)
),
Named.of(
"Movie without year", TestData(
MovieInfo("Another Movie", "Another Movie"),
DataHolder("Another Movie", "Another Movie", FileNameDeterminate.ContentType.MOVIE)
)
),
Named.of(
"Movie with year and additional info", TestData(
expected = MovieInfo("Awesome Movie", "Awesome Movie - Part 1"),
DataHolder("Awesome Movie (2012) - Part 1", "Awesome Movie (2012) - Part 1")
)
),
Named.of("Movie with title as '2012'", TestData(
expected = MovieInfo("2012", "2012"),
DataHolder("2012", "2012")
)),
Named.of("Movie with title as '2020'", TestData(
expected = MovieInfo("2020", "2020"),
DataHolder("2020 (2012)", "2020 (2012)")
)),
Named.of("Movie with title as '2049'", TestData(
expected = MovieInfo("2049", "2049"),
DataHolder("2049 (2017)", "2049 (2017)")
)),
Named.of("Movie with title as '3000'", TestData(
expected = MovieInfo("3000", "3000"),
DataHolder("3000 (2000)", "3000 (2000)")
)),
Named.of("Avengers - Endgame", TestData(
expected = MovieInfo("Avengers", "Avengers - Endgame"),
DataHolder("Avengers - Endgame", "Avengers - Endgame")
)),
Named.of(
"Ghost in the Shell (S.A.C) - Solid State Society", TestData(
expected = MovieInfo("Ghost in the Shell", "Ghost in the Shell - Solid State Society"),
DataHolder(
"Ghost in the Shell - Solid State Society",
"Ghost in the Shell (S.A.C) - Solid State Society"
)
)
),
)
}
@JvmStatic
fun undefinedTestCases(): List<Named<TestData>> {
return listOf(
Named.of("Undefined - Movie", TestData("Avengers - Endgame", "Avengers - Endgame")),
Named.of("Undefined - Series", TestData("Stranger Things", "Stranger Things")),
Named.of("Undefined - Movie with Year", TestData("Inception (2010)", "Inception (2010)")),
Named.of("Undefined - Series with Year", TestData("Friends (1994)", "Friends (1994)")),
Named.of("Undefined - Movie with Genre", TestData("The Dark Knight", "The Dark Knight")),
Named.of("Undefined - Series with Genre", TestData("Breaking Bad", "Breaking Bad")),
Named.of("Undefined - Movie with Keywords", TestData("The Lord of the Rings", "The Lord of the Rings (Movie)")),
Named.of("Undefined - Series with Keywords", TestData("Game of Thrones 01", "Game of Thrones 01")),
Named.of("Undefined - Series with number", TestData("Game of Thrones - S01E01", "Game of Thrones - 01")),
Named.of("Undefined - Movie", TestData(
expected = MovieInfo("Avengers", "Avengers - Endgame"),
input = DataHolder("Avengers", "Avengers - Endgame")
)),
Named.of("Undefined - Series", TestData(
expected = MovieInfo("Stranger Things", "Stranger Things"),
input = DataHolder("Stranger Things", "Stranger Things")
)),
Named.of("Undefined - Movie with Year", TestData(
expected = MovieInfo("Inception", "Inception"),
input = DataHolder("Inception", "Inception (2010)")
)),
Named.of("Undefined - Series with Year", TestData(
expected = MovieInfo("Friends", "Friends"),
input = DataHolder("Friends", "Friends (1994)")
)),
Named.of("Undefined - Movie with Genre", TestData(
expected = MovieInfo("The Dark Knight", "The Dark Knight"),
input = DataHolder("The Dark Knight", "The Dark Knight")
)),
Named.of("Undefined - Series with Genre", TestData(
expected = MovieInfo("Breaking Bad", "Breaking Bad"),
input = DataHolder("Breaking Bad", "Breaking Bad")
)),
Named.of(
"Undefined - Movie with Keywords",
TestData(
expected = MovieInfo("The Lord of the Rings", "The Lord of the Rings"),
input = DataHolder("The Lord of the Rings", "The Lord of the Rings (Movie)")
)
),
Named.of("Undefined - Series with Keywords", TestData(
expected = EpisodeInfo("Game of Thrones", fullName = "Game of Thrones 01", episode = 1, season = 1, episodeTitle = ""),
input = DataHolder("Game of Thrones", "Game of Thrones 01")
)),
Named.of("Undefined - Series with Keywords", TestData(
expected = MovieInfo("Game of Thrones", fullName = "Game of Thrones 01"),
input = DataHolder("Game of Thrones", "Game of Thrones 01")
)),
Named.of(
"Undefined - Series with number",
TestData(
expected = EpisodeInfo(title = "Game of Thrones", fullName = "Game of Thrones - S01E01", episode = 1, season = 1, episodeTitle = ""),
input = DataHolder("Game of Thrones", "Game of Thrones - 01")
)
),
)
}
}

View File

@ -2,6 +2,7 @@ import logging
import signal
import sys
import os
from typing import Optional
import uuid
import threading
import json
@ -125,22 +126,12 @@ class MessageHandlerThread(threading.Thread):
# Sjekk om statusen er SUCCESS
if status_type == 'SUCCESS':
data_value = self.message.value['data']["title"]
result = None # Will be assigned by either cache_result or sel.perform_action
logger.info("Checking cache for offloading")
cache_result = ResultCache.get(data_value)
if cache_result:
logger.info("Cache hit for %s", data_value)
result = cache_result
else:
logger.info("Not in cache: %s", data_value)
logger.info("Searching in sources for information about %s", data_value)
result = self.perform_action(title=data_value)
if (result.statusType == "SUCCESS"):
logger.info("Storing response for %s in in-memory cache", data_value)
ResultCache.add(data_value, result)
baseName = self.message.value["data"]["sanitizedName"]
title = self.message.value['data']["title"]
result = self.get_metadata(baseName)
if (result is None):
result = self.get_metadata(title)
producerMessage = self.compose_message(referenceId=self.message.value["referenceId"], result=result)
@ -156,6 +147,19 @@ class MessageHandlerThread(threading.Thread):
producer.send(kafka_topic, key="event:metadata:obtained", value=result_json)
producer.close()
def get_metadata(self, name: str) -> Optional[DataResult]:
logger.info("Checking cache for offloading")
cache_result = ResultCache.get(name)
if cache_result:
logger.info("Cache hit for %s", name)
result = cache_result
else:
logger.info("Not in cache: %s", name)
logger.info("Searching in sources for information about %s", name)
result = self.perform_action(title=name)
if (result.statusType == "SUCCESS"):
logger.info("Storing response for %s in in-memory cache", name)
ResultCache.add(name, result)
def perform_action(self, title) -> DataResult: