This commit is contained in:
Brage 2023-07-28 23:15:31 +02:00
parent 93ec06089d
commit 58f9b6dc61
3 changed files with 213 additions and 88 deletions

View File

@ -23,9 +23,9 @@ class FileNameDeterminate(val title: String, val sanitizedName: String, val ctyp
private fun determineMovieFileName(): MovieInfo? { private fun determineMovieFileName(): MovieInfo? {
val movieEx = MovieEx(title, sanitizedName) val movieEx = MovieEx(title, sanitizedName)
val stripped = when { val stripped = when {
movieEx.isDefinedWithYear() != null -> sanitizedName.replace(movieEx.isDefinedWithYear()!!, "").trim() movieEx.isDefinedWithYear() -> sanitizedName.replace(movieEx.yearRegex(), "").trim()
movieEx.doesContainMovieKeywords() -> sanitizedName.replace(Regex("(?i)\\s*\\(\\s*movie\\s*\\)\\s*"), "").trim() movieEx.doesContainMovieKeywords() -> sanitizedName.replace(Regex("(?i)\\s*\\(\\s*movie\\s*\\)\\s*"), "").trim()
else -> title else -> sanitizedName
} }
val nonResolutioned = movieEx.removeResolutionAndBeyond(stripped) ?: stripped val nonResolutioned = movieEx.removeResolutionAndBeyond(stripped) ?: stripped
return MovieInfo(cleanup(nonResolutioned), cleanup(nonResolutioned)) return MovieInfo(cleanup(nonResolutioned), cleanup(nonResolutioned))
@ -85,14 +85,18 @@ class FileNameDeterminate(val title: String, val sanitizedName: String, val ctyp
val removalValue = Regex("(i?)([0-9].*[pk]|[ ._-]+[UHD]+[ ._-])").find(input)?.value ?: return null val removalValue = Regex("(i?)([0-9].*[pk]|[ ._-]+[UHD]+[ ._-])").find(input)?.value ?: return null
return input.substring(0, input.indexOf(removalValue)) return input.substring(0, input.indexOf(removalValue))
} }
fun yearRegex(): Regex {
return Regex("[ .(][0-9]{4}[ .)]")
}
} }
internal class MovieEx(title: String, sanitizedName: String) : Base(title, sanitizedName) { internal class MovieEx(title: String, sanitizedName: String) : Base(title, sanitizedName) {
/** /**
* @return not null if matches " 2020 " or ".2020." * @return not null if matches " 2020 " or ".2020."
*/ */
fun isDefinedWithYear(): String? { fun isDefinedWithYear(): Boolean {
return getMatch("[ .][0-9]{4}[ .]") return getMatch(yearRegex().pattern)?.isNotBlank() ?: false
} }
/** /**

View File

@ -1,5 +1,8 @@
package no.iktdev.streamit.content.reader.analyzer.contentDeterminator package no.iktdev.streamit.content.reader.analyzer.contentDeterminator
import no.iktdev.streamit.content.common.dto.reader.EpisodeInfo
import no.iktdev.streamit.content.common.dto.reader.MovieInfo
import no.iktdev.streamit.content.common.dto.reader.VideoInfo
import org.assertj.core.api.AssertionsForInterfaceTypes.assertThat import org.assertj.core.api.AssertionsForInterfaceTypes.assertThat
import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Named import org.junit.jupiter.api.Named
@ -7,125 +10,239 @@ import org.junit.jupiter.api.Test
import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.MethodSource import org.junit.jupiter.params.provider.MethodSource
data class DataHolder(
val title: String,
val sanitizedName: String,
val ctype: FileNameDeterminate.ContentType = FileNameDeterminate.ContentType.UNDEFINED
)
class FileNameDeterminateTest { class FileNameDeterminateTest {
data class TestData( data class TestData(
val expected: String, val expected: VideoInfo,
val input: String val input: DataHolder
) )
@ParameterizedTest @ParameterizedTest
@MethodSource("serieTestCases") @MethodSource("serieTestCases")
fun testDetermineFileNameForSerie(namedTestData: TestData) { fun testDetermineFileNameForSerie(namedTestData: TestData) {
val fileNameDeterminate = val fileNameDeterminate =
FileNameDeterminate("Iseleve", namedTestData.input, FileNameDeterminate.ContentType.SERIE) FileNameDeterminate(
assertEquals( namedTestData.input.title,
namedTestData.expected, namedTestData.input.sanitizedName,
fileNameDeterminate.getDeterminedVideoInfo()?.fullName, FileNameDeterminate.ContentType.SERIE
"Test case: ${namedTestData.input}" )
) val result = fileNameDeterminate.getDeterminedVideoInfo()
assertThat(result).isNotNull()
assertThat(result?.fullName).isEqualTo(namedTestData.expected.fullName)
} }
@ParameterizedTest(name = "{0}") @ParameterizedTest(name = "{0}")
@MethodSource("movieTestCases") @MethodSource("movieTestCases")
fun testDetermineFileNameForMovie(namedTestData: TestData) { fun testDetermineFileNameForMovie(namedTestData: TestData) {
val fileNameDeterminate = FileNameDeterminate( val fileNameDeterminate =
namedTestData.input, namedTestData.input, FileNameDeterminate.ContentType.MOVIE FileNameDeterminate(
) namedTestData.input.title,
assertEquals( namedTestData.input.sanitizedName,
namedTestData.expected, FileNameDeterminate.ContentType.MOVIE
fileNameDeterminate.getDeterminedVideoInfo()?.fullName, )
"Test case: ${namedTestData.input}" val result = fileNameDeterminate.getDeterminedVideoInfo()
) assertThat(result).isNotNull()
assertThat(result?.fullName).isEqualTo(namedTestData.expected.fullName)
} }
@ParameterizedTest() @ParameterizedTest()
@MethodSource("undefinedTestCases") @MethodSource("undefinedTestCases")
fun testDetermineFileNameForUndefined(namedTestData: TestData) { fun testDetermineFileNameForUndefined(namedTestData: TestData) {
val fileNameDeterminate = FileNameDeterminate( val fileNameDeterminate =
namedTestData.input, namedTestData.input, FileNameDeterminate.ContentType.UNDEFINED FileNameDeterminate(
) namedTestData.input.title,
assertThat(fileNameDeterminate.getDeterminedVideoInfo()?.fullName).isEqualTo(namedTestData.expected) namedTestData.input.sanitizedName,
FileNameDeterminate.ContentType.UNDEFINED
)
val result = fileNameDeterminate.getDeterminedVideoInfo()
assertThat(result).isNotNull()
assertThat(result?.fullName).isEqualTo(namedTestData.expected.fullName)
} }
@Test @Test
fun test() { fun test() {
val namedTestData = TestData("Game of Thrones - S01E01", "Game of Thrones - 01")
val fileNameDeterminate = FileNameDeterminate( val fileNameDeterminate = FileNameDeterminate(
namedTestData.input, namedTestData.input, FileNameDeterminate.ContentType.UNDEFINED "Game of Thrones", "Game of Thrones - 01", FileNameDeterminate.ContentType.UNDEFINED
) )
assertThat(fileNameDeterminate.getDeterminedVideoInfo()?.fullName).isEqualTo(namedTestData.expected) assertThat(fileNameDeterminate.getDeterminedVideoInfo()?.fullName).isEqualTo("Game of Thrones - S01E01")
val td = TestData(
expected = EpisodeInfo(title = "Game of Thrones", fullName = "Game of Thrones - S01E01", episode = 1, season = 1, episodeTitle = ""),
input = DataHolder("Game of Thrones", "Game of Thrones - 01")
)
val fileNameDeterminate2 = FileNameDeterminate(
td.input.title, td.input.sanitizedName, FileNameDeterminate.ContentType.UNDEFINED
)
assertThat(fileNameDeterminate2.getDeterminedVideoInfo()?.fullName).isEqualTo(td.expected.fullName)
} }
@Test @Test
fun testWildStuff() { fun testWildStuff() {
val namedTestData = TestData("The Potato man", "The.Potato.man.2023.1080p.L950XL.x265-WIN10")
val fileNameDeterminate = FileNameDeterminate( val fileNameDeterminate = FileNameDeterminate(
namedTestData.input, namedTestData.input, FileNameDeterminate.ContentType.UNDEFINED "The Potato man", "The.Potato.man.2023.1080p.L950XL.x265-WIN10", FileNameDeterminate.ContentType.UNDEFINED
) )
assertThat(fileNameDeterminate.getDeterminedVideoInfo()?.fullName).isEqualTo(namedTestData.expected) assertThat(fileNameDeterminate.getDeterminedVideoInfo()?.fullName).isEqualTo("The Potato man")
} }
companion object { companion object {
@JvmStatic @JvmStatic
fun serieTestCases(): List<Named<TestData>> { fun serieTestCases(): List<Named<TestData>> {
return listOf( return listOf(
Named.of("Is defined", TestData("Iseleve - S01E13", "Iseleve - 13")), Named.of("Is defined", TestData(
Named.of("Contains episode title", TestData("Iseleve - S01E13 - potetmos", "Iseleve - 13 potetmos")), expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13", episode = 13, season = 1, episodeTitle = "" ),
Named.of("Season and Episode in S01E01 format", TestData("Iseleve - S01E13", "Iseleve - S1E13")), DataHolder("Iseleve","Iseleve - 13")
Named.of( )),
"Season and Episode with episode title", Named.of("Is defined", TestData(
TestData("Iseleve - S01E13 - potetmos", "Iseleve - S1E13 potetmos") expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13 - potetmos", episode = 13, season = 1, episodeTitle = "potetmos" ),
), input = DataHolder("Iseleve","Iseleve - 13 potetmos")
Named.of("Season and Episode with space separator", TestData("Iseleve - S01E13", "Iseleve - S1 13")), )),
Named.of( Named.of("Season and Episode in S01E01 format", TestData(
"Season and Episode with space separator and episode title", expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13", episode = 13, season = 1, episodeTitle = "" ),
TestData("Iseleve - S01E13 - potetos", "Iseleve - S1 13 potetos") input = DataHolder("Iseleve","Iseleve - S1E13")
), )),
Named.of("Lowercase season and episode", TestData("Iseleve - S01E13", "Iseleve - s1e13")),
Named.of(
"Episode title with Season and Episode in text", Named.of("Season and Episode with episode title", TestData(
TestData("Iseleve - S01E13", "Iseleve - Season 1 Episode 13") expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13 - potetmos", episode = 13, season = 1, episodeTitle = "potetmos" ),
), input = DataHolder("Iseleve","Iseleve - S1E13 potetmos")
Named.of( )),
"Episode title with Season and Episode in text and episode title", Named.of("Season and Episode with space separator", TestData(
TestData("Iseleve - S01E13 - Potetmos", "Iseleve - Season 1 Episode 13 Potetmos") expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13", episode = 13, season = 1, episodeTitle = "" ),
) input = DataHolder("Iseleve","Iseleve - S1 13")
)),
Named.of("Season and Episode with space separator and episode title", TestData(
expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13 - potetos", episode = 13, season = 1, episodeTitle = "" ),
input = DataHolder("Iseleve","Iseleve - S1 13 potetos")
)),
Named.of("Lowercase season and episode", TestData(
expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13", episode = 13, season = 1, episodeTitle = "" ),
input = DataHolder("Iseleve","Iseleve - s1e13")
)),
Named.of("Episode title with Season and Episode in text", TestData(
expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13", episode = 13, season = 1, episodeTitle = "" ),
input = DataHolder("Iseleve","Iseleve - Season 1 Episode 13")
)),
Named.of("Episode title with Season and Episode in text and episode title", TestData(
expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13 - Potetmos", episode = 13, season = 1, episodeTitle = "Potetmos" ),
input = DataHolder("Iseleve","Iseleve - Season 1 Episode 13 Potetmos")
)),
) )
} }
@JvmStatic @JvmStatic
fun movieTestCases(): List<Named<TestData>> { fun movieTestCases(): List<Named<TestData>> {
return listOf( return listOf(
Named.of("Movie with year", TestData("Some Movie (2012)", "Some Movie (2012)")), Named.of(
Named.of("Movie without year", TestData("Another Movie", "Another Movie")), "Movie with year", TestData(
Named.of("Movie with year and additional info", TestData("Awesome Movie (2012) - Part 1", "Awesome Movie (2012) - Part 1")), MovieInfo("Some Movie", "Some Movie"),
//Named.of("Movie with year and spaces", TestData("Space Odyssey (2010)", "Space Odyssey (2010)")), DataHolder("Some Movie (2012)", "Some Movie (2012)", FileNameDeterminate.ContentType.MOVIE)
//Named.of("Movie with year and parentheses", TestData("Sci-Fi Movie (2015)", "Sci-Fi Movie (((2015)))")), )
//Named.of("Movie with year and hyphen", TestData("Action Flick (2008)", "Action Flick - 2008")), ),
//Named.of("Movie with year and brackets", TestData("Blockbuster (2011)", "Blockbuster [2011]")), Named.of(
//Named.of("Movie with year and period", TestData("Time Travelers. (2022)", "Time Travelers. .2022.")), "Movie without year", TestData(
//Named.of("Movie with year and underscores", TestData("Hidden Gem (1999)", "Hidden Gem _1999_")), MovieInfo("Another Movie", "Another Movie"),
Named.of("Movie with title as '2012'", TestData("2012", "2012")), DataHolder("Another Movie", "Another Movie", FileNameDeterminate.ContentType.MOVIE)
Named.of("Movie with title as '2020'", TestData("2020 (2012)", "2020 (2012)")), )
Named.of("Movie with title as '2049'", TestData("2049 (2017)", "2049 (2017)")),
Named.of("Movie with title as '3000'", TestData("3000 (2000)", "3000 (2000)")) ),
Named.of(
"Movie with year and additional info", TestData(
expected = MovieInfo("Awesome Movie", "Awesome Movie - Part 1"),
DataHolder("Awesome Movie (2012) - Part 1", "Awesome Movie (2012) - Part 1")
)
),
Named.of("Movie with title as '2012'", TestData(
expected = MovieInfo("2012", "2012"),
DataHolder("2012", "2012")
)),
Named.of("Movie with title as '2020'", TestData(
expected = MovieInfo("2020", "2020"),
DataHolder("2020 (2012)", "2020 (2012)")
)),
Named.of("Movie with title as '2049'", TestData(
expected = MovieInfo("2049", "2049"),
DataHolder("2049 (2017)", "2049 (2017)")
)),
Named.of("Movie with title as '3000'", TestData(
expected = MovieInfo("3000", "3000"),
DataHolder("3000 (2000)", "3000 (2000)")
)),
Named.of("Avengers - Endgame", TestData(
expected = MovieInfo("Avengers", "Avengers - Endgame"),
DataHolder("Avengers - Endgame", "Avengers - Endgame")
)),
Named.of(
"Ghost in the Shell (S.A.C) - Solid State Society", TestData(
expected = MovieInfo("Ghost in the Shell", "Ghost in the Shell - Solid State Society"),
DataHolder(
"Ghost in the Shell - Solid State Society",
"Ghost in the Shell (S.A.C) - Solid State Society"
)
)
),
) )
} }
@JvmStatic @JvmStatic
fun undefinedTestCases(): List<Named<TestData>> { fun undefinedTestCases(): List<Named<TestData>> {
return listOf( return listOf(
Named.of("Undefined - Movie", TestData("Avengers - Endgame", "Avengers - Endgame")), Named.of("Undefined - Movie", TestData(
Named.of("Undefined - Series", TestData("Stranger Things", "Stranger Things")), expected = MovieInfo("Avengers", "Avengers - Endgame"),
Named.of("Undefined - Movie with Year", TestData("Inception (2010)", "Inception (2010)")), input = DataHolder("Avengers", "Avengers - Endgame")
Named.of("Undefined - Series with Year", TestData("Friends (1994)", "Friends (1994)")), )),
Named.of("Undefined - Movie with Genre", TestData("The Dark Knight", "The Dark Knight")), Named.of("Undefined - Series", TestData(
Named.of("Undefined - Series with Genre", TestData("Breaking Bad", "Breaking Bad")), expected = MovieInfo("Stranger Things", "Stranger Things"),
Named.of("Undefined - Movie with Keywords", TestData("The Lord of the Rings", "The Lord of the Rings (Movie)")), input = DataHolder("Stranger Things", "Stranger Things")
Named.of("Undefined - Series with Keywords", TestData("Game of Thrones 01", "Game of Thrones 01")), )),
Named.of("Undefined - Series with number", TestData("Game of Thrones - S01E01", "Game of Thrones - 01")), Named.of("Undefined - Movie with Year", TestData(
expected = MovieInfo("Inception", "Inception"),
input = DataHolder("Inception", "Inception (2010)")
)),
Named.of("Undefined - Series with Year", TestData(
expected = MovieInfo("Friends", "Friends"),
input = DataHolder("Friends", "Friends (1994)")
)),
Named.of("Undefined - Movie with Genre", TestData(
expected = MovieInfo("The Dark Knight", "The Dark Knight"),
input = DataHolder("The Dark Knight", "The Dark Knight")
)),
Named.of("Undefined - Series with Genre", TestData(
expected = MovieInfo("Breaking Bad", "Breaking Bad"),
input = DataHolder("Breaking Bad", "Breaking Bad")
)),
Named.of(
"Undefined - Movie with Keywords",
TestData(
expected = MovieInfo("The Lord of the Rings", "The Lord of the Rings"),
input = DataHolder("The Lord of the Rings", "The Lord of the Rings (Movie)")
)
),
Named.of("Undefined - Series with Keywords", TestData(
expected = EpisodeInfo("Game of Thrones", fullName = "Game of Thrones 01", episode = 1, season = 1, episodeTitle = ""),
input = DataHolder("Game of Thrones", "Game of Thrones 01")
)),
Named.of("Undefined - Series with Keywords", TestData(
expected = MovieInfo("Game of Thrones", fullName = "Game of Thrones 01"),
input = DataHolder("Game of Thrones", "Game of Thrones 01")
)),
Named.of(
"Undefined - Series with number",
TestData(
expected = EpisodeInfo(title = "Game of Thrones", fullName = "Game of Thrones - S01E01", episode = 1, season = 1, episodeTitle = ""),
input = DataHolder("Game of Thrones", "Game of Thrones - 01")
)
),
) )
} }
} }

View File

@ -2,6 +2,7 @@ import logging
import signal import signal
import sys import sys
import os import os
from typing import Optional
import uuid import uuid
import threading import threading
import json import json
@ -125,22 +126,12 @@ class MessageHandlerThread(threading.Thread):
# Sjekk om statusen er SUCCESS # Sjekk om statusen er SUCCESS
if status_type == 'SUCCESS': if status_type == 'SUCCESS':
data_value = self.message.value['data']["title"] baseName = self.message.value["data"]["sanitizedName"]
title = self.message.value['data']["title"]
result = None # Will be assigned by either cache_result or sel.perform_action
logger.info("Checking cache for offloading")
cache_result = ResultCache.get(data_value)
if cache_result:
logger.info("Cache hit for %s", data_value)
result = cache_result
else:
logger.info("Not in cache: %s", data_value)
logger.info("Searching in sources for information about %s", data_value)
result = self.perform_action(title=data_value)
if (result.statusType == "SUCCESS"):
logger.info("Storing response for %s in in-memory cache", data_value)
ResultCache.add(data_value, result)
result = self.get_metadata(baseName)
if (result is None):
result = self.get_metadata(title)
producerMessage = self.compose_message(referenceId=self.message.value["referenceId"], result=result) producerMessage = self.compose_message(referenceId=self.message.value["referenceId"], result=result)
@ -156,6 +147,19 @@ class MessageHandlerThread(threading.Thread):
producer.send(kafka_topic, key="event:metadata:obtained", value=result_json) producer.send(kafka_topic, key="event:metadata:obtained", value=result_json)
producer.close() producer.close()
def get_metadata(self, name: str) -> Optional[DataResult]:
logger.info("Checking cache for offloading")
cache_result = ResultCache.get(name)
if cache_result:
logger.info("Cache hit for %s", name)
result = cache_result
else:
logger.info("Not in cache: %s", name)
logger.info("Searching in sources for information about %s", name)
result = self.perform_action(title=name)
if (result.statusType == "SUCCESS"):
logger.info("Storing response for %s in in-memory cache", name)
ResultCache.add(name, result)
def perform_action(self, title) -> DataResult: def perform_action(self, title) -> DataResult: