diff --git a/Reader/src/main/kotlin/no/iktdev/streamit/content/reader/analyzer/contentDeterminator/FileNameDeterminate.kt b/Reader/src/main/kotlin/no/iktdev/streamit/content/reader/analyzer/contentDeterminator/FileNameDeterminate.kt index c963c61c..5f530021 100644 --- a/Reader/src/main/kotlin/no/iktdev/streamit/content/reader/analyzer/contentDeterminator/FileNameDeterminate.kt +++ b/Reader/src/main/kotlin/no/iktdev/streamit/content/reader/analyzer/contentDeterminator/FileNameDeterminate.kt @@ -23,9 +23,9 @@ class FileNameDeterminate(val title: String, val sanitizedName: String, val ctyp private fun determineMovieFileName(): MovieInfo? { val movieEx = MovieEx(title, sanitizedName) val stripped = when { - movieEx.isDefinedWithYear() != null -> sanitizedName.replace(movieEx.isDefinedWithYear()!!, "").trim() + movieEx.isDefinedWithYear() -> sanitizedName.replace(movieEx.yearRegex(), "").trim() movieEx.doesContainMovieKeywords() -> sanitizedName.replace(Regex("(?i)\\s*\\(\\s*movie\\s*\\)\\s*"), "").trim() - else -> title + else -> sanitizedName } val nonResolutioned = movieEx.removeResolutionAndBeyond(stripped) ?: stripped return MovieInfo(cleanup(nonResolutioned), cleanup(nonResolutioned)) @@ -85,14 +85,18 @@ class FileNameDeterminate(val title: String, val sanitizedName: String, val ctyp val removalValue = Regex("(i?)([0-9].*[pk]|[ ._-]+[UHD]+[ ._-])").find(input)?.value ?: return null return input.substring(0, input.indexOf(removalValue)) } + + fun yearRegex(): Regex { + return Regex("[ .(][0-9]{4}[ .)]") + } } internal class MovieEx(title: String, sanitizedName: String) : Base(title, sanitizedName) { /** * @return not null if matches " 2020 " or ".2020." */ - fun isDefinedWithYear(): String? { - return getMatch("[ .][0-9]{4}[ .]") + fun isDefinedWithYear(): Boolean { + return getMatch(yearRegex().pattern)?.isNotBlank() ?: false } /** diff --git a/Reader/src/test/kotlin/no/iktdev/streamit/content/reader/analyzer/contentDeterminator/FileNameDeterminateTest.kt b/Reader/src/test/kotlin/no/iktdev/streamit/content/reader/analyzer/contentDeterminator/FileNameDeterminateTest.kt index 353a8a80..0666de8b 100644 --- a/Reader/src/test/kotlin/no/iktdev/streamit/content/reader/analyzer/contentDeterminator/FileNameDeterminateTest.kt +++ b/Reader/src/test/kotlin/no/iktdev/streamit/content/reader/analyzer/contentDeterminator/FileNameDeterminateTest.kt @@ -1,5 +1,8 @@ package no.iktdev.streamit.content.reader.analyzer.contentDeterminator +import no.iktdev.streamit.content.common.dto.reader.EpisodeInfo +import no.iktdev.streamit.content.common.dto.reader.MovieInfo +import no.iktdev.streamit.content.common.dto.reader.VideoInfo import org.assertj.core.api.AssertionsForInterfaceTypes.assertThat import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Named @@ -7,125 +10,239 @@ import org.junit.jupiter.api.Test import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.MethodSource +data class DataHolder( + val title: String, + val sanitizedName: String, + val ctype: FileNameDeterminate.ContentType = FileNameDeterminate.ContentType.UNDEFINED +) class FileNameDeterminateTest { data class TestData( - val expected: String, - val input: String + val expected: VideoInfo, + val input: DataHolder ) @ParameterizedTest @MethodSource("serieTestCases") fun testDetermineFileNameForSerie(namedTestData: TestData) { val fileNameDeterminate = - FileNameDeterminate("Iseleve", namedTestData.input, FileNameDeterminate.ContentType.SERIE) - assertEquals( - namedTestData.expected, - fileNameDeterminate.getDeterminedVideoInfo()?.fullName, - "Test case: ${namedTestData.input}" - ) + FileNameDeterminate( + namedTestData.input.title, + namedTestData.input.sanitizedName, + FileNameDeterminate.ContentType.SERIE + ) + val result = fileNameDeterminate.getDeterminedVideoInfo() + assertThat(result).isNotNull() + assertThat(result?.fullName).isEqualTo(namedTestData.expected.fullName) } @ParameterizedTest(name = "{0}") @MethodSource("movieTestCases") fun testDetermineFileNameForMovie(namedTestData: TestData) { - val fileNameDeterminate = FileNameDeterminate( - namedTestData.input, namedTestData.input, FileNameDeterminate.ContentType.MOVIE - ) - assertEquals( - namedTestData.expected, - fileNameDeterminate.getDeterminedVideoInfo()?.fullName, - "Test case: ${namedTestData.input}" - ) + val fileNameDeterminate = + FileNameDeterminate( + namedTestData.input.title, + namedTestData.input.sanitizedName, + FileNameDeterminate.ContentType.MOVIE + ) + val result = fileNameDeterminate.getDeterminedVideoInfo() + assertThat(result).isNotNull() + assertThat(result?.fullName).isEqualTo(namedTestData.expected.fullName) } @ParameterizedTest() @MethodSource("undefinedTestCases") fun testDetermineFileNameForUndefined(namedTestData: TestData) { - val fileNameDeterminate = FileNameDeterminate( - namedTestData.input, namedTestData.input, FileNameDeterminate.ContentType.UNDEFINED - ) - assertThat(fileNameDeterminate.getDeterminedVideoInfo()?.fullName).isEqualTo(namedTestData.expected) + val fileNameDeterminate = + FileNameDeterminate( + namedTestData.input.title, + namedTestData.input.sanitizedName, + FileNameDeterminate.ContentType.UNDEFINED + ) + val result = fileNameDeterminate.getDeterminedVideoInfo() + assertThat(result).isNotNull() + assertThat(result?.fullName).isEqualTo(namedTestData.expected.fullName) } @Test fun test() { - val namedTestData = TestData("Game of Thrones - S01E01", "Game of Thrones - 01") val fileNameDeterminate = FileNameDeterminate( - namedTestData.input, namedTestData.input, FileNameDeterminate.ContentType.UNDEFINED + "Game of Thrones", "Game of Thrones - 01", FileNameDeterminate.ContentType.UNDEFINED ) - assertThat(fileNameDeterminate.getDeterminedVideoInfo()?.fullName).isEqualTo(namedTestData.expected) + assertThat(fileNameDeterminate.getDeterminedVideoInfo()?.fullName).isEqualTo("Game of Thrones - S01E01") + + + val td = TestData( + expected = EpisodeInfo(title = "Game of Thrones", fullName = "Game of Thrones - S01E01", episode = 1, season = 1, episodeTitle = ""), + input = DataHolder("Game of Thrones", "Game of Thrones - 01") + ) + + val fileNameDeterminate2 = FileNameDeterminate( + td.input.title, td.input.sanitizedName, FileNameDeterminate.ContentType.UNDEFINED + ) + assertThat(fileNameDeterminate2.getDeterminedVideoInfo()?.fullName).isEqualTo(td.expected.fullName) + } @Test fun testWildStuff() { - val namedTestData = TestData("The Potato man", "The.Potato.man.2023.1080p.L950XL.x265-WIN10") val fileNameDeterminate = FileNameDeterminate( - namedTestData.input, namedTestData.input, FileNameDeterminate.ContentType.UNDEFINED + "The Potato man", "The.Potato.man.2023.1080p.L950XL.x265-WIN10", FileNameDeterminate.ContentType.UNDEFINED ) - assertThat(fileNameDeterminate.getDeterminedVideoInfo()?.fullName).isEqualTo(namedTestData.expected) + assertThat(fileNameDeterminate.getDeterminedVideoInfo()?.fullName).isEqualTo("The Potato man") } companion object { @JvmStatic fun serieTestCases(): List> { return listOf( - Named.of("Is defined", TestData("Iseleve - S01E13", "Iseleve - 13")), - Named.of("Contains episode title", TestData("Iseleve - S01E13 - potetmos", "Iseleve - 13 potetmos")), - Named.of("Season and Episode in S01E01 format", TestData("Iseleve - S01E13", "Iseleve - S1E13")), - Named.of( - "Season and Episode with episode title", - TestData("Iseleve - S01E13 - potetmos", "Iseleve - S1E13 potetmos") - ), - Named.of("Season and Episode with space separator", TestData("Iseleve - S01E13", "Iseleve - S1 13")), - Named.of( - "Season and Episode with space separator and episode title", - TestData("Iseleve - S01E13 - potetos", "Iseleve - S1 13 potetos") - ), - Named.of("Lowercase season and episode", TestData("Iseleve - S01E13", "Iseleve - s1e13")), - Named.of( - "Episode title with Season and Episode in text", - TestData("Iseleve - S01E13", "Iseleve - Season 1 Episode 13") - ), - Named.of( - "Episode title with Season and Episode in text and episode title", - TestData("Iseleve - S01E13 - Potetmos", "Iseleve - Season 1 Episode 13 Potetmos") - ) + Named.of("Is defined", TestData( + expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13", episode = 13, season = 1, episodeTitle = "" ), + DataHolder("Iseleve","Iseleve - 13") + )), + Named.of("Is defined", TestData( + expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13 - potetmos", episode = 13, season = 1, episodeTitle = "potetmos" ), + input = DataHolder("Iseleve","Iseleve - 13 potetmos") + )), + Named.of("Season and Episode in S01E01 format", TestData( + expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13", episode = 13, season = 1, episodeTitle = "" ), + input = DataHolder("Iseleve","Iseleve - S1E13") + )), + + + Named.of("Season and Episode with episode title", TestData( + expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13 - potetmos", episode = 13, season = 1, episodeTitle = "potetmos" ), + input = DataHolder("Iseleve","Iseleve - S1E13 potetmos") + )), + Named.of("Season and Episode with space separator", TestData( + expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13", episode = 13, season = 1, episodeTitle = "" ), + input = DataHolder("Iseleve","Iseleve - S1 13") + )), + Named.of("Season and Episode with space separator and episode title", TestData( + expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13 - potetos", episode = 13, season = 1, episodeTitle = "" ), + input = DataHolder("Iseleve","Iseleve - S1 13 potetos") + )), + Named.of("Lowercase season and episode", TestData( + expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13", episode = 13, season = 1, episodeTitle = "" ), + input = DataHolder("Iseleve","Iseleve - s1e13") + )), + Named.of("Episode title with Season and Episode in text", TestData( + expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13", episode = 13, season = 1, episodeTitle = "" ), + input = DataHolder("Iseleve","Iseleve - Season 1 Episode 13") + )), + Named.of("Episode title with Season and Episode in text and episode title", TestData( + expected = EpisodeInfo(title = "Iseleve", fullName = "Iseleve - S01E13 - Potetmos", episode = 13, season = 1, episodeTitle = "Potetmos" ), + input = DataHolder("Iseleve","Iseleve - Season 1 Episode 13 Potetmos") + )), ) } @JvmStatic fun movieTestCases(): List> { return listOf( - Named.of("Movie with year", TestData("Some Movie (2012)", "Some Movie (2012)")), - Named.of("Movie without year", TestData("Another Movie", "Another Movie")), - Named.of("Movie with year and additional info", TestData("Awesome Movie (2012) - Part 1", "Awesome Movie (2012) - Part 1")), - //Named.of("Movie with year and spaces", TestData("Space Odyssey (2010)", "Space Odyssey (2010)")), - //Named.of("Movie with year and parentheses", TestData("Sci-Fi Movie (2015)", "Sci-Fi Movie (((2015)))")), - //Named.of("Movie with year and hyphen", TestData("Action Flick (2008)", "Action Flick - 2008")), - //Named.of("Movie with year and brackets", TestData("Blockbuster (2011)", "Blockbuster [2011]")), - //Named.of("Movie with year and period", TestData("Time Travelers. (2022)", "Time Travelers. .2022.")), - //Named.of("Movie with year and underscores", TestData("Hidden Gem (1999)", "Hidden Gem _1999_")), - Named.of("Movie with title as '2012'", TestData("2012", "2012")), - Named.of("Movie with title as '2020'", TestData("2020 (2012)", "2020 (2012)")), - Named.of("Movie with title as '2049'", TestData("2049 (2017)", "2049 (2017)")), - Named.of("Movie with title as '3000'", TestData("3000 (2000)", "3000 (2000)")) + Named.of( + "Movie with year", TestData( + MovieInfo("Some Movie", "Some Movie"), + DataHolder("Some Movie (2012)", "Some Movie (2012)", FileNameDeterminate.ContentType.MOVIE) + ) + ), + Named.of( + "Movie without year", TestData( + MovieInfo("Another Movie", "Another Movie"), + DataHolder("Another Movie", "Another Movie", FileNameDeterminate.ContentType.MOVIE) + ) + + ), + Named.of( + "Movie with year and additional info", TestData( + expected = MovieInfo("Awesome Movie", "Awesome Movie - Part 1"), + DataHolder("Awesome Movie (2012) - Part 1", "Awesome Movie (2012) - Part 1") + ) + + ), + Named.of("Movie with title as '2012'", TestData( + expected = MovieInfo("2012", "2012"), + DataHolder("2012", "2012") + )), + Named.of("Movie with title as '2020'", TestData( + expected = MovieInfo("2020", "2020"), + DataHolder("2020 (2012)", "2020 (2012)") + )), + Named.of("Movie with title as '2049'", TestData( + expected = MovieInfo("2049", "2049"), + DataHolder("2049 (2017)", "2049 (2017)") + )), + Named.of("Movie with title as '3000'", TestData( + expected = MovieInfo("3000", "3000"), + DataHolder("3000 (2000)", "3000 (2000)") + )), + Named.of("Avengers - Endgame", TestData( + expected = MovieInfo("Avengers", "Avengers - Endgame"), + + DataHolder("Avengers - Endgame", "Avengers - Endgame") + )), + Named.of( + "Ghost in the Shell (S.A.C) - Solid State Society", TestData( + expected = MovieInfo("Ghost in the Shell", "Ghost in the Shell - Solid State Society"), + DataHolder( + "Ghost in the Shell - Solid State Society", + "Ghost in the Shell (S.A.C) - Solid State Society" + ) + ) + ), ) } @JvmStatic fun undefinedTestCases(): List> { return listOf( - Named.of("Undefined - Movie", TestData("Avengers - Endgame", "Avengers - Endgame")), - Named.of("Undefined - Series", TestData("Stranger Things", "Stranger Things")), - Named.of("Undefined - Movie with Year", TestData("Inception (2010)", "Inception (2010)")), - Named.of("Undefined - Series with Year", TestData("Friends (1994)", "Friends (1994)")), - Named.of("Undefined - Movie with Genre", TestData("The Dark Knight", "The Dark Knight")), - Named.of("Undefined - Series with Genre", TestData("Breaking Bad", "Breaking Bad")), - Named.of("Undefined - Movie with Keywords", TestData("The Lord of the Rings", "The Lord of the Rings (Movie)")), - Named.of("Undefined - Series with Keywords", TestData("Game of Thrones 01", "Game of Thrones 01")), - Named.of("Undefined - Series with number", TestData("Game of Thrones - S01E01", "Game of Thrones - 01")), + Named.of("Undefined - Movie", TestData( + expected = MovieInfo("Avengers", "Avengers - Endgame"), + input = DataHolder("Avengers", "Avengers - Endgame") + )), + Named.of("Undefined - Series", TestData( + expected = MovieInfo("Stranger Things", "Stranger Things"), + input = DataHolder("Stranger Things", "Stranger Things") + )), + Named.of("Undefined - Movie with Year", TestData( + expected = MovieInfo("Inception", "Inception"), + input = DataHolder("Inception", "Inception (2010)") + )), + Named.of("Undefined - Series with Year", TestData( + expected = MovieInfo("Friends", "Friends"), + input = DataHolder("Friends", "Friends (1994)") + )), + Named.of("Undefined - Movie with Genre", TestData( + expected = MovieInfo("The Dark Knight", "The Dark Knight"), + input = DataHolder("The Dark Knight", "The Dark Knight") + )), + Named.of("Undefined - Series with Genre", TestData( + expected = MovieInfo("Breaking Bad", "Breaking Bad"), + input = DataHolder("Breaking Bad", "Breaking Bad") + )), + Named.of( + "Undefined - Movie with Keywords", + TestData( + expected = MovieInfo("The Lord of the Rings", "The Lord of the Rings"), + input = DataHolder("The Lord of the Rings", "The Lord of the Rings (Movie)") + ) + ), + Named.of("Undefined - Series with Keywords", TestData( + expected = EpisodeInfo("Game of Thrones", fullName = "Game of Thrones 01", episode = 1, season = 1, episodeTitle = ""), + input = DataHolder("Game of Thrones", "Game of Thrones 01") + )), + Named.of("Undefined - Series with Keywords", TestData( + expected = MovieInfo("Game of Thrones", fullName = "Game of Thrones 01"), + input = DataHolder("Game of Thrones", "Game of Thrones 01") + )), + Named.of( + "Undefined - Series with number", + TestData( + expected = EpisodeInfo(title = "Game of Thrones", fullName = "Game of Thrones - S01E01", episode = 1, season = 1, episodeTitle = ""), + input = DataHolder("Game of Thrones", "Game of Thrones - 01") + ) + ), ) } } diff --git a/pyMetadata/app.py b/pyMetadata/app.py index aef479d9..f78ad25e 100644 --- a/pyMetadata/app.py +++ b/pyMetadata/app.py @@ -2,6 +2,7 @@ import logging import signal import sys import os +from typing import Optional import uuid import threading import json @@ -125,22 +126,12 @@ class MessageHandlerThread(threading.Thread): # Sjekk om statusen er SUCCESS if status_type == 'SUCCESS': - data_value = self.message.value['data']["title"] - - result = None # Will be assigned by either cache_result or sel.perform_action - logger.info("Checking cache for offloading") - cache_result = ResultCache.get(data_value) - if cache_result: - logger.info("Cache hit for %s", data_value) - result = cache_result - else: - logger.info("Not in cache: %s", data_value) - logger.info("Searching in sources for information about %s", data_value) - result = self.perform_action(title=data_value) - if (result.statusType == "SUCCESS"): - logger.info("Storing response for %s in in-memory cache", data_value) - ResultCache.add(data_value, result) + baseName = self.message.value["data"]["sanitizedName"] + title = self.message.value['data']["title"] + result = self.get_metadata(baseName) + if (result is None): + result = self.get_metadata(title) producerMessage = self.compose_message(referenceId=self.message.value["referenceId"], result=result) @@ -156,6 +147,19 @@ class MessageHandlerThread(threading.Thread): producer.send(kafka_topic, key="event:metadata:obtained", value=result_json) producer.close() + def get_metadata(self, name: str) -> Optional[DataResult]: + logger.info("Checking cache for offloading") + cache_result = ResultCache.get(name) + if cache_result: + logger.info("Cache hit for %s", name) + result = cache_result + else: + logger.info("Not in cache: %s", name) + logger.info("Searching in sources for information about %s", name) + result = self.perform_action(title=name) + if (result.statusType == "SUCCESS"): + logger.info("Storing response for %s in in-memory cache", name) + ResultCache.add(name, result) def perform_action(self, title) -> DataResult: