From b32ff8ce4f525156e495aa8175b9bfc64a036286 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Brage=20Skj=C3=B8nborg?= Date: Mon, 8 Dec 2025 18:52:14 +0100 Subject: [PATCH] Regex parsing og media file name --- .../events/MediaEventParsedInfoListener.kt | 97 ++++--- .../MediaEventParsedInfoListenerTest.kt | 268 +++++++++++++----- 2 files changed, 263 insertions(+), 102 deletions(-) diff --git a/apps/coordinator/src/main/kotlin/no/iktdev/mediaprocessing/coordinator/listeners/events/MediaEventParsedInfoListener.kt b/apps/coordinator/src/main/kotlin/no/iktdev/mediaprocessing/coordinator/listeners/events/MediaEventParsedInfoListener.kt index 583118d1..8caa21b5 100644 --- a/apps/coordinator/src/main/kotlin/no/iktdev/mediaprocessing/coordinator/listeners/events/MediaEventParsedInfoListener.kt +++ b/apps/coordinator/src/main/kotlin/no/iktdev/mediaprocessing/coordinator/listeners/events/MediaEventParsedInfoListener.kt @@ -40,10 +40,12 @@ class MediaEventParsedInfoListener : EventListener() { fun String.noResolutionAndAfter() = Regex("[0-9]+[pk].*", RegexOption.IGNORE_CASE).replace(this, "") fun String.noSourceTags() = Regex("(?i)(bluray|laserdisc|dvd|web|uhd|hd|htds|imax).*", RegexOption.IGNORE_CASE).replace(this, " ") + fun String.noUnderscores() = this.replace("_", " ") fun String.noYear() = Regex("\\b\\d{4}\\b").replace(this.takeIf { !it.matches(Regex("^\\d{4}")) } ?: this, "") fun String.noDots() = Regex("(? this.guessDesiredMovieTitle() MediaType.Serie -> this.guessDesiredSerieTitle() } @@ -122,22 +125,43 @@ class MediaEventParsedInfoListener : EventListener() { val seasonRegex = Regex("""(?i)(?:S|Season|Series)\s*(\d{1,2})""") val episodeRegex = Regex("""(?i)(?:E|Episode|Ep)\s*(\d{1,3})""") val revisionRegex = Regex("""(?i)\bv(\d+)\b""") + val seasonEpisodeRegex = Regex("""(?i)(\d{1,2})x(\d{1,2})(?:[vV](\d+))?""") - val seasonMatch = seasonRegex.find(raw) - val episodeMatch = episodeRegex.find(raw) - val revisionMatch = revisionRegex.find(raw) + var season: Int? = null + var episode: Int? = null + var revision: Int? = null + var baseTitle = raw.getCleanedTitle() + var episodeTitle = "" - val season = seasonMatch?.groupValues?.get(1)?.toIntOrNull() - val episode = episodeMatch?.groupValues?.get(1)?.toIntOrNull() - val revision = revisionMatch?.groupValues?.get(1)?.toIntOrNull() + val seMatch = seasonEpisodeRegex.find(raw) + if (seMatch != null) { + season = seMatch.groupValues[1].toIntOrNull() + episode = seMatch.groupValues[2].toIntOrNull() + revision = seMatch.groupValues.getOrNull(3)?.toIntOrNull() + baseTitle = raw.substring(0, seMatch.range.first).getCleanedTitle() + episodeTitle = raw.substring(seMatch.range.last + 1).getCleanedTitle() + } else { + val seasonMatch = seasonRegex.find(raw) + val episodeMatch = episodeRegex.find(raw) + val revisionMatch = revisionRegex.find(raw) - val baseTitle = if (seasonMatch != null) { - raw.substring(0, seasonMatch.range.first).getCleanedTitle() - } else raw.getCleanedTitle() + season = seasonMatch?.groupValues?.get(1)?.toIntOrNull() + episode = episodeMatch?.groupValues?.get(1)?.toIntOrNull() + revision = revisionMatch?.groupValues?.get(1)?.toIntOrNull() - val episodeTitle = if (episodeMatch != null) { - raw.substring(episodeMatch.range.last + 1).getCleanedTitle() - } else "" + baseTitle = if (seasonMatch != null) { + raw.substring(0, seasonMatch.range.first).getCleanedTitle() + } else raw.getCleanedTitle() + + episodeTitle = if (episodeMatch != null) { + raw.substring(episodeMatch.range.last + 1).getCleanedTitle() + } else "" + } + + // Fallback: hvis baseTitle er tom eller bare inneholder S/E, bruk parent‑mappe + if (baseTitle.isBlank() || baseTitle.matches(Regex("""(?i)^s?\d+e?\d+$"""))) { + baseTitle = this.parentFile?.name?.getCleanedTitle() ?: "Dumb ways to die" + } val tag = buildString { append("S${(season ?: 1).toString().padStart(2, '0')}") @@ -159,29 +183,35 @@ class MediaEventParsedInfoListener : EventListener() { fun File.guessSearchableTitle(): List { - val cleaned = this.guessDesiredFileName().noParens() - .let { - val regex = "\\((?!\\d{4}\\))(?>[^()]+|\\b)\\)" - Regex(regex).replace(it, "") - } + val cleaned = this.guessDesiredFileName() .noResolutionAndAfter() .noSourceTags() .noDots() .noExtraSpaces() - .trim('.', ',', ' ') + .fullTrim() val titles = mutableListOf() - // 1. Første del før bindestrek + val yearRegex = Regex("""\b(19|20)\d{2}\b""") + val hasYear = yearRegex.containsMatchIn(cleaned) + + // 1. Hvis årstall finnes, legg hele cleaned først + if (hasYear) { + titles.add(cleaned) + } + + // 2. Første del før bindestrek val firstPart = cleaned.split(" - ").firstOrNull()?.trim() ?: cleaned titles.add(firstPart) - // 2. Hele cleaned - titles.add(cleaned) + // 3. Hele cleaned (hvis ikke allerede lagt inn først) + if (!hasYear) { + titles.add(cleaned) + } - // 3. Fjern årstall hvis det finnes - val yearRegex = Regex("""\b(19|20)\d{2}\b""") - val noYear = yearRegex.replace(cleaned, "").trim() + // 4. Variant uten årstall + val noYear = yearRegex.replace(cleaned, "") + .noParens().trim() if (noYear.isNotEmpty() && noYear != cleaned) { titles.add(noYear) } @@ -190,5 +220,4 @@ class MediaEventParsedInfoListener : EventListener() { } - } \ No newline at end of file diff --git a/apps/coordinator/src/test/kotlin/no/iktdev/mediaprocessing/coordinator/events/MediaEventParsedInfoListenerTest.kt b/apps/coordinator/src/test/kotlin/no/iktdev/mediaprocessing/coordinator/events/MediaEventParsedInfoListenerTest.kt index 74dda01c..0600ba38 100644 --- a/apps/coordinator/src/test/kotlin/no/iktdev/mediaprocessing/coordinator/events/MediaEventParsedInfoListenerTest.kt +++ b/apps/coordinator/src/test/kotlin/no/iktdev/mediaprocessing/coordinator/events/MediaEventParsedInfoListenerTest.kt @@ -11,14 +11,6 @@ import java.io.File class MediaEventParsedInfoListenerTest : MediaEventParsedInfoListener() { - @MethodSource("fileNameSanitizeTest") - @ParameterizedTest(name = "{0}") - fun fileNameSanitizeTest(testCase: SanitizeTestCase) { - val parser = FileNameParser(testCase.input) - val result = parser.guessDesiredFileName() - assertThat(result).isEqualTo(testCase.expected) - } - @MethodSource("parsedInfoTest") @ParameterizedTest(name = "{0}") fun parsedInfoTest(testCase: ParsedInfoTestCase) { @@ -39,12 +31,6 @@ class MediaEventParsedInfoListenerTest : MediaEventParsedInfoListener() { assertThat(mediaType).isEqualTo(testCase.expectedType) } - - data class SanitizeTestCase( - val input: String, - val expected: String - ) - data class ParsedInfoTestCase( val file: File, val expectedTitle: String, @@ -58,61 +44,10 @@ class MediaEventParsedInfoListenerTest : MediaEventParsedInfoListener() { ) companion object { - @JvmStatic - fun fileNameSanitizeTest() = listOf( - Named.of( - "Basic sanitization", - SanitizeTestCase( - input = "Fancy.Thomas.S03E03.Enemy.1080p.AMAZING.WEB-VALUE.DDP5AN.1.H.264", - expected = "Fancy Thomas S03E03 Enemy" - ) - ), - Named.of( - "Name with numbers", - SanitizeTestCase( - input = "[TST] Fancy Name Test 99 - 01 [Nans][#00A8E6]", - expected = "Fancy Name Test 99 - 01" - ) - ), - Named.of( - "Dot removal and special characters", - SanitizeTestCase( - input = "Like.a.Potato.Chef.S01E01.Departure.\\u0026.Skills.1080p.Potato", - expected = "Like a Potato Chef S01E01 Departure \\u0026 Skills" - ) - ), - Named.of( - "Movie name with numbers", - SanitizeTestCase( - input = "Wicket.Wicker.Potato.4.2023.UHD.BluRay.2160p", - expected = "Wicket Wicker Potato 4" - ) - ), - Named.of( - "Movie with extended title", - SanitizeTestCase( - input = "Potato-Pass Movie - Skinke", - expected = "Potato-Pass Movie - Skinke" - ) - ), - Named.of( - "Title with year in parentheses", - SanitizeTestCase( - input = "Amazing Potato (2022) 1080p BluRay", - expected = "Amazing Potato" - ) - ), - Named.of( - "Same", - SanitizeTestCase( - input = "S01E03-How to unlucky i am", - expected = "S01E03-How to unlucky i am" - ) - ) - ) @JvmStatic fun parsedInfoTest() = listOf( + // existing parsed cases Named.of( "Series episode parsing", ParsedInfoTestCase( @@ -148,6 +83,154 @@ class MediaEventParsedInfoListenerTest : MediaEventParsedInfoListener() { expectedFileName = "Potato-Pass Movie - Skinke", expectedSearchTitles = listOf("Potato-Pass Movie", "Potato-Pass Movie - Skinke") ) + ), + + Named.of( + "Name with numbers", + ParsedInfoTestCase( + file = File("[TST] Fancy Name Test 99 - 01 [Nans][#00A8E6].mkv"), + expectedTitle = "Fancy Name Test 99", + expectedFileName = "Fancy Name Test 99 - 01", + expectedSearchTitles = listOf("Fancy Name Test 99", "Fancy Name Test 99 - 01") + ) + ), + Named.of( + "Movie name with numbers", + ParsedInfoTestCase( + file = File("Wicket.Wicker.Potato.4.2023.UHD.BluRay.2160p.mkv"), + expectedTitle = "Wicket Wicker Potato 4", + expectedFileName = "Wicket Wicker Potato 4", + expectedSearchTitles = listOf("Wicket Wicker Potato 4") + ) + ), + Named.of( + "Title with year in parentheses", + ParsedInfoTestCase( + file = File("Amazing Potato (2022) 1080p BluRay.mkv"), + expectedTitle = "Amazing Potato", + expectedFileName = "Amazing Potato", + expectedSearchTitles = listOf("Amazing Potato") + ) + ), + Named.of( + "Same", + ParsedInfoTestCase( + file = File("/Dumb ways to die/S01E03-How to unlucky i am.mkv"), + expectedTitle = "Dumb ways to die", + expectedFileName = "Dumb ways to die - S01E03 - How to unlucky i am", + expectedSearchTitles = listOf( + "Dumb ways to die", + "Dumb ways to die - S01E03 - How to unlucky i am" + ) + ) + ), + Named.of( + "Underscores and mixed tags", + ParsedInfoTestCase( + file = File("my_movie_title_2019_1080p_x264_YTS.mkv"), + expectedTitle = "my movie title", + expectedFileName = "my movie title (2019)", + expectedSearchTitles = listOf("my movie title (2019)", "my movie title") + ) + ), + Named.of( + "Multiple bracketed groups and release tags", + ParsedInfoTestCase( + file = File("[GROUP][WEBRip][YTS]Some.Movie.Title.720p.WEBRip.x264.AAC-[eztv].mkv"), + expectedTitle = "Some Movie Title", + expectedFileName = "Some Movie Title", + expectedSearchTitles = listOf("Some Movie Title") + ) + ), + Named.of( + "Remux, PROPER, REPACK and extras", + ParsedInfoTestCase( + file = File("Cool.Movie.2018.1080p.BluRay.REMUX.PROPER.REPACK.READNFO-GRP.mkv"), + expectedTitle = "Cool Movie", + expectedFileName = "Cool Movie", + expectedSearchTitles = listOf("Cool Movie") + ) + ), + Named.of( + "Hyphens and multiple dashes", + ParsedInfoTestCase( + file = File("Potato-Fields_-_A-Strange.Day-2017-HDTV-720p.mkv"), + expectedTitle = "Potato-Fields", + expectedFileName = "Potato-Fields - A-Strange Day", + expectedSearchTitles = listOf("Potato-Fields", "Potato-Fields - A-Strange Day") + ) + ), + Named.of( + "Trailing group and site tags", + ParsedInfoTestCase( + file = File("Movie.Name.2015.1080p.BluRay.x264-[YTS.MX].mkv"), + expectedTitle = "Movie Name", + expectedFileName = "Movie Name", + expectedSearchTitles = listOf("Movie Name") + ) + ), + Named.of( + "IMAX and UNRATED markers", + ParsedInfoTestCase( + file = File("Epic.Film.IMAX.UNRATED.2019.2160p.HDR.HEVC.mkv"), + expectedTitle = "Epic Film", + expectedFileName = "Epic Film", + expectedSearchTitles = listOf("Epic Film") + ) + ), + Named.of( + "Sample and Trailer should be stripped", + ParsedInfoTestCase( + file = File("Amazing.Movie.2020.1080p.Trailer-SAMPLE.mp4"), + expectedTitle = "Amazing Movie", + expectedFileName = "Amazing Movie", + expectedSearchTitles = listOf("Amazing Movie") + ) + ), + Named.of( + "Parentheses director's cut", + ParsedInfoTestCase( + file = File("The.Great.Film.(Director's.Cut).2016.1080p.BluRay.mkv"), + expectedTitle = "The Great Film", + expectedFileName = "The Great Film", + expectedSearchTitles = listOf("The Great Film") + ) + ), + Named.of( + "Mixed separators and version tags", + ParsedInfoTestCase( + file = File("Show.Name.S01.E02.720p.HDTV.x264-Group_v2.mkv"), + expectedTitle = "Show Name", + expectedFileName = "Show Name - S01E02", + expectedSearchTitles = listOf("Show Name", "Show Name - S01E02") + ) + ), + Named.of( + "Square brackets year and tags", + ParsedInfoTestCase( + file = File("Title [2014] [1080p] [BluRay] [ENG].mkv"), + expectedTitle = "Title", + expectedFileName = "Title", + expectedSearchTitles = listOf("Title") + ) + ), + Named.of( + "Version suffixes and fix tags", + ParsedInfoTestCase( + file = File("Movie.Title.720p.HDTV.x264-FLEET.fix.mkv"), + expectedTitle = "Movie Title", + expectedFileName = "Movie Title", + expectedSearchTitles = listOf("Movie Title") + ) + ), + Named.of( + "Nested brackets and group names", + ParsedInfoTestCase( + file = File("[HD] (2020) Weird.Movie.Title - Extended.Edition [Group-Name].mkv"), + expectedTitle = "Weird Movie Title", + expectedFileName = "Weird Movie Title - Extended Edition", + expectedSearchTitles = listOf("Weird Movie Title", "Weird Movie Title - Extended Edition") + ) ) ) @@ -188,8 +271,57 @@ class MediaEventParsedInfoListenerTest : MediaEventParsedInfoListener() { expectedType = MediaType.Movie ) ), + + // Additional parse/dumb filename cases + Named.of( + "Lowercase sXe pattern", + ParseVideoTypeTestCase( + file = File("weird_show.s01e02.720p.mkv"), + expectedType = MediaType.Serie + ) + ), + Named.of( + "Spaces and full words", + ParseVideoTypeTestCase( + file = File("Some Show Season 02 Episode 09 1080p.mkv"), + expectedType = MediaType.Serie + ) + ), + Named.of( + "1x02 style", + ParseVideoTypeTestCase( + file = File("Show.Name.1x02.HDTV.mp4"), + expectedType = MediaType.Serie + ) + ), + Named.of( + "Season and episode no separators", + ParseVideoTypeTestCase( + file = File("ShowNameSeason03Episode04.avi"), + expectedType = MediaType.Serie + ) + ), + Named.of( + "Movie with year and extra tags", + ParseVideoTypeTestCase( + file = File("Some.Movie.Title.1999.720p.BluRay.x264-GROUP.mkv"), + expectedType = MediaType.Movie + ) + ), + Named.of( + "Confusing underscores and trailers", + ParseVideoTypeTestCase( + file = File("a_movie_trailer_2017_sample.mp4"), + expectedType = MediaType.Movie + ) + ), + Named.of( + "Mixed separators and version tags", + ParseVideoTypeTestCase( + file = File("Show.Name.S01.E02.720p.HDTV.x264-Group_v2.mkv"), + expectedType = MediaType.Serie + ) + ), ) } - - } \ No newline at end of file