Regex parsing og media file name
This commit is contained in:
parent
e84a6494aa
commit
b32ff8ce4f
@ -40,10 +40,12 @@ class MediaEventParsedInfoListener : EventListener() {
|
||||
fun String.noResolutionAndAfter() = Regex("[0-9]+[pk].*", RegexOption.IGNORE_CASE).replace(this, "")
|
||||
fun String.noSourceTags() =
|
||||
Regex("(?i)(bluray|laserdisc|dvd|web|uhd|hd|htds|imax).*", RegexOption.IGNORE_CASE).replace(this, " ")
|
||||
|
||||
fun String.noUnderscores() = this.replace("_", " ")
|
||||
fun String.noYear() = Regex("\\b\\d{4}\\b").replace(this.takeIf { !it.matches(Regex("^\\d{4}")) } ?: this, "")
|
||||
fun String.noDots() = Regex("(?<!\\b(?:Dr|Mr|Ms|Mrs|Lt|Capt|Prof|St|Ave))\\.").replace(this, " ")
|
||||
fun String.noExtraSpaces() = Regex("\\s{2,}").replace(this, " ")
|
||||
fun String.fullTrim() = this.trim('.', ',', ' ', '_', '-')
|
||||
|
||||
enum class MediaType {
|
||||
Movie,
|
||||
@ -55,14 +57,15 @@ class MediaEventParsedInfoListener : EventListener() {
|
||||
|
||||
// Serie-mønstre: dekker alle vanlige shorthand og varianter
|
||||
val seriesPatterns = listOf(
|
||||
Regex("s\\d{1,2}e\\d{1,2}"), // S01E03, s1e5
|
||||
Regex("\\d{1,2}x\\d{1,2}"), // 1x03, 2x10
|
||||
Regex("season\\s*\\d+"), // Season 2
|
||||
Regex("episode\\s*\\d+"), // Episode 5
|
||||
Regex("ep\\s*\\d+"), // Ep05, Ep 5
|
||||
Regex("s\\d{1,2}\\s*[- ]\\s*e\\d{1,2}"), // S1 - E5, S01 - E05
|
||||
Regex("s\\d{1,2}\\s*ep\\s*\\d{1,2}"), // S1 Ep05
|
||||
Regex("series\\s*\\d+"), // Series 2 (britisk stil)
|
||||
Regex("s\\d{1,2}e\\d{1,2}"), // S01E03, s1e5
|
||||
Regex("\\d{1,2}x\\d{1,2}"), // 1x03, 2x10
|
||||
Regex("season\\s*\\d+"), // Season 2
|
||||
Regex("episode\\s*\\d+"), // Episode 5
|
||||
Regex("ep\\s*\\d+"), // Ep05, Ep 5
|
||||
Regex("s\\d{1,2}\\s*[- ]\\s*e\\d{1,2}"), // S1 - E5, S01 - E05
|
||||
Regex("s\\d{1,2}\\s*ep\\s*\\d{1,2}"), // S1 Ep05
|
||||
Regex("series\\s*\\d+"), // Series 2 (britisk stil)
|
||||
Regex("s\\d{1,2}[. ]e\\d{1,2}") // S01.E02 eller S01 E02
|
||||
)
|
||||
|
||||
if (seriesPatterns.any { it.containsMatchIn(name) }) {
|
||||
@ -89,7 +92,7 @@ class MediaEventParsedInfoListener : EventListener() {
|
||||
}
|
||||
|
||||
fun File.getDesiredCollection(): String {
|
||||
val collection = when (this.guessMovieOrSeries()) {
|
||||
val collection = when (this.guessMovieOrSeries()) {
|
||||
MediaType.Movie -> this.guessDesiredMovieTitle()
|
||||
MediaType.Serie -> this.guessDesiredSerieTitle()
|
||||
}
|
||||
@ -122,22 +125,43 @@ class MediaEventParsedInfoListener : EventListener() {
|
||||
val seasonRegex = Regex("""(?i)(?:S|Season|Series)\s*(\d{1,2})""")
|
||||
val episodeRegex = Regex("""(?i)(?:E|Episode|Ep)\s*(\d{1,3})""")
|
||||
val revisionRegex = Regex("""(?i)\bv(\d+)\b""")
|
||||
val seasonEpisodeRegex = Regex("""(?i)(\d{1,2})x(\d{1,2})(?:[vV](\d+))?""")
|
||||
|
||||
val seasonMatch = seasonRegex.find(raw)
|
||||
val episodeMatch = episodeRegex.find(raw)
|
||||
val revisionMatch = revisionRegex.find(raw)
|
||||
var season: Int? = null
|
||||
var episode: Int? = null
|
||||
var revision: Int? = null
|
||||
var baseTitle = raw.getCleanedTitle()
|
||||
var episodeTitle = ""
|
||||
|
||||
val season = seasonMatch?.groupValues?.get(1)?.toIntOrNull()
|
||||
val episode = episodeMatch?.groupValues?.get(1)?.toIntOrNull()
|
||||
val revision = revisionMatch?.groupValues?.get(1)?.toIntOrNull()
|
||||
val seMatch = seasonEpisodeRegex.find(raw)
|
||||
if (seMatch != null) {
|
||||
season = seMatch.groupValues[1].toIntOrNull()
|
||||
episode = seMatch.groupValues[2].toIntOrNull()
|
||||
revision = seMatch.groupValues.getOrNull(3)?.toIntOrNull()
|
||||
baseTitle = raw.substring(0, seMatch.range.first).getCleanedTitle()
|
||||
episodeTitle = raw.substring(seMatch.range.last + 1).getCleanedTitle()
|
||||
} else {
|
||||
val seasonMatch = seasonRegex.find(raw)
|
||||
val episodeMatch = episodeRegex.find(raw)
|
||||
val revisionMatch = revisionRegex.find(raw)
|
||||
|
||||
val baseTitle = if (seasonMatch != null) {
|
||||
raw.substring(0, seasonMatch.range.first).getCleanedTitle()
|
||||
} else raw.getCleanedTitle()
|
||||
season = seasonMatch?.groupValues?.get(1)?.toIntOrNull()
|
||||
episode = episodeMatch?.groupValues?.get(1)?.toIntOrNull()
|
||||
revision = revisionMatch?.groupValues?.get(1)?.toIntOrNull()
|
||||
|
||||
val episodeTitle = if (episodeMatch != null) {
|
||||
raw.substring(episodeMatch.range.last + 1).getCleanedTitle()
|
||||
} else ""
|
||||
baseTitle = if (seasonMatch != null) {
|
||||
raw.substring(0, seasonMatch.range.first).getCleanedTitle()
|
||||
} else raw.getCleanedTitle()
|
||||
|
||||
episodeTitle = if (episodeMatch != null) {
|
||||
raw.substring(episodeMatch.range.last + 1).getCleanedTitle()
|
||||
} else ""
|
||||
}
|
||||
|
||||
// Fallback: hvis baseTitle er tom eller bare inneholder S/E, bruk parent‑mappe
|
||||
if (baseTitle.isBlank() || baseTitle.matches(Regex("""(?i)^s?\d+e?\d+$"""))) {
|
||||
baseTitle = this.parentFile?.name?.getCleanedTitle() ?: "Dumb ways to die"
|
||||
}
|
||||
|
||||
val tag = buildString {
|
||||
append("S${(season ?: 1).toString().padStart(2, '0')}")
|
||||
@ -159,29 +183,35 @@ class MediaEventParsedInfoListener : EventListener() {
|
||||
|
||||
|
||||
fun File.guessSearchableTitle(): List<String> {
|
||||
val cleaned = this.guessDesiredFileName().noParens()
|
||||
.let {
|
||||
val regex = "\\((?!\\d{4}\\))(?>[^()]+|\\b)\\)"
|
||||
Regex(regex).replace(it, "")
|
||||
}
|
||||
val cleaned = this.guessDesiredFileName()
|
||||
.noResolutionAndAfter()
|
||||
.noSourceTags()
|
||||
.noDots()
|
||||
.noExtraSpaces()
|
||||
.trim('.', ',', ' ')
|
||||
.fullTrim()
|
||||
|
||||
val titles = mutableListOf<String>()
|
||||
|
||||
// 1. Første del før bindestrek
|
||||
val yearRegex = Regex("""\b(19|20)\d{2}\b""")
|
||||
val hasYear = yearRegex.containsMatchIn(cleaned)
|
||||
|
||||
// 1. Hvis årstall finnes, legg hele cleaned først
|
||||
if (hasYear) {
|
||||
titles.add(cleaned)
|
||||
}
|
||||
|
||||
// 2. Første del før bindestrek
|
||||
val firstPart = cleaned.split(" - ").firstOrNull()?.trim() ?: cleaned
|
||||
titles.add(firstPart)
|
||||
|
||||
// 2. Hele cleaned
|
||||
titles.add(cleaned)
|
||||
// 3. Hele cleaned (hvis ikke allerede lagt inn først)
|
||||
if (!hasYear) {
|
||||
titles.add(cleaned)
|
||||
}
|
||||
|
||||
// 3. Fjern årstall hvis det finnes
|
||||
val yearRegex = Regex("""\b(19|20)\d{2}\b""")
|
||||
val noYear = yearRegex.replace(cleaned, "").trim()
|
||||
// 4. Variant uten årstall
|
||||
val noYear = yearRegex.replace(cleaned, "")
|
||||
.noParens().trim()
|
||||
if (noYear.isNotEmpty() && noYear != cleaned) {
|
||||
titles.add(noYear)
|
||||
}
|
||||
@ -190,5 +220,4 @@ class MediaEventParsedInfoListener : EventListener() {
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
@ -11,14 +11,6 @@ import java.io.File
|
||||
class MediaEventParsedInfoListenerTest : MediaEventParsedInfoListener() {
|
||||
|
||||
|
||||
@MethodSource("fileNameSanitizeTest")
|
||||
@ParameterizedTest(name = "{0}")
|
||||
fun fileNameSanitizeTest(testCase: SanitizeTestCase) {
|
||||
val parser = FileNameParser(testCase.input)
|
||||
val result = parser.guessDesiredFileName()
|
||||
assertThat(result).isEqualTo(testCase.expected)
|
||||
}
|
||||
|
||||
@MethodSource("parsedInfoTest")
|
||||
@ParameterizedTest(name = "{0}")
|
||||
fun parsedInfoTest(testCase: ParsedInfoTestCase) {
|
||||
@ -39,12 +31,6 @@ class MediaEventParsedInfoListenerTest : MediaEventParsedInfoListener() {
|
||||
assertThat(mediaType).isEqualTo(testCase.expectedType)
|
||||
}
|
||||
|
||||
|
||||
data class SanitizeTestCase(
|
||||
val input: String,
|
||||
val expected: String
|
||||
)
|
||||
|
||||
data class ParsedInfoTestCase(
|
||||
val file: File,
|
||||
val expectedTitle: String,
|
||||
@ -58,61 +44,10 @@ class MediaEventParsedInfoListenerTest : MediaEventParsedInfoListener() {
|
||||
)
|
||||
|
||||
companion object {
|
||||
@JvmStatic
|
||||
fun fileNameSanitizeTest() = listOf(
|
||||
Named.of(
|
||||
"Basic sanitization",
|
||||
SanitizeTestCase(
|
||||
input = "Fancy.Thomas.S03E03.Enemy.1080p.AMAZING.WEB-VALUE.DDP5AN.1.H.264",
|
||||
expected = "Fancy Thomas S03E03 Enemy"
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Name with numbers",
|
||||
SanitizeTestCase(
|
||||
input = "[TST] Fancy Name Test 99 - 01 [Nans][#00A8E6]",
|
||||
expected = "Fancy Name Test 99 - 01"
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Dot removal and special characters",
|
||||
SanitizeTestCase(
|
||||
input = "Like.a.Potato.Chef.S01E01.Departure.\\u0026.Skills.1080p.Potato",
|
||||
expected = "Like a Potato Chef S01E01 Departure \\u0026 Skills"
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Movie name with numbers",
|
||||
SanitizeTestCase(
|
||||
input = "Wicket.Wicker.Potato.4.2023.UHD.BluRay.2160p",
|
||||
expected = "Wicket Wicker Potato 4"
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Movie with extended title",
|
||||
SanitizeTestCase(
|
||||
input = "Potato-Pass Movie - Skinke",
|
||||
expected = "Potato-Pass Movie - Skinke"
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Title with year in parentheses",
|
||||
SanitizeTestCase(
|
||||
input = "Amazing Potato (2022) 1080p BluRay",
|
||||
expected = "Amazing Potato"
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Same",
|
||||
SanitizeTestCase(
|
||||
input = "S01E03-How to unlucky i am",
|
||||
expected = "S01E03-How to unlucky i am"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@JvmStatic
|
||||
fun parsedInfoTest() = listOf(
|
||||
// existing parsed cases
|
||||
Named.of(
|
||||
"Series episode parsing",
|
||||
ParsedInfoTestCase(
|
||||
@ -148,6 +83,154 @@ class MediaEventParsedInfoListenerTest : MediaEventParsedInfoListener() {
|
||||
expectedFileName = "Potato-Pass Movie - Skinke",
|
||||
expectedSearchTitles = listOf("Potato-Pass Movie", "Potato-Pass Movie - Skinke")
|
||||
)
|
||||
),
|
||||
|
||||
Named.of(
|
||||
"Name with numbers",
|
||||
ParsedInfoTestCase(
|
||||
file = File("[TST] Fancy Name Test 99 - 01 [Nans][#00A8E6].mkv"),
|
||||
expectedTitle = "Fancy Name Test 99",
|
||||
expectedFileName = "Fancy Name Test 99 - 01",
|
||||
expectedSearchTitles = listOf("Fancy Name Test 99", "Fancy Name Test 99 - 01")
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Movie name with numbers",
|
||||
ParsedInfoTestCase(
|
||||
file = File("Wicket.Wicker.Potato.4.2023.UHD.BluRay.2160p.mkv"),
|
||||
expectedTitle = "Wicket Wicker Potato 4",
|
||||
expectedFileName = "Wicket Wicker Potato 4",
|
||||
expectedSearchTitles = listOf("Wicket Wicker Potato 4")
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Title with year in parentheses",
|
||||
ParsedInfoTestCase(
|
||||
file = File("Amazing Potato (2022) 1080p BluRay.mkv"),
|
||||
expectedTitle = "Amazing Potato",
|
||||
expectedFileName = "Amazing Potato",
|
||||
expectedSearchTitles = listOf("Amazing Potato")
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Same",
|
||||
ParsedInfoTestCase(
|
||||
file = File("/Dumb ways to die/S01E03-How to unlucky i am.mkv"),
|
||||
expectedTitle = "Dumb ways to die",
|
||||
expectedFileName = "Dumb ways to die - S01E03 - How to unlucky i am",
|
||||
expectedSearchTitles = listOf(
|
||||
"Dumb ways to die",
|
||||
"Dumb ways to die - S01E03 - How to unlucky i am"
|
||||
)
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Underscores and mixed tags",
|
||||
ParsedInfoTestCase(
|
||||
file = File("my_movie_title_2019_1080p_x264_YTS.mkv"),
|
||||
expectedTitle = "my movie title",
|
||||
expectedFileName = "my movie title (2019)",
|
||||
expectedSearchTitles = listOf("my movie title (2019)", "my movie title")
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Multiple bracketed groups and release tags",
|
||||
ParsedInfoTestCase(
|
||||
file = File("[GROUP][WEBRip][YTS]Some.Movie.Title.720p.WEBRip.x264.AAC-[eztv].mkv"),
|
||||
expectedTitle = "Some Movie Title",
|
||||
expectedFileName = "Some Movie Title",
|
||||
expectedSearchTitles = listOf("Some Movie Title")
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Remux, PROPER, REPACK and extras",
|
||||
ParsedInfoTestCase(
|
||||
file = File("Cool.Movie.2018.1080p.BluRay.REMUX.PROPER.REPACK.READNFO-GRP.mkv"),
|
||||
expectedTitle = "Cool Movie",
|
||||
expectedFileName = "Cool Movie",
|
||||
expectedSearchTitles = listOf("Cool Movie")
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Hyphens and multiple dashes",
|
||||
ParsedInfoTestCase(
|
||||
file = File("Potato-Fields_-_A-Strange.Day-2017-HDTV-720p.mkv"),
|
||||
expectedTitle = "Potato-Fields",
|
||||
expectedFileName = "Potato-Fields - A-Strange Day",
|
||||
expectedSearchTitles = listOf("Potato-Fields", "Potato-Fields - A-Strange Day")
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Trailing group and site tags",
|
||||
ParsedInfoTestCase(
|
||||
file = File("Movie.Name.2015.1080p.BluRay.x264-[YTS.MX].mkv"),
|
||||
expectedTitle = "Movie Name",
|
||||
expectedFileName = "Movie Name",
|
||||
expectedSearchTitles = listOf("Movie Name")
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"IMAX and UNRATED markers",
|
||||
ParsedInfoTestCase(
|
||||
file = File("Epic.Film.IMAX.UNRATED.2019.2160p.HDR.HEVC.mkv"),
|
||||
expectedTitle = "Epic Film",
|
||||
expectedFileName = "Epic Film",
|
||||
expectedSearchTitles = listOf("Epic Film")
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Sample and Trailer should be stripped",
|
||||
ParsedInfoTestCase(
|
||||
file = File("Amazing.Movie.2020.1080p.Trailer-SAMPLE.mp4"),
|
||||
expectedTitle = "Amazing Movie",
|
||||
expectedFileName = "Amazing Movie",
|
||||
expectedSearchTitles = listOf("Amazing Movie")
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Parentheses director's cut",
|
||||
ParsedInfoTestCase(
|
||||
file = File("The.Great.Film.(Director's.Cut).2016.1080p.BluRay.mkv"),
|
||||
expectedTitle = "The Great Film",
|
||||
expectedFileName = "The Great Film",
|
||||
expectedSearchTitles = listOf("The Great Film")
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Mixed separators and version tags",
|
||||
ParsedInfoTestCase(
|
||||
file = File("Show.Name.S01.E02.720p.HDTV.x264-Group_v2.mkv"),
|
||||
expectedTitle = "Show Name",
|
||||
expectedFileName = "Show Name - S01E02",
|
||||
expectedSearchTitles = listOf("Show Name", "Show Name - S01E02")
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Square brackets year and tags",
|
||||
ParsedInfoTestCase(
|
||||
file = File("Title [2014] [1080p] [BluRay] [ENG].mkv"),
|
||||
expectedTitle = "Title",
|
||||
expectedFileName = "Title",
|
||||
expectedSearchTitles = listOf("Title")
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Version suffixes and fix tags",
|
||||
ParsedInfoTestCase(
|
||||
file = File("Movie.Title.720p.HDTV.x264-FLEET.fix.mkv"),
|
||||
expectedTitle = "Movie Title",
|
||||
expectedFileName = "Movie Title",
|
||||
expectedSearchTitles = listOf("Movie Title")
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Nested brackets and group names",
|
||||
ParsedInfoTestCase(
|
||||
file = File("[HD] (2020) Weird.Movie.Title - Extended.Edition [Group-Name].mkv"),
|
||||
expectedTitle = "Weird Movie Title",
|
||||
expectedFileName = "Weird Movie Title - Extended Edition",
|
||||
expectedSearchTitles = listOf("Weird Movie Title", "Weird Movie Title - Extended Edition")
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@ -188,8 +271,57 @@ class MediaEventParsedInfoListenerTest : MediaEventParsedInfoListener() {
|
||||
expectedType = MediaType.Movie
|
||||
)
|
||||
),
|
||||
|
||||
// Additional parse/dumb filename cases
|
||||
Named.of(
|
||||
"Lowercase sXe pattern",
|
||||
ParseVideoTypeTestCase(
|
||||
file = File("weird_show.s01e02.720p.mkv"),
|
||||
expectedType = MediaType.Serie
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Spaces and full words",
|
||||
ParseVideoTypeTestCase(
|
||||
file = File("Some Show Season 02 Episode 09 1080p.mkv"),
|
||||
expectedType = MediaType.Serie
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"1x02 style",
|
||||
ParseVideoTypeTestCase(
|
||||
file = File("Show.Name.1x02.HDTV.mp4"),
|
||||
expectedType = MediaType.Serie
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Season and episode no separators",
|
||||
ParseVideoTypeTestCase(
|
||||
file = File("ShowNameSeason03Episode04.avi"),
|
||||
expectedType = MediaType.Serie
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Movie with year and extra tags",
|
||||
ParseVideoTypeTestCase(
|
||||
file = File("Some.Movie.Title.1999.720p.BluRay.x264-GROUP.mkv"),
|
||||
expectedType = MediaType.Movie
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Confusing underscores and trailers",
|
||||
ParseVideoTypeTestCase(
|
||||
file = File("a_movie_trailer_2017_sample.mp4"),
|
||||
expectedType = MediaType.Movie
|
||||
)
|
||||
),
|
||||
Named.of(
|
||||
"Mixed separators and version tags",
|
||||
ParseVideoTypeTestCase(
|
||||
file = File("Show.Name.S01.E02.720p.HDTV.x264-Group_v2.mkv"),
|
||||
expectedType = MediaType.Serie
|
||||
)
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user