Transliterating

This commit is contained in:
Brage Skjønborg 2026-02-01 17:59:16 +01:00
parent 21f6f8c83b
commit 701c939e8d
5 changed files with 154 additions and 16 deletions

View File

@ -48,6 +48,8 @@ dependencies {
implementation(project(":shared:ffmpeg")) implementation(project(":shared:ffmpeg"))
implementation(libs.eventi) implementation(libs.eventi)
implementation("com.ibm.icu:icu4j:75.1")
testImplementation(kotlin("test")) testImplementation(kotlin("test"))
testImplementation(platform("org.junit:junit-bom:5.10.0")) testImplementation(platform("org.junit:junit-bom:5.10.0"))
testImplementation("org.junit.jupiter:junit-jupiter") testImplementation("org.junit.jupiter:junit-jupiter")

View File

@ -1,5 +1,6 @@
package no.iktdev.mediaprocessing.shared.common package no.iktdev.mediaprocessing.shared.common
import com.ibm.icu.text.Transliterator
import kotlinx.coroutines.delay import kotlinx.coroutines.delay
import mu.KotlinLogging import mu.KotlinLogging
import no.iktdev.eventi.ZDS.toEvent import no.iktdev.eventi.ZDS.toEvent
@ -254,4 +255,16 @@ fun <T : Any> KClass<T>.getName(): String =
this.simpleName ?: this.java.simpleName this.simpleName ?: this.java.simpleName
private val transliterator = Transliterator.getInstance("Any-Latin; Latin-ASCII")
fun String.cleanForFileSystem(): String {
// 1. Full translitterering (Æ→AE, Ø→O, Å→AA, Ł→L, Þ→Th, etc.)
val ascii = transliterator.transliterate(this)
// 2. Fjern alt som ikke er bokstav, tall, mellomrom, bindestrek, parentes, komma, punktum
val cleaned = ascii.replace(Regex("[^\\p{L}\\p{N}\\s\\-(),.]"), " ")
// 3. Normaliser whitespace
return cleaned.replace(Regex("\\s{2,}"), " ").trim()
}

View File

@ -2,6 +2,7 @@ package no.iktdev.mediaprocessing.shared.common.projection
import no.iktdev.eventi.models.Event import no.iktdev.eventi.models.Event
import no.iktdev.exfl.using import no.iktdev.exfl.using
import no.iktdev.mediaprocessing.shared.common.cleanForFileSystem
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.* import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.*
import no.iktdev.mediaprocessing.shared.common.resolveConflict import no.iktdev.mediaprocessing.shared.common.resolveConflict
import java.io.File import java.io.File
@ -22,11 +23,11 @@ open class MigrateContentProject(
internal fun getFileName(): String? { internal fun getFileName(): String? {
val parsedInfo = events.filterIsInstance<MediaParsedInfoEvent>().lastOrNull() ?: return null val parsedInfo = events.filterIsInstance<MediaParsedInfoEvent>().lastOrNull() ?: return null
return parsedInfo.data.parsedFileName return parsedInfo.data.parsedFileName.cleanForFileSystem()
} }
internal fun getDesiredStoreFolder(): File? { internal fun getDesiredStoreFolder(): File? {
val desiredCollection = getDesiredCollection() ?: return null val desiredCollection = getDesiredCollection()?.cleanForFileSystem() ?: return null
val assuredStore = storageArea.using(desiredCollection) val assuredStore = storageArea.using(desiredCollection)
val existingCollectionNames = getFoldersInStore() val existingCollectionNames = getFoldersInStore()
@ -34,7 +35,7 @@ open class MigrateContentProject(
return assuredStore return assuredStore
} }
val titles = getMetadataTitles() val titles = getMetadataTitles().map { it.cleanForFileSystem() }
val matchedExisting = titles val matchedExisting = titles
.firstOrNull { it in existingCollectionNames } .firstOrNull { it in existingCollectionNames }
@ -111,7 +112,7 @@ open class MigrateContentProject(
e to file e to file
} }
val baseName = getDesiredCollection() ?: return null val baseName = getDesiredCollection()?.cleanForFileSystem() ?: return null
val store = useStore ?: return null val store = useStore ?: return null
val multiple = downloaded.size > 1 val multiple = downloaded.size > 1

View File

@ -2,10 +2,9 @@ package no.iktdev.mediaprocessing.shared.common.projection
import no.iktdev.eventi.models.Event import no.iktdev.eventi.models.Event
import no.iktdev.eventi.models.store.TaskStatus import no.iktdev.eventi.models.store.TaskStatus
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.MediaParsedInfoEvent import no.iktdev.mediaprocessing.shared.common.cleanForFileSystem
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.*
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.MediaParsedInfoEvent.ParsedData import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.MediaParsedInfoEvent.ParsedData
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.MetadataSearchResultEvent
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.ProcesserEncodeResultEvent
import no.iktdev.mediaprocessing.shared.common.model.MediaType import no.iktdev.mediaprocessing.shared.common.model.MediaType
import org.junit.jupiter.api.Assertions.* import org.junit.jupiter.api.Assertions.*
import org.junit.jupiter.api.DisplayName import org.junit.jupiter.api.DisplayName
@ -116,4 +115,113 @@ class MigrateContentProjectPathTest {
project.useStore!!.absolutePath project.useStore!!.absolutePath
) )
} }
@Test
fun cleanForFileSystem_transliteration() {
assertEquals("Senor de los Cielos", "Señor de los Cielos".cleanForFileSystem())
assertEquals("Amelie (2001)", "Amélie (2001)".cleanForFileSystem())
assertEquals("Ubermensch", "Übermensch".cleanForFileSystem())
assertEquals("Lodz, Polska", "Łódź, Polska".cleanForFileSystem())
}
@Test
fun cleanForFileSystem_removesSpecialCharacters() {
assertEquals("Hello World", "Hello@World!".cleanForFileSystem())
assertEquals("Spider-Man No Way Home", "Spider-Man: No Way Home!".cleanForFileSystem())
}
@Test
fun videoStoreFile_usesSanitizedName() {
val temp = File("build/test-folder/file")
val parsed = MediaParsedInfoEvent(
data = MediaParsedInfoEvent.ParsedData(
parsedCollection = "Señor de los Cielos",
parsedFileName = "Amélie (2001)",
parsedSearchTitles = emptyList(),
mediaType = MediaType.Movie
)
)
val encode = ProcesserEncodeResultEvent(
data = ProcesserEncodeResultEvent.EncodeResult(
cachedOutputFile = "/tmp/cache/video.mp4"
),
status = TaskStatus.Completed
)
val store = MigrateContentProject(listOf(parsed, encode), temp)
val result = store.getVideoStoreFile()
assertNotNull(result)
assertEquals("Amelie (2001).mp4", result!!.storeFile.name)
assertEquals("Senor de los Cielos", result.storeFile.parentFile.name)
}
@Test
fun subtitleStoreFiles_useSanitizedNames() {
val temp = File("build/test-folder/file")
val parsed = MediaParsedInfoEvent(
data = MediaParsedInfoEvent.ParsedData(
parsedCollection = "Señor de los Cielos",
parsedFileName = "Niña Épica",
parsedSearchTitles = emptyList(),
mediaType = MediaType.Serie
)
)
val extract = ProcesserExtractResultEvent(
status = TaskStatus.Completed,
data = ProcesserExtractResultEvent.ExtractResult(
language = "spa",
cachedOutputFile = "/tmp/cache/sub1.srt"
)
)
val store = MigrateContentProject(listOf(parsed, extract), temp)
val results = store.getSubtitleStoreFiles()
assertNotNull(results)
val file = results!!.first().cts.storeFile
assertEquals("Nina Epica.srt", file.name)
assertEquals("spa", file.parentFile.name)
assertEquals("sub", file.parentFile.parentFile.name)
assertEquals("Senor de los Cielos", file.parentFile.parentFile.parentFile.name)
}
@Test
fun coverStoreFiles_useSanitizedNames() {
val temp = File("build/test-folder/file")
val parsed = MediaParsedInfoEvent(
data = MediaParsedInfoEvent.ParsedData(
parsedCollection = "João e Maria",
parsedFileName = "ignored",
parsedSearchTitles = emptyList(),
mediaType = MediaType.Movie
)
)
val cover = CoverDownloadResultEvent(
data = CoverDownloadResultEvent.CoverDownloadedData(
source = "tmdb",
outputFile = "/tmp/cache/cover.jpg"
),
status = TaskStatus.Completed
)
val store = MigrateContentProject(listOf(parsed, cover), temp)
val results = store.getCoverStoreFiles()
assertNotNull(results)
val file = results!!.first().storeFile
assertEquals("Joao e Maria.jpg", file.name)
}
} }

View File

@ -3,6 +3,7 @@ package no.iktdev.mediaprocessing.shared.common.projection
import no.iktdev.eventi.models.Event import no.iktdev.eventi.models.Event
import no.iktdev.eventi.models.store.TaskStatus import no.iktdev.eventi.models.store.TaskStatus
import no.iktdev.exfl.using import no.iktdev.exfl.using
import no.iktdev.mediaprocessing.shared.common.cleanForFileSystem
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.* import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.*
import no.iktdev.mediaprocessing.shared.common.model.MediaType import no.iktdev.mediaprocessing.shared.common.model.MediaType
import org.assertj.core.util.Files import org.assertj.core.util.Files
@ -76,7 +77,7 @@ class ProjectContentStoreTest {
Hvis extract- og convert-events inneholder undertekstfiler Hvis extract- og convert-events inneholder undertekstfiler
Når getSubtitleStoreFiles kalles Når getSubtitleStoreFiles kalles
: :
skal filer lagres under <storage>/<collection>/<language>/<filnavn> skal filer lagres under <storage>/<collection>/sub/<language>/<filnavn>
""" """
) )
@Test @Test
@ -87,7 +88,7 @@ class ProjectContentStoreTest {
val parsed = MediaParsedInfoEvent( val parsed = MediaParsedInfoEvent(
data = MediaParsedInfoEvent.ParsedData( data = MediaParsedInfoEvent.ParsedData(
parsedCollection = "MyShow", parsedCollection = "MyShow",
parsedFileName = "episode1.mkv", parsedFileName = "episode1",
parsedSearchTitles = emptyList(), parsedSearchTitles = emptyList(),
mediaType = MediaType.Serie mediaType = MediaType.Serie
) )
@ -104,7 +105,7 @@ class ProjectContentStoreTest {
val convert = ConvertTaskResultEvent( val convert = ConvertTaskResultEvent(
data = ConvertTaskResultEvent.ConvertedData( data = ConvertTaskResultEvent.ConvertedData(
language = "eng", language = "eng",
"sub1", baseName = "sub1",
outputFiles = listOf("/tmp/cache/sub1.vtt") outputFiles = listOf("/tmp/cache/sub1.vtt")
), ),
status = TaskStatus.Completed status = TaskStatus.Completed
@ -120,13 +121,26 @@ class ProjectContentStoreTest {
assertEquals(2, results?.size) assertEquals(2, results?.size)
results?.forEach { entry -> results?.forEach { entry ->
assertEquals("eng", (entry.cts.storeFile.parentFile).name) val file = entry.cts.storeFile
assertEquals("sub", entry.cts.storeFile.parentFile.parentFile.name)
assertEquals("MyShow", entry.cts.storeFile.parentFile.parentFile.parentFile.name) // Filnavn
assertEquals(temp, entry.cts.storeFile.parentFile.parentFile.parentFile.parentFile) assertTrue(file.name == "episode1.srt" || file.name == "episode1.vtt")
// <language>
assertEquals("eng", file.parentFile.name)
// sub/
assertEquals("sub", file.parentFile.parentFile.name)
// <collection>
assertEquals("MyShow", file.parentFile.parentFile.parentFile.name)
// <storage>
assertEquals(temp, file.parentFile.parentFile.parentFile.parentFile)
} }
} }
@DisplayName( @DisplayName(
""" """
Hvis cover-download-event inneholder en coverfil Hvis cover-download-event inneholder en coverfil
@ -638,8 +652,8 @@ class ProjectContentStoreTest {
name = "Weird folder names (spaces, unicode)", name = "Weird folder names (spaces, unicode)",
parsedCollection = "Fallback", parsedCollection = "Fallback",
metadataTitles = listOf("ÆØÅ Show"), metadataTitles = listOf("ÆØÅ Show"),
existingFolders = listOf("ÆØÅ Show"), existingFolders = listOf("ÆØÅ Show".cleanForFileSystem()),
expectedFolder = "ÆØÅ Show" expectedFolder = "AEOA Show"
), ),
DesiredStoreCase( DesiredStoreCase(
name = "Case-insensitive mismatch → fallback", name = "Case-insensitive mismatch → fallback",