Transliterating
This commit is contained in:
parent
21f6f8c83b
commit
701c939e8d
@ -48,6 +48,8 @@ dependencies {
|
||||
implementation(project(":shared:ffmpeg"))
|
||||
implementation(libs.eventi)
|
||||
|
||||
implementation("com.ibm.icu:icu4j:75.1")
|
||||
|
||||
testImplementation(kotlin("test"))
|
||||
testImplementation(platform("org.junit:junit-bom:5.10.0"))
|
||||
testImplementation("org.junit.jupiter:junit-jupiter")
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
package no.iktdev.mediaprocessing.shared.common
|
||||
|
||||
import com.ibm.icu.text.Transliterator
|
||||
import kotlinx.coroutines.delay
|
||||
import mu.KotlinLogging
|
||||
import no.iktdev.eventi.ZDS.toEvent
|
||||
@ -254,4 +255,16 @@ fun <T : Any> KClass<T>.getName(): String =
|
||||
this.simpleName ?: this.java.simpleName
|
||||
|
||||
|
||||
private val transliterator = Transliterator.getInstance("Any-Latin; Latin-ASCII")
|
||||
fun String.cleanForFileSystem(): String {
|
||||
// 1. Full translitterering (Æ→AE, Ø→O, Å→AA, Ł→L, Þ→Th, etc.)
|
||||
val ascii = transliterator.transliterate(this)
|
||||
|
||||
// 2. Fjern alt som ikke er bokstav, tall, mellomrom, bindestrek, parentes, komma, punktum
|
||||
val cleaned = ascii.replace(Regex("[^\\p{L}\\p{N}\\s\\-(),.]"), " ")
|
||||
|
||||
// 3. Normaliser whitespace
|
||||
return cleaned.replace(Regex("\\s{2,}"), " ").trim()
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -2,6 +2,7 @@ package no.iktdev.mediaprocessing.shared.common.projection
|
||||
|
||||
import no.iktdev.eventi.models.Event
|
||||
import no.iktdev.exfl.using
|
||||
import no.iktdev.mediaprocessing.shared.common.cleanForFileSystem
|
||||
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.*
|
||||
import no.iktdev.mediaprocessing.shared.common.resolveConflict
|
||||
import java.io.File
|
||||
@ -22,11 +23,11 @@ open class MigrateContentProject(
|
||||
|
||||
internal fun getFileName(): String? {
|
||||
val parsedInfo = events.filterIsInstance<MediaParsedInfoEvent>().lastOrNull() ?: return null
|
||||
return parsedInfo.data.parsedFileName
|
||||
return parsedInfo.data.parsedFileName.cleanForFileSystem()
|
||||
}
|
||||
|
||||
internal fun getDesiredStoreFolder(): File? {
|
||||
val desiredCollection = getDesiredCollection() ?: return null
|
||||
val desiredCollection = getDesiredCollection()?.cleanForFileSystem() ?: return null
|
||||
val assuredStore = storageArea.using(desiredCollection)
|
||||
|
||||
val existingCollectionNames = getFoldersInStore()
|
||||
@ -34,7 +35,7 @@ open class MigrateContentProject(
|
||||
return assuredStore
|
||||
}
|
||||
|
||||
val titles = getMetadataTitles()
|
||||
val titles = getMetadataTitles().map { it.cleanForFileSystem() }
|
||||
|
||||
val matchedExisting = titles
|
||||
.firstOrNull { it in existingCollectionNames }
|
||||
@ -111,7 +112,7 @@ open class MigrateContentProject(
|
||||
e to file
|
||||
}
|
||||
|
||||
val baseName = getDesiredCollection() ?: return null
|
||||
val baseName = getDesiredCollection()?.cleanForFileSystem() ?: return null
|
||||
val store = useStore ?: return null
|
||||
|
||||
val multiple = downloaded.size > 1
|
||||
|
||||
@ -2,10 +2,9 @@ package no.iktdev.mediaprocessing.shared.common.projection
|
||||
|
||||
import no.iktdev.eventi.models.Event
|
||||
import no.iktdev.eventi.models.store.TaskStatus
|
||||
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.MediaParsedInfoEvent
|
||||
import no.iktdev.mediaprocessing.shared.common.cleanForFileSystem
|
||||
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.*
|
||||
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.MediaParsedInfoEvent.ParsedData
|
||||
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.MetadataSearchResultEvent
|
||||
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.ProcesserEncodeResultEvent
|
||||
import no.iktdev.mediaprocessing.shared.common.model.MediaType
|
||||
import org.junit.jupiter.api.Assertions.*
|
||||
import org.junit.jupiter.api.DisplayName
|
||||
@ -116,4 +115,113 @@ class MigrateContentProjectPathTest {
|
||||
project.useStore!!.absolutePath
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun cleanForFileSystem_transliteration() {
|
||||
assertEquals("Senor de los Cielos", "Señor de los Cielos".cleanForFileSystem())
|
||||
assertEquals("Amelie (2001)", "Amélie (2001)".cleanForFileSystem())
|
||||
assertEquals("Ubermensch", "Übermensch".cleanForFileSystem())
|
||||
assertEquals("Lodz, Polska", "Łódź, Polska".cleanForFileSystem())
|
||||
}
|
||||
|
||||
@Test
|
||||
fun cleanForFileSystem_removesSpecialCharacters() {
|
||||
assertEquals("Hello World", "Hello@World!".cleanForFileSystem())
|
||||
assertEquals("Spider-Man No Way Home", "Spider-Man: No Way Home!".cleanForFileSystem())
|
||||
}
|
||||
@Test
|
||||
fun videoStoreFile_usesSanitizedName() {
|
||||
val temp = File("build/test-folder/file")
|
||||
|
||||
val parsed = MediaParsedInfoEvent(
|
||||
data = MediaParsedInfoEvent.ParsedData(
|
||||
parsedCollection = "Señor de los Cielos",
|
||||
parsedFileName = "Amélie (2001)",
|
||||
parsedSearchTitles = emptyList(),
|
||||
mediaType = MediaType.Movie
|
||||
)
|
||||
)
|
||||
|
||||
val encode = ProcesserEncodeResultEvent(
|
||||
data = ProcesserEncodeResultEvent.EncodeResult(
|
||||
cachedOutputFile = "/tmp/cache/video.mp4"
|
||||
),
|
||||
status = TaskStatus.Completed
|
||||
)
|
||||
|
||||
val store = MigrateContentProject(listOf(parsed, encode), temp)
|
||||
val result = store.getVideoStoreFile()
|
||||
|
||||
assertNotNull(result)
|
||||
assertEquals("Amelie (2001).mp4", result!!.storeFile.name)
|
||||
assertEquals("Senor de los Cielos", result.storeFile.parentFile.name)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun subtitleStoreFiles_useSanitizedNames() {
|
||||
val temp = File("build/test-folder/file")
|
||||
|
||||
|
||||
val parsed = MediaParsedInfoEvent(
|
||||
data = MediaParsedInfoEvent.ParsedData(
|
||||
parsedCollection = "Señor de los Cielos",
|
||||
parsedFileName = "Niña Épica",
|
||||
parsedSearchTitles = emptyList(),
|
||||
mediaType = MediaType.Serie
|
||||
)
|
||||
)
|
||||
|
||||
val extract = ProcesserExtractResultEvent(
|
||||
status = TaskStatus.Completed,
|
||||
data = ProcesserExtractResultEvent.ExtractResult(
|
||||
language = "spa",
|
||||
cachedOutputFile = "/tmp/cache/sub1.srt"
|
||||
)
|
||||
)
|
||||
|
||||
val store = MigrateContentProject(listOf(parsed, extract), temp)
|
||||
val results = store.getSubtitleStoreFiles()
|
||||
|
||||
assertNotNull(results)
|
||||
val file = results!!.first().cts.storeFile
|
||||
|
||||
assertEquals("Nina Epica.srt", file.name)
|
||||
assertEquals("spa", file.parentFile.name)
|
||||
assertEquals("sub", file.parentFile.parentFile.name)
|
||||
assertEquals("Senor de los Cielos", file.parentFile.parentFile.parentFile.name)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun coverStoreFiles_useSanitizedNames() {
|
||||
val temp = File("build/test-folder/file")
|
||||
|
||||
|
||||
val parsed = MediaParsedInfoEvent(
|
||||
data = MediaParsedInfoEvent.ParsedData(
|
||||
parsedCollection = "João e Maria",
|
||||
parsedFileName = "ignored",
|
||||
parsedSearchTitles = emptyList(),
|
||||
mediaType = MediaType.Movie
|
||||
)
|
||||
)
|
||||
|
||||
val cover = CoverDownloadResultEvent(
|
||||
data = CoverDownloadResultEvent.CoverDownloadedData(
|
||||
source = "tmdb",
|
||||
outputFile = "/tmp/cache/cover.jpg"
|
||||
),
|
||||
status = TaskStatus.Completed
|
||||
)
|
||||
|
||||
val store = MigrateContentProject(listOf(parsed, cover), temp)
|
||||
val results = store.getCoverStoreFiles()
|
||||
|
||||
assertNotNull(results)
|
||||
val file = results!!.first().storeFile
|
||||
|
||||
assertEquals("Joao e Maria.jpg", file.name)
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -3,6 +3,7 @@ package no.iktdev.mediaprocessing.shared.common.projection
|
||||
import no.iktdev.eventi.models.Event
|
||||
import no.iktdev.eventi.models.store.TaskStatus
|
||||
import no.iktdev.exfl.using
|
||||
import no.iktdev.mediaprocessing.shared.common.cleanForFileSystem
|
||||
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.*
|
||||
import no.iktdev.mediaprocessing.shared.common.model.MediaType
|
||||
import org.assertj.core.util.Files
|
||||
@ -76,7 +77,7 @@ class ProjectContentStoreTest {
|
||||
Hvis extract- og convert-events inneholder undertekstfiler
|
||||
Når getSubtitleStoreFiles kalles
|
||||
Så:
|
||||
skal filer lagres under <storage>/<collection>/<language>/<filnavn>
|
||||
skal filer lagres under <storage>/<collection>/sub/<language>/<filnavn>
|
||||
"""
|
||||
)
|
||||
@Test
|
||||
@ -87,7 +88,7 @@ class ProjectContentStoreTest {
|
||||
val parsed = MediaParsedInfoEvent(
|
||||
data = MediaParsedInfoEvent.ParsedData(
|
||||
parsedCollection = "MyShow",
|
||||
parsedFileName = "episode1.mkv",
|
||||
parsedFileName = "episode1",
|
||||
parsedSearchTitles = emptyList(),
|
||||
mediaType = MediaType.Serie
|
||||
)
|
||||
@ -104,7 +105,7 @@ class ProjectContentStoreTest {
|
||||
val convert = ConvertTaskResultEvent(
|
||||
data = ConvertTaskResultEvent.ConvertedData(
|
||||
language = "eng",
|
||||
"sub1",
|
||||
baseName = "sub1",
|
||||
outputFiles = listOf("/tmp/cache/sub1.vtt")
|
||||
),
|
||||
status = TaskStatus.Completed
|
||||
@ -120,13 +121,26 @@ class ProjectContentStoreTest {
|
||||
assertEquals(2, results?.size)
|
||||
|
||||
results?.forEach { entry ->
|
||||
assertEquals("eng", (entry.cts.storeFile.parentFile).name)
|
||||
assertEquals("sub", entry.cts.storeFile.parentFile.parentFile.name)
|
||||
assertEquals("MyShow", entry.cts.storeFile.parentFile.parentFile.parentFile.name)
|
||||
assertEquals(temp, entry.cts.storeFile.parentFile.parentFile.parentFile.parentFile)
|
||||
val file = entry.cts.storeFile
|
||||
|
||||
// Filnavn
|
||||
assertTrue(file.name == "episode1.srt" || file.name == "episode1.vtt")
|
||||
|
||||
// <language>
|
||||
assertEquals("eng", file.parentFile.name)
|
||||
|
||||
// sub/
|
||||
assertEquals("sub", file.parentFile.parentFile.name)
|
||||
|
||||
// <collection>
|
||||
assertEquals("MyShow", file.parentFile.parentFile.parentFile.name)
|
||||
|
||||
// <storage>
|
||||
assertEquals(temp, file.parentFile.parentFile.parentFile.parentFile)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@DisplayName(
|
||||
"""
|
||||
Hvis cover-download-event inneholder en coverfil
|
||||
@ -638,8 +652,8 @@ class ProjectContentStoreTest {
|
||||
name = "Weird folder names (spaces, unicode)",
|
||||
parsedCollection = "Fallback",
|
||||
metadataTitles = listOf("ÆØÅ Show"),
|
||||
existingFolders = listOf("ÆØÅ Show"),
|
||||
expectedFolder = "ÆØÅ Show"
|
||||
existingFolders = listOf("ÆØÅ Show".cleanForFileSystem()),
|
||||
expectedFolder = "AEOA Show"
|
||||
),
|
||||
DesiredStoreCase(
|
||||
name = "Case-insensitive mismatch → fallback",
|
||||
|
||||
Loading…
Reference in New Issue
Block a user