Transliterating
This commit is contained in:
parent
21f6f8c83b
commit
701c939e8d
@ -48,6 +48,8 @@ dependencies {
|
|||||||
implementation(project(":shared:ffmpeg"))
|
implementation(project(":shared:ffmpeg"))
|
||||||
implementation(libs.eventi)
|
implementation(libs.eventi)
|
||||||
|
|
||||||
|
implementation("com.ibm.icu:icu4j:75.1")
|
||||||
|
|
||||||
testImplementation(kotlin("test"))
|
testImplementation(kotlin("test"))
|
||||||
testImplementation(platform("org.junit:junit-bom:5.10.0"))
|
testImplementation(platform("org.junit:junit-bom:5.10.0"))
|
||||||
testImplementation("org.junit.jupiter:junit-jupiter")
|
testImplementation("org.junit.jupiter:junit-jupiter")
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
package no.iktdev.mediaprocessing.shared.common
|
package no.iktdev.mediaprocessing.shared.common
|
||||||
|
|
||||||
|
import com.ibm.icu.text.Transliterator
|
||||||
import kotlinx.coroutines.delay
|
import kotlinx.coroutines.delay
|
||||||
import mu.KotlinLogging
|
import mu.KotlinLogging
|
||||||
import no.iktdev.eventi.ZDS.toEvent
|
import no.iktdev.eventi.ZDS.toEvent
|
||||||
@ -254,4 +255,16 @@ fun <T : Any> KClass<T>.getName(): String =
|
|||||||
this.simpleName ?: this.java.simpleName
|
this.simpleName ?: this.java.simpleName
|
||||||
|
|
||||||
|
|
||||||
|
private val transliterator = Transliterator.getInstance("Any-Latin; Latin-ASCII")
|
||||||
|
fun String.cleanForFileSystem(): String {
|
||||||
|
// 1. Full translitterering (Æ→AE, Ø→O, Å→AA, Ł→L, Þ→Th, etc.)
|
||||||
|
val ascii = transliterator.transliterate(this)
|
||||||
|
|
||||||
|
// 2. Fjern alt som ikke er bokstav, tall, mellomrom, bindestrek, parentes, komma, punktum
|
||||||
|
val cleaned = ascii.replace(Regex("[^\\p{L}\\p{N}\\s\\-(),.]"), " ")
|
||||||
|
|
||||||
|
// 3. Normaliser whitespace
|
||||||
|
return cleaned.replace(Regex("\\s{2,}"), " ").trim()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -2,6 +2,7 @@ package no.iktdev.mediaprocessing.shared.common.projection
|
|||||||
|
|
||||||
import no.iktdev.eventi.models.Event
|
import no.iktdev.eventi.models.Event
|
||||||
import no.iktdev.exfl.using
|
import no.iktdev.exfl.using
|
||||||
|
import no.iktdev.mediaprocessing.shared.common.cleanForFileSystem
|
||||||
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.*
|
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.*
|
||||||
import no.iktdev.mediaprocessing.shared.common.resolveConflict
|
import no.iktdev.mediaprocessing.shared.common.resolveConflict
|
||||||
import java.io.File
|
import java.io.File
|
||||||
@ -22,11 +23,11 @@ open class MigrateContentProject(
|
|||||||
|
|
||||||
internal fun getFileName(): String? {
|
internal fun getFileName(): String? {
|
||||||
val parsedInfo = events.filterIsInstance<MediaParsedInfoEvent>().lastOrNull() ?: return null
|
val parsedInfo = events.filterIsInstance<MediaParsedInfoEvent>().lastOrNull() ?: return null
|
||||||
return parsedInfo.data.parsedFileName
|
return parsedInfo.data.parsedFileName.cleanForFileSystem()
|
||||||
}
|
}
|
||||||
|
|
||||||
internal fun getDesiredStoreFolder(): File? {
|
internal fun getDesiredStoreFolder(): File? {
|
||||||
val desiredCollection = getDesiredCollection() ?: return null
|
val desiredCollection = getDesiredCollection()?.cleanForFileSystem() ?: return null
|
||||||
val assuredStore = storageArea.using(desiredCollection)
|
val assuredStore = storageArea.using(desiredCollection)
|
||||||
|
|
||||||
val existingCollectionNames = getFoldersInStore()
|
val existingCollectionNames = getFoldersInStore()
|
||||||
@ -34,7 +35,7 @@ open class MigrateContentProject(
|
|||||||
return assuredStore
|
return assuredStore
|
||||||
}
|
}
|
||||||
|
|
||||||
val titles = getMetadataTitles()
|
val titles = getMetadataTitles().map { it.cleanForFileSystem() }
|
||||||
|
|
||||||
val matchedExisting = titles
|
val matchedExisting = titles
|
||||||
.firstOrNull { it in existingCollectionNames }
|
.firstOrNull { it in existingCollectionNames }
|
||||||
@ -111,7 +112,7 @@ open class MigrateContentProject(
|
|||||||
e to file
|
e to file
|
||||||
}
|
}
|
||||||
|
|
||||||
val baseName = getDesiredCollection() ?: return null
|
val baseName = getDesiredCollection()?.cleanForFileSystem() ?: return null
|
||||||
val store = useStore ?: return null
|
val store = useStore ?: return null
|
||||||
|
|
||||||
val multiple = downloaded.size > 1
|
val multiple = downloaded.size > 1
|
||||||
|
|||||||
@ -2,10 +2,9 @@ package no.iktdev.mediaprocessing.shared.common.projection
|
|||||||
|
|
||||||
import no.iktdev.eventi.models.Event
|
import no.iktdev.eventi.models.Event
|
||||||
import no.iktdev.eventi.models.store.TaskStatus
|
import no.iktdev.eventi.models.store.TaskStatus
|
||||||
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.MediaParsedInfoEvent
|
import no.iktdev.mediaprocessing.shared.common.cleanForFileSystem
|
||||||
|
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.*
|
||||||
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.MediaParsedInfoEvent.ParsedData
|
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.MediaParsedInfoEvent.ParsedData
|
||||||
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.MetadataSearchResultEvent
|
|
||||||
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.ProcesserEncodeResultEvent
|
|
||||||
import no.iktdev.mediaprocessing.shared.common.model.MediaType
|
import no.iktdev.mediaprocessing.shared.common.model.MediaType
|
||||||
import org.junit.jupiter.api.Assertions.*
|
import org.junit.jupiter.api.Assertions.*
|
||||||
import org.junit.jupiter.api.DisplayName
|
import org.junit.jupiter.api.DisplayName
|
||||||
@ -116,4 +115,113 @@ class MigrateContentProjectPathTest {
|
|||||||
project.useStore!!.absolutePath
|
project.useStore!!.absolutePath
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun cleanForFileSystem_transliteration() {
|
||||||
|
assertEquals("Senor de los Cielos", "Señor de los Cielos".cleanForFileSystem())
|
||||||
|
assertEquals("Amelie (2001)", "Amélie (2001)".cleanForFileSystem())
|
||||||
|
assertEquals("Ubermensch", "Übermensch".cleanForFileSystem())
|
||||||
|
assertEquals("Lodz, Polska", "Łódź, Polska".cleanForFileSystem())
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun cleanForFileSystem_removesSpecialCharacters() {
|
||||||
|
assertEquals("Hello World", "Hello@World!".cleanForFileSystem())
|
||||||
|
assertEquals("Spider-Man No Way Home", "Spider-Man: No Way Home!".cleanForFileSystem())
|
||||||
|
}
|
||||||
|
@Test
|
||||||
|
fun videoStoreFile_usesSanitizedName() {
|
||||||
|
val temp = File("build/test-folder/file")
|
||||||
|
|
||||||
|
val parsed = MediaParsedInfoEvent(
|
||||||
|
data = MediaParsedInfoEvent.ParsedData(
|
||||||
|
parsedCollection = "Señor de los Cielos",
|
||||||
|
parsedFileName = "Amélie (2001)",
|
||||||
|
parsedSearchTitles = emptyList(),
|
||||||
|
mediaType = MediaType.Movie
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
val encode = ProcesserEncodeResultEvent(
|
||||||
|
data = ProcesserEncodeResultEvent.EncodeResult(
|
||||||
|
cachedOutputFile = "/tmp/cache/video.mp4"
|
||||||
|
),
|
||||||
|
status = TaskStatus.Completed
|
||||||
|
)
|
||||||
|
|
||||||
|
val store = MigrateContentProject(listOf(parsed, encode), temp)
|
||||||
|
val result = store.getVideoStoreFile()
|
||||||
|
|
||||||
|
assertNotNull(result)
|
||||||
|
assertEquals("Amelie (2001).mp4", result!!.storeFile.name)
|
||||||
|
assertEquals("Senor de los Cielos", result.storeFile.parentFile.name)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun subtitleStoreFiles_useSanitizedNames() {
|
||||||
|
val temp = File("build/test-folder/file")
|
||||||
|
|
||||||
|
|
||||||
|
val parsed = MediaParsedInfoEvent(
|
||||||
|
data = MediaParsedInfoEvent.ParsedData(
|
||||||
|
parsedCollection = "Señor de los Cielos",
|
||||||
|
parsedFileName = "Niña Épica",
|
||||||
|
parsedSearchTitles = emptyList(),
|
||||||
|
mediaType = MediaType.Serie
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
val extract = ProcesserExtractResultEvent(
|
||||||
|
status = TaskStatus.Completed,
|
||||||
|
data = ProcesserExtractResultEvent.ExtractResult(
|
||||||
|
language = "spa",
|
||||||
|
cachedOutputFile = "/tmp/cache/sub1.srt"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
val store = MigrateContentProject(listOf(parsed, extract), temp)
|
||||||
|
val results = store.getSubtitleStoreFiles()
|
||||||
|
|
||||||
|
assertNotNull(results)
|
||||||
|
val file = results!!.first().cts.storeFile
|
||||||
|
|
||||||
|
assertEquals("Nina Epica.srt", file.name)
|
||||||
|
assertEquals("spa", file.parentFile.name)
|
||||||
|
assertEquals("sub", file.parentFile.parentFile.name)
|
||||||
|
assertEquals("Senor de los Cielos", file.parentFile.parentFile.parentFile.name)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun coverStoreFiles_useSanitizedNames() {
|
||||||
|
val temp = File("build/test-folder/file")
|
||||||
|
|
||||||
|
|
||||||
|
val parsed = MediaParsedInfoEvent(
|
||||||
|
data = MediaParsedInfoEvent.ParsedData(
|
||||||
|
parsedCollection = "João e Maria",
|
||||||
|
parsedFileName = "ignored",
|
||||||
|
parsedSearchTitles = emptyList(),
|
||||||
|
mediaType = MediaType.Movie
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
val cover = CoverDownloadResultEvent(
|
||||||
|
data = CoverDownloadResultEvent.CoverDownloadedData(
|
||||||
|
source = "tmdb",
|
||||||
|
outputFile = "/tmp/cache/cover.jpg"
|
||||||
|
),
|
||||||
|
status = TaskStatus.Completed
|
||||||
|
)
|
||||||
|
|
||||||
|
val store = MigrateContentProject(listOf(parsed, cover), temp)
|
||||||
|
val results = store.getCoverStoreFiles()
|
||||||
|
|
||||||
|
assertNotNull(results)
|
||||||
|
val file = results!!.first().storeFile
|
||||||
|
|
||||||
|
assertEquals("Joao e Maria.jpg", file.name)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,6 +3,7 @@ package no.iktdev.mediaprocessing.shared.common.projection
|
|||||||
import no.iktdev.eventi.models.Event
|
import no.iktdev.eventi.models.Event
|
||||||
import no.iktdev.eventi.models.store.TaskStatus
|
import no.iktdev.eventi.models.store.TaskStatus
|
||||||
import no.iktdev.exfl.using
|
import no.iktdev.exfl.using
|
||||||
|
import no.iktdev.mediaprocessing.shared.common.cleanForFileSystem
|
||||||
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.*
|
import no.iktdev.mediaprocessing.shared.common.event_task_contract.events.*
|
||||||
import no.iktdev.mediaprocessing.shared.common.model.MediaType
|
import no.iktdev.mediaprocessing.shared.common.model.MediaType
|
||||||
import org.assertj.core.util.Files
|
import org.assertj.core.util.Files
|
||||||
@ -76,7 +77,7 @@ class ProjectContentStoreTest {
|
|||||||
Hvis extract- og convert-events inneholder undertekstfiler
|
Hvis extract- og convert-events inneholder undertekstfiler
|
||||||
Når getSubtitleStoreFiles kalles
|
Når getSubtitleStoreFiles kalles
|
||||||
Så:
|
Så:
|
||||||
skal filer lagres under <storage>/<collection>/<language>/<filnavn>
|
skal filer lagres under <storage>/<collection>/sub/<language>/<filnavn>
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
@Test
|
@Test
|
||||||
@ -87,7 +88,7 @@ class ProjectContentStoreTest {
|
|||||||
val parsed = MediaParsedInfoEvent(
|
val parsed = MediaParsedInfoEvent(
|
||||||
data = MediaParsedInfoEvent.ParsedData(
|
data = MediaParsedInfoEvent.ParsedData(
|
||||||
parsedCollection = "MyShow",
|
parsedCollection = "MyShow",
|
||||||
parsedFileName = "episode1.mkv",
|
parsedFileName = "episode1",
|
||||||
parsedSearchTitles = emptyList(),
|
parsedSearchTitles = emptyList(),
|
||||||
mediaType = MediaType.Serie
|
mediaType = MediaType.Serie
|
||||||
)
|
)
|
||||||
@ -104,7 +105,7 @@ class ProjectContentStoreTest {
|
|||||||
val convert = ConvertTaskResultEvent(
|
val convert = ConvertTaskResultEvent(
|
||||||
data = ConvertTaskResultEvent.ConvertedData(
|
data = ConvertTaskResultEvent.ConvertedData(
|
||||||
language = "eng",
|
language = "eng",
|
||||||
"sub1",
|
baseName = "sub1",
|
||||||
outputFiles = listOf("/tmp/cache/sub1.vtt")
|
outputFiles = listOf("/tmp/cache/sub1.vtt")
|
||||||
),
|
),
|
||||||
status = TaskStatus.Completed
|
status = TaskStatus.Completed
|
||||||
@ -120,13 +121,26 @@ class ProjectContentStoreTest {
|
|||||||
assertEquals(2, results?.size)
|
assertEquals(2, results?.size)
|
||||||
|
|
||||||
results?.forEach { entry ->
|
results?.forEach { entry ->
|
||||||
assertEquals("eng", (entry.cts.storeFile.parentFile).name)
|
val file = entry.cts.storeFile
|
||||||
assertEquals("sub", entry.cts.storeFile.parentFile.parentFile.name)
|
|
||||||
assertEquals("MyShow", entry.cts.storeFile.parentFile.parentFile.parentFile.name)
|
// Filnavn
|
||||||
assertEquals(temp, entry.cts.storeFile.parentFile.parentFile.parentFile.parentFile)
|
assertTrue(file.name == "episode1.srt" || file.name == "episode1.vtt")
|
||||||
|
|
||||||
|
// <language>
|
||||||
|
assertEquals("eng", file.parentFile.name)
|
||||||
|
|
||||||
|
// sub/
|
||||||
|
assertEquals("sub", file.parentFile.parentFile.name)
|
||||||
|
|
||||||
|
// <collection>
|
||||||
|
assertEquals("MyShow", file.parentFile.parentFile.parentFile.name)
|
||||||
|
|
||||||
|
// <storage>
|
||||||
|
assertEquals(temp, file.parentFile.parentFile.parentFile.parentFile)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@DisplayName(
|
@DisplayName(
|
||||||
"""
|
"""
|
||||||
Hvis cover-download-event inneholder en coverfil
|
Hvis cover-download-event inneholder en coverfil
|
||||||
@ -638,8 +652,8 @@ class ProjectContentStoreTest {
|
|||||||
name = "Weird folder names (spaces, unicode)",
|
name = "Weird folder names (spaces, unicode)",
|
||||||
parsedCollection = "Fallback",
|
parsedCollection = "Fallback",
|
||||||
metadataTitles = listOf("ÆØÅ Show"),
|
metadataTitles = listOf("ÆØÅ Show"),
|
||||||
existingFolders = listOf("ÆØÅ Show"),
|
existingFolders = listOf("ÆØÅ Show".cleanForFileSystem()),
|
||||||
expectedFolder = "ÆØÅ Show"
|
expectedFolder = "AEOA Show"
|
||||||
),
|
),
|
||||||
DesiredStoreCase(
|
DesiredStoreCase(
|
||||||
name = "Case-insensitive mismatch → fallback",
|
name = "Case-insensitive mismatch → fallback",
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user