Dot stripping

This commit is contained in:
bskjon 2024-11-19 22:09:30 +01:00
parent 9ecdcd88fe
commit 01df7ef239
4 changed files with 12 additions and 37 deletions

View File

@ -1,32 +0,0 @@
package no.iktdev.mediaprocessing.coordinator.reader
/*
import no.iktdev.mediaprocessing.shared.kafka.core.DefaultConsumer
import no.iktdev.mediaprocessing.shared.kafka.core.DefaultMessageListener
import no.iktdev.mediaprocessing.shared.kafka.core.DefaultProducer
import org.apache.kafka.clients.admin.AdminClient
import org.junit.jupiter.api.extension.ExtendWith
import org.mockito.InjectMocks
import org.mockito.Mock
import org.mockito.junit.jupiter.MockitoExtension
import org.springframework.kafka.core.KafkaTemplate
import org.springframework.kafka.core.ProducerFactory
@ExtendWith(MockitoExtension::class)
class KafkaTestBase {
@Mock
lateinit var kafkaTemplate: KafkaTemplate<String, String>
@Mock
lateinit var adminClient: AdminClient
@InjectMocks
lateinit var defaultProducer: DefaultProducer
@InjectMocks
lateinit var defaultConsumer: DefaultConsumer
@InjectMocks
lateinit var defaultListener: DefaultMessageListener
}*/

View File

@ -1,6 +1,5 @@
package no.iktdev.mediaprocessing.shared.common.parsing package no.iktdev.mediaprocessing.shared.common.parsing
import org.apache.kafka.common.protocol.types.Field.Str
class FileNameParser(val fileName: String) { class FileNameParser(val fileName: String) {
var cleanedFileName: String var cleanedFileName: String
@ -122,7 +121,7 @@ class FileNameParser(val fileName: String) {
fun removeDot(input: String): String { fun removeDot(input: String): String {
//var text = Regex("(?<=\\s)\\.|\\.(?=\\s)").replace(input, "") //var text = Regex("(?<=\\s)\\.|\\.(?=\\s)").replace(input, "")
//return Regex("\\.(?<!(Dr|Mr|Ms|Mrs|Lt|Capt|Prof|St|Ave)\\.)\\b").replace(text, " ") //return Regex("\\.(?<!(Dr|Mr|Ms|Mrs|Lt|Capt|Prof|St|Ave)\\.)\\b").replace(text, " ")
return Regex("(?<!\\b(?:Dr|Mr|Ms|Mrs|Lt|Capt|Prof|St|Ave))\\.+(?=\\s|\\w)").replace(input, " ") return Regex("(?<!\\b(?:Dr|Mr|Ms|Mrs|Lt|Capt|Prof|St|Ave))\\.").replace(input, " ")
} }
fun removeInBetweenCharacters(text: String): String { fun removeInBetweenCharacters(text: String): String {

View File

@ -8,14 +8,14 @@ object NameHelper {
val normalized = Normalizer.normalize(text, Normalizer.Form.NFC) val normalized = Normalizer.normalize(text, Normalizer.Form.NFC)
val result = normalized.replace("\\p{M}".toRegex(), "") val result = normalized.replace("\\p{M}".toRegex(), "")
val cleaned = "[^A-Za-z0-9 -]".toRegex().replace(result, "") val cleaned = "[^A-Za-z0-9 -]".toRegex().replace(result, "")
return StringUtils.stripAccents(cleaned) return StringUtils.stripAccents(cleaned).trim()
} }
fun cleanup(input: String): String { fun cleanup(input: String): String {
var cleaned = Regex("(?<=\\w)[_.](?=\\w)").replace(input, " ") var cleaned = Regex("(?<=\\w)[_.](?=\\w)").replace(input, " ")
cleaned = Regexes.illegalCharacters.replace(cleaned, " - ") cleaned = Regexes.illegalCharacters.replace(cleaned, " - ")
cleaned = Regexes.trimWhiteSpaces.replace(cleaned, " ") cleaned = Regexes.trimWhiteSpaces.replace(cleaned, " ")
return NameHelper.normalize(cleaned) return NameHelper.normalize(cleaned).trim()
} }
} }

View File

@ -36,6 +36,15 @@ class FileNameParserTest {
} }
@Test
fun assertDotRemoval() {
val input = "Like.a.Potato.Chef.S01E01.Departure.\\u0026.Skills.1080p.Potato"
val parser = FileNameParser(input)
val result = parser.guessDesiredTitle()
assertThat(result).isEqualTo("Like a Potato Chef")
assertThat(parser.guessDesiredFileName()).isEqualTo("Like a Potato Chef S01E01 Departure \\u0026 Skills")
}
@Test @Test
fun movieName() { fun movieName() {
val inName = "Wicket.Wicker.Potato.4.2023.UHD.BluRay.2160p" val inName = "Wicket.Wicker.Potato.4.2023.UHD.BluRay.2160p"
@ -89,5 +98,4 @@ class FileNameParserTest {
val result = FileNameParser(input).guessDesiredTitle() val result = FileNameParser(input).guessDesiredTitle()
assertThat(result).isEmpty() assertThat(result).isEmpty()
} }
} }