Updated subtitle stream picker
This commit is contained in:
parent
5cf7dff3f2
commit
ff21cb9595
@ -2,6 +2,7 @@ package no.iktdev.mediaprocessing.coordinator.tasksV2.mapping.streams
|
|||||||
|
|
||||||
import no.iktdev.mediaprocessing.shared.common.contract.ffmpeg.SubtitleArgumentsDto
|
import no.iktdev.mediaprocessing.shared.common.contract.ffmpeg.SubtitleArgumentsDto
|
||||||
import no.iktdev.mediaprocessing.shared.common.contract.ffmpeg.SubtitleStream
|
import no.iktdev.mediaprocessing.shared.common.contract.ffmpeg.SubtitleStream
|
||||||
|
import kotlin.math.sqrt
|
||||||
|
|
||||||
class SubtitleArguments(val subtitleStreams: List<SubtitleStream>) {
|
class SubtitleArguments(val subtitleStreams: List<SubtitleStream>) {
|
||||||
/**
|
/**
|
||||||
@ -18,18 +19,27 @@ class SubtitleArguments(val subtitleStreams: List<SubtitleStream>) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private fun SubtitleStream.isCC(): Boolean {
|
private fun SubtitleStream.isCC(): Boolean {
|
||||||
|
if ((this.disposition?.captions ?: 0) > 0) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
val title = this.tags.title?.lowercase() ?: return false
|
val title = this.tags.title?.lowercase() ?: return false
|
||||||
val keywords = listOf("cc", "closed caption")
|
val keywords = listOf("cc", "closed caption")
|
||||||
return keywords.any { title.contains(it) }
|
return keywords.any { title.contains(it) }
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun SubtitleStream.isSHD(): Boolean {
|
private fun SubtitleStream.isSHD(): Boolean {
|
||||||
|
if ((this.disposition?.hearing_impaired ?: 0) > 0) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
val title = this.tags.title?.lowercase() ?: return false
|
val title = this.tags.title?.lowercase() ?: return false
|
||||||
val keywords = listOf("shd", "hh", "Hard-of-Hearing", "Hard of Hearing")
|
val keywords = listOf("shd", "hh", "Hard-of-Hearing", "Hard of Hearing")
|
||||||
return keywords.any { title.contains(it) }
|
return keywords.any { title.contains(it) }
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun SubtitleStream.isSignOrSong(): Boolean {
|
private fun SubtitleStream.isSignOrSong(): Boolean {
|
||||||
|
if ((this.disposition?.lyrics ?: 0) > 0) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
val title = this.tags.title?.lowercase() ?: return false
|
val title = this.tags.title?.lowercase() ?: return false
|
||||||
val keywords = listOf("song", "songs", "sign", "signs")
|
val keywords = listOf("song", "songs", "sign", "signs")
|
||||||
return keywords.any { title.contains(it) }
|
return keywords.any { title.contains(it) }
|
||||||
@ -48,16 +58,19 @@ class SubtitleArguments(val subtitleStreams: List<SubtitleStream>) {
|
|||||||
fun getSubtitleArguments(): List<SubtitleArgumentsDto> {
|
fun getSubtitleArguments(): List<SubtitleArgumentsDto> {
|
||||||
val acceptable = subtitleStreams.filter { !it.isSignOrSong() }
|
val acceptable = subtitleStreams.filter { !it.isSignOrSong() }
|
||||||
val codecFiltered = acceptable.filter { getFormatToCodec(it.codec_name) != null }
|
val codecFiltered = acceptable.filter { getFormatToCodec(it.codec_name) != null }
|
||||||
val mappedToType =
|
|
||||||
codecFiltered.map { getSubtitleType(it) to it }.filter { it.first in SubtitleType.entries }
|
|
||||||
.groupBy { it.second.tags.language ?: "eng" }
|
|
||||||
.mapValues { entry ->
|
|
||||||
val languageStreams = entry.value
|
|
||||||
val sortedStreams = languageStreams.sortedBy { SubtitleType.entries.indexOf(it.first) }
|
|
||||||
sortedStreams.firstOrNull()?.second
|
|
||||||
}.mapNotNull { it.value }
|
|
||||||
|
|
||||||
return mappedToType.mapNotNull { stream ->
|
val languageGrouped = codecFiltered.groupBy { it.tags.language ?: "eng" }
|
||||||
|
|
||||||
|
val streamsToExtract = languageGrouped.mapNotNull { item ->
|
||||||
|
val types = item.value.map { getSubtitleType(it) }
|
||||||
|
if (types.none { t -> t == SubtitleType.DEFAULT } || types.count { t -> t == SubtitleType.DEFAULT} > 1) {
|
||||||
|
excludeLowFrameCount(item.value).sortedBy { it.tags.NUMBER_OF_FRAMES }.firstOrNull()
|
||||||
|
} else {
|
||||||
|
item.value.minByOrNull { s -> getSubtitleType(s) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return streamsToExtract.mapNotNull { stream ->
|
||||||
getFormatToCodec(stream.codec_name)?.let { format ->
|
getFormatToCodec(stream.codec_name)?.let { format ->
|
||||||
SubtitleArgumentsDto(
|
SubtitleArgumentsDto(
|
||||||
index = subtitleStreams.indexOf(stream),
|
index = subtitleStreams.indexOf(stream),
|
||||||
@ -69,6 +82,22 @@ class SubtitleArguments(val subtitleStreams: List<SubtitleStream>) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun excludeLowFrameCount(streams: List<SubtitleStream>): List<SubtitleStream> {
|
||||||
|
val usable = streams.filter { (it.tags.NUMBER_OF_FRAMES ?: 0) > 0 }
|
||||||
|
val mean = usable.mapNotNull { it.tags.NUMBER_OF_FRAMES }.average()
|
||||||
|
val variance = usable.map { (it.tags.NUMBER_OF_FRAMES!! - mean) * (it.tags.NUMBER_OF_FRAMES!! - mean) }.average()
|
||||||
|
val standardDeviation = sqrt(variance)
|
||||||
|
|
||||||
|
// Definer intervallet for "normale" rammer: mean ± 2 * standard deviation
|
||||||
|
val lowerBound = mean - 2 * standardDeviation
|
||||||
|
val upperBound = mean + 2 * standardDeviation
|
||||||
|
|
||||||
|
return usable.filter {
|
||||||
|
val frameCount = it.tags.NUMBER_OF_FRAMES ?: 0
|
||||||
|
frameCount.toDouble() in standardDeviation..upperBound
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fun getFormatToCodec(codecName: String): String? {
|
fun getFormatToCodec(codecName: String): String? {
|
||||||
return when (codecName) {
|
return when (codecName) {
|
||||||
"ass" -> "ass"
|
"ass" -> "ass"
|
||||||
|
|||||||
@ -0,0 +1,265 @@
|
|||||||
|
package no.iktdev.mediaprocessing.coordinator.tasksV2.mapping.streams
|
||||||
|
|
||||||
|
import com.google.gson.Gson
|
||||||
|
import com.google.gson.reflect.TypeToken
|
||||||
|
import no.iktdev.mediaprocessing.shared.common.contract.ffmpeg.SubtitleStream
|
||||||
|
import org.assertj.core.api.Assertions.assertThat
|
||||||
|
import org.junit.jupiter.api.Test
|
||||||
|
|
||||||
|
class SubtitleArgumentsTest {
|
||||||
|
val type = object : TypeToken<List<SubtitleStream>>() {}.type
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun validate1() {
|
||||||
|
val data = Gson().fromJson<List<SubtitleStream>>(multipleSubtitleStreamsWithSameLanguage, type)
|
||||||
|
assertThat(data.all { it is SubtitleStream }).isTrue()
|
||||||
|
assertThat(data).isNotNull()
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun validate2() {
|
||||||
|
val data = Gson().fromJson<List<SubtitleStream>>(multipleSubtitleStreamsWithSameLanguage, type)
|
||||||
|
val args = SubtitleArguments(data)
|
||||||
|
val selectable = args.excludeLowFrameCount(data)
|
||||||
|
assertThat(selectable).hasSize(2)
|
||||||
|
assertThat(selectable.find { it.index == 4 })
|
||||||
|
assertThat(selectable.find { it.index == 5 })
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun validate3() {
|
||||||
|
val data = Gson().fromJson<List<SubtitleStream>>(multipleSubtitleStreamsWithSameLanguage, type)
|
||||||
|
val args = SubtitleArguments(data).getSubtitleArguments()
|
||||||
|
assertThat(args).hasSize(1)
|
||||||
|
assertThat(args.firstOrNull()?.index).isEqualTo(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun validate3_2() {
|
||||||
|
val data = Gson().fromJson<List<SubtitleStream>>(multipleSubtitleStreamsWithSameLanguageWithDisposition, type)
|
||||||
|
val args = SubtitleArguments(data).getSubtitleArguments()
|
||||||
|
assertThat(args).hasSize(1)
|
||||||
|
assertThat(args.firstOrNull()?.index).isEqualTo(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
val multipleSubtitleStreamsWithSameLanguage = """
|
||||||
|
[{
|
||||||
|
"index": 3,
|
||||||
|
"codec_name": "ass",
|
||||||
|
"codec_long_name": "ASS (Advanced SSA) subtitle",
|
||||||
|
"codec_type": "subtitle",
|
||||||
|
"codec_tag_string": "[0][0][0][0]",
|
||||||
|
"codec_tag": "0x0000",
|
||||||
|
"r_frame_rate": "0/0",
|
||||||
|
"avg_frame_rate": "0/0",
|
||||||
|
"time_base": "1/1000",
|
||||||
|
"start_pts": 0,
|
||||||
|
"start_time": "0.000000",
|
||||||
|
"duration_ts": 1437083,
|
||||||
|
"duration": "1437.083000",
|
||||||
|
"extradata_size": 1967,
|
||||||
|
"tags": {
|
||||||
|
"language": "eng",
|
||||||
|
"title": "Forced",
|
||||||
|
"BPS": "5",
|
||||||
|
"DURATION": "00:21:42.640000000",
|
||||||
|
"NUMBER_OF_FRAMES": "14",
|
||||||
|
"NUMBER_OF_BYTES": "835",
|
||||||
|
"_STATISTICS_WRITING_APP": "mkvmerge v69.0.0 ('Day And Age') 64-bit",
|
||||||
|
"_STATISTICS_WRITING_DATE_UTC": "2024-10-04 08:12:59",
|
||||||
|
"_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 4,
|
||||||
|
"codec_name": "ass",
|
||||||
|
"codec_long_name": "ASS (Advanced SSA) subtitle",
|
||||||
|
"codec_type": "subtitle",
|
||||||
|
"codec_tag_string": "[0][0][0][0]",
|
||||||
|
"codec_tag": "0x0000",
|
||||||
|
"r_frame_rate": "0/0",
|
||||||
|
"avg_frame_rate": "0/0",
|
||||||
|
"time_base": "1/1000",
|
||||||
|
"start_pts": 0,
|
||||||
|
"start_time": "0.000000",
|
||||||
|
"duration_ts": 1437083,
|
||||||
|
"duration": "1437.083000",
|
||||||
|
"extradata_size": 1791,
|
||||||
|
"tags": {
|
||||||
|
"language": "eng",
|
||||||
|
"BPS": "129",
|
||||||
|
"DURATION": "00:22:26.550000000",
|
||||||
|
"NUMBER_OF_FRAMES": "356",
|
||||||
|
"NUMBER_OF_BYTES": "21787",
|
||||||
|
"_STATISTICS_WRITING_APP": "mkvmerge v69.0.0 ('Day And Age') 64-bit",
|
||||||
|
"_STATISTICS_WRITING_DATE_UTC": "2024-10-04 08:12:59",
|
||||||
|
"_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 5,
|
||||||
|
"codec_name": "subrip",
|
||||||
|
"codec_long_name": "SubRip subtitle",
|
||||||
|
"codec_type": "subtitle",
|
||||||
|
"codec_tag_string": "[0][0][0][0]",
|
||||||
|
"codec_tag": "0x0000",
|
||||||
|
"r_frame_rate": "0/0",
|
||||||
|
"avg_frame_rate": "0/0",
|
||||||
|
"time_base": "1/1000",
|
||||||
|
"start_pts": 790,
|
||||||
|
"start_time": "0.790000",
|
||||||
|
"tags": {
|
||||||
|
"language": "eng",
|
||||||
|
"title": "CC",
|
||||||
|
"BPS": "83",
|
||||||
|
"DURATION": "00:23:56.060000000",
|
||||||
|
"NUMBER_OF_FRAMES": "495",
|
||||||
|
"NUMBER_OF_BYTES": "14954",
|
||||||
|
"_STATISTICS_WRITING_APP": "mkvmerge v69.0.0 ('Day And Age') 64-bit",
|
||||||
|
"_STATISTICS_WRITING_DATE_UTC": "2024-10-04 08:12:59",
|
||||||
|
"_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
""".trimIndent()
|
||||||
|
|
||||||
|
val multipleSubtitleStreamsWithSameLanguageWithDisposition = """
|
||||||
|
[{
|
||||||
|
"index": 3,
|
||||||
|
"codec_name": "ass",
|
||||||
|
"codec_long_name": "ASS (Advanced SSA) subtitle",
|
||||||
|
"codec_type": "subtitle",
|
||||||
|
"codec_tag_string": "[0][0][0][0]",
|
||||||
|
"codec_tag": "0x0000",
|
||||||
|
"r_frame_rate": "0/0",
|
||||||
|
"avg_frame_rate": "0/0",
|
||||||
|
"time_base": "1/1000",
|
||||||
|
"start_pts": 0,
|
||||||
|
"start_time": "0.000000",
|
||||||
|
"duration_ts": 1437083,
|
||||||
|
"duration": "1437.083000",
|
||||||
|
"extradata_size": 1967,
|
||||||
|
"disposition": {
|
||||||
|
"default": 1,
|
||||||
|
"dub": 0,
|
||||||
|
"original": 0,
|
||||||
|
"comment": 0,
|
||||||
|
"lyrics": 0,
|
||||||
|
"karaoke": 0,
|
||||||
|
"forced": 1,
|
||||||
|
"hearing_impaired": 0,
|
||||||
|
"visual_impaired": 0,
|
||||||
|
"clean_effects": 0,
|
||||||
|
"attached_pic": 0,
|
||||||
|
"timed_thumbnails": 0,
|
||||||
|
"non_diegetic": 0,
|
||||||
|
"captions": 0,
|
||||||
|
"descriptions": 0,
|
||||||
|
"metadata": 0,
|
||||||
|
"dependent": 0,
|
||||||
|
"still_image": 0
|
||||||
|
},
|
||||||
|
"tags": {
|
||||||
|
"language": "eng",
|
||||||
|
"title": "Forced",
|
||||||
|
"BPS": "5",
|
||||||
|
"DURATION": "00:21:42.640000000",
|
||||||
|
"NUMBER_OF_FRAMES": "14",
|
||||||
|
"NUMBER_OF_BYTES": "835",
|
||||||
|
"_STATISTICS_WRITING_APP": "mkvmerge v69.0.0 ('Day And Age') 64-bit",
|
||||||
|
"_STATISTICS_WRITING_DATE_UTC": "2024-10-04 08:12:59",
|
||||||
|
"_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 4,
|
||||||
|
"codec_name": "ass",
|
||||||
|
"codec_long_name": "ASS (Advanced SSA) subtitle",
|
||||||
|
"codec_type": "subtitle",
|
||||||
|
"codec_tag_string": "[0][0][0][0]",
|
||||||
|
"codec_tag": "0x0000",
|
||||||
|
"r_frame_rate": "0/0",
|
||||||
|
"avg_frame_rate": "0/0",
|
||||||
|
"time_base": "1/1000",
|
||||||
|
"start_pts": 0,
|
||||||
|
"start_time": "0.000000",
|
||||||
|
"duration_ts": 1437083,
|
||||||
|
"duration": "1437.083000",
|
||||||
|
"extradata_size": 1791,
|
||||||
|
"disposition": {
|
||||||
|
"default": 0,
|
||||||
|
"dub": 0,
|
||||||
|
"original": 0,
|
||||||
|
"comment": 0,
|
||||||
|
"lyrics": 0,
|
||||||
|
"karaoke": 0,
|
||||||
|
"forced": 0,
|
||||||
|
"hearing_impaired": 0,
|
||||||
|
"visual_impaired": 0,
|
||||||
|
"clean_effects": 0,
|
||||||
|
"attached_pic": 0,
|
||||||
|
"timed_thumbnails": 0,
|
||||||
|
"non_diegetic": 0,
|
||||||
|
"captions": 0,
|
||||||
|
"descriptions": 0,
|
||||||
|
"metadata": 0,
|
||||||
|
"dependent": 0,
|
||||||
|
"still_image": 0
|
||||||
|
},
|
||||||
|
"tags": {
|
||||||
|
"language": "eng",
|
||||||
|
"BPS": "129",
|
||||||
|
"DURATION": "00:22:26.550000000",
|
||||||
|
"NUMBER_OF_FRAMES": "356",
|
||||||
|
"NUMBER_OF_BYTES": "21787",
|
||||||
|
"_STATISTICS_WRITING_APP": "mkvmerge v69.0.0 ('Day And Age') 64-bit",
|
||||||
|
"_STATISTICS_WRITING_DATE_UTC": "2024-10-04 08:12:59",
|
||||||
|
"_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"index": 5,
|
||||||
|
"codec_name": "subrip",
|
||||||
|
"codec_long_name": "SubRip subtitle",
|
||||||
|
"codec_type": "subtitle",
|
||||||
|
"codec_tag_string": "[0][0][0][0]",
|
||||||
|
"codec_tag": "0x0000",
|
||||||
|
"r_frame_rate": "0/0",
|
||||||
|
"avg_frame_rate": "0/0",
|
||||||
|
"time_base": "1/1000",
|
||||||
|
"start_pts": 790,
|
||||||
|
"start_time": "0.790000",
|
||||||
|
"disposition": {
|
||||||
|
"default": 0,
|
||||||
|
"dub": 0,
|
||||||
|
"original": 0,
|
||||||
|
"comment": 0,
|
||||||
|
"lyrics": 0,
|
||||||
|
"karaoke": 0,
|
||||||
|
"forced": 0,
|
||||||
|
"hearing_impaired": 0,
|
||||||
|
"visual_impaired": 0,
|
||||||
|
"clean_effects": 0,
|
||||||
|
"attached_pic": 0,
|
||||||
|
"timed_thumbnails": 0,
|
||||||
|
"non_diegetic": 0,
|
||||||
|
"captions": 0,
|
||||||
|
"descriptions": 0,
|
||||||
|
"metadata": 0,
|
||||||
|
"dependent": 0,
|
||||||
|
"still_image": 0
|
||||||
|
},
|
||||||
|
"tags": {
|
||||||
|
"language": "eng",
|
||||||
|
"title": "CC",
|
||||||
|
"BPS": "83",
|
||||||
|
"DURATION": "00:23:56.060000000",
|
||||||
|
"NUMBER_OF_FRAMES": "495",
|
||||||
|
"NUMBER_OF_BYTES": "14954",
|
||||||
|
"_STATISTICS_WRITING_APP": "mkvmerge v69.0.0 ('Day And Age') 64-bit",
|
||||||
|
"_STATISTICS_WRITING_DATE_UTC": "2024-10-04 08:12:59",
|
||||||
|
"_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
""".trimIndent()
|
||||||
|
}
|
||||||
@ -24,7 +24,7 @@ sealed class Stream(
|
|||||||
@Transient open val start_time: String,
|
@Transient open val start_time: String,
|
||||||
@Transient open val duration_ts: Long? = null,
|
@Transient open val duration_ts: Long? = null,
|
||||||
@Transient open val duration: String? = null,
|
@Transient open val duration: String? = null,
|
||||||
@Transient open val disposition: Disposition,
|
@Transient open val disposition: Disposition? = null,
|
||||||
@Transient open val tags: Tags
|
@Transient open val tags: Tags
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -133,7 +133,7 @@ data class SubtitleStream(
|
|||||||
override val start_time: String,
|
override val start_time: String,
|
||||||
override val duration: String?,
|
override val duration: String?,
|
||||||
override val duration_ts: Long?,
|
override val duration_ts: Long?,
|
||||||
override val disposition: Disposition,
|
override val disposition: Disposition? = null,
|
||||||
override val tags: Tags,
|
override val tags: Tags,
|
||||||
val subtitle_tags: SubtitleTags
|
val subtitle_tags: SubtitleTags
|
||||||
) : Stream(
|
) : Stream(
|
||||||
@ -163,6 +163,7 @@ data class Disposition(
|
|||||||
val karaoke: Int,
|
val karaoke: Int,
|
||||||
val forced: Int,
|
val forced: Int,
|
||||||
val hearing_impaired: Int,
|
val hearing_impaired: Int,
|
||||||
|
val captions: Int,
|
||||||
val visual_impaired: Int,
|
val visual_impaired: Int,
|
||||||
val clean_effects: Int,
|
val clean_effects: Int,
|
||||||
val attached_pic: Int,
|
val attached_pic: Int,
|
||||||
@ -173,7 +174,7 @@ data class Tags(
|
|||||||
val title: String?,
|
val title: String?,
|
||||||
val BPS: String?,
|
val BPS: String?,
|
||||||
val DURATION: String?,
|
val DURATION: String?,
|
||||||
val NUMBER_OF_FRAMES: String?,
|
val NUMBER_OF_FRAMES: Int? = 0,
|
||||||
val NUMBER_OF_BYTES: String?,
|
val NUMBER_OF_BYTES: String?,
|
||||||
val _STATISTICS_WRITING_APP: String?,
|
val _STATISTICS_WRITING_APP: String?,
|
||||||
val _STATISTICS_WRITING_DATE_UTC: String?,
|
val _STATISTICS_WRITING_DATE_UTC: String?,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user