Updated subtitle stream picker
This commit is contained in:
parent
5cf7dff3f2
commit
ff21cb9595
@ -2,6 +2,7 @@ package no.iktdev.mediaprocessing.coordinator.tasksV2.mapping.streams
|
||||
|
||||
import no.iktdev.mediaprocessing.shared.common.contract.ffmpeg.SubtitleArgumentsDto
|
||||
import no.iktdev.mediaprocessing.shared.common.contract.ffmpeg.SubtitleStream
|
||||
import kotlin.math.sqrt
|
||||
|
||||
class SubtitleArguments(val subtitleStreams: List<SubtitleStream>) {
|
||||
/**
|
||||
@ -18,18 +19,27 @@ class SubtitleArguments(val subtitleStreams: List<SubtitleStream>) {
|
||||
}
|
||||
|
||||
private fun SubtitleStream.isCC(): Boolean {
|
||||
if ((this.disposition?.captions ?: 0) > 0) {
|
||||
return true
|
||||
}
|
||||
val title = this.tags.title?.lowercase() ?: return false
|
||||
val keywords = listOf("cc", "closed caption")
|
||||
return keywords.any { title.contains(it) }
|
||||
}
|
||||
|
||||
private fun SubtitleStream.isSHD(): Boolean {
|
||||
if ((this.disposition?.hearing_impaired ?: 0) > 0) {
|
||||
return true
|
||||
}
|
||||
val title = this.tags.title?.lowercase() ?: return false
|
||||
val keywords = listOf("shd", "hh", "Hard-of-Hearing", "Hard of Hearing")
|
||||
return keywords.any { title.contains(it) }
|
||||
}
|
||||
|
||||
private fun SubtitleStream.isSignOrSong(): Boolean {
|
||||
if ((this.disposition?.lyrics ?: 0) > 0) {
|
||||
return true
|
||||
}
|
||||
val title = this.tags.title?.lowercase() ?: return false
|
||||
val keywords = listOf("song", "songs", "sign", "signs")
|
||||
return keywords.any { title.contains(it) }
|
||||
@ -48,16 +58,19 @@ class SubtitleArguments(val subtitleStreams: List<SubtitleStream>) {
|
||||
fun getSubtitleArguments(): List<SubtitleArgumentsDto> {
|
||||
val acceptable = subtitleStreams.filter { !it.isSignOrSong() }
|
||||
val codecFiltered = acceptable.filter { getFormatToCodec(it.codec_name) != null }
|
||||
val mappedToType =
|
||||
codecFiltered.map { getSubtitleType(it) to it }.filter { it.first in SubtitleType.entries }
|
||||
.groupBy { it.second.tags.language ?: "eng" }
|
||||
.mapValues { entry ->
|
||||
val languageStreams = entry.value
|
||||
val sortedStreams = languageStreams.sortedBy { SubtitleType.entries.indexOf(it.first) }
|
||||
sortedStreams.firstOrNull()?.second
|
||||
}.mapNotNull { it.value }
|
||||
|
||||
return mappedToType.mapNotNull { stream ->
|
||||
val languageGrouped = codecFiltered.groupBy { it.tags.language ?: "eng" }
|
||||
|
||||
val streamsToExtract = languageGrouped.mapNotNull { item ->
|
||||
val types = item.value.map { getSubtitleType(it) }
|
||||
if (types.none { t -> t == SubtitleType.DEFAULT } || types.count { t -> t == SubtitleType.DEFAULT} > 1) {
|
||||
excludeLowFrameCount(item.value).sortedBy { it.tags.NUMBER_OF_FRAMES }.firstOrNull()
|
||||
} else {
|
||||
item.value.minByOrNull { s -> getSubtitleType(s) }
|
||||
}
|
||||
}
|
||||
|
||||
return streamsToExtract.mapNotNull { stream ->
|
||||
getFormatToCodec(stream.codec_name)?.let { format ->
|
||||
SubtitleArgumentsDto(
|
||||
index = subtitleStreams.indexOf(stream),
|
||||
@ -69,6 +82,22 @@ class SubtitleArguments(val subtitleStreams: List<SubtitleStream>) {
|
||||
|
||||
}
|
||||
|
||||
fun excludeLowFrameCount(streams: List<SubtitleStream>): List<SubtitleStream> {
|
||||
val usable = streams.filter { (it.tags.NUMBER_OF_FRAMES ?: 0) > 0 }
|
||||
val mean = usable.mapNotNull { it.tags.NUMBER_OF_FRAMES }.average()
|
||||
val variance = usable.map { (it.tags.NUMBER_OF_FRAMES!! - mean) * (it.tags.NUMBER_OF_FRAMES!! - mean) }.average()
|
||||
val standardDeviation = sqrt(variance)
|
||||
|
||||
// Definer intervallet for "normale" rammer: mean ± 2 * standard deviation
|
||||
val lowerBound = mean - 2 * standardDeviation
|
||||
val upperBound = mean + 2 * standardDeviation
|
||||
|
||||
return usable.filter {
|
||||
val frameCount = it.tags.NUMBER_OF_FRAMES ?: 0
|
||||
frameCount.toDouble() in standardDeviation..upperBound
|
||||
}
|
||||
}
|
||||
|
||||
fun getFormatToCodec(codecName: String): String? {
|
||||
return when (codecName) {
|
||||
"ass" -> "ass"
|
||||
|
||||
@ -0,0 +1,265 @@
|
||||
package no.iktdev.mediaprocessing.coordinator.tasksV2.mapping.streams
|
||||
|
||||
import com.google.gson.Gson
|
||||
import com.google.gson.reflect.TypeToken
|
||||
import no.iktdev.mediaprocessing.shared.common.contract.ffmpeg.SubtitleStream
|
||||
import org.assertj.core.api.Assertions.assertThat
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
class SubtitleArgumentsTest {
|
||||
val type = object : TypeToken<List<SubtitleStream>>() {}.type
|
||||
|
||||
@Test
|
||||
fun validate1() {
|
||||
val data = Gson().fromJson<List<SubtitleStream>>(multipleSubtitleStreamsWithSameLanguage, type)
|
||||
assertThat(data.all { it is SubtitleStream }).isTrue()
|
||||
assertThat(data).isNotNull()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun validate2() {
|
||||
val data = Gson().fromJson<List<SubtitleStream>>(multipleSubtitleStreamsWithSameLanguage, type)
|
||||
val args = SubtitleArguments(data)
|
||||
val selectable = args.excludeLowFrameCount(data)
|
||||
assertThat(selectable).hasSize(2)
|
||||
assertThat(selectable.find { it.index == 4 })
|
||||
assertThat(selectable.find { it.index == 5 })
|
||||
}
|
||||
|
||||
@Test
|
||||
fun validate3() {
|
||||
val data = Gson().fromJson<List<SubtitleStream>>(multipleSubtitleStreamsWithSameLanguage, type)
|
||||
val args = SubtitleArguments(data).getSubtitleArguments()
|
||||
assertThat(args).hasSize(1)
|
||||
assertThat(args.firstOrNull()?.index).isEqualTo(1)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun validate3_2() {
|
||||
val data = Gson().fromJson<List<SubtitleStream>>(multipleSubtitleStreamsWithSameLanguageWithDisposition, type)
|
||||
val args = SubtitleArguments(data).getSubtitleArguments()
|
||||
assertThat(args).hasSize(1)
|
||||
assertThat(args.firstOrNull()?.index).isEqualTo(1)
|
||||
}
|
||||
|
||||
|
||||
val multipleSubtitleStreamsWithSameLanguage = """
|
||||
[{
|
||||
"index": 3,
|
||||
"codec_name": "ass",
|
||||
"codec_long_name": "ASS (Advanced SSA) subtitle",
|
||||
"codec_type": "subtitle",
|
||||
"codec_tag_string": "[0][0][0][0]",
|
||||
"codec_tag": "0x0000",
|
||||
"r_frame_rate": "0/0",
|
||||
"avg_frame_rate": "0/0",
|
||||
"time_base": "1/1000",
|
||||
"start_pts": 0,
|
||||
"start_time": "0.000000",
|
||||
"duration_ts": 1437083,
|
||||
"duration": "1437.083000",
|
||||
"extradata_size": 1967,
|
||||
"tags": {
|
||||
"language": "eng",
|
||||
"title": "Forced",
|
||||
"BPS": "5",
|
||||
"DURATION": "00:21:42.640000000",
|
||||
"NUMBER_OF_FRAMES": "14",
|
||||
"NUMBER_OF_BYTES": "835",
|
||||
"_STATISTICS_WRITING_APP": "mkvmerge v69.0.0 ('Day And Age') 64-bit",
|
||||
"_STATISTICS_WRITING_DATE_UTC": "2024-10-04 08:12:59",
|
||||
"_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
|
||||
}
|
||||
},
|
||||
{
|
||||
"index": 4,
|
||||
"codec_name": "ass",
|
||||
"codec_long_name": "ASS (Advanced SSA) subtitle",
|
||||
"codec_type": "subtitle",
|
||||
"codec_tag_string": "[0][0][0][0]",
|
||||
"codec_tag": "0x0000",
|
||||
"r_frame_rate": "0/0",
|
||||
"avg_frame_rate": "0/0",
|
||||
"time_base": "1/1000",
|
||||
"start_pts": 0,
|
||||
"start_time": "0.000000",
|
||||
"duration_ts": 1437083,
|
||||
"duration": "1437.083000",
|
||||
"extradata_size": 1791,
|
||||
"tags": {
|
||||
"language": "eng",
|
||||
"BPS": "129",
|
||||
"DURATION": "00:22:26.550000000",
|
||||
"NUMBER_OF_FRAMES": "356",
|
||||
"NUMBER_OF_BYTES": "21787",
|
||||
"_STATISTICS_WRITING_APP": "mkvmerge v69.0.0 ('Day And Age') 64-bit",
|
||||
"_STATISTICS_WRITING_DATE_UTC": "2024-10-04 08:12:59",
|
||||
"_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
|
||||
}
|
||||
},
|
||||
{
|
||||
"index": 5,
|
||||
"codec_name": "subrip",
|
||||
"codec_long_name": "SubRip subtitle",
|
||||
"codec_type": "subtitle",
|
||||
"codec_tag_string": "[0][0][0][0]",
|
||||
"codec_tag": "0x0000",
|
||||
"r_frame_rate": "0/0",
|
||||
"avg_frame_rate": "0/0",
|
||||
"time_base": "1/1000",
|
||||
"start_pts": 790,
|
||||
"start_time": "0.790000",
|
||||
"tags": {
|
||||
"language": "eng",
|
||||
"title": "CC",
|
||||
"BPS": "83",
|
||||
"DURATION": "00:23:56.060000000",
|
||||
"NUMBER_OF_FRAMES": "495",
|
||||
"NUMBER_OF_BYTES": "14954",
|
||||
"_STATISTICS_WRITING_APP": "mkvmerge v69.0.0 ('Day And Age') 64-bit",
|
||||
"_STATISTICS_WRITING_DATE_UTC": "2024-10-04 08:12:59",
|
||||
"_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
|
||||
}
|
||||
}]
|
||||
""".trimIndent()
|
||||
|
||||
val multipleSubtitleStreamsWithSameLanguageWithDisposition = """
|
||||
[{
|
||||
"index": 3,
|
||||
"codec_name": "ass",
|
||||
"codec_long_name": "ASS (Advanced SSA) subtitle",
|
||||
"codec_type": "subtitle",
|
||||
"codec_tag_string": "[0][0][0][0]",
|
||||
"codec_tag": "0x0000",
|
||||
"r_frame_rate": "0/0",
|
||||
"avg_frame_rate": "0/0",
|
||||
"time_base": "1/1000",
|
||||
"start_pts": 0,
|
||||
"start_time": "0.000000",
|
||||
"duration_ts": 1437083,
|
||||
"duration": "1437.083000",
|
||||
"extradata_size": 1967,
|
||||
"disposition": {
|
||||
"default": 1,
|
||||
"dub": 0,
|
||||
"original": 0,
|
||||
"comment": 0,
|
||||
"lyrics": 0,
|
||||
"karaoke": 0,
|
||||
"forced": 1,
|
||||
"hearing_impaired": 0,
|
||||
"visual_impaired": 0,
|
||||
"clean_effects": 0,
|
||||
"attached_pic": 0,
|
||||
"timed_thumbnails": 0,
|
||||
"non_diegetic": 0,
|
||||
"captions": 0,
|
||||
"descriptions": 0,
|
||||
"metadata": 0,
|
||||
"dependent": 0,
|
||||
"still_image": 0
|
||||
},
|
||||
"tags": {
|
||||
"language": "eng",
|
||||
"title": "Forced",
|
||||
"BPS": "5",
|
||||
"DURATION": "00:21:42.640000000",
|
||||
"NUMBER_OF_FRAMES": "14",
|
||||
"NUMBER_OF_BYTES": "835",
|
||||
"_STATISTICS_WRITING_APP": "mkvmerge v69.0.0 ('Day And Age') 64-bit",
|
||||
"_STATISTICS_WRITING_DATE_UTC": "2024-10-04 08:12:59",
|
||||
"_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
|
||||
}
|
||||
},
|
||||
{
|
||||
"index": 4,
|
||||
"codec_name": "ass",
|
||||
"codec_long_name": "ASS (Advanced SSA) subtitle",
|
||||
"codec_type": "subtitle",
|
||||
"codec_tag_string": "[0][0][0][0]",
|
||||
"codec_tag": "0x0000",
|
||||
"r_frame_rate": "0/0",
|
||||
"avg_frame_rate": "0/0",
|
||||
"time_base": "1/1000",
|
||||
"start_pts": 0,
|
||||
"start_time": "0.000000",
|
||||
"duration_ts": 1437083,
|
||||
"duration": "1437.083000",
|
||||
"extradata_size": 1791,
|
||||
"disposition": {
|
||||
"default": 0,
|
||||
"dub": 0,
|
||||
"original": 0,
|
||||
"comment": 0,
|
||||
"lyrics": 0,
|
||||
"karaoke": 0,
|
||||
"forced": 0,
|
||||
"hearing_impaired": 0,
|
||||
"visual_impaired": 0,
|
||||
"clean_effects": 0,
|
||||
"attached_pic": 0,
|
||||
"timed_thumbnails": 0,
|
||||
"non_diegetic": 0,
|
||||
"captions": 0,
|
||||
"descriptions": 0,
|
||||
"metadata": 0,
|
||||
"dependent": 0,
|
||||
"still_image": 0
|
||||
},
|
||||
"tags": {
|
||||
"language": "eng",
|
||||
"BPS": "129",
|
||||
"DURATION": "00:22:26.550000000",
|
||||
"NUMBER_OF_FRAMES": "356",
|
||||
"NUMBER_OF_BYTES": "21787",
|
||||
"_STATISTICS_WRITING_APP": "mkvmerge v69.0.0 ('Day And Age') 64-bit",
|
||||
"_STATISTICS_WRITING_DATE_UTC": "2024-10-04 08:12:59",
|
||||
"_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
|
||||
}
|
||||
},
|
||||
{
|
||||
"index": 5,
|
||||
"codec_name": "subrip",
|
||||
"codec_long_name": "SubRip subtitle",
|
||||
"codec_type": "subtitle",
|
||||
"codec_tag_string": "[0][0][0][0]",
|
||||
"codec_tag": "0x0000",
|
||||
"r_frame_rate": "0/0",
|
||||
"avg_frame_rate": "0/0",
|
||||
"time_base": "1/1000",
|
||||
"start_pts": 790,
|
||||
"start_time": "0.790000",
|
||||
"disposition": {
|
||||
"default": 0,
|
||||
"dub": 0,
|
||||
"original": 0,
|
||||
"comment": 0,
|
||||
"lyrics": 0,
|
||||
"karaoke": 0,
|
||||
"forced": 0,
|
||||
"hearing_impaired": 0,
|
||||
"visual_impaired": 0,
|
||||
"clean_effects": 0,
|
||||
"attached_pic": 0,
|
||||
"timed_thumbnails": 0,
|
||||
"non_diegetic": 0,
|
||||
"captions": 0,
|
||||
"descriptions": 0,
|
||||
"metadata": 0,
|
||||
"dependent": 0,
|
||||
"still_image": 0
|
||||
},
|
||||
"tags": {
|
||||
"language": "eng",
|
||||
"title": "CC",
|
||||
"BPS": "83",
|
||||
"DURATION": "00:23:56.060000000",
|
||||
"NUMBER_OF_FRAMES": "495",
|
||||
"NUMBER_OF_BYTES": "14954",
|
||||
"_STATISTICS_WRITING_APP": "mkvmerge v69.0.0 ('Day And Age') 64-bit",
|
||||
"_STATISTICS_WRITING_DATE_UTC": "2024-10-04 08:12:59",
|
||||
"_STATISTICS_TAGS": "BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES"
|
||||
}
|
||||
}]
|
||||
""".trimIndent()
|
||||
}
|
||||
@ -24,7 +24,7 @@ sealed class Stream(
|
||||
@Transient open val start_time: String,
|
||||
@Transient open val duration_ts: Long? = null,
|
||||
@Transient open val duration: String? = null,
|
||||
@Transient open val disposition: Disposition,
|
||||
@Transient open val disposition: Disposition? = null,
|
||||
@Transient open val tags: Tags
|
||||
)
|
||||
|
||||
@ -133,7 +133,7 @@ data class SubtitleStream(
|
||||
override val start_time: String,
|
||||
override val duration: String?,
|
||||
override val duration_ts: Long?,
|
||||
override val disposition: Disposition,
|
||||
override val disposition: Disposition? = null,
|
||||
override val tags: Tags,
|
||||
val subtitle_tags: SubtitleTags
|
||||
) : Stream(
|
||||
@ -163,6 +163,7 @@ data class Disposition(
|
||||
val karaoke: Int,
|
||||
val forced: Int,
|
||||
val hearing_impaired: Int,
|
||||
val captions: Int,
|
||||
val visual_impaired: Int,
|
||||
val clean_effects: Int,
|
||||
val attached_pic: Int,
|
||||
@ -173,7 +174,7 @@ data class Tags(
|
||||
val title: String?,
|
||||
val BPS: String?,
|
||||
val DURATION: String?,
|
||||
val NUMBER_OF_FRAMES: String?,
|
||||
val NUMBER_OF_FRAMES: Int? = 0,
|
||||
val NUMBER_OF_BYTES: String?,
|
||||
val _STATISTICS_WRITING_APP: String?,
|
||||
val _STATISTICS_WRITING_DATE_UTC: String?,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user