More Health

This commit is contained in:
Brage Skjønborg 2026-01-31 11:25:36 +01:00
parent c8aeb3759f
commit 7cb2eed79d
11 changed files with 76 additions and 32 deletions

View File

@ -1,7 +1,9 @@
package no.iktdev.mediaprocessing.coordinator.controller
import no.iktdev.mediaprocessing.coordinator.dto.CoordinatorHealth
import no.iktdev.mediaprocessing.coordinator.dto.health.CoordinatorHealth
import no.iktdev.mediaprocessing.coordinator.dto.rate.EventRate
import no.iktdev.mediaprocessing.coordinator.services.CoordinatorHealthService
import no.iktdev.mediaprocessing.coordinator.util.DiskInfo
import org.springframework.web.bind.annotation.GetMapping
import org.springframework.web.bind.annotation.RequestMapping
import org.springframework.web.bind.annotation.RestController
@ -16,4 +18,10 @@ class HealthController(
fun getHealth(): CoordinatorHealth {
return healthService.getHealth()
}
@GetMapping("/health/events")
fun getEventRate(): EventRate = healthService.getEventRate()
@GetMapping("/health/storage")
fun getDiskStatus(): List<DiskInfo> = healthService.getDiskHealth()
}

View File

@ -3,8 +3,8 @@ package no.iktdev.mediaprocessing.coordinator.controller
import no.iktdev.mediaprocessing.coordinator.services.EventService
import no.iktdev.mediaprocessing.coordinator.services.TaskService
import no.iktdev.mediaprocessing.coordinator.translateDto.CoordinatorTaskTransferDto
import no.iktdev.mediaprocessing.coordinator.translateDto.toCoordinatorTransferDto
import no.iktdev.mediaprocessing.coordinator.dto.translate.CoordinatorTaskTransferDto
import no.iktdev.mediaprocessing.coordinator.dto.translate.toCoordinatorTransferDto
import no.iktdev.mediaprocessing.ffmpeg.util.UtcNow
import no.iktdev.mediaprocessing.shared.common.dto.Paginated
import no.iktdev.mediaprocessing.shared.common.dto.ResetTaskResponse

View File

@ -1,4 +1,4 @@
package no.iktdev.mediaprocessing.coordinator.dto
package no.iktdev.mediaprocessing.coordinator.dto.health
import java.time.Instant
@ -8,6 +8,7 @@ data class CoordinatorHealth(
val stalledTasks: Int,
val activeTasks: Int,
val queuedTasks: Int,
val failedTasks: Int,
val lastActivity: Instant?,
// IDs for UI linking

View File

@ -1,4 +1,4 @@
package no.iktdev.mediaprocessing.coordinator.dto
package no.iktdev.mediaprocessing.coordinator.dto.health
enum class CoordinatorHealthStatus {
HEALTHY,

View File

@ -1,4 +1,4 @@
package no.iktdev.mediaprocessing.coordinator.dto
package no.iktdev.mediaprocessing.coordinator.dto.health
import java.time.Duration
import java.time.Instant

View File

@ -0,0 +1,6 @@
package no.iktdev.mediaprocessing.coordinator.dto.rate
data class EventRate(
val lastMinute: Long,
val lastFiveMinutes: Long
)

View File

@ -1,4 +1,4 @@
package no.iktdev.mediaprocessing.coordinator.translateDto
package no.iktdev.mediaprocessing.coordinator.dto.translate
import no.iktdev.eventi.models.store.PersistedTask
import no.iktdev.mediaprocessing.shared.common.rules.TaskLifecycleRules

View File

@ -1,8 +1,12 @@
package no.iktdev.mediaprocessing.coordinator.services
import no.iktdev.mediaprocessing.coordinator.dto.CoordinatorHealth
import no.iktdev.mediaprocessing.coordinator.dto.CoordinatorHealthStatus
import no.iktdev.mediaprocessing.coordinator.dto.SequenceHealth
import no.iktdev.mediaprocessing.coordinator.CoordinatorEnv
import no.iktdev.mediaprocessing.coordinator.dto.health.CoordinatorHealth
import no.iktdev.mediaprocessing.coordinator.dto.health.CoordinatorHealthStatus
import no.iktdev.mediaprocessing.coordinator.dto.health.SequenceHealth
import no.iktdev.mediaprocessing.coordinator.dto.rate.EventRate
import no.iktdev.mediaprocessing.coordinator.util.DiskInfo
import no.iktdev.mediaprocessing.coordinator.util.getDiskInfoFor
import no.iktdev.mediaprocessing.shared.common.rules.EventLifecycleRules
import no.iktdev.mediaprocessing.shared.common.rules.TaskLifecycleRules
import no.iktdev.mediaprocessing.shared.database.stores.TaskStore
@ -13,7 +17,8 @@ import java.time.Instant
@Service
class CoordinatorHealthService(
private val taskService: TaskService,
private val eventService: EventService
private val eventService: EventService,
private val coordinatorEnv: CoordinatorEnv
) {
fun getHealth(): CoordinatorHealth {
@ -31,6 +36,8 @@ class CoordinatorHealthService(
.filter { TaskLifecycleRules.isStalled(it) }
.map { it.taskId }
val failedTasks = taskService.getFailedTasks()
// --- SEQUENCE HEALTH ---
val overdueSequences = incompleteSequences
.filter { EventLifecycleRules.isOverdue(it) }
@ -75,8 +82,6 @@ class CoordinatorHealthService(
else -> CoordinatorHealthStatus.HEALTHY
}
val eventsLastMinute = eventService.getEventsLast(1)
val eventsLastFive = eventService.getEventsLast(5)
val lastActivityCandidates = listOfNotNull(
tasks.maxOfOrNull { it.persistedAt },
@ -88,6 +93,7 @@ class CoordinatorHealthService(
abandonedTasks = abandonedTaskIds.size,
stalledTasks = stalledTaskIds.size,
activeTasks = tasks.count { !it.consumed },
failedTasks = failedTasks.count(),
queuedTasks = TaskStore.getPendingTasks().size,
lastActivity = lastActivityCandidates.maxOrNull(),
@ -99,10 +105,23 @@ class CoordinatorHealthService(
details = mapOf(
"oldestActiveTaskAgeMinutes" to tasks.minOfOrNull {
Duration.between(it.persistedAt, Instant.now()).toMinutes()
},
"eventsLastMinute" to eventsLastMinute,
"eventsLastFiveMinutes" to eventsLastFive,
}
)
)
}
fun getEventRate(): EventRate {
return EventRate(
lastMinute = eventService.getEventsLast(1),
lastFiveMinutes = eventService.getEventsLast(5)
)
}
fun getDiskHealth(): List<DiskInfo> {
val paths = listOf(coordinatorEnv.incomingContent,
coordinatorEnv.cachedContent,
coordinatorEnv.outgoingContent)
.map { it -> it.absolutePath }
return getDiskInfoFor(paths)
}
}

View File

@ -28,4 +28,8 @@ class TaskService {
val resetSuccess = TaskStore.resetTaskById(taskId).isSuccess
return resetSuccess
}
fun getFailedTasks(): List<PersistedTask> {
return TaskStore.getFailedTasks()
}
}

View File

@ -1,35 +1,33 @@
package no.iktdev.mediaprocessing.coordinator.util
import java.nio.file.FileSystems
import java.nio.file.Files
import java.nio.file.Paths.get
import java.nio.file.Paths
data class DiskInfo(
val mount: String,
val device: String,
val totalBytes: Long,
val freeBytes: Long
val freeBytes: Long,
val usedBytes: Long,
val usedPercent: Double
)
fun getDiskInfoFor(mounts: List<String>): List<DiskInfo> {
val fileStores = FileSystems.getDefault().fileStores
return mounts.mapNotNull { mount ->
val path = get(mount)
fun getDiskInfoFor(mounts: List<String>): List<DiskInfo> =
mounts.mapNotNull { mount ->
val path = Paths.get(mount)
val store = fileStores.find { fs ->
try {
Files.getFileStore(path) == fs
} catch (e: Exception) {
false
}
} ?: return@mapNotNull null
val store = runCatching { Files.getFileStore(path) }.getOrNull()
?: return@mapNotNull null
DiskInfo(
mount = mount,
device = store.name(),
totalBytes = store.totalSpace,
freeBytes = store.usableSpace
freeBytes = store.usableSpace,
usedBytes = store.totalSpace - store.usableSpace,
usedPercent = if (store.totalSpace > 0)
((store.totalSpace - store.usableSpace).toDouble() / store.totalSpace.toDouble()) * 100
else 0.0
)
}
}

View File

@ -218,5 +218,13 @@ object TaskStore: TaskStore {
}.getOrDefault(emptyList())
}
fun getFailedTasks(): List<PersistedTask> {
return withTransaction {
TasksTable.getWhere {
(TasksTable.status eq TaskStatus.Failed)
}
}.getOrDefault(emptyList())
}
}