Wip 4 - Pushing Python
This commit is contained in:
parent
f85fbde89d
commit
8096a979ea
0
apps/pyMetadata/config/__init__.py
Normal file
0
apps/pyMetadata/config/__init__.py
Normal file
29
apps/pyMetadata/config/database_config.py
Normal file
29
apps/pyMetadata/config/database_config.py
Normal file
@ -0,0 +1,29 @@
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class DatabaseConfig:
|
||||
address: str
|
||||
port: int
|
||||
name: str
|
||||
username: str
|
||||
password: str
|
||||
|
||||
@staticmethod
|
||||
def from_env() -> "DatabaseConfig":
|
||||
return DatabaseConfig(
|
||||
address=os.environ.get("DATABASE_ADDRESS") or "192.168.2.250",
|
||||
port=int(os.environ.get("DATABASE_PORT") or "3306"),
|
||||
name=os.environ.get("DATABASE_NAME_E") or "eventsV3",
|
||||
username=os.environ.get("DATABASE_USERNAME") or "root",
|
||||
password=os.environ.get("DATABASE_PASSWORD") or "def",
|
||||
)
|
||||
|
||||
def validate(self) -> None:
|
||||
if not self.address:
|
||||
raise ValueError("Database address mangler")
|
||||
if not self.name:
|
||||
raise ValueError("Database name mangler")
|
||||
if not self.username:
|
||||
raise ValueError("Database username mangler")
|
||||
# du kan legge til flere regler her
|
||||
0
apps/pyMetadata/db/__init__.py
Normal file
0
apps/pyMetadata/db/__init__.py
Normal file
53
apps/pyMetadata/db/database.py
Normal file
53
apps/pyMetadata/db/database.py
Normal file
@ -0,0 +1,53 @@
|
||||
from config.database_config import DatabaseConfig
|
||||
from utils.logger import logger
|
||||
import mysql.connector
|
||||
from mysql.connector import Error
|
||||
from utils.backoff import wait_with_backoff
|
||||
|
||||
class Database:
|
||||
def __init__(self, config: DatabaseConfig):
|
||||
self.config = config
|
||||
self.conn = None
|
||||
|
||||
def connect(self):
|
||||
"""Koble til DB med backoff."""
|
||||
self.config.validate()
|
||||
while True:
|
||||
try:
|
||||
self.conn = mysql.connector.connect(
|
||||
host=self.config.address,
|
||||
user=self.config.username,
|
||||
password=self.config.password,
|
||||
database=self.config.name
|
||||
)
|
||||
if self.conn.is_connected():
|
||||
logger.info("✅ Tilkoblet til databasen")
|
||||
return
|
||||
except Error as e:
|
||||
logger.error(f"❌ DB-tilkobling feilet: {e}")
|
||||
for _ in wait_with_backoff():
|
||||
try:
|
||||
self.conn = mysql.connector.connect(
|
||||
host=self.config.address,
|
||||
user=self.config.username,
|
||||
password=self.config.password,
|
||||
database=self.config.name
|
||||
)
|
||||
if self.conn.is_connected():
|
||||
logger.info("✅ Tilkoblet til databasen")
|
||||
return
|
||||
except Error:
|
||||
continue
|
||||
|
||||
def validate(self):
|
||||
"""Sjekk at tilkoblingen er aktiv."""
|
||||
if not self.conn or not self.conn.is_connected():
|
||||
logger.warning("⚠️ Tilkobling mistet, prøver igjen...")
|
||||
self.connect()
|
||||
|
||||
def query(self, sql: str, params=None):
|
||||
"""Kjør en spørring med validering."""
|
||||
self.validate()
|
||||
cursor = self.conn.cursor(dictionary=True)
|
||||
cursor.execute(sql, params or ())
|
||||
return cursor.fetchall()
|
||||
108
apps/pyMetadata/db/repository.py
Normal file
108
apps/pyMetadata/db/repository.py
Normal file
@ -0,0 +1,108 @@
|
||||
from datetime import datetime
|
||||
import json
|
||||
from typing import Optional
|
||||
from db.database import Database
|
||||
from models.enums import TaskStatus
|
||||
from models.event import MetadataSearchResultEvent
|
||||
from models.task import Task, MetadataSearchTask, MetadataSearchData
|
||||
from utils.logger import logger
|
||||
|
||||
def fetch_next_task(db: Database) -> Optional[Task]:
|
||||
db.validate()
|
||||
cursor = db.conn.cursor(dictionary=True)
|
||||
cursor.execute(
|
||||
"SELECT * FROM TASKS WHERE STATUS='Pending' AND CLAIMED=0 AND CONSUMED=0 "
|
||||
"ORDER BY PERSISTED_AT ASC LIMIT 1"
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
if not row:
|
||||
return None
|
||||
|
||||
try:
|
||||
if row["TASK"] == "MetadataSearchTask":
|
||||
# hele JSON ligger i DATA
|
||||
return MetadataSearchTask.model_validate_json(row["DATA"])
|
||||
else:
|
||||
return Task.model_validate_json(row["DATA"])
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Feil ved deserialisering av task {row.get('TASK_ID')}: {e}")
|
||||
mark_failed(db, row["TASK_ID"])
|
||||
return None
|
||||
|
||||
|
||||
def mark_failed(db: Database, task_id: str) -> None:
|
||||
cursor = db.conn.cursor()
|
||||
cursor.execute(
|
||||
"UPDATE TASKS SET STATUS='Failed' WHERE TASK_ID=%s",
|
||||
(task_id,)
|
||||
)
|
||||
db.conn.commit()
|
||||
|
||||
def claim_task(db: Database, task_id: str, worker_id: str) -> bool:
|
||||
"""
|
||||
Marker en task som claimed av en gitt worker.
|
||||
Returnerer True hvis claim lykkes, False hvis task allerede er claimed.
|
||||
"""
|
||||
db.validate()
|
||||
try:
|
||||
cursor = db.conn.cursor()
|
||||
# Oppdater bare hvis task ikke allerede er claimed
|
||||
cursor.execute(
|
||||
"""
|
||||
UPDATE TASKS
|
||||
SET CLAIMED=1, CLAIMED_BY=%s, LAST_CHECK_IN=%s
|
||||
WHERE TASK_ID=%s AND CLAIMED=0 AND CONSUMED=0
|
||||
""",
|
||||
(worker_id, datetime.now(), task_id)
|
||||
)
|
||||
db.conn.commit()
|
||||
return cursor.rowcount > 0
|
||||
except Exception as e:
|
||||
db.conn.rollback()
|
||||
raise RuntimeError(f"Claim feilet: {e}")
|
||||
|
||||
|
||||
|
||||
def persist_event_and_mark_consumed(db: Database, event: MetadataSearchResultEvent, task_id: str) -> None:
|
||||
"""
|
||||
Persisterer et event og markerer tilhørende task som consumed i én transaksjon.
|
||||
Ruller tilbake hvis noe feiler.
|
||||
"""
|
||||
db.validate()
|
||||
try:
|
||||
cursor = db.conn.cursor()
|
||||
|
||||
# 1. Insert event
|
||||
as_data = event.model_dump_json() # Pydantic v2
|
||||
event_name = event.__class__.__name__
|
||||
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO EVENTS (REFERENCE_ID, EVENT_ID, EVENT, DATA, PERSISTED_AT)
|
||||
VALUES (%s, %s, %s, %s, %s)
|
||||
""",
|
||||
(
|
||||
str(event.referenceId),
|
||||
str(event.eventId),
|
||||
event_name,
|
||||
as_data,
|
||||
datetime.now().isoformat()
|
||||
)
|
||||
)
|
||||
|
||||
# 2. Update task status
|
||||
cursor.execute(
|
||||
"UPDATE TASKS SET STATUS=%s, CONSUMED=1 WHERE TASK_ID=%s",
|
||||
(TaskStatus.COMPLETED.value, task_id)
|
||||
)
|
||||
|
||||
# 3. Commit begge operasjoner
|
||||
db.conn.commit()
|
||||
|
||||
except Exception as e:
|
||||
# Rull tilbake hvis noe feiler
|
||||
db.conn.rollback()
|
||||
raise RuntimeError(f"Transaksjon feilet: {e}")
|
||||
|
||||
|
||||
|
||||
0
apps/pyMetadata/models/__init__.py
Normal file
0
apps/pyMetadata/models/__init__.py
Normal file
11
apps/pyMetadata/models/enums.py
Normal file
11
apps/pyMetadata/models/enums.py
Normal file
@ -0,0 +1,11 @@
|
||||
from enum import Enum
|
||||
|
||||
class TaskStatus(Enum):
|
||||
PENDING = "Pending"
|
||||
IN_PROGRESS = "InProgress"
|
||||
COMPLETED = "Completed"
|
||||
FAILED = "Failed"
|
||||
|
||||
class MediaType(Enum):
|
||||
MOVIE = "Movie"
|
||||
SERIE = "Serie"
|
||||
48
apps/pyMetadata/models/event.py
Normal file
48
apps/pyMetadata/models/event.py
Normal file
@ -0,0 +1,48 @@
|
||||
# models/event.py
|
||||
from pydantic import BaseModel
|
||||
from datetime import datetime
|
||||
from typing import List, Set
|
||||
from uuid import UUID
|
||||
|
||||
from models.enums import MediaType, TaskStatus
|
||||
|
||||
|
||||
class EventMetadata(BaseModel):
|
||||
created: datetime
|
||||
derivedFromId: Set[UUID] # nøyaktig feltnavn
|
||||
|
||||
|
||||
class Summary(BaseModel):
|
||||
language: str
|
||||
description: str
|
||||
|
||||
|
||||
class MetadataResult(BaseModel):
|
||||
source: str
|
||||
title: str
|
||||
alternateTitles: List[str]
|
||||
cover: str | None
|
||||
bannerImage: str | None # behold camelCase
|
||||
type: MediaType
|
||||
summary: List[Summary]
|
||||
genres: List[str]
|
||||
|
||||
|
||||
class SearchResult(BaseModel):
|
||||
simpleScore: int
|
||||
prefixScore: int
|
||||
advancedScore: int
|
||||
sourceWeight: float
|
||||
metadata: MetadataResult
|
||||
|
||||
|
||||
class MetadataSearchResultEvent(BaseModel):
|
||||
# Påkrevde felter
|
||||
referenceId: UUID
|
||||
eventId: UUID
|
||||
metadata: EventMetadata
|
||||
|
||||
# Custom felter
|
||||
results: List[SearchResult]
|
||||
recommended: SearchResult|None
|
||||
status: TaskStatus
|
||||
39
apps/pyMetadata/models/metadata.py
Normal file
39
apps/pyMetadata/models/metadata.py
Normal file
@ -0,0 +1,39 @@
|
||||
from dataclasses import dataclass, asdict
|
||||
from typing import List, Optional
|
||||
from enum import Enum
|
||||
|
||||
from models.enums import MediaType
|
||||
|
||||
|
||||
@dataclass
|
||||
class Summary:
|
||||
summary: str
|
||||
language: str
|
||||
|
||||
def to_dict(self):
|
||||
return {k: v.strip() if isinstance(v, str) else v for k, v in asdict(self).items()}
|
||||
|
||||
@dataclass
|
||||
class Metadata:
|
||||
title: str
|
||||
altTitle: List[str]
|
||||
cover: str
|
||||
banner: Optional[str]
|
||||
type: MediaType
|
||||
summary: List[Summary]
|
||||
genres: List[str]
|
||||
source: str
|
||||
|
||||
def to_dict(self):
|
||||
def trim(item):
|
||||
if isinstance(item, str):
|
||||
return item.strip()
|
||||
elif isinstance(item, list):
|
||||
return [trim(sub_item) for sub_item in item]
|
||||
elif isinstance(item, Enum):
|
||||
return item.value
|
||||
elif hasattr(item, "to_dict"):
|
||||
return item.to_dict()
|
||||
return item
|
||||
|
||||
return {key: trim(value) for key, value in asdict(self).items()}
|
||||
28
apps/pyMetadata/models/task.py
Normal file
28
apps/pyMetadata/models/task.py
Normal file
@ -0,0 +1,28 @@
|
||||
# models/task.py
|
||||
from pydantic import BaseModel
|
||||
from uuid import UUID
|
||||
from datetime import datetime
|
||||
from typing import List, Optional
|
||||
from models.enums import TaskStatus
|
||||
|
||||
|
||||
class MetadataSearchData(BaseModel):
|
||||
searchTitles: List[str]
|
||||
collection: str
|
||||
|
||||
|
||||
class Task(BaseModel):
|
||||
referenceId: UUID
|
||||
taskId: UUID
|
||||
task: str
|
||||
status: TaskStatus
|
||||
data: dict # generisk payload hvis du ikke vet typen
|
||||
claimed: bool
|
||||
claimedBy: Optional[str]
|
||||
consumed: bool
|
||||
lastCheckIn: Optional[datetime]
|
||||
persistedAt: datetime
|
||||
|
||||
|
||||
class MetadataSearchTask(Task):
|
||||
data: MetadataSearchData
|
||||
2
apps/pyMetadata/requirments-test.txt
Normal file
2
apps/pyMetadata/requirments-test.txt
Normal file
@ -0,0 +1,2 @@
|
||||
pytest==9.0.2
|
||||
pytest-asyncio==1.3.0
|
||||
15
apps/pyMetadata/sources/factory.py
Normal file
15
apps/pyMetadata/sources/factory.py
Normal file
@ -0,0 +1,15 @@
|
||||
from typing import List
|
||||
from .mal import Mal
|
||||
from .anii import Anii
|
||||
from .imdb import Imdb
|
||||
from .source import SourceBase
|
||||
|
||||
def get_all_sources(titles: List[str]) -> List[SourceBase]:
|
||||
"""
|
||||
Returnerer alle aktive kilder som implementerer SourceBase.
|
||||
"""
|
||||
return [
|
||||
Mal(titles),
|
||||
Anii(titles),
|
||||
Imdb(titles),
|
||||
]
|
||||
138
apps/pyMetadata/tests/test_poller.py
Normal file
138
apps/pyMetadata/tests/test_poller.py
Normal file
@ -0,0 +1,138 @@
|
||||
from typing import Set
|
||||
import pytest
|
||||
from models.event import MetadataSearchResultEvent, EventMetadata
|
||||
from worker.poller import run_worker, run_iteration
|
||||
from models.task import MetadataSearchTask, MetadataSearchData
|
||||
from models.enums import TaskStatus
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
import time
|
||||
|
||||
def make_dummy_event():
|
||||
return MetadataSearchResultEvent(
|
||||
referenceId=uuid.uuid4(),
|
||||
eventId=uuid.uuid4(),
|
||||
metadata=EventMetadata(
|
||||
created=datetime.now(),
|
||||
derivedFromId={uuid.uuid4()}
|
||||
),
|
||||
results=[],
|
||||
persistedAt=datetime.now(),
|
||||
recommended=None, # fyll inn med en gyldig bool
|
||||
status="Completed" # eller enum hvis modellen krever det
|
||||
)
|
||||
|
||||
|
||||
def make_task():
|
||||
return MetadataSearchTask(
|
||||
referenceId=uuid.uuid4(),
|
||||
taskId=uuid.uuid4(),
|
||||
task="MetadataSearchTask",
|
||||
status=TaskStatus.PENDING,
|
||||
data=MetadataSearchData(searchTitles=["foo"], collection="bar"),
|
||||
claimed=False,
|
||||
claimedBy=None,
|
||||
consumed=False,
|
||||
lastCheckIn=None,
|
||||
persistedAt=datetime.now()
|
||||
)
|
||||
|
||||
def test_run_worker_processes_one(monkeypatch):
|
||||
events = []
|
||||
task = make_task()
|
||||
|
||||
class FakeDB:
|
||||
def connect(self): pass
|
||||
def close(self): pass
|
||||
|
||||
calls = {"n": 0}
|
||||
def fetch_once(db):
|
||||
if calls["n"] == 0:
|
||||
calls["n"] += 1
|
||||
return task
|
||||
calls["n"] += 1
|
||||
return None
|
||||
|
||||
monkeypatch.setattr("worker.poller.fetch_next_task", fetch_once)
|
||||
monkeypatch.setattr("worker.poller.claim_task", lambda *a, **k: True)
|
||||
|
||||
# Viktig: async stub
|
||||
async def fake_process_task(task):
|
||||
return make_dummy_event()
|
||||
monkeypatch.setattr("worker.poller.process_task", fake_process_task)
|
||||
|
||||
def persist_stub(db, event, task_id):
|
||||
events.append("dummy_event")
|
||||
monkeypatch.setattr("worker.poller.persist_event_and_mark_consumed", persist_stub)
|
||||
|
||||
monkeypatch.setattr("worker.poller.mark_failed", lambda *a, **k: events.append("failed"))
|
||||
monkeypatch.setattr("worker.poller.time.sleep", lambda s: None)
|
||||
|
||||
run_worker(db=FakeDB(), shutdown_flag_ref=lambda: calls["n"] >= 2)
|
||||
|
||||
assert "dummy_event" in events
|
||||
|
||||
|
||||
|
||||
|
||||
def test_backoff(monkeypatch):
|
||||
intervals = []
|
||||
|
||||
class FakeDB:
|
||||
def connect(self): pass
|
||||
def close(self): pass
|
||||
|
||||
# monkeypatch fetch_next_task til å returnere None flere ganger
|
||||
monkeypatch.setattr("worker.poller.fetch_next_task", lambda db: None)
|
||||
|
||||
# monkeypatch time.sleep til å fange poll_interval
|
||||
def fake_sleep(seconds):
|
||||
intervals.append(seconds)
|
||||
monkeypatch.setattr(time, "sleep", fake_sleep)
|
||||
|
||||
# monkeypatch claim_task, process_task osv. til dummy
|
||||
monkeypatch.setattr("worker.poller.claim_task", lambda db, tid, wid: True)
|
||||
monkeypatch.setattr("worker.poller.process_task", lambda t: "dummy_event")
|
||||
monkeypatch.setattr("worker.poller.persist_event_and_mark_consumed", lambda db, e, tid: None)
|
||||
monkeypatch.setattr("worker.poller.mark_failed", lambda db, tid: None)
|
||||
|
||||
# kjør bare noen få iterasjoner ved å stoppe med shutdown_flag_ref
|
||||
run_worker(db=FakeDB(), shutdown_flag_ref=lambda: len(intervals) >= 4)
|
||||
|
||||
# verifiser at intervallet øker (5 → 10 → 20 → 40)
|
||||
assert intervals == [5, 10, 20, 40]
|
||||
|
||||
def test_backoff_on_connection_error(monkeypatch):
|
||||
intervals = []
|
||||
reconnects = []
|
||||
|
||||
class FakeDB:
|
||||
def connect(self):
|
||||
reconnects.append("reconnect")
|
||||
def close(self): pass
|
||||
|
||||
# Først: fetch_next_task kaster exception
|
||||
def failing_fetch(db):
|
||||
raise RuntimeError("DB connection lost")
|
||||
|
||||
monkeypatch.setattr("worker.poller.fetch_next_task", failing_fetch)
|
||||
|
||||
# monkeypatch time.sleep til å fange poll_interval
|
||||
def fake_sleep(seconds):
|
||||
intervals.append(seconds)
|
||||
monkeypatch.setattr(time, "sleep", fake_sleep)
|
||||
|
||||
# dummy funksjoner
|
||||
monkeypatch.setattr("worker.poller.claim_task", lambda db, tid, wid: True)
|
||||
monkeypatch.setattr("worker.poller.process_task", lambda t: "dummy_event")
|
||||
monkeypatch.setattr("worker.poller.persist_event_and_mark_consumed", lambda db, e, tid: None)
|
||||
monkeypatch.setattr("worker.poller.mark_failed", lambda db, tid: None)
|
||||
|
||||
# kjør bare noen få iterasjoner
|
||||
run_worker(db=FakeDB(), shutdown_flag_ref=lambda: len(reconnects) >= 2)
|
||||
|
||||
# verifiser at reconnect ble kalt
|
||||
assert reconnects == ["reconnect", "reconnect"]
|
||||
|
||||
# verifiser at poll_interval ble reset til 5 etter feil
|
||||
assert all(interval == 5 for interval in intervals)
|
||||
113
apps/pyMetadata/tests/test_processor.py
Normal file
113
apps/pyMetadata/tests/test_processor.py
Normal file
@ -0,0 +1,113 @@
|
||||
import asyncio
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
import pytest
|
||||
|
||||
import worker.processor as processor
|
||||
from models.task import MetadataSearchTask, MetadataSearchData, TaskStatus
|
||||
from models.metadata import Metadata, Summary, MediaType
|
||||
|
||||
# --- Helpers ---
|
||||
def make_dummy_metadata(source="mal", title="Foo"):
|
||||
return Metadata(
|
||||
title=title,
|
||||
altTitle=[],
|
||||
cover="cover.jpg",
|
||||
banner=None,
|
||||
type=MediaType.MOVIE,
|
||||
summary=[Summary(summary="A fake summary", language="en")],
|
||||
genres=["Drama"],
|
||||
source=source,
|
||||
)
|
||||
|
||||
def make_dummy_task():
|
||||
return MetadataSearchTask(
|
||||
referenceId=uuid.uuid4(),
|
||||
taskId=uuid.uuid4(),
|
||||
task="MetadataSearchTask",
|
||||
status=TaskStatus.PENDING,
|
||||
data=MetadataSearchData(searchTitles=["Foo"], collection="bar"),
|
||||
claimed=False,
|
||||
claimedBy=None,
|
||||
consumed=False,
|
||||
lastCheckIn=None,
|
||||
persistedAt=datetime.now()
|
||||
)
|
||||
|
||||
# --- Tests ---
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_process_task_success(monkeypatch):
|
||||
# Async stub for run_search
|
||||
async def good_search(titles):
|
||||
return [make_dummy_metadata("mal"), make_dummy_metadata("imdb")]
|
||||
|
||||
monkeypatch.setattr(processor, "run_search", good_search)
|
||||
|
||||
# Matchers return fixed scores
|
||||
class DummyMatcher:
|
||||
def __init__(self, title, m): pass
|
||||
def getScore(self): return 50
|
||||
monkeypatch.setattr(processor, "SimpleMatcher", DummyMatcher)
|
||||
monkeypatch.setattr(processor, "PrefixMatcher", DummyMatcher)
|
||||
monkeypatch.setattr(processor, "AdvancedMatcher", DummyMatcher)
|
||||
|
||||
# Fake DB and mark_failed
|
||||
class FakeDB: pass
|
||||
called = {}
|
||||
monkeypatch.setattr(processor, "mark_failed", lambda db, tid: called.setdefault("failed", True))
|
||||
|
||||
task = make_dummy_task()
|
||||
event = await processor.process_task(FakeDB(), task)
|
||||
|
||||
assert isinstance(event, processor.MetadataSearchResultEvent)
|
||||
assert event.status == TaskStatus.COMPLETED
|
||||
assert event.recommended is not None
|
||||
assert "failed" not in called
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_process_task_no_results(monkeypatch):
|
||||
async def empty_search(titles):
|
||||
return []
|
||||
monkeypatch.setattr(processor, "run_search", empty_search)
|
||||
|
||||
class FakeDB: pass
|
||||
called = {}
|
||||
monkeypatch.setattr(processor, "mark_failed", lambda db, tid: called.setdefault("failed", True))
|
||||
|
||||
task = make_dummy_task()
|
||||
event = await processor.process_task(FakeDB(), task)
|
||||
|
||||
assert event is None
|
||||
assert "failed" in called
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_process_task_exception(monkeypatch):
|
||||
async def bad_search(titles):
|
||||
raise RuntimeError("boom")
|
||||
monkeypatch.setattr(processor, "run_search", bad_search)
|
||||
|
||||
class FakeDB: pass
|
||||
called = {}
|
||||
monkeypatch.setattr(processor, "mark_failed", lambda db, tid: called.setdefault("failed", True))
|
||||
|
||||
task = make_dummy_task()
|
||||
event = await processor.process_task(FakeDB(), task)
|
||||
|
||||
assert event is None
|
||||
assert "failed" in called
|
||||
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_choose_recommended_prefers_advanced(monkeypatch):
|
||||
# Lag tre SearchResult med ulike scorer
|
||||
m = make_dummy_metadata("mal")
|
||||
r1 = processor.SearchResult(simpleScore=10, prefixScore=10, advancedScore=90, sourceWeight=1.0, metadata=processor.MetadataResult(source="mal", title="Foo", alternateTitles=None, cover="", bannerImage=None, type=MediaType.MOVIE, summary=[], genres=[]))
|
||||
r2 = processor.SearchResult(simpleScore=50, prefixScore=50, advancedScore=20, sourceWeight=1.0, metadata=processor.MetadataResult(source="imdb", title="Foo", alternateTitles=None, cover="", bannerImage=None, type=MediaType.MOVIE, summary=[], genres=[]))
|
||||
r3 = processor.SearchResult(simpleScore=80, prefixScore=80, advancedScore=80, sourceWeight=1.0, metadata=processor.MetadataResult(source="anii", title="Foo", alternateTitles=None, cover="", bannerImage=None, type=MediaType.MOVIE, summary=[], genres=[]))
|
||||
|
||||
recommended = processor.choose_recommended([r1, r2, r3])
|
||||
assert recommended is r1 # høyest advancedScore vinner
|
||||
135
apps/pyMetadata/tests/test_repository.py
Normal file
135
apps/pyMetadata/tests/test_repository.py
Normal file
@ -0,0 +1,135 @@
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
import pytest
|
||||
from db import repository
|
||||
from models.event import MetadataSearchResultEvent, EventMetadata, SearchResult, MetadataResult, Summary
|
||||
from models.enums import MediaType, TaskStatus
|
||||
from db.repository import persist_event_and_mark_consumed
|
||||
from models.task import MetadataSearchData, MetadataSearchTask
|
||||
|
||||
class FakeCursor:
|
||||
def __init__(self):
|
||||
self.executed = []
|
||||
self.rowcount = 1
|
||||
def execute(self, sql, params=None):
|
||||
self.executed.append((sql, params))
|
||||
def close(self): pass
|
||||
|
||||
class FakeConn:
|
||||
def __init__(self):
|
||||
self.cursor_obj = FakeCursor()
|
||||
self.committed = False
|
||||
self.rolled_back = False
|
||||
def cursor(self, dictionary=False):
|
||||
return self.cursor_obj
|
||||
def commit(self): self.committed = True
|
||||
def rollback(self): self.rolled_back = True
|
||||
|
||||
class FakeDB:
|
||||
def __init__(self):
|
||||
self.conn = FakeConn()
|
||||
|
||||
def validate(self): pass
|
||||
|
||||
|
||||
def make_event() -> MetadataSearchResultEvent:
|
||||
return MetadataSearchResultEvent(
|
||||
referenceId=uuid.uuid4(),
|
||||
eventId=uuid.uuid4(),
|
||||
metadata=EventMetadata(
|
||||
created=datetime.now(),
|
||||
derivedFromId={uuid.uuid4()}
|
||||
),
|
||||
results=[],
|
||||
recommended=SearchResult(
|
||||
simpleScore=1,
|
||||
prefixScore=2,
|
||||
advancedScore=3,
|
||||
sourceWeight=1.0,
|
||||
metadata=MetadataResult(
|
||||
source="test",
|
||||
title="title",
|
||||
alternateTitles=None,
|
||||
cover=None,
|
||||
bannerImage=None,
|
||||
type=MediaType.SERIE,
|
||||
summary=[Summary(language="en", description="desc")],
|
||||
genres=["action"]
|
||||
)
|
||||
),
|
||||
status=TaskStatus.PENDING
|
||||
)
|
||||
|
||||
def test_persist_event_and_mark_consumed_success():
|
||||
db = FakeDB()
|
||||
event = make_event()
|
||||
persist_event_and_mark_consumed(db, event, str(event.eventId))
|
||||
# verifiser at commit ble kalt
|
||||
assert db.conn.committed
|
||||
# verifiser at to SQL statements ble kjørt
|
||||
assert len(db.conn.cursor_obj.executed) == 2
|
||||
|
||||
|
||||
def make_row(task_id, ref_id):
|
||||
# Simulerer en DB-rad slik den faktisk ligger i Tasks-tabellen
|
||||
return {
|
||||
"REFERENCE_ID": str(ref_id),
|
||||
"TASK_ID": str(task_id),
|
||||
"TASK": "MetadataSearchTask",
|
||||
"STATUS": TaskStatus.PENDING.value,
|
||||
"DATA": json.dumps({
|
||||
"searchTitles": ["Foo", "Bar"],
|
||||
"collection": "anime"
|
||||
}),
|
||||
"CLAIMED": False,
|
||||
"CLAIMED_BY": None,
|
||||
"CONSUMED": False,
|
||||
"LAST_CHECK_IN": None,
|
||||
"PERSISTED_AT": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
def test_fetch_next_task_maps_correctly(monkeypatch):
|
||||
task_id = uuid.uuid4()
|
||||
ref_id = uuid.uuid4()
|
||||
fake_row = make_row(task_id, ref_id)
|
||||
|
||||
# Fake DB som returnerer radene
|
||||
class FakeDB:
|
||||
def execute(self, query, *args, **kwargs):
|
||||
return [fake_row]
|
||||
|
||||
# Monkeypatch fetch_next_task til å bruke fake_row direkte
|
||||
def fake_fetch_next_task(db):
|
||||
row = fake_row
|
||||
data = json.loads(row["DATA"])
|
||||
return MetadataSearchTask(
|
||||
referenceId=uuid.UUID(row["REFERENCE_ID"]),
|
||||
taskId=uuid.UUID(row["TASK_ID"]),
|
||||
task=row["TASK"],
|
||||
status=TaskStatus(row["STATUS"]),
|
||||
data=MetadataSearchData(
|
||||
searchTitles=data["searchTitles"],
|
||||
collection=data["collection"]
|
||||
),
|
||||
claimed=row["CLAIMED"],
|
||||
claimedBy=row["CLAIMED_BY"],
|
||||
consumed=row["CONSUMED"],
|
||||
lastCheckIn=row["LAST_CHECK_IN"],
|
||||
persistedAt=datetime.fromisoformat(row["PERSISTED_AT"])
|
||||
)
|
||||
|
||||
monkeypatch.setattr(repository, "fetch_next_task", fake_fetch_next_task)
|
||||
|
||||
db = FakeDB()
|
||||
task = repository.fetch_next_task(db)
|
||||
|
||||
# Verifiser at mappingen er korrekt
|
||||
assert isinstance(task, MetadataSearchTask)
|
||||
assert task.taskId == task_id
|
||||
assert task.referenceId == ref_id
|
||||
assert task.status == TaskStatus.PENDING
|
||||
assert task.data.collection == "anime"
|
||||
assert task.data.searchTitles == ["Foo", "Bar"]
|
||||
assert task.claimed is False
|
||||
assert task.consumed is False
|
||||
75
apps/pyMetadata/tests/test_search_runner.py
Normal file
75
apps/pyMetadata/tests/test_search_runner.py
Normal file
@ -0,0 +1,75 @@
|
||||
import asyncio
|
||||
import pytest
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from worker.search_runner import run_search
|
||||
from models.metadata import Metadata, Summary, MediaType
|
||||
|
||||
# Dummy Metadata factory
|
||||
def make_dummy_metadata(source: str, title: str = "Dummy Title") -> Metadata:
|
||||
return Metadata(
|
||||
title=title,
|
||||
altTitle=[f"{title} alt"],
|
||||
cover="http://example.com/cover.jpg",
|
||||
banner=None,
|
||||
type=MediaType.MOVIE, # bruk en gyldig enum fra din kode
|
||||
summary=[Summary(summary="A fake summary", language="en")],
|
||||
genres=["Drama", "Action"],
|
||||
source=source,
|
||||
)
|
||||
|
||||
# Dummy Source that mimics SourceBase
|
||||
class DummySource:
|
||||
def __init__(self, titles, result=None, raise_exc=False):
|
||||
self.titles = titles
|
||||
self._result = result
|
||||
self._raise_exc = raise_exc
|
||||
|
||||
async def search(self):
|
||||
if self._raise_exc:
|
||||
raise RuntimeError("Search failed")
|
||||
return self._result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_run_search_all_results(monkeypatch):
|
||||
sources = [
|
||||
DummySource(["foo"], make_dummy_metadata("mal")),
|
||||
DummySource(["foo"], make_dummy_metadata("imdb")),
|
||||
DummySource(["foo"], make_dummy_metadata("anii")),
|
||||
]
|
||||
monkeypatch.setattr("worker.search_runner.get_all_sources", lambda titles: sources)
|
||||
|
||||
results = await run_search(["foo"])
|
||||
assert len(results) == 3
|
||||
assert all(isinstance(r, Metadata) for r in results)
|
||||
assert {r.source for r in results} == {"mal", "imdb", "anii"}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_run_search_filters_none(monkeypatch):
|
||||
sources = [
|
||||
DummySource(["foo"], make_dummy_metadata("mal")),
|
||||
DummySource(["foo"], None),
|
||||
DummySource(["foo"], make_dummy_metadata("imdb")),
|
||||
]
|
||||
monkeypatch.setattr("worker.search_runner.get_all_sources", lambda titles: sources)
|
||||
|
||||
results = await run_search(["foo"])
|
||||
assert len(results) == 2
|
||||
assert {r.source for r in results} == {"mal", "imdb"}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_run_search_handles_exception(monkeypatch):
|
||||
sources = [
|
||||
DummySource(["foo"], make_dummy_metadata("mal")),
|
||||
DummySource(["foo"], raise_exc=True),
|
||||
DummySource(["foo"], make_dummy_metadata("imdb")),
|
||||
]
|
||||
monkeypatch.setattr("worker.search_runner.get_all_sources", lambda titles: sources)
|
||||
|
||||
results = await run_search(["foo"])
|
||||
|
||||
# Nå skal vi få bare de gyldige Metadata-resultatene
|
||||
assert all(isinstance(r, Metadata) for r in results)
|
||||
assert {r.source for r in results} == {"mal", "imdb"}
|
||||
|
||||
0
apps/pyMetadata/utils/__init__.py
Normal file
0
apps/pyMetadata/utils/__init__.py
Normal file
11
apps/pyMetadata/utils/backoff.py
Normal file
11
apps/pyMetadata/utils/backoff.py
Normal file
@ -0,0 +1,11 @@
|
||||
from utils.logger import logger
|
||||
import time
|
||||
|
||||
def retry_delays():
|
||||
return [5, 15, 30, 60]
|
||||
|
||||
def wait_with_backoff():
|
||||
for delay in retry_delays():
|
||||
logger.info(f"⏳ Venter {delay} sekunder...")
|
||||
time.sleep(delay)
|
||||
yield
|
||||
32
apps/pyMetadata/utils/logger.py
Normal file
32
apps/pyMetadata/utils/logger.py
Normal file
@ -0,0 +1,32 @@
|
||||
import logging
|
||||
import sys
|
||||
|
||||
# ANSI farger
|
||||
COLORS = {
|
||||
"INFO": "\033[94m", # blå
|
||||
"DEBUG": "\033[92m", # grønn
|
||||
"WARNING": "\033[93m", # gul
|
||||
"ERROR": "\033[91m", # rød
|
||||
"RESET": "\033[0m"
|
||||
}
|
||||
|
||||
class ColoredFormatter(logging.Formatter):
|
||||
def format(self, record):
|
||||
levelname = record.levelname
|
||||
color = COLORS.get(levelname, COLORS["RESET"])
|
||||
prefix = f"[{levelname}]"
|
||||
message = super().format(record)
|
||||
return f"{color}{prefix}{COLORS['RESET']} {message}"
|
||||
|
||||
def setup_logger(level=logging.INFO):
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
formatter = ColoredFormatter("%(asctime)s - %(name)s - %(message)s")
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(level)
|
||||
logger.handlers = [handler]
|
||||
return logger
|
||||
|
||||
# Opprett global logger
|
||||
logger: logging.Logger = setup_logger()
|
||||
0
apps/pyMetadata/worker/__init__.py
Normal file
0
apps/pyMetadata/worker/__init__.py
Normal file
59
apps/pyMetadata/worker/poller.py
Normal file
59
apps/pyMetadata/worker/poller.py
Normal file
@ -0,0 +1,59 @@
|
||||
import asyncio
|
||||
import time
|
||||
from typing import Optional
|
||||
import uuid
|
||||
from db.database import Database
|
||||
from db.repository import claim_task, fetch_next_task, mark_failed, persist_event_and_mark_consumed
|
||||
from models.event import MetadataSearchResultEvent
|
||||
from worker.processor import process_task
|
||||
from utils.logger import logger
|
||||
from config.database_config import DatabaseConfig
|
||||
from models.task import MetadataSearchTask, Task
|
||||
|
||||
def run_iteration(db: Database, worker_id: str, poll_interval: int) -> tuple[int, int]:
|
||||
"""
|
||||
Kjør én iterasjon av poller-loopen.
|
||||
Returnerer (sleep_interval, next_interval).
|
||||
"""
|
||||
try:
|
||||
task: Optional[Task] = fetch_next_task(db)
|
||||
if task:
|
||||
if not isinstance(task, MetadataSearchTask):
|
||||
logger.warning(f"⚠️ Ukjent task-type {type(task)} for {task.taskId}, hopper over.")
|
||||
return poll_interval, poll_interval
|
||||
|
||||
if not claim_task(db, str(task.taskId), worker_id):
|
||||
logger.info(f"⏩ Task {task.taskId} ble claimet av en annen worker.")
|
||||
return poll_interval, poll_interval
|
||||
|
||||
logger.info(f"🔔 Fant task {task.taskId} ({task.task}), claimed by {worker_id}")
|
||||
try:
|
||||
event: MetadataSearchResultEvent = asyncio.run(process_task(task))
|
||||
if event:
|
||||
persist_event_and_mark_consumed(db, event, str(task.taskId))
|
||||
logger.info(f"✅ Task {task.taskId} ferdig prosessert")
|
||||
else:
|
||||
logger.error(f"❌ Task returned nothing! {task.taskId}")
|
||||
raise RuntimeError("process_task returned nothing!")
|
||||
except Exception as task_error:
|
||||
logger.error(f"❌ Task {task.taskId} feilet under prosessering: {task_error}")
|
||||
mark_failed(db, str(task.taskId))
|
||||
return poll_interval, 5 # sov med nåværende, reset til 5
|
||||
else:
|
||||
logger.debug("Ingen nye tasks.")
|
||||
return poll_interval, min(poll_interval * 2, 60)
|
||||
except Exception as e:
|
||||
logger.error(f"⚠️ Feil i worker: {e}")
|
||||
db.connect()
|
||||
return poll_interval, 5
|
||||
|
||||
def run_worker(db: Database, shutdown_flag_ref=lambda: False) -> None:
|
||||
poll_interval: int = 5
|
||||
worker_id = f"worker-{uuid.uuid4()}"
|
||||
|
||||
while not shutdown_flag_ref():
|
||||
sleep_interval, poll_interval = run_iteration(db, worker_id, poll_interval)
|
||||
time.sleep(sleep_interval)
|
||||
|
||||
logger.info("👋 run_worker loop avsluttet")
|
||||
db.close()
|
||||
128
apps/pyMetadata/worker/processor.py
Normal file
128
apps/pyMetadata/worker/processor.py
Normal file
@ -0,0 +1,128 @@
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from tabulate import tabulate
|
||||
from models.metadata import Metadata
|
||||
from models.task import MetadataSearchTask
|
||||
from utils.logger import logger
|
||||
from models.event import (
|
||||
EventMetadata,
|
||||
MetadataSearchResultEvent,
|
||||
SearchResult,
|
||||
MetadataResult,
|
||||
Summary,
|
||||
TaskStatus,
|
||||
MediaType,
|
||||
)
|
||||
from worker.search_runner import run_search
|
||||
from algo.SimpleMatcher import SimpleMatcher
|
||||
from algo.PrefixMatcher import PrefixMatcher
|
||||
from algo.AdvancedMatcher import AdvancedMatcher
|
||||
from db.repository import mark_failed
|
||||
|
||||
def source_priority(source: str) -> int:
|
||||
"""Domene-spesifikk kildevekting."""
|
||||
priority_map = {'mal': 1, 'anii': 2, 'imdb': 3}
|
||||
return priority_map.get(source, 4)
|
||||
|
||||
|
||||
def score_metadata_against_title(title, m: Metadata) -> SearchResult:
|
||||
simple = SimpleMatcher(title, m).getScore()
|
||||
prefix = PrefixMatcher(title, m).getScore()
|
||||
advanced = AdvancedMatcher(title, m).getScore()
|
||||
|
||||
# IMPORTANT: map exactly to bannerImage, not banner.
|
||||
metadata_result = MetadataResult(
|
||||
source=m.source,
|
||||
title=m.title,
|
||||
alternateTitles=m.altTitle if m.altTitle else [],
|
||||
cover=getattr(m, "cover", None),
|
||||
bannerImage=getattr(m, "bannerImage", None), # no renaming
|
||||
type=m.type, # must already be MediaType
|
||||
summary=[Summary(language=s.language, description=s.summary) for s in m.summary],
|
||||
genres=m.genres,
|
||||
)
|
||||
|
||||
return SearchResult(
|
||||
simpleScore=simple,
|
||||
prefixScore=prefix,
|
||||
advancedScore=advanced,
|
||||
sourceWeight=1.0,
|
||||
metadata=metadata_result
|
||||
)
|
||||
|
||||
|
||||
def print_summary(results: list[SearchResult], titles: list[str]) -> None:
|
||||
"""Print tabell med scorer for alle kombinasjoner."""
|
||||
rows = []
|
||||
for r in results:
|
||||
rows.append((
|
||||
# NB: metadata.title er matched title, search_title kan du lagre i SearchResult hvis du vil
|
||||
r.metadata.title,
|
||||
r.metadata.source,
|
||||
r.simpleScore,
|
||||
r.prefixScore,
|
||||
r.advancedScore
|
||||
))
|
||||
headers = ["Matched Title", "Source", "Simple", "Prefix", "Advanced"]
|
||||
print(tabulate(rows, headers=headers))
|
||||
|
||||
|
||||
def choose_recommended(results: list[SearchResult]) -> SearchResult:
|
||||
"""Velg recommended basert på scorer og kildevekting."""
|
||||
return max(
|
||||
results,
|
||||
key=lambda r: (
|
||||
r.advancedScore,
|
||||
r.simpleScore,
|
||||
r.prefixScore,
|
||||
-source_priority(r.metadata.source)
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
async def process_task(db, task: MetadataSearchTask) -> MetadataSearchResultEvent|None:
|
||||
titles = task.data.searchTitles
|
||||
logger.info(f"Prosesserer task {task.taskId} med titler: {titles}")
|
||||
|
||||
try:
|
||||
metadata_list = await run_search(titles)
|
||||
if not metadata_list:
|
||||
mark_failed(db, task.taskId)
|
||||
return
|
||||
|
||||
# 1) Score alle kombinasjoner
|
||||
results = []
|
||||
for m in metadata_list:
|
||||
for t in titles:
|
||||
results.append(score_metadata_against_title(t, m))
|
||||
|
||||
# 2) Print tabell
|
||||
print_summary(results, titles)
|
||||
|
||||
# 3) Velg recommended
|
||||
recommended = choose_recommended(results)
|
||||
|
||||
# 4) Bygg event
|
||||
core_metadata = EventMetadata(
|
||||
created=datetime.now(),
|
||||
derivedFromId={task.referenceId, task.taskId}
|
||||
)
|
||||
|
||||
event = MetadataSearchResultEvent(
|
||||
referenceId=task.referenceId,
|
||||
eventId=uuid.uuid4(),
|
||||
metadata=core_metadata,
|
||||
results=results,
|
||||
recommended=recommended,
|
||||
status=TaskStatus.COMPLETED
|
||||
)
|
||||
|
||||
# 5) Returner
|
||||
logger.info(f"✅ Task {task.taskId} ferdig prosessert med {len(results)} resultater")
|
||||
return event
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Task {task.taskId} feilet: {e}")
|
||||
mark_failed(db, task.taskId)
|
||||
return None
|
||||
31
apps/pyMetadata/worker/search_runner.py
Normal file
31
apps/pyMetadata/worker/search_runner.py
Normal file
@ -0,0 +1,31 @@
|
||||
# search_runner.py
|
||||
import asyncio
|
||||
from typing import List
|
||||
from models.metadata import Metadata
|
||||
from utils.logger import logger
|
||||
from sources.factory import get_all_sources
|
||||
|
||||
async def run_search(titles: List[str]) -> List[Metadata]:
|
||||
"""
|
||||
Kjører alle kilder parallelt på gitt titler.
|
||||
Returnerer en liste av Metadata fra alle kilder.
|
||||
Ingen mapping eller scoring gjøres her.
|
||||
"""
|
||||
|
||||
sources = get_all_sources(titles)
|
||||
|
||||
# Kjør alle kildesøk parallelt
|
||||
results = await asyncio.gather(*(s.search() for s in sources), return_exceptions=True)
|
||||
|
||||
metadata_results: List[Metadata] = []
|
||||
for source, r in zip(sources, results):
|
||||
if isinstance(r, Exception):
|
||||
logger.warning(
|
||||
f"Kilde '{source.__class__.__name__}' feilet under søk "
|
||||
f"med titler={source.titles}: {r}"
|
||||
)
|
||||
elif r is not None:
|
||||
metadata_results.append(r)
|
||||
|
||||
logger.info(f"Søk ferdig: {len(metadata_results)} resultater fra {len(sources)} kilder")
|
||||
return metadata_results
|
||||
19
apps/pyWatcher/api/health_api.py
Normal file
19
apps/pyWatcher/api/health_api.py
Normal file
@ -0,0 +1,19 @@
|
||||
from fastapi import FastAPI
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
def create_health_app(observers_ref):
|
||||
"""
|
||||
Returnerer en FastAPI-app med /health endpoint.
|
||||
observers_ref: en funksjon eller lambda som gir listen av observers.
|
||||
"""
|
||||
app = FastAPI()
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
observers = observers_ref()
|
||||
healthy = all(obs.is_alive() for obs in observers)
|
||||
status = "healthy" if healthy else "unhealthy"
|
||||
code = 200 if healthy else 500
|
||||
return JSONResponse({"status": status}, status_code=code)
|
||||
|
||||
return app
|
||||
Loading…
Reference in New Issue
Block a user