Updated metadata

This commit is contained in:
bskjon 2024-12-30 04:14:28 +01:00
parent ff21cb9595
commit c2cd6f451e
3 changed files with 50 additions and 33 deletions

View File

@ -6,7 +6,7 @@ from typing import List, Optional
import uuid
import threading
import json
import time
import time, asyncio
from fuzzywuzzy import fuzz
from algo.AdvancedMatcher import AdvancedMatcher
@ -15,6 +15,7 @@ from algo.PrefixMatcher import PrefixMatcher
from clazz.Metadata import Metadata
from clazz.shared import EventData, EventMetadata, MediaEvent
from app import MetadataEventHandler
from sources.anii import Anii
from sources.imdb import Imdb
from sources.mal import Mal
@ -56,32 +57,37 @@ else:
class DryRun():
titles: List[str] = []
searchTitles: List[str] = []
title: str
sanitizedName: str
def __init__(self, titles: List[str]) -> None:
self.titles = titles
def __init__(self, title: str, sanitizedName: str, searchTitles: List[str]) -> None:
self.title = title
self.sanitizedName = sanitizedName
self.searchTitles = searchTitles
def run(self) -> None:
combined_titles = ", ".join(self.titles)
logger.info("Searching for %s", combined_titles)
result: Metadata | None = self.__getMetadata(self.titles)
message: str | None = None
if (result is None):
message = f"No result for {combined_titles}"
logger.info(message)
message = MediaEvent(
metadata = EventMetadata(
referenceId="00000000-0000-0000-0000-000000000000",
eventId="XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX",
derivedFromEventId=None,
status= "Failed" if result is None else "Success",
evnet = MediaEvent(
metadata=EventMetadata(
derivedFromEventId="ccccccccc-cccc-cccc-cccc-cccccccccccc",
eventId="eeeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee",
referenceId="rrrrrrrrr-rrrr-rrrr-rrrr-rrrrrrrrrrrr",
status="Success",
created="2024-12-28T16:19:31.917684523",
source="DryRun"
),
data=result
eventType="DryRun",
data=EventData(
title=self.title,
sanitizedName=self.sanitizedName,
searchTitles=self.searchTitles
)
)
logger.info(message)
handler = MetadataEventHandler(evnet)
asyncio.run(handler.run())
def __getMetadata(self, titles: List[str]) -> Metadata | None:
mal = Mal(titles=titles)

View File

@ -1,22 +1,32 @@
from fuzzywuzzy import fuzz
import re
from .AlgorithmBase import AlgorithmBase, MatchResult
from clazz.Metadata import Metadata
class AdvancedMatcher(AlgorithmBase):
def clean_title(self, title: str) -> str:
# Fjerner eventuelle ekstra tekster etter kolon eller andre skilletegn
return re.sub(r'[:\-\—].*', '', title).strip()
def getBestMatch(self) -> Metadata | None:
best_match = None
best_score = -1
match_results = []
for title in self.titles:
cleaned_title = self.clean_title(title) # Renset tittel uten ekstra tekst
for metadata in self.metadata:
# Compute different match ratios
title_ratio = fuzz.token_sort_ratio(title.lower(), metadata.title.lower())
alt_title_ratios = [fuzz.token_sort_ratio(title.lower(), alt_title.lower()) for alt_title in metadata.altTitle]
cleaned_metadata_title = self.clean_title(metadata.title) # Renset metadata-tittel
# Compute different match ratios for both the original and cleaned titles
original_title_ratio = fuzz.token_sort_ratio(title.lower(), metadata.title.lower())
cleaned_title_ratio = fuzz.token_sort_ratio(cleaned_title.lower(), cleaned_metadata_title.lower())
alt_title_ratios = [fuzz.token_sort_ratio(cleaned_title.lower(), self.clean_title(alt_title).lower()) for alt_title in metadata.altTitle]
max_alt_title_ratio = max(alt_title_ratios) if alt_title_ratios else 0
# Combine ratios as desired
combined_score = max(title_ratio, max_alt_title_ratio)
# Combine ratios: take the best of original vs cleaned title, and alt title match
combined_score = max(original_title_ratio, cleaned_title_ratio, max_alt_title_ratio)
match_results.append(MatchResult(title, metadata.title, combined_score, metadata.source, metadata))

View File

@ -250,14 +250,15 @@ class MetadataEventHandler:
event: MediaEvent = self.mediaEvent
searchableTitles: List[str] = event.data.searchTitles
searchableTitles.extend([
unique_titles = set(event.data.searchTitles)
unique_titles.update([
event.data.title,
event.data.sanitizedName
])
searchableTitles = list(unique_titles)
joinedTitles = "\n".join(searchableTitles)
logger.info("Searching for: %s", joinedTitles)
logger.info("Searching for:\n%s", joinedTitles)
# Kjør den asynkrone søkemetoden
result: Metadata | None = await self.__getMetadata(searchableTitles)
@ -300,10 +301,10 @@ class MetadataEventHandler:
logger.info("\nPrefix matcher")
prefixSelector = PrefixMatcher(titles=titles, metadata=filtered_results).getBestMatch()
if simpleSelector is not None:
return simpleSelector
if advancedSelector is not None:
return advancedSelector
if simpleSelector is not None:
return simpleSelector
if prefixSelector is not None:
return prefixSelector
return None