Updated metadata

This commit is contained in:
bskjon 2024-12-30 04:14:28 +01:00
parent ff21cb9595
commit c2cd6f451e
3 changed files with 50 additions and 33 deletions

View File

@ -6,7 +6,7 @@ from typing import List, Optional
import uuid import uuid
import threading import threading
import json import json
import time import time, asyncio
from fuzzywuzzy import fuzz from fuzzywuzzy import fuzz
from algo.AdvancedMatcher import AdvancedMatcher from algo.AdvancedMatcher import AdvancedMatcher
@ -15,6 +15,7 @@ from algo.PrefixMatcher import PrefixMatcher
from clazz.Metadata import Metadata from clazz.Metadata import Metadata
from clazz.shared import EventData, EventMetadata, MediaEvent from clazz.shared import EventData, EventMetadata, MediaEvent
from app import MetadataEventHandler
from sources.anii import Anii from sources.anii import Anii
from sources.imdb import Imdb from sources.imdb import Imdb
from sources.mal import Mal from sources.mal import Mal
@ -56,32 +57,37 @@ else:
class DryRun(): class DryRun():
titles: List[str] = [] searchTitles: List[str] = []
title: str
sanitizedName: str
def __init__(self, titles: List[str]) -> None: def __init__(self, title: str, sanitizedName: str, searchTitles: List[str]) -> None:
self.titles = titles self.title = title
self.sanitizedName = sanitizedName
self.searchTitles = searchTitles
def run(self) -> None: def run(self) -> None:
combined_titles = ", ".join(self.titles) evnet = MediaEvent(
logger.info("Searching for %s", combined_titles) metadata=EventMetadata(
result: Metadata | None = self.__getMetadata(self.titles) derivedFromEventId="ccccccccc-cccc-cccc-cccc-cccccccccccc",
eventId="eeeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee",
message: str | None = None referenceId="rrrrrrrrr-rrrr-rrrr-rrrr-rrrrrrrrrrrr",
if (result is None): status="Success",
message = f"No result for {combined_titles}" created="2024-12-28T16:19:31.917684523",
logger.info(message) source="DryRun"
message = MediaEvent(
metadata = EventMetadata(
referenceId="00000000-0000-0000-0000-000000000000",
eventId="XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX",
derivedFromEventId=None,
status= "Failed" if result is None else "Success",
), ),
data=result eventType="DryRun",
data=EventData(
title=self.title,
sanitizedName=self.sanitizedName,
searchTitles=self.searchTitles
)
) )
logger.info(message) handler = MetadataEventHandler(evnet)
asyncio.run(handler.run())
def __getMetadata(self, titles: List[str]) -> Metadata | None: def __getMetadata(self, titles: List[str]) -> Metadata | None:
mal = Mal(titles=titles) mal = Mal(titles=titles)

View File

@ -1,22 +1,32 @@
from fuzzywuzzy import fuzz from fuzzywuzzy import fuzz
import re
from .AlgorithmBase import AlgorithmBase, MatchResult from .AlgorithmBase import AlgorithmBase, MatchResult
from clazz.Metadata import Metadata from clazz.Metadata import Metadata
class AdvancedMatcher(AlgorithmBase): class AdvancedMatcher(AlgorithmBase):
def clean_title(self, title: str) -> str:
# Fjerner eventuelle ekstra tekster etter kolon eller andre skilletegn
return re.sub(r'[:\-\—].*', '', title).strip()
def getBestMatch(self) -> Metadata | None: def getBestMatch(self) -> Metadata | None:
best_match = None best_match = None
best_score = -1 best_score = -1
match_results = [] match_results = []
for title in self.titles: for title in self.titles:
cleaned_title = self.clean_title(title) # Renset tittel uten ekstra tekst
for metadata in self.metadata: for metadata in self.metadata:
# Compute different match ratios cleaned_metadata_title = self.clean_title(metadata.title) # Renset metadata-tittel
title_ratio = fuzz.token_sort_ratio(title.lower(), metadata.title.lower())
alt_title_ratios = [fuzz.token_sort_ratio(title.lower(), alt_title.lower()) for alt_title in metadata.altTitle] # Compute different match ratios for both the original and cleaned titles
original_title_ratio = fuzz.token_sort_ratio(title.lower(), metadata.title.lower())
cleaned_title_ratio = fuzz.token_sort_ratio(cleaned_title.lower(), cleaned_metadata_title.lower())
alt_title_ratios = [fuzz.token_sort_ratio(cleaned_title.lower(), self.clean_title(alt_title).lower()) for alt_title in metadata.altTitle]
max_alt_title_ratio = max(alt_title_ratios) if alt_title_ratios else 0 max_alt_title_ratio = max(alt_title_ratios) if alt_title_ratios else 0
# Combine ratios as desired # Combine ratios: take the best of original vs cleaned title, and alt title match
combined_score = max(title_ratio, max_alt_title_ratio) combined_score = max(original_title_ratio, cleaned_title_ratio, max_alt_title_ratio)
match_results.append(MatchResult(title, metadata.title, combined_score, metadata.source, metadata)) match_results.append(MatchResult(title, metadata.title, combined_score, metadata.source, metadata))
@ -28,4 +38,4 @@ class AdvancedMatcher(AlgorithmBase):
# Print match summary # Print match summary
self.print_match_summary(match_results) self.print_match_summary(match_results)
return best_match return best_match

View File

@ -250,14 +250,15 @@ class MetadataEventHandler:
event: MediaEvent = self.mediaEvent event: MediaEvent = self.mediaEvent
searchableTitles: List[str] = event.data.searchTitles unique_titles = set(event.data.searchTitles)
searchableTitles.extend([ unique_titles.update([
event.data.title, event.data.title,
event.data.sanitizedName event.data.sanitizedName
]) ])
searchableTitles = list(unique_titles)
joinedTitles = "\n".join(searchableTitles) joinedTitles = "\n".join(searchableTitles)
logger.info("Searching for: %s", joinedTitles) logger.info("Searching for:\n%s", joinedTitles)
# Kjør den asynkrone søkemetoden # Kjør den asynkrone søkemetoden
result: Metadata | None = await self.__getMetadata(searchableTitles) result: Metadata | None = await self.__getMetadata(searchableTitles)
@ -300,10 +301,10 @@ class MetadataEventHandler:
logger.info("\nPrefix matcher") logger.info("\nPrefix matcher")
prefixSelector = PrefixMatcher(titles=titles, metadata=filtered_results).getBestMatch() prefixSelector = PrefixMatcher(titles=titles, metadata=filtered_results).getBestMatch()
if simpleSelector is not None:
return simpleSelector
if advancedSelector is not None: if advancedSelector is not None:
return advancedSelector return advancedSelector
if simpleSelector is not None:
return simpleSelector
if prefixSelector is not None: if prefixSelector is not None:
return prefixSelector return prefixSelector
return None return None