Updated metadata
This commit is contained in:
parent
ff21cb9595
commit
c2cd6f451e
@ -6,7 +6,7 @@ from typing import List, Optional
|
||||
import uuid
|
||||
import threading
|
||||
import json
|
||||
import time
|
||||
import time, asyncio
|
||||
from fuzzywuzzy import fuzz
|
||||
|
||||
from algo.AdvancedMatcher import AdvancedMatcher
|
||||
@ -15,6 +15,7 @@ from algo.PrefixMatcher import PrefixMatcher
|
||||
from clazz.Metadata import Metadata
|
||||
|
||||
from clazz.shared import EventData, EventMetadata, MediaEvent
|
||||
from app import MetadataEventHandler
|
||||
from sources.anii import Anii
|
||||
from sources.imdb import Imdb
|
||||
from sources.mal import Mal
|
||||
@ -56,32 +57,37 @@ else:
|
||||
|
||||
|
||||
class DryRun():
|
||||
titles: List[str] = []
|
||||
searchTitles: List[str] = []
|
||||
title: str
|
||||
sanitizedName: str
|
||||
|
||||
def __init__(self, titles: List[str]) -> None:
|
||||
self.titles = titles
|
||||
def __init__(self, title: str, sanitizedName: str, searchTitles: List[str]) -> None:
|
||||
self.title = title
|
||||
self.sanitizedName = sanitizedName
|
||||
self.searchTitles = searchTitles
|
||||
|
||||
def run(self) -> None:
|
||||
combined_titles = ", ".join(self.titles)
|
||||
logger.info("Searching for %s", combined_titles)
|
||||
result: Metadata | None = self.__getMetadata(self.titles)
|
||||
|
||||
message: str | None = None
|
||||
if (result is None):
|
||||
message = f"No result for {combined_titles}"
|
||||
logger.info(message)
|
||||
|
||||
message = MediaEvent(
|
||||
metadata = EventMetadata(
|
||||
referenceId="00000000-0000-0000-0000-000000000000",
|
||||
eventId="XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX",
|
||||
derivedFromEventId=None,
|
||||
status= "Failed" if result is None else "Success",
|
||||
evnet = MediaEvent(
|
||||
metadata=EventMetadata(
|
||||
derivedFromEventId="ccccccccc-cccc-cccc-cccc-cccccccccccc",
|
||||
eventId="eeeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee",
|
||||
referenceId="rrrrrrrrr-rrrr-rrrr-rrrr-rrrrrrrrrrrr",
|
||||
status="Success",
|
||||
created="2024-12-28T16:19:31.917684523",
|
||||
source="DryRun"
|
||||
),
|
||||
data=result
|
||||
eventType="DryRun",
|
||||
data=EventData(
|
||||
title=self.title,
|
||||
sanitizedName=self.sanitizedName,
|
||||
searchTitles=self.searchTitles
|
||||
)
|
||||
)
|
||||
|
||||
logger.info(message)
|
||||
handler = MetadataEventHandler(evnet)
|
||||
|
||||
asyncio.run(handler.run())
|
||||
|
||||
|
||||
def __getMetadata(self, titles: List[str]) -> Metadata | None:
|
||||
mal = Mal(titles=titles)
|
||||
|
||||
@ -1,22 +1,32 @@
|
||||
from fuzzywuzzy import fuzz
|
||||
import re
|
||||
from .AlgorithmBase import AlgorithmBase, MatchResult
|
||||
from clazz.Metadata import Metadata
|
||||
|
||||
class AdvancedMatcher(AlgorithmBase):
|
||||
def clean_title(self, title: str) -> str:
|
||||
# Fjerner eventuelle ekstra tekster etter kolon eller andre skilletegn
|
||||
return re.sub(r'[:\-\—].*', '', title).strip()
|
||||
|
||||
def getBestMatch(self) -> Metadata | None:
|
||||
best_match = None
|
||||
best_score = -1
|
||||
match_results = []
|
||||
|
||||
for title in self.titles:
|
||||
cleaned_title = self.clean_title(title) # Renset tittel uten ekstra tekst
|
||||
for metadata in self.metadata:
|
||||
# Compute different match ratios
|
||||
title_ratio = fuzz.token_sort_ratio(title.lower(), metadata.title.lower())
|
||||
alt_title_ratios = [fuzz.token_sort_ratio(title.lower(), alt_title.lower()) for alt_title in metadata.altTitle]
|
||||
cleaned_metadata_title = self.clean_title(metadata.title) # Renset metadata-tittel
|
||||
|
||||
# Compute different match ratios for both the original and cleaned titles
|
||||
original_title_ratio = fuzz.token_sort_ratio(title.lower(), metadata.title.lower())
|
||||
cleaned_title_ratio = fuzz.token_sort_ratio(cleaned_title.lower(), cleaned_metadata_title.lower())
|
||||
|
||||
alt_title_ratios = [fuzz.token_sort_ratio(cleaned_title.lower(), self.clean_title(alt_title).lower()) for alt_title in metadata.altTitle]
|
||||
max_alt_title_ratio = max(alt_title_ratios) if alt_title_ratios else 0
|
||||
|
||||
# Combine ratios as desired
|
||||
combined_score = max(title_ratio, max_alt_title_ratio)
|
||||
# Combine ratios: take the best of original vs cleaned title, and alt title match
|
||||
combined_score = max(original_title_ratio, cleaned_title_ratio, max_alt_title_ratio)
|
||||
|
||||
match_results.append(MatchResult(title, metadata.title, combined_score, metadata.source, metadata))
|
||||
|
||||
|
||||
@ -250,14 +250,15 @@ class MetadataEventHandler:
|
||||
|
||||
event: MediaEvent = self.mediaEvent
|
||||
|
||||
searchableTitles: List[str] = event.data.searchTitles
|
||||
searchableTitles.extend([
|
||||
unique_titles = set(event.data.searchTitles)
|
||||
unique_titles.update([
|
||||
event.data.title,
|
||||
event.data.sanitizedName
|
||||
])
|
||||
searchableTitles = list(unique_titles)
|
||||
|
||||
joinedTitles = "\n".join(searchableTitles)
|
||||
logger.info("Searching for: %s", joinedTitles)
|
||||
logger.info("Searching for:\n%s", joinedTitles)
|
||||
|
||||
# Kjør den asynkrone søkemetoden
|
||||
result: Metadata | None = await self.__getMetadata(searchableTitles)
|
||||
@ -300,10 +301,10 @@ class MetadataEventHandler:
|
||||
logger.info("\nPrefix matcher")
|
||||
prefixSelector = PrefixMatcher(titles=titles, metadata=filtered_results).getBestMatch()
|
||||
|
||||
if simpleSelector is not None:
|
||||
return simpleSelector
|
||||
if advancedSelector is not None:
|
||||
return advancedSelector
|
||||
if simpleSelector is not None:
|
||||
return simpleSelector
|
||||
if prefixSelector is not None:
|
||||
return prefixSelector
|
||||
return None
|
||||
|
||||
Loading…
Reference in New Issue
Block a user