Updated metadata
This commit is contained in:
parent
ff21cb9595
commit
c2cd6f451e
@ -6,7 +6,7 @@ from typing import List, Optional
|
|||||||
import uuid
|
import uuid
|
||||||
import threading
|
import threading
|
||||||
import json
|
import json
|
||||||
import time
|
import time, asyncio
|
||||||
from fuzzywuzzy import fuzz
|
from fuzzywuzzy import fuzz
|
||||||
|
|
||||||
from algo.AdvancedMatcher import AdvancedMatcher
|
from algo.AdvancedMatcher import AdvancedMatcher
|
||||||
@ -15,6 +15,7 @@ from algo.PrefixMatcher import PrefixMatcher
|
|||||||
from clazz.Metadata import Metadata
|
from clazz.Metadata import Metadata
|
||||||
|
|
||||||
from clazz.shared import EventData, EventMetadata, MediaEvent
|
from clazz.shared import EventData, EventMetadata, MediaEvent
|
||||||
|
from app import MetadataEventHandler
|
||||||
from sources.anii import Anii
|
from sources.anii import Anii
|
||||||
from sources.imdb import Imdb
|
from sources.imdb import Imdb
|
||||||
from sources.mal import Mal
|
from sources.mal import Mal
|
||||||
@ -56,32 +57,37 @@ else:
|
|||||||
|
|
||||||
|
|
||||||
class DryRun():
|
class DryRun():
|
||||||
titles: List[str] = []
|
searchTitles: List[str] = []
|
||||||
|
title: str
|
||||||
|
sanitizedName: str
|
||||||
|
|
||||||
def __init__(self, titles: List[str]) -> None:
|
def __init__(self, title: str, sanitizedName: str, searchTitles: List[str]) -> None:
|
||||||
self.titles = titles
|
self.title = title
|
||||||
|
self.sanitizedName = sanitizedName
|
||||||
|
self.searchTitles = searchTitles
|
||||||
|
|
||||||
def run(self) -> None:
|
def run(self) -> None:
|
||||||
combined_titles = ", ".join(self.titles)
|
evnet = MediaEvent(
|
||||||
logger.info("Searching for %s", combined_titles)
|
metadata=EventMetadata(
|
||||||
result: Metadata | None = self.__getMetadata(self.titles)
|
derivedFromEventId="ccccccccc-cccc-cccc-cccc-cccccccccccc",
|
||||||
|
eventId="eeeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee",
|
||||||
message: str | None = None
|
referenceId="rrrrrrrrr-rrrr-rrrr-rrrr-rrrrrrrrrrrr",
|
||||||
if (result is None):
|
status="Success",
|
||||||
message = f"No result for {combined_titles}"
|
created="2024-12-28T16:19:31.917684523",
|
||||||
logger.info(message)
|
source="DryRun"
|
||||||
|
|
||||||
message = MediaEvent(
|
|
||||||
metadata = EventMetadata(
|
|
||||||
referenceId="00000000-0000-0000-0000-000000000000",
|
|
||||||
eventId="XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX",
|
|
||||||
derivedFromEventId=None,
|
|
||||||
status= "Failed" if result is None else "Success",
|
|
||||||
),
|
),
|
||||||
data=result
|
eventType="DryRun",
|
||||||
|
data=EventData(
|
||||||
|
title=self.title,
|
||||||
|
sanitizedName=self.sanitizedName,
|
||||||
|
searchTitles=self.searchTitles
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(message)
|
handler = MetadataEventHandler(evnet)
|
||||||
|
|
||||||
|
asyncio.run(handler.run())
|
||||||
|
|
||||||
|
|
||||||
def __getMetadata(self, titles: List[str]) -> Metadata | None:
|
def __getMetadata(self, titles: List[str]) -> Metadata | None:
|
||||||
mal = Mal(titles=titles)
|
mal = Mal(titles=titles)
|
||||||
|
|||||||
@ -1,22 +1,32 @@
|
|||||||
from fuzzywuzzy import fuzz
|
from fuzzywuzzy import fuzz
|
||||||
|
import re
|
||||||
from .AlgorithmBase import AlgorithmBase, MatchResult
|
from .AlgorithmBase import AlgorithmBase, MatchResult
|
||||||
from clazz.Metadata import Metadata
|
from clazz.Metadata import Metadata
|
||||||
|
|
||||||
class AdvancedMatcher(AlgorithmBase):
|
class AdvancedMatcher(AlgorithmBase):
|
||||||
|
def clean_title(self, title: str) -> str:
|
||||||
|
# Fjerner eventuelle ekstra tekster etter kolon eller andre skilletegn
|
||||||
|
return re.sub(r'[:\-\—].*', '', title).strip()
|
||||||
|
|
||||||
def getBestMatch(self) -> Metadata | None:
|
def getBestMatch(self) -> Metadata | None:
|
||||||
best_match = None
|
best_match = None
|
||||||
best_score = -1
|
best_score = -1
|
||||||
match_results = []
|
match_results = []
|
||||||
|
|
||||||
for title in self.titles:
|
for title in self.titles:
|
||||||
|
cleaned_title = self.clean_title(title) # Renset tittel uten ekstra tekst
|
||||||
for metadata in self.metadata:
|
for metadata in self.metadata:
|
||||||
# Compute different match ratios
|
cleaned_metadata_title = self.clean_title(metadata.title) # Renset metadata-tittel
|
||||||
title_ratio = fuzz.token_sort_ratio(title.lower(), metadata.title.lower())
|
|
||||||
alt_title_ratios = [fuzz.token_sort_ratio(title.lower(), alt_title.lower()) for alt_title in metadata.altTitle]
|
# Compute different match ratios for both the original and cleaned titles
|
||||||
|
original_title_ratio = fuzz.token_sort_ratio(title.lower(), metadata.title.lower())
|
||||||
|
cleaned_title_ratio = fuzz.token_sort_ratio(cleaned_title.lower(), cleaned_metadata_title.lower())
|
||||||
|
|
||||||
|
alt_title_ratios = [fuzz.token_sort_ratio(cleaned_title.lower(), self.clean_title(alt_title).lower()) for alt_title in metadata.altTitle]
|
||||||
max_alt_title_ratio = max(alt_title_ratios) if alt_title_ratios else 0
|
max_alt_title_ratio = max(alt_title_ratios) if alt_title_ratios else 0
|
||||||
|
|
||||||
# Combine ratios as desired
|
# Combine ratios: take the best of original vs cleaned title, and alt title match
|
||||||
combined_score = max(title_ratio, max_alt_title_ratio)
|
combined_score = max(original_title_ratio, cleaned_title_ratio, max_alt_title_ratio)
|
||||||
|
|
||||||
match_results.append(MatchResult(title, metadata.title, combined_score, metadata.source, metadata))
|
match_results.append(MatchResult(title, metadata.title, combined_score, metadata.source, metadata))
|
||||||
|
|
||||||
@ -28,4 +38,4 @@ class AdvancedMatcher(AlgorithmBase):
|
|||||||
# Print match summary
|
# Print match summary
|
||||||
self.print_match_summary(match_results)
|
self.print_match_summary(match_results)
|
||||||
|
|
||||||
return best_match
|
return best_match
|
||||||
@ -250,14 +250,15 @@ class MetadataEventHandler:
|
|||||||
|
|
||||||
event: MediaEvent = self.mediaEvent
|
event: MediaEvent = self.mediaEvent
|
||||||
|
|
||||||
searchableTitles: List[str] = event.data.searchTitles
|
unique_titles = set(event.data.searchTitles)
|
||||||
searchableTitles.extend([
|
unique_titles.update([
|
||||||
event.data.title,
|
event.data.title,
|
||||||
event.data.sanitizedName
|
event.data.sanitizedName
|
||||||
])
|
])
|
||||||
|
searchableTitles = list(unique_titles)
|
||||||
|
|
||||||
joinedTitles = "\n".join(searchableTitles)
|
joinedTitles = "\n".join(searchableTitles)
|
||||||
logger.info("Searching for: %s", joinedTitles)
|
logger.info("Searching for:\n%s", joinedTitles)
|
||||||
|
|
||||||
# Kjør den asynkrone søkemetoden
|
# Kjør den asynkrone søkemetoden
|
||||||
result: Metadata | None = await self.__getMetadata(searchableTitles)
|
result: Metadata | None = await self.__getMetadata(searchableTitles)
|
||||||
@ -300,10 +301,10 @@ class MetadataEventHandler:
|
|||||||
logger.info("\nPrefix matcher")
|
logger.info("\nPrefix matcher")
|
||||||
prefixSelector = PrefixMatcher(titles=titles, metadata=filtered_results).getBestMatch()
|
prefixSelector = PrefixMatcher(titles=titles, metadata=filtered_results).getBestMatch()
|
||||||
|
|
||||||
if simpleSelector is not None:
|
|
||||||
return simpleSelector
|
|
||||||
if advancedSelector is not None:
|
if advancedSelector is not None:
|
||||||
return advancedSelector
|
return advancedSelector
|
||||||
|
if simpleSelector is not None:
|
||||||
|
return simpleSelector
|
||||||
if prefixSelector is not None:
|
if prefixSelector is not None:
|
||||||
return prefixSelector
|
return prefixSelector
|
||||||
return None
|
return None
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user