from dataclasses import dataclass from typing import List, Optional from fuzzywuzzy import fuzz from unidecode import unidecode import logging import re from clazz.Metadata import Metadata log = logging.getLogger(__name__) @dataclass class WeightedData: result: Metadata weight: float @dataclass class DataAndScore: result: Metadata score: float weight: float matched_title: str class UseSource: titles: List[str] = [] dataWeighed: List[WeightedData] = [] def __init__(self, titles: List[str], mal: Optional[Metadata] = None, imdb: Optional[Metadata] = None, anii: Optional[Metadata] = None) -> None: self.titles = titles if mal is not None: self.dataWeighed.append(WeightedData(mal, 1.5)) if imdb is not None: self.dataWeighed.append(WeightedData(imdb, 1)) if anii is not None: self.dataWeighed.append(WeightedData(anii, 1.3)) def stripped(self, input_string) -> str: unitext = unidecode(input_string) unitext = re.sub(r'[^a-zA-Z0-9\s]', ' ', unitext) unitext = re.sub(r'\s{2,}', ' ', unitext) return unitext.strip() def __calculate_score(self, title: str, weightData: List[WeightedData]) -> List[DataAndScore]: result: List[DataAndScore] = [] for title_to_check in self.titles: for wd in weightData: if wd.result is None: continue highScore = fuzz.ratio(self.stripped(title_to_check.lower()), self.stripped(wd.result.title.lower())) for alt_title in wd.result.altTitle: try: altScore = fuzz.ratio(self.stripped(title_to_check.lower()), self.stripped(alt_title.lower())) if altScore > highScore: highScore = altScore except Exception as e: logging.debug("Unntak: {e}") logging.debug(f"type(title): {type(title)}, value: {title}") logging.debug(f"type(alt_title): {type(alt_title)}, value: {alt_title}") logging.debug(f"Metadata objekt:") logging.debug(weightData) givenScore = highScore * wd.weight result.append(DataAndScore(wd.result, givenScore, wd.weight, title_to_check)) result.sort(key=lambda x: x.score, reverse=True) return result def select_result_table(self) -> Optional[pd.DataFrame]: scoredResults = [] for title in self.titles: scoredResult = self.__calculate_score(title=title, weightData=self.dataWeighed) scoredResults.append(scoredResult) all_results = [item for sublist in scoredResults for item in sublist] if not all_results: return None # Prepare data for DataFrame data = { "Title": [], "Alt Title": [], "Score": [], "Weight": [], "Matched Title": [] } for ds in all_results: metadata = ds.result data["Title"].append(metadata.title) data["Alt Title"].append(", ".join(metadata.altTitle)) data["Score"].append(ds.score) data["Weight"].append(ds.weight) data["Matched Title"].append(ds.matched_title) df = pd.DataFrame(data) df = df.sort_values(by="Score", ascending=False).reset_index(drop=True) try: df.to_json(f"./logs/{self.titles[0]}.json", orient="records", indent=4) except Exception as e: log.error(f"Failed to dump JSON: {e}") return df