diff --git a/pyMetadata/app.py b/pyMetadata/app.py index 8c502486..01576231 100644 --- a/pyMetadata/app.py +++ b/pyMetadata/app.py @@ -14,6 +14,7 @@ from sources.anii import metadata as AniiMetadata from sources.imdb import metadata as ImdbMetadata from sources.mal import metadata as MalMetadata from sources.cache import ResultCache +from sources.select import UseSource # Konfigurer Kafka-forbindelsen bootstrap_servers = os.environ.get("KAFKA_BOOTSTRAP_SERVER") or "127.0.0.1:9092" @@ -157,68 +158,13 @@ class MessageHandlerThread(threading.Thread): else: logger.info("Not in cache: %s", name) logger.info("Searching in sources for information about %s", name) - result = self.perform_action(title=name) + result: Optional[DataResult] = self.perform_action(title=name) if (result.statusType == "SUCCESS"): logger.info("Storing response for %s in in-memory cache", name) ResultCache.add(name, result) return result - def perform_action(self, title) -> DataResult: - anii = AniiMetadata(title) - imdb = ImdbMetadata(title) - mal = MalMetadata(title) - - mal_result = mal.lookup() - anii_result = anii.lookup() - imdb_result = imdb.lookup() - - # Sammenlign resultater basert på likheter og sammenhenger med tittelen - if anii_result.statusType == "SUCCESS" and imdb_result.statusType == "SUCCESS" and mal_result.statusType == "SUCCESS": - # Begge registrene ga suksessresultater, bruk fuzzy matching for å gjøre en vurdering - title_similarity_anii = fuzz.ratio(title.lower(), anii_result.data.title.lower()) - title_similarity_imdb = fuzz.ratio(title.lower(), imdb_result.data.title.lower()) - title_similarity_mal = fuzz.ratio(title.lower(), mal_result.data.title.lower()) - - alt_titles_anii = anii_result.data.altTitle - alt_titles_imdb = imdb_result.data.altTitle - alt_titles_mal = mal_result.data.altTitle - - # Sammenlign likheter mellom tittel og registertitler, inkludert alternative titler - if ( - title_similarity_anii * 0.8 + sum(fuzz.ratio(title.lower(), alt_title.lower()) for alt_title in alt_titles_anii) * 0.2 - < title_similarity_mal * 0.8 + sum(fuzz.ratio(title.lower(), alt_title.lower()) for alt_title in alt_titles_mal) * 0.2 - ): - most_likely_result = mal_result - elif ( - title_similarity_imdb * 0.8 + sum(fuzz.ratio(title.lower(), alt_title.lower()) for alt_title in alt_titles_imdb) * 0.2 - > title_similarity_anii * 0.8 + sum(fuzz.ratio(title.lower(), alt_title.lower()) for alt_title in alt_titles_anii) * 0.2 - ): - most_likely_result = imdb_result - else: - most_likely_result = anii_result - - elif anii_result.statusType == "SUCCESS": - # AniList ga suksessresultat, bruk det som det mest sannsynlige - most_likely_result = anii_result - - elif imdb_result.statusType == "SUCCESS": - # IMDb ga suksessresultat, bruk det som det mest sannsynlige - most_likely_result = imdb_result - - elif mal_result.statusType == "SUCCESS": - # MAL ga suksessresultat, bruk det som det mest sannsynlige - most_likely_result = mal_result - - else: - # Ingen resultater, bruk AniList hvis tilgjengelig - most_likely_result = anii_result - - # Returner det mest sannsynlige resultatet - return most_likely_result - - - def compose_message(self, referenceId: str, result: DataResult) -> ProducerDataValueSchema: return ProducerDataValueSchema( referenceId=referenceId, diff --git a/pyMetadata/sources/select.py b/pyMetadata/sources/select.py new file mode 100644 index 00000000..b04d0ba4 --- /dev/null +++ b/pyMetadata/sources/select.py @@ -0,0 +1,60 @@ +from dataclasses import dataclass +from typing import List, Optional +from .result import Metadata, DataResult +from .anii import metadata as AniiMetadata +from .imdb import metadata as ImdbMetadata +from .mal import metadata as MalMetadata +from fuzzywuzzy import fuzz + + +@dataclass +class WeightedData: + result: DataResult + weight: int = 1 + +@dataclass +class DataAndScore: + result: DataResult = None + score: int = 0 + +class UseSource(): + title: str + def __init__(self, title) -> None: + self.title = title + + def __perform_search(self, title)-> List[WeightedData]: + anii = AniiMetadata(title).lookup() + imdb = ImdbMetadata(title).lookup() + mal = MalMetadata(title).lookup() + + result: List[WeightedData] = [] + if (anii is not None) and (anii.statusType == "SUCCESS"): + result.append(WeightedData(anii, 4)) + if (imdb is not None) and (imdb.statusType == "SUCCESS"): + result.append(WeightedData(imdb, 1)) + if (mal is not None) and (mal.statusType == "SUCCESS"): + result.append(WeightedData(mal, 8)) + + def __calculate_score(self, title: str, weightData: List[WeightedData]) -> List[DataAndScore]: + """""" + result: List[WeightedData] = [] + for wd in weightData: + highScore = fuzz.ratio(title.lower(), wd.result.data.title.lower()) + for name in wd.result.data.altTitle: + altScore = fuzz.ratio(title.lower(), name.lower()) + if (altScore > highScore): + highScore = altScore + givenScore = highScore * (wd.weight / 10) + result.append(DataAndScore(wd.result, givenScore)) + return result + + + def select_result(self) -> Optional[DataResult]: + """""" + weightResult = self.__perform_search(title=self.title) + scored = self.__calculate_score(title=self.title, weightData=weightResult) + scored.sort(key=lambda x: x.score, reverse=True) + + # Return the result with the highest score (most likely result) + return scored[0].result if scored else None +