Adjusted metadata

This commit is contained in:
Brage 2023-07-31 19:36:10 +02:00
parent 6ca4a08018
commit 1296e2532f
2 changed files with 62 additions and 56 deletions

View File

@ -14,6 +14,7 @@ from sources.anii import metadata as AniiMetadata
from sources.imdb import metadata as ImdbMetadata from sources.imdb import metadata as ImdbMetadata
from sources.mal import metadata as MalMetadata from sources.mal import metadata as MalMetadata
from sources.cache import ResultCache from sources.cache import ResultCache
from sources.select import UseSource
# Konfigurer Kafka-forbindelsen # Konfigurer Kafka-forbindelsen
bootstrap_servers = os.environ.get("KAFKA_BOOTSTRAP_SERVER") or "127.0.0.1:9092" bootstrap_servers = os.environ.get("KAFKA_BOOTSTRAP_SERVER") or "127.0.0.1:9092"
@ -157,68 +158,13 @@ class MessageHandlerThread(threading.Thread):
else: else:
logger.info("Not in cache: %s", name) logger.info("Not in cache: %s", name)
logger.info("Searching in sources for information about %s", name) logger.info("Searching in sources for information about %s", name)
result = self.perform_action(title=name) result: Optional[DataResult] = self.perform_action(title=name)
if (result.statusType == "SUCCESS"): if (result.statusType == "SUCCESS"):
logger.info("Storing response for %s in in-memory cache", name) logger.info("Storing response for %s in in-memory cache", name)
ResultCache.add(name, result) ResultCache.add(name, result)
return result return result
def perform_action(self, title) -> DataResult:
anii = AniiMetadata(title)
imdb = ImdbMetadata(title)
mal = MalMetadata(title)
mal_result = mal.lookup()
anii_result = anii.lookup()
imdb_result = imdb.lookup()
# Sammenlign resultater basert på likheter og sammenhenger med tittelen
if anii_result.statusType == "SUCCESS" and imdb_result.statusType == "SUCCESS" and mal_result.statusType == "SUCCESS":
# Begge registrene ga suksessresultater, bruk fuzzy matching for å gjøre en vurdering
title_similarity_anii = fuzz.ratio(title.lower(), anii_result.data.title.lower())
title_similarity_imdb = fuzz.ratio(title.lower(), imdb_result.data.title.lower())
title_similarity_mal = fuzz.ratio(title.lower(), mal_result.data.title.lower())
alt_titles_anii = anii_result.data.altTitle
alt_titles_imdb = imdb_result.data.altTitle
alt_titles_mal = mal_result.data.altTitle
# Sammenlign likheter mellom tittel og registertitler, inkludert alternative titler
if (
title_similarity_anii * 0.8 + sum(fuzz.ratio(title.lower(), alt_title.lower()) for alt_title in alt_titles_anii) * 0.2
< title_similarity_mal * 0.8 + sum(fuzz.ratio(title.lower(), alt_title.lower()) for alt_title in alt_titles_mal) * 0.2
):
most_likely_result = mal_result
elif (
title_similarity_imdb * 0.8 + sum(fuzz.ratio(title.lower(), alt_title.lower()) for alt_title in alt_titles_imdb) * 0.2
> title_similarity_anii * 0.8 + sum(fuzz.ratio(title.lower(), alt_title.lower()) for alt_title in alt_titles_anii) * 0.2
):
most_likely_result = imdb_result
else:
most_likely_result = anii_result
elif anii_result.statusType == "SUCCESS":
# AniList ga suksessresultat, bruk det som det mest sannsynlige
most_likely_result = anii_result
elif imdb_result.statusType == "SUCCESS":
# IMDb ga suksessresultat, bruk det som det mest sannsynlige
most_likely_result = imdb_result
elif mal_result.statusType == "SUCCESS":
# MAL ga suksessresultat, bruk det som det mest sannsynlige
most_likely_result = mal_result
else:
# Ingen resultater, bruk AniList hvis tilgjengelig
most_likely_result = anii_result
# Returner det mest sannsynlige resultatet
return most_likely_result
def compose_message(self, referenceId: str, result: DataResult) -> ProducerDataValueSchema: def compose_message(self, referenceId: str, result: DataResult) -> ProducerDataValueSchema:
return ProducerDataValueSchema( return ProducerDataValueSchema(
referenceId=referenceId, referenceId=referenceId,

View File

@ -0,0 +1,60 @@
from dataclasses import dataclass
from typing import List, Optional
from .result import Metadata, DataResult
from .anii import metadata as AniiMetadata
from .imdb import metadata as ImdbMetadata
from .mal import metadata as MalMetadata
from fuzzywuzzy import fuzz
@dataclass
class WeightedData:
result: DataResult
weight: int = 1
@dataclass
class DataAndScore:
result: DataResult = None
score: int = 0
class UseSource():
title: str
def __init__(self, title) -> None:
self.title = title
def __perform_search(self, title)-> List[WeightedData]:
anii = AniiMetadata(title).lookup()
imdb = ImdbMetadata(title).lookup()
mal = MalMetadata(title).lookup()
result: List[WeightedData] = []
if (anii is not None) and (anii.statusType == "SUCCESS"):
result.append(WeightedData(anii, 4))
if (imdb is not None) and (imdb.statusType == "SUCCESS"):
result.append(WeightedData(imdb, 1))
if (mal is not None) and (mal.statusType == "SUCCESS"):
result.append(WeightedData(mal, 8))
def __calculate_score(self, title: str, weightData: List[WeightedData]) -> List[DataAndScore]:
""""""
result: List[WeightedData] = []
for wd in weightData:
highScore = fuzz.ratio(title.lower(), wd.result.data.title.lower())
for name in wd.result.data.altTitle:
altScore = fuzz.ratio(title.lower(), name.lower())
if (altScore > highScore):
highScore = altScore
givenScore = highScore * (wd.weight / 10)
result.append(DataAndScore(wd.result, givenScore))
return result
def select_result(self) -> Optional[DataResult]:
""""""
weightResult = self.__perform_search(title=self.title)
scored = self.__calculate_score(title=self.title, weightData=weightResult)
scored.sort(key=lambda x: x.score, reverse=True)
# Return the result with the highest score (most likely result)
return scored[0].result if scored else None