Adjusted metadata
This commit is contained in:
parent
6ca4a08018
commit
1296e2532f
@ -14,6 +14,7 @@ from sources.anii import metadata as AniiMetadata
|
|||||||
from sources.imdb import metadata as ImdbMetadata
|
from sources.imdb import metadata as ImdbMetadata
|
||||||
from sources.mal import metadata as MalMetadata
|
from sources.mal import metadata as MalMetadata
|
||||||
from sources.cache import ResultCache
|
from sources.cache import ResultCache
|
||||||
|
from sources.select import UseSource
|
||||||
|
|
||||||
# Konfigurer Kafka-forbindelsen
|
# Konfigurer Kafka-forbindelsen
|
||||||
bootstrap_servers = os.environ.get("KAFKA_BOOTSTRAP_SERVER") or "127.0.0.1:9092"
|
bootstrap_servers = os.environ.get("KAFKA_BOOTSTRAP_SERVER") or "127.0.0.1:9092"
|
||||||
@ -157,68 +158,13 @@ class MessageHandlerThread(threading.Thread):
|
|||||||
else:
|
else:
|
||||||
logger.info("Not in cache: %s", name)
|
logger.info("Not in cache: %s", name)
|
||||||
logger.info("Searching in sources for information about %s", name)
|
logger.info("Searching in sources for information about %s", name)
|
||||||
result = self.perform_action(title=name)
|
result: Optional[DataResult] = self.perform_action(title=name)
|
||||||
if (result.statusType == "SUCCESS"):
|
if (result.statusType == "SUCCESS"):
|
||||||
logger.info("Storing response for %s in in-memory cache", name)
|
logger.info("Storing response for %s in in-memory cache", name)
|
||||||
ResultCache.add(name, result)
|
ResultCache.add(name, result)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def perform_action(self, title) -> DataResult:
|
|
||||||
anii = AniiMetadata(title)
|
|
||||||
imdb = ImdbMetadata(title)
|
|
||||||
mal = MalMetadata(title)
|
|
||||||
|
|
||||||
mal_result = mal.lookup()
|
|
||||||
anii_result = anii.lookup()
|
|
||||||
imdb_result = imdb.lookup()
|
|
||||||
|
|
||||||
# Sammenlign resultater basert på likheter og sammenhenger med tittelen
|
|
||||||
if anii_result.statusType == "SUCCESS" and imdb_result.statusType == "SUCCESS" and mal_result.statusType == "SUCCESS":
|
|
||||||
# Begge registrene ga suksessresultater, bruk fuzzy matching for å gjøre en vurdering
|
|
||||||
title_similarity_anii = fuzz.ratio(title.lower(), anii_result.data.title.lower())
|
|
||||||
title_similarity_imdb = fuzz.ratio(title.lower(), imdb_result.data.title.lower())
|
|
||||||
title_similarity_mal = fuzz.ratio(title.lower(), mal_result.data.title.lower())
|
|
||||||
|
|
||||||
alt_titles_anii = anii_result.data.altTitle
|
|
||||||
alt_titles_imdb = imdb_result.data.altTitle
|
|
||||||
alt_titles_mal = mal_result.data.altTitle
|
|
||||||
|
|
||||||
# Sammenlign likheter mellom tittel og registertitler, inkludert alternative titler
|
|
||||||
if (
|
|
||||||
title_similarity_anii * 0.8 + sum(fuzz.ratio(title.lower(), alt_title.lower()) for alt_title in alt_titles_anii) * 0.2
|
|
||||||
< title_similarity_mal * 0.8 + sum(fuzz.ratio(title.lower(), alt_title.lower()) for alt_title in alt_titles_mal) * 0.2
|
|
||||||
):
|
|
||||||
most_likely_result = mal_result
|
|
||||||
elif (
|
|
||||||
title_similarity_imdb * 0.8 + sum(fuzz.ratio(title.lower(), alt_title.lower()) for alt_title in alt_titles_imdb) * 0.2
|
|
||||||
> title_similarity_anii * 0.8 + sum(fuzz.ratio(title.lower(), alt_title.lower()) for alt_title in alt_titles_anii) * 0.2
|
|
||||||
):
|
|
||||||
most_likely_result = imdb_result
|
|
||||||
else:
|
|
||||||
most_likely_result = anii_result
|
|
||||||
|
|
||||||
elif anii_result.statusType == "SUCCESS":
|
|
||||||
# AniList ga suksessresultat, bruk det som det mest sannsynlige
|
|
||||||
most_likely_result = anii_result
|
|
||||||
|
|
||||||
elif imdb_result.statusType == "SUCCESS":
|
|
||||||
# IMDb ga suksessresultat, bruk det som det mest sannsynlige
|
|
||||||
most_likely_result = imdb_result
|
|
||||||
|
|
||||||
elif mal_result.statusType == "SUCCESS":
|
|
||||||
# MAL ga suksessresultat, bruk det som det mest sannsynlige
|
|
||||||
most_likely_result = mal_result
|
|
||||||
|
|
||||||
else:
|
|
||||||
# Ingen resultater, bruk AniList hvis tilgjengelig
|
|
||||||
most_likely_result = anii_result
|
|
||||||
|
|
||||||
# Returner det mest sannsynlige resultatet
|
|
||||||
return most_likely_result
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def compose_message(self, referenceId: str, result: DataResult) -> ProducerDataValueSchema:
|
def compose_message(self, referenceId: str, result: DataResult) -> ProducerDataValueSchema:
|
||||||
return ProducerDataValueSchema(
|
return ProducerDataValueSchema(
|
||||||
referenceId=referenceId,
|
referenceId=referenceId,
|
||||||
|
|||||||
60
pyMetadata/sources/select.py
Normal file
60
pyMetadata/sources/select.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import List, Optional
|
||||||
|
from .result import Metadata, DataResult
|
||||||
|
from .anii import metadata as AniiMetadata
|
||||||
|
from .imdb import metadata as ImdbMetadata
|
||||||
|
from .mal import metadata as MalMetadata
|
||||||
|
from fuzzywuzzy import fuzz
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class WeightedData:
|
||||||
|
result: DataResult
|
||||||
|
weight: int = 1
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DataAndScore:
|
||||||
|
result: DataResult = None
|
||||||
|
score: int = 0
|
||||||
|
|
||||||
|
class UseSource():
|
||||||
|
title: str
|
||||||
|
def __init__(self, title) -> None:
|
||||||
|
self.title = title
|
||||||
|
|
||||||
|
def __perform_search(self, title)-> List[WeightedData]:
|
||||||
|
anii = AniiMetadata(title).lookup()
|
||||||
|
imdb = ImdbMetadata(title).lookup()
|
||||||
|
mal = MalMetadata(title).lookup()
|
||||||
|
|
||||||
|
result: List[WeightedData] = []
|
||||||
|
if (anii is not None) and (anii.statusType == "SUCCESS"):
|
||||||
|
result.append(WeightedData(anii, 4))
|
||||||
|
if (imdb is not None) and (imdb.statusType == "SUCCESS"):
|
||||||
|
result.append(WeightedData(imdb, 1))
|
||||||
|
if (mal is not None) and (mal.statusType == "SUCCESS"):
|
||||||
|
result.append(WeightedData(mal, 8))
|
||||||
|
|
||||||
|
def __calculate_score(self, title: str, weightData: List[WeightedData]) -> List[DataAndScore]:
|
||||||
|
""""""
|
||||||
|
result: List[WeightedData] = []
|
||||||
|
for wd in weightData:
|
||||||
|
highScore = fuzz.ratio(title.lower(), wd.result.data.title.lower())
|
||||||
|
for name in wd.result.data.altTitle:
|
||||||
|
altScore = fuzz.ratio(title.lower(), name.lower())
|
||||||
|
if (altScore > highScore):
|
||||||
|
highScore = altScore
|
||||||
|
givenScore = highScore * (wd.weight / 10)
|
||||||
|
result.append(DataAndScore(wd.result, givenScore))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def select_result(self) -> Optional[DataResult]:
|
||||||
|
""""""
|
||||||
|
weightResult = self.__perform_search(title=self.title)
|
||||||
|
scored = self.__calculate_score(title=self.title, weightData=weightResult)
|
||||||
|
scored.sort(key=lambda x: x.score, reverse=True)
|
||||||
|
|
||||||
|
# Return the result with the highest score (most likely result)
|
||||||
|
return scored[0].result if scored else None
|
||||||
|
|
||||||
Loading…
Reference in New Issue
Block a user