Correction2

This commit is contained in:
bskjon 2024-04-15 18:03:27 +02:00
parent 5e52385c68
commit fd61dcbeeb
3 changed files with 38 additions and 12 deletions

View File

@ -10,7 +10,7 @@ import time
from kafka import KafkaConsumer, KafkaProducer from kafka import KafkaConsumer, KafkaProducer
from fuzzywuzzy import fuzz from fuzzywuzzy import fuzz
from sources.result import DataResult, Metadata from sources.result import DataResult, Metadata, NamedDataResult
from sources.anii import metadata as AniiMetadata from sources.anii import metadata as AniiMetadata
from sources.imdb import metadata as ImdbMetadata from sources.imdb import metadata as ImdbMetadata
from sources.mal import metadata as MalMetadata from sources.mal import metadata as MalMetadata
@ -191,10 +191,10 @@ class MessageHandlerThread(threading.Thread):
if result is None: if result is None:
logger.info("Not in cache: %s or %s", name, baseName) logger.info("Not in cache: %s or %s", name, baseName)
logger.info("Searching in sources for information about %s", name) logger.info("Searching in sources for information about %s", name)
result: Optional[DataResult] = UseSource(title=name, baseName=baseName, eventId=evnetId).select_result() result: Optional[NamedDataResult] = UseSource(title=name, baseName=baseName, eventId=evnetId).select_result()
if (result.status == "COMPLETED"): if (result.data.status == "COMPLETED"):
logger.info("Storing response for %s in in-memory cache", name) logger.info("Storing response for %s in in-memory cache", name)
ResultCache.add(name, result) ResultCache.add(title=result.name, result=result.data)
return result return result

View File

@ -23,7 +23,6 @@ class Metadata:
def to_dict(self): def to_dict(self):
return asdict(self) return asdict(self)
@dataclass @dataclass
class DataResult: class DataResult:
status: str # COMPLETED / ERROR status: str # COMPLETED / ERROR
@ -32,3 +31,11 @@ class DataResult:
def to_dict(self): def to_dict(self):
return asdict(self) return asdict(self)
@dataclass
class NamedDataResult:
name: str
data: DataResult
def to_dict(self):
return asdict(self)

View File

@ -1,7 +1,7 @@
import logging import logging
from dataclasses import dataclass, asdict from dataclasses import dataclass, asdict
from typing import List, Optional from typing import List, Optional
from .result import Metadata, DataResult from .result import Metadata, DataResult, NamedDataResult
from .anii import metadata as AniiMetadata from .anii import metadata as AniiMetadata
from .imdb import metadata as ImdbMetadata from .imdb import metadata as ImdbMetadata
from .mal import metadata as MalMetadata from .mal import metadata as MalMetadata
@ -39,7 +39,9 @@ class UseSource():
def stripped(self, input_string) -> str: def stripped(self, input_string) -> str:
unitext = unidecode(input_string) unitext = unidecode(input_string)
return re.sub(r'[^a-zA-Z0-9\s]', '', unitext) unitext = re.sub(r'[^a-zA-Z0-9\s]', '', unitext)
unitext = re.sub(r'\s{2,}', '', unitext)
return unitext
def __perform_search(self, title)-> List[WeightedData]: def __perform_search(self, title)-> List[WeightedData]:
anii = AniiMetadata(title).lookup() anii = AniiMetadata(title).lookup()
@ -75,7 +77,7 @@ class UseSource():
return result return result
def select_result(self) -> Optional[DataResult]: def select_result(self) -> Optional[NamedDataResult]:
"""""" """"""
scored: List[DataAndScore] = [] scored: List[DataAndScore] = []
titleResult = self.__perform_search(title=self.title) titleResult = self.__perform_search(title=self.title)
@ -83,12 +85,29 @@ class UseSource():
titleScoreResult = self.__calculate_score(title=self.title, weightData=titleResult) titleScoreResult = self.__calculate_score(title=self.title, weightData=titleResult)
baseNameScoreResult = self.__calculate_score(title=self.baseName, weightData=baseNameResult) baseNameScoreResult = self.__calculate_score(title=self.baseName, weightData=baseNameResult)
titleScoreResult.sort(key=lambda x: x.score, reverse=True)
baseNameScoreResult.sort(key=lambda x: x.score, reverse=True)
scored.extend(titleScoreResult) scored.extend(titleScoreResult)
scored.extend(baseNameScoreResult) scored.extend(baseNameScoreResult)
scored.sort(key=lambda x: x.score, reverse=True) selected: NamedDataResult|None = None
ht = titleScoreResult[0]
bt = baseNameScoreResult[0]
if (bt is not None and ht is not None):
if (bt.score >= ht.score):
selected = NamedDataResult(self.baseName, bt)
else:
selected = NamedDataResult(self.title, ht)
else:
if len(titleScoreResult) > 0:
selected = NamedDataResult(self.title, titleScoreResult[0])
elif len(baseNameScoreResult) > 0:
selected = NamedDataResult(self.baseName, baseNameScoreResult[0])
else:
selected = None
jsr = "" jsr = ""
try: try:
jsr = json.dumps([obj.to_dict() for obj in scored], indent=4) jsr = json.dumps([obj.to_dict() for obj in scored], indent=4)
@ -108,11 +127,11 @@ class UseSource():
titles.append(wd.result.data.title) titles.append(wd.result.data.title)
titles.extend(wd.result.data.altTitle) titles.extend(wd.result.data.altTitle)
joinedTitles = "\n\t" + "\n\t".join(titles) joinedTitles = "\n\t" + "\n\t".join(titles)
logger.info(f"\nTitle: {self.title} \nBaseName: {self.baseName} \nFound: {joinedTitles} \nTitle selected: \n\t{scored[0].result.data.title}\n") logger.info(f"\nTitle: {self.title} \nBaseName: {self.baseName} \nFound: {joinedTitles} \nTitle selected: \n\t{selected.result.data.title}\n")
except Exception as e: except Exception as e:
logger.error(e) logger.error(e)
pass pass
# Return the result with the highest score (most likely result) # Return the result with the highest score (most likely result)
return scored[0].result if scored else None return selected