From fd61dcbeebf535341bee2fb11e1d365b05845f0d Mon Sep 17 00:00:00 2001 From: bskjon Date: Mon, 15 Apr 2024 18:03:27 +0200 Subject: [PATCH] Correction2 --- apps/pyMetadata/app.py | 8 ++++---- apps/pyMetadata/sources/result.py | 9 ++++++++- apps/pyMetadata/sources/select.py | 33 ++++++++++++++++++++++++------- 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/apps/pyMetadata/app.py b/apps/pyMetadata/app.py index 9a1e4240..96d036dc 100644 --- a/apps/pyMetadata/app.py +++ b/apps/pyMetadata/app.py @@ -10,7 +10,7 @@ import time from kafka import KafkaConsumer, KafkaProducer from fuzzywuzzy import fuzz -from sources.result import DataResult, Metadata +from sources.result import DataResult, Metadata, NamedDataResult from sources.anii import metadata as AniiMetadata from sources.imdb import metadata as ImdbMetadata from sources.mal import metadata as MalMetadata @@ -191,10 +191,10 @@ class MessageHandlerThread(threading.Thread): if result is None: logger.info("Not in cache: %s or %s", name, baseName) logger.info("Searching in sources for information about %s", name) - result: Optional[DataResult] = UseSource(title=name, baseName=baseName, eventId=evnetId).select_result() - if (result.status == "COMPLETED"): + result: Optional[NamedDataResult] = UseSource(title=name, baseName=baseName, eventId=evnetId).select_result() + if (result.data.status == "COMPLETED"): logger.info("Storing response for %s in in-memory cache", name) - ResultCache.add(name, result) + ResultCache.add(title=result.name, result=result.data) return result diff --git a/apps/pyMetadata/sources/result.py b/apps/pyMetadata/sources/result.py index 13457cad..ef245358 100644 --- a/apps/pyMetadata/sources/result.py +++ b/apps/pyMetadata/sources/result.py @@ -23,7 +23,6 @@ class Metadata: def to_dict(self): return asdict(self) - @dataclass class DataResult: status: str # COMPLETED / ERROR @@ -32,3 +31,11 @@ class DataResult: def to_dict(self): return asdict(self) + +@dataclass +class NamedDataResult: + name: str + data: DataResult + + def to_dict(self): + return asdict(self) \ No newline at end of file diff --git a/apps/pyMetadata/sources/select.py b/apps/pyMetadata/sources/select.py index 16701537..47d024c4 100644 --- a/apps/pyMetadata/sources/select.py +++ b/apps/pyMetadata/sources/select.py @@ -1,7 +1,7 @@ import logging from dataclasses import dataclass, asdict from typing import List, Optional -from .result import Metadata, DataResult +from .result import Metadata, DataResult, NamedDataResult from .anii import metadata as AniiMetadata from .imdb import metadata as ImdbMetadata from .mal import metadata as MalMetadata @@ -39,7 +39,9 @@ class UseSource(): def stripped(self, input_string) -> str: unitext = unidecode(input_string) - return re.sub(r'[^a-zA-Z0-9\s]', '', unitext) + unitext = re.sub(r'[^a-zA-Z0-9\s]', '', unitext) + unitext = re.sub(r'\s{2,}', '', unitext) + return unitext def __perform_search(self, title)-> List[WeightedData]: anii = AniiMetadata(title).lookup() @@ -75,7 +77,7 @@ class UseSource(): return result - def select_result(self) -> Optional[DataResult]: + def select_result(self) -> Optional[NamedDataResult]: """""" scored: List[DataAndScore] = [] titleResult = self.__perform_search(title=self.title) @@ -83,12 +85,29 @@ class UseSource(): titleScoreResult = self.__calculate_score(title=self.title, weightData=titleResult) baseNameScoreResult = self.__calculate_score(title=self.baseName, weightData=baseNameResult) + + titleScoreResult.sort(key=lambda x: x.score, reverse=True) + baseNameScoreResult.sort(key=lambda x: x.score, reverse=True) scored.extend(titleScoreResult) scored.extend(baseNameScoreResult) - scored.sort(key=lambda x: x.score, reverse=True) - + selected: NamedDataResult|None = None + ht = titleScoreResult[0] + bt = baseNameScoreResult[0] + if (bt is not None and ht is not None): + if (bt.score >= ht.score): + selected = NamedDataResult(self.baseName, bt) + else: + selected = NamedDataResult(self.title, ht) + else: + if len(titleScoreResult) > 0: + selected = NamedDataResult(self.title, titleScoreResult[0]) + elif len(baseNameScoreResult) > 0: + selected = NamedDataResult(self.baseName, baseNameScoreResult[0]) + else: + selected = None + jsr = "" try: jsr = json.dumps([obj.to_dict() for obj in scored], indent=4) @@ -108,11 +127,11 @@ class UseSource(): titles.append(wd.result.data.title) titles.extend(wd.result.data.altTitle) joinedTitles = "\n\t" + "\n\t".join(titles) - logger.info(f"\nTitle: {self.title} \nBaseName: {self.baseName} \nFound: {joinedTitles} \nTitle selected: \n\t{scored[0].result.data.title}\n") + logger.info(f"\nTitle: {self.title} \nBaseName: {self.baseName} \nFound: {joinedTitles} \nTitle selected: \n\t{selected.result.data.title}\n") except Exception as e: logger.error(e) pass # Return the result with the highest score (most likely result) - return scored[0].result if scored else None + return selected