Refactor python
This commit is contained in:
parent
583c0613a5
commit
e46029bedd
@ -7,15 +7,19 @@ import uuid
|
|||||||
import threading
|
import threading
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
from shared import ConsumerRecord, MediaEvent, decode_key, decode_value, suppress_ignore, consume_on_key
|
||||||
from kafka import KafkaConsumer, KafkaProducer
|
|
||||||
from fuzzywuzzy import fuzz
|
from fuzzywuzzy import fuzz
|
||||||
from sources.result import DataResult, Metadata
|
|
||||||
from sources.anii import metadata as AniiMetadata
|
from algo.AdvancedMatcher import AdvancedMatcher
|
||||||
from sources.imdb import metadata as ImdbMetadata
|
from algo.SimpleMatcher import SimpleMatcher
|
||||||
from sources.mal import metadata as MalMetadata
|
from algo.PrefixMatcher import PrefixMatcher
|
||||||
from sources.cache import ResultCache
|
from clazz.KafkaMessageSchema import KafkaMessage, MessageDataWrapper
|
||||||
from sources.select import UseSource
|
from clazz.Metadata import Metadata
|
||||||
|
from kafka import KafkaConsumer, KafkaProducer
|
||||||
|
|
||||||
|
from sources.anii import Anii
|
||||||
|
from sources.imdb import Imdb
|
||||||
|
from sources.mal import Mal
|
||||||
|
|
||||||
# Konfigurer Kafka-forbindelsen
|
# Konfigurer Kafka-forbindelsen
|
||||||
bootstrap_servers = os.environ.get("KAFKA_BOOTSTRAP_SERVER") or "127.0.0.1:9092"
|
bootstrap_servers = os.environ.get("KAFKA_BOOTSTRAP_SERVER") or "127.0.0.1:9092"
|
||||||
@ -23,26 +27,6 @@ consumer_group = os.environ.get("KAFKA_CONSUMER_ID") or f"MetadataConsumer"
|
|||||||
kafka_topic = os.environ.get("KAFKA_TOPIC") or "mediaEvents"
|
kafka_topic = os.environ.get("KAFKA_TOPIC") or "mediaEvents"
|
||||||
|
|
||||||
|
|
||||||
suppress_ignore: List[str] = [
|
|
||||||
"event:media-process:started",
|
|
||||||
"event:request-process:started",
|
|
||||||
"event::save",
|
|
||||||
"event:media-process:completed",
|
|
||||||
"event:work-encode:created",
|
|
||||||
"event:work-extract:created",
|
|
||||||
"event:work-convert:created",
|
|
||||||
"event:work-encode:performed",
|
|
||||||
"event:work-extract:performed",
|
|
||||||
"event:work-convert:performed",
|
|
||||||
"event:media-read-out-cover:performed",
|
|
||||||
"event:work-download-cover:performed",
|
|
||||||
"event:media-read-out-name-and-type:performed",
|
|
||||||
"event:media-parse-stream:performed",
|
|
||||||
"event:media-extract-parameter:created",
|
|
||||||
"event:media-encode-parameter:created",
|
|
||||||
"event:media-metadata-search:performed"
|
|
||||||
]
|
|
||||||
|
|
||||||
# Konfigurer logging
|
# Konfigurer logging
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO,
|
level=logging.INFO,
|
||||||
@ -53,28 +37,6 @@ logging.basicConfig(
|
|||||||
)
|
)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class ProducerDataValueSchema:
|
|
||||||
def __init__(self, referenceId, data: DataResult):
|
|
||||||
self.referenceId = referenceId
|
|
||||||
self.data = data
|
|
||||||
|
|
||||||
def to_dict(self):
|
|
||||||
return {
|
|
||||||
'referenceId': self.referenceId,
|
|
||||||
'eventId': str(uuid.uuid4()),
|
|
||||||
'data': self.data.to_dict() if self.data else None
|
|
||||||
}
|
|
||||||
|
|
||||||
def to_json(self):
|
|
||||||
data_dict = self.to_dict()
|
|
||||||
return json.dumps(data_dict)
|
|
||||||
|
|
||||||
def decode_key(key_bytes):
|
|
||||||
return key_bytes.decode('utf-8') if key_bytes else None
|
|
||||||
|
|
||||||
def decode_value(value_bytes):
|
|
||||||
return json.loads(value_bytes.decode('utf-8')) if value_bytes else None
|
|
||||||
|
|
||||||
|
|
||||||
# Kafka consumer-klasse
|
# Kafka consumer-klasse
|
||||||
class KafkaConsumerThread(threading.Thread):
|
class KafkaConsumerThread(threading.Thread):
|
||||||
@ -104,21 +66,21 @@ class KafkaConsumerThread(threading.Thread):
|
|||||||
|
|
||||||
|
|
||||||
while not self.shutdown.is_set():
|
while not self.shutdown.is_set():
|
||||||
for message in consumer:
|
for cm in consumer:
|
||||||
if self.shutdown.is_set():
|
if self.shutdown.is_set():
|
||||||
break
|
break
|
||||||
|
message: ConsumerRecord = ConsumerRecord(cm)
|
||||||
|
|
||||||
|
|
||||||
# Sjekk om meldingen har målnøkkelen
|
# Sjekk om meldingen har målnøkkelen
|
||||||
if message.key == "request:metadata:obtain" or message.key == "event:media-read-base-info:performed":
|
if message.key in consume_on_key:
|
||||||
logger.info("==> Incoming message: %s \n%s", message.key, message.value)
|
logger.info("==> Incoming message: %s \n%s", message.key, message.value)
|
||||||
# Opprett en ny tråd for å håndtere meldingen
|
# Opprett en ny tråd for å håndtere meldingen
|
||||||
handler_thread = MessageHandlerThread(message)
|
handler_thread = MessageHandlerThread(message)
|
||||||
handler_thread.start()
|
handler_thread.start()
|
||||||
else:
|
else:
|
||||||
if (message.key not in suppress_ignore):
|
if (message.key not in suppress_ignore):
|
||||||
logger.info("Ignored message: key=%s", message.key)
|
logger.debug("Ignored message: key=%s", message.key)
|
||||||
# Introduce a small sleep to reduce CPU usage
|
# Introduce a small sleep to reduce CPU usage
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
if consumer is not None:
|
if consumer is not None:
|
||||||
@ -132,89 +94,86 @@ class KafkaConsumerThread(threading.Thread):
|
|||||||
|
|
||||||
# Kafka message handler-klasse
|
# Kafka message handler-klasse
|
||||||
class MessageHandlerThread(threading.Thread):
|
class MessageHandlerThread(threading.Thread):
|
||||||
def __init__(self, message):
|
producerMessageKey = "event:media-metadata-search:performed"
|
||||||
|
def __init__(self, message: ConsumerRecord):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.message = message
|
self.message = message
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
logger.info("Handling message: key=%s, value=%s", self.message.key, self.message.value)
|
|
||||||
if 'data' not in self.message.value:
|
|
||||||
logger.error("data is not present in message!")
|
|
||||||
messageData = self.message.value["data"]
|
|
||||||
# Sjekk om meldingen har en Status
|
|
||||||
if 'status' in messageData:
|
|
||||||
status_type = messageData['status']
|
|
||||||
|
|
||||||
# Sjekk om statusen er COMPLETED
|
mediaEvent = MediaEvent(message=self.message)
|
||||||
if status_type == 'COMPLETED':
|
|
||||||
baseName = messageData["sanitizedName"]
|
|
||||||
title = messageData["title"]
|
|
||||||
|
|
||||||
eventId = self.message.value["eventId"]
|
if mediaEvent.data is None:
|
||||||
|
logger.error("No data present for %s", self.message.value)
|
||||||
|
return
|
||||||
|
if mediaEvent.isConsumable() == False:
|
||||||
|
logger.info("Message status is not of 'COMPLETED', %s", self.message.value)
|
||||||
|
return
|
||||||
|
|
||||||
logger.info("Searching for %s", title)
|
logger.info("Processing record: key=%s, value=%s", self.message.key, self.message.value)
|
||||||
result = self.get_metadata(title, baseName, eventId)
|
|
||||||
if (result is None):
|
|
||||||
logger.info("No result for %s or %s", title, baseName)
|
|
||||||
|
|
||||||
producerMessage = self.compose_message(referenceId=self.message.value["referenceId"], result=result)
|
|
||||||
|
|
||||||
# Serialiser resultatet til JSON som strenger
|
|
||||||
result_json = json.dumps(producerMessage.to_dict())
|
|
||||||
|
|
||||||
# Send resultatet tilbake ved hjelp av Kafka-producer
|
|
||||||
producer = KafkaProducer(
|
|
||||||
bootstrap_servers=bootstrap_servers,
|
|
||||||
key_serializer=lambda k: k.encode('utf-8') if isinstance(k, str) else None,
|
|
||||||
value_serializer=lambda v: v.encode('utf-8') if isinstance(v, str) else None
|
|
||||||
)
|
|
||||||
producer.send(kafka_topic, key="event:media-metadata-search:performed", value=result_json)
|
|
||||||
producer.close()
|
|
||||||
else:
|
|
||||||
logger.info("Message status is not of 'COMPLETED', %s", self.message.value)
|
|
||||||
else:
|
|
||||||
logger.warn("No status present for %s", self.message.value)
|
|
||||||
|
|
||||||
def get_metadata(self, name: str, baseName: str, evnetId: str) -> Optional[DataResult]:
|
|
||||||
result = None
|
|
||||||
logger.info("Checking cache")
|
|
||||||
titleCache = ResultCache.get(name)
|
|
||||||
if (titleCache is None):
|
|
||||||
titleCache = UseSource(title=name, eventId=evnetId).select_result()
|
|
||||||
if titleCache is not None:
|
|
||||||
logger.info("Storing response for %s in in-memory cache", name)
|
|
||||||
ResultCache.add(title=name, result=titleCache)
|
|
||||||
else:
|
|
||||||
logger.info("Cache hit for %s", name)
|
|
||||||
|
|
||||||
baseNameCache = ResultCache.get(baseName)
|
|
||||||
if (baseNameCache is None):
|
|
||||||
baseNameCache = UseSource(title=baseName, eventId=evnetId).select_result()
|
|
||||||
if baseNameCache is not None:
|
|
||||||
logger.info("Storing response for %s in in-memory cache", baseName)
|
|
||||||
ResultCache.add(title=baseName, result=baseNameCache)
|
|
||||||
else:
|
|
||||||
logger.info("Cache hit for %s", baseName)
|
|
||||||
|
|
||||||
if titleCache is not None and baseNameCache is not None:
|
|
||||||
if (titleCache.data.type.lower() == "movie" or baseNameCache.data.type.lower() == "movie"):
|
|
||||||
result = baseNameCache
|
|
||||||
else:
|
|
||||||
result = titleCache
|
|
||||||
elif titleCache is not None:
|
|
||||||
result = titleCache
|
|
||||||
elif baseNameCache is not None:
|
|
||||||
result = baseNameCache
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def compose_message(self, referenceId: str, result: DataResult) -> ProducerDataValueSchema:
|
searchableTitles: List[str] = mediaEvent.data["searchTitles"]
|
||||||
return ProducerDataValueSchema(
|
joinedTitles = ", ".join(searchableTitles)
|
||||||
referenceId=referenceId,
|
logger.info("Searching for %s", joinedTitles)
|
||||||
data=result
|
result: Metadata | None = self.__getMetadata(searchableTitles)
|
||||||
|
|
||||||
|
result_message: str | None = None
|
||||||
|
if (result is None):
|
||||||
|
result_message = f"No result for {joinedTitles}"
|
||||||
|
logger.info(result_message)
|
||||||
|
|
||||||
|
messageData = MessageDataWrapper(
|
||||||
|
status = "ERROR" if result is None else "COMPLETED",
|
||||||
|
message = result_message,
|
||||||
|
data = result,
|
||||||
|
derivedFromEventId = mediaEvent.eventId
|
||||||
)
|
)
|
||||||
|
|
||||||
|
producerMessage = KafkaMessage(referenceId=mediaEvent.referenceId, data=messageData).to_json()
|
||||||
|
|
||||||
|
# Serialiser resultatet til JSON som strenger
|
||||||
|
result_json = json.dumps(producerMessage)
|
||||||
|
|
||||||
|
logger.info("<== Outgoing message: %s \n%s", self.producerMessageKey, result_json)
|
||||||
|
|
||||||
|
# Send resultatet tilbake ved hjelp av Kafka-producer
|
||||||
|
producer = KafkaProducer(
|
||||||
|
bootstrap_servers=bootstrap_servers,
|
||||||
|
key_serializer=lambda k: k.encode('utf-8') if isinstance(k, str) else None,
|
||||||
|
value_serializer=lambda v: v.encode('utf-8') if isinstance(v, str) else None
|
||||||
|
)
|
||||||
|
producer.send(kafka_topic, key=self.producerMessageKey, value=result_json)
|
||||||
|
producer.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def __getMetadata(self, titles: List[str]) -> Metadata | None:
|
||||||
|
mal = Mal(titles=titles)
|
||||||
|
anii = Anii(titles=titles)
|
||||||
|
imdb = Imdb(titles=titles)
|
||||||
|
|
||||||
|
results: List[Metadata] = [
|
||||||
|
mal.search(),
|
||||||
|
anii.search(),
|
||||||
|
imdb.search()
|
||||||
|
]
|
||||||
|
filtered_results = [result for result in results if result is not None]
|
||||||
|
logger.info("Simple matcher")
|
||||||
|
simpleSelector = SimpleMatcher(titles=titles, metadata=filtered_results).getBestMatch()
|
||||||
|
logger.info("Advanced matcher")
|
||||||
|
advancedSelector = AdvancedMatcher(titles=titles, metadata=filtered_results).getBestMatch()
|
||||||
|
logger.info("Prefrix matcher")
|
||||||
|
prefixSelector = PrefixMatcher(titles=titles, metadata=filtered_results).getBestMatch()
|
||||||
|
if simpleSelector is not None:
|
||||||
|
return simpleSelector
|
||||||
|
if advancedSelector is not None:
|
||||||
|
return advancedSelector
|
||||||
|
if prefixSelector is not None:
|
||||||
|
return prefixSelector
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
# Global variabel for å indikere om applikasjonen skal avsluttes
|
# Global variabel for å indikere om applikasjonen skal avsluttes
|
||||||
should_stop = False
|
should_stop = False
|
||||||
|
|||||||
@ -6,3 +6,4 @@ requests>=2.31.0
|
|||||||
python-Levenshtein>=0.21.1
|
python-Levenshtein>=0.21.1
|
||||||
mal-api>=0.5.3
|
mal-api>=0.5.3
|
||||||
Unidecode>=1.3.8
|
Unidecode>=1.3.8
|
||||||
|
tabulate>=0.9.0
|
||||||
@ -1,40 +1,83 @@
|
|||||||
|
import logging, sys
|
||||||
|
import hashlib
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from clazz.Metadata import Metadata, Summary
|
||||||
|
from .source import SourceBase
|
||||||
|
|
||||||
from AnilistPython import Anilist
|
from AnilistPython import Anilist
|
||||||
from .result import Metadata, DataResult, Summary
|
|
||||||
|
|
||||||
class metadata():
|
log = logging.getLogger(__name__)
|
||||||
name: str = None
|
|
||||||
anilist = Anilist()
|
|
||||||
|
|
||||||
def __init__(self, name) -> None:
|
class Anii(SourceBase):
|
||||||
self.name = name
|
|
||||||
|
|
||||||
def lookup(self) -> DataResult:
|
def __init__(self, titles: List[str]) -> None:
|
||||||
""""""
|
super().__init__(titles)
|
||||||
|
|
||||||
|
def search(self) -> Metadata | None:
|
||||||
|
idToTitle: dict[str, str] = {}
|
||||||
|
results: dict[str, str] = {}
|
||||||
try:
|
try:
|
||||||
result = self.anilist.get_anime(self.name)
|
for title in self.titles:
|
||||||
|
try:
|
||||||
|
result = Anilist().get_anime(title)
|
||||||
|
if result:
|
||||||
|
_title = result.get("name_english", None)
|
||||||
|
givenId = self.generate_id(_title)
|
||||||
|
idToTitle[givenId] = _title
|
||||||
|
results[givenId] = result
|
||||||
|
except IndexError as notFound:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
log.exception(e)
|
||||||
|
except IndexError as notFound:
|
||||||
|
self.logNoMatch("Anii", titles=self.titles)
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
log.exception(e)
|
||||||
|
|
||||||
meta = Metadata(
|
|
||||||
|
if not idToTitle or not results:
|
||||||
|
self.logNoMatch("Anii", titles=self.titles)
|
||||||
|
return None
|
||||||
|
|
||||||
|
best_match_id, best_match_title = self.findBestMatchAcrossTitles(idToTitle, self.titles)
|
||||||
|
|
||||||
|
return self.__getMetadata(results[best_match_id])
|
||||||
|
|
||||||
|
def queryIds(self, title: str) -> dict[str, str]:
|
||||||
|
return super().queryIds(title)
|
||||||
|
|
||||||
|
|
||||||
|
def __getMetadata(self, result: dict) -> Metadata:
|
||||||
|
try:
|
||||||
|
summary = result.get("desc", None)
|
||||||
|
return Metadata(
|
||||||
title = result.get("name_english", None),
|
title = result.get("name_english", None),
|
||||||
altTitle = [result.get("name_romaji", [])],
|
altTitle = [result.get("name_romaji", [])],
|
||||||
cover = result.get("cover_image", None),
|
cover = result.get("cover_image", None),
|
||||||
summary = [
|
banner = None,
|
||||||
|
summary = [] if summary is None else [
|
||||||
Summary(
|
Summary(
|
||||||
language = "eng",
|
language = "eng",
|
||||||
summary = result.get("desc", None)
|
summary = summary
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
type = 'movie' if result.get('airing_format', '').lower() == 'movie' else 'serie',
|
type = self.getMediaType(result.get('airing_format', '')),
|
||||||
genres = result.get('genres', []),
|
genres = result.get('genres', []),
|
||||||
source="anii",
|
source="anii",
|
||||||
usedTitle=self.name
|
|
||||||
)
|
)
|
||||||
if (meta.title is None) or (meta.type is None):
|
|
||||||
return DataResult(status="COMPLETED", message= None, data= None)
|
|
||||||
|
|
||||||
return DataResult(status="COMPLETED", message= None, data=meta)
|
|
||||||
|
|
||||||
except IndexError as ingore:
|
|
||||||
return DataResult(status="COMPLETED", message=f"No result for {self.name}")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return DataResult(status="ERROR", message=str(e))
|
log.exception(e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def generate_id(self, text: str):
|
||||||
|
return hashlib.md5(text.encode()).hexdigest()
|
||||||
|
|
||||||
|
def getMediaType(self, type: str) -> str:
|
||||||
|
return 'movie' if type.lower() == 'movie' else 'serie'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -1,14 +0,0 @@
|
|||||||
from typing import Optional
|
|
||||||
from .result import DataResult
|
|
||||||
|
|
||||||
|
|
||||||
class ResultCache:
|
|
||||||
_cache = {}
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def add(cls, title: str, result: DataResult):
|
|
||||||
cls._cache[title] = result
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get(cls, title) -> Optional[DataResult]:
|
|
||||||
return cls._cache.get(title)
|
|
||||||
@ -1,38 +1,70 @@
|
|||||||
import imdb
|
import logging
|
||||||
from .result import Metadata, DataResult, Summary
|
from imdb import Cinemagoer
|
||||||
|
from imdb.Movie import Movie
|
||||||
|
|
||||||
class metadata():
|
from typing import List
|
||||||
name: str = None
|
|
||||||
imdb = imdb.Cinemagoer()
|
|
||||||
|
|
||||||
def __init__(self, name) -> None:
|
from clazz.Metadata import Metadata, Summary
|
||||||
self.name = name
|
from .source import SourceBase
|
||||||
|
|
||||||
|
|
||||||
def lookup(self) -> DataResult:
|
log = logging.getLogger(__name__)
|
||||||
""""""
|
|
||||||
|
class Imdb(SourceBase):
|
||||||
|
|
||||||
|
def __init__(self, titles: List[str]) -> None:
|
||||||
|
super().__init__(titles)
|
||||||
|
|
||||||
|
def search(self) -> Metadata | None:
|
||||||
|
idToTitle: dict[str, str] = {}
|
||||||
|
for title in self.titles:
|
||||||
|
receivedIds = self.queryIds(title)
|
||||||
|
for id, title in receivedIds.items():
|
||||||
|
idToTitle[id] = title
|
||||||
|
|
||||||
|
if not idToTitle:
|
||||||
|
self.logNoMatch("Imdb", titles=self.titles)
|
||||||
|
return None
|
||||||
|
|
||||||
|
best_match_id, best_match_title = self.findBestMatchAcrossTitles(idToTitle, self.titles)
|
||||||
|
|
||||||
|
return self.__getMetadata(best_match_id)
|
||||||
|
|
||||||
|
def queryIds(self, title: str) -> dict[str, str]:
|
||||||
|
idToTitle: dict[str, str] = {}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
query = self.imdb.search_movie(self.name)
|
search = Cinemagoer().search_movie(title)
|
||||||
imdbId = query[0].movieID
|
cappedResult: List[Movie] = search[:5]
|
||||||
result = self.imdb.get_movie(imdbId)
|
usable: List[Movie] = [found for found in cappedResult if self.isMatchOrPartial("Imdb", title, found._getitem("title"))]
|
||||||
meta = Metadata(
|
for item in usable:
|
||||||
|
idToTitle[item.movieID] = item._getitem("title")
|
||||||
|
except Exception as e:
|
||||||
|
log.exception(e)
|
||||||
|
return idToTitle
|
||||||
|
|
||||||
|
def __getMetadata(self, id: str) -> Metadata | None:
|
||||||
|
try:
|
||||||
|
result = Cinemagoer().get_movie(id)
|
||||||
|
summary = result.get("plot outline", None)
|
||||||
|
return Metadata(
|
||||||
title = result.get("title", None),
|
title = result.get("title", None),
|
||||||
altTitle = [result.get("localized title", [])],
|
altTitle = [result.get("localized title", [])],
|
||||||
cover = result.get("cover url", None),
|
cover = result.get("cover url", None),
|
||||||
summary = [
|
banner = None,
|
||||||
|
summary = [] if summary is None else [
|
||||||
Summary(
|
Summary(
|
||||||
language = "eng",
|
language = "eng",
|
||||||
summary = result.get("plot outline", None)
|
summary = summary
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
type = 'movie' if result.get('kind', '').lower() == 'movie' else 'serie',
|
type = self.getMediaType(result.get('kind', '')),
|
||||||
genres = result.get('genres', []),
|
genres = result.get('genres', []),
|
||||||
source="imdb",
|
source="imdb",
|
||||||
usedTitle=self.name
|
|
||||||
)
|
)
|
||||||
if (meta.title is None) or (meta.type is None):
|
|
||||||
return DataResult(status="COMPLETED", message= None, data= None)
|
|
||||||
|
|
||||||
return DataResult(status="COMPLETED", message= None, data= meta)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return DataResult(status="ERROR", data=None, message=str(e))
|
log.exception(e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def getMediaType(self, type: str) -> str:
|
||||||
|
return 'movie' if type.lower() == 'movie' else 'serie'
|
||||||
@ -1,36 +1,71 @@
|
|||||||
from mal import *
|
import logging, sys
|
||||||
from .result import Metadata, DataResult, Summary
|
from typing import List
|
||||||
|
|
||||||
class metadata():
|
from clazz.Metadata import Metadata, Summary
|
||||||
name: str = None
|
from .source import SourceBase
|
||||||
|
|
||||||
def __init__(self, name: str) -> None:
|
from mal import Anime, AnimeSearch, AnimeSearchResult
|
||||||
self.name = name
|
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class Mal(SourceBase):
|
||||||
|
""""""
|
||||||
|
def __init__(self, titles: List[str]) -> None:
|
||||||
|
super().__init__(titles)
|
||||||
|
|
||||||
|
def search(self) -> Metadata | None:
|
||||||
|
idToTitle: dict[str, str] = {}
|
||||||
|
|
||||||
|
for title in self.titles:
|
||||||
|
receivedIds = self.queryIds(title)
|
||||||
|
for id, title in receivedIds.items():
|
||||||
|
idToTitle[id] = title
|
||||||
|
|
||||||
|
if not idToTitle:
|
||||||
|
self.logNoMatch("MAL", titles=self.titles)
|
||||||
|
return None
|
||||||
|
|
||||||
|
best_match_id, best_match_title = self.findBestMatchAcrossTitles(idToTitle, self.titles)
|
||||||
|
|
||||||
|
return self.__getMetadata(best_match_id)
|
||||||
|
|
||||||
|
def queryIds(self, title: str) -> dict[str, str]:
|
||||||
|
idToTitle: dict[str, str] = {}
|
||||||
|
|
||||||
def lookup(self) -> DataResult:
|
|
||||||
try:
|
try:
|
||||||
search = AnimeSearch(self.name)
|
search = AnimeSearch(title)
|
||||||
if (len(search.results) == 0):
|
cappedResult: List[AnimeSearchResult] = search.results[:5]
|
||||||
return DataResult(status="SKIPPED", message="No results")
|
usable: List[AnimeSearchResult] = [found for found in cappedResult if self.isMatchOrPartial("MAL", title, found.title)]
|
||||||
anime = Anime(search.results[0].mal_id)
|
for item in usable:
|
||||||
meta = Metadata(
|
log.info(f"malId: {item.mal_id} to {item.title}")
|
||||||
title = anime.title,
|
idToTitle[item.mal_id] = item.title
|
||||||
altTitle = [altName for altName in [anime.title_english, *anime.title_synonyms] if altName],
|
|
||||||
cover = anime.image_url,
|
|
||||||
summary = [
|
|
||||||
Summary(
|
|
||||||
language = "eng",
|
|
||||||
summary = anime.synopsis
|
|
||||||
)
|
|
||||||
],
|
|
||||||
type = 'movie' if anime.type.lower() == 'movie' else 'serie',
|
|
||||||
genres = anime.genres,
|
|
||||||
source="mal",
|
|
||||||
usedTitle=self.name
|
|
||||||
)
|
|
||||||
if (meta.title is None) or (meta.type is None):
|
|
||||||
return DataResult(status="COMPLETED", message = None, data = None)
|
|
||||||
|
|
||||||
return DataResult(status = "COMPLETED", message = None, data = meta)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return DataResult(status="ERROR", message=str(e))
|
log.exception(e)
|
||||||
|
return idToTitle
|
||||||
|
|
||||||
|
def __getMetadata(self, id: str):
|
||||||
|
try:
|
||||||
|
anime = Anime(id)
|
||||||
|
return Metadata(
|
||||||
|
title = anime.title,
|
||||||
|
altTitle = [altName for altName in [anime.title_english, *anime.title_synonyms] if altName],
|
||||||
|
cover = anime.image_url,
|
||||||
|
banner = None,
|
||||||
|
summary = [] if anime.synopsis is None else [
|
||||||
|
Summary(
|
||||||
|
language = "eng",
|
||||||
|
summary = anime.synopsis
|
||||||
|
)
|
||||||
|
],
|
||||||
|
type = self.getMediaType(anime.type),
|
||||||
|
genres = anime.genres,
|
||||||
|
source="mal",
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
log.exception(e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def getMediaType(self, type: str) -> str:
|
||||||
|
return 'movie' if type.lower() == 'movie' else 'serie'
|
||||||
@ -1,33 +0,0 @@
|
|||||||
from typing import List, Optional
|
|
||||||
from dataclasses import dataclass, asdict
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Summary:
|
|
||||||
summary: str
|
|
||||||
language: str
|
|
||||||
|
|
||||||
def to_dict(self):
|
|
||||||
return asdict(self)
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Metadata:
|
|
||||||
title: str
|
|
||||||
altTitle: List[str]
|
|
||||||
cover: str
|
|
||||||
type: str # Serie/Movie
|
|
||||||
summary: List[Summary]
|
|
||||||
genres: List[str]
|
|
||||||
source: str
|
|
||||||
usedTitle: str
|
|
||||||
|
|
||||||
def to_dict(self):
|
|
||||||
return asdict(self)
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class DataResult:
|
|
||||||
status: str # COMPLETED / ERROR
|
|
||||||
message: str | None = None
|
|
||||||
data: Metadata = None
|
|
||||||
|
|
||||||
def to_dict(self):
|
|
||||||
return asdict(self)
|
|
||||||
@ -1,112 +0,0 @@
|
|||||||
import logging
|
|
||||||
from dataclasses import dataclass, asdict
|
|
||||||
from typing import List, Optional
|
|
||||||
from .result import Metadata, DataResult
|
|
||||||
from .anii import metadata as AniiMetadata
|
|
||||||
from .imdb import metadata as ImdbMetadata
|
|
||||||
from .mal import metadata as MalMetadata
|
|
||||||
from fuzzywuzzy import fuzz
|
|
||||||
from unidecode import unidecode
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class WeightedData:
|
|
||||||
result: DataResult
|
|
||||||
weight: int = 1
|
|
||||||
|
|
||||||
def to_dict(self):
|
|
||||||
return asdict(self)
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class DataAndScore:
|
|
||||||
result: DataResult = None
|
|
||||||
score: int = 0
|
|
||||||
def to_dict(self):
|
|
||||||
|
|
||||||
return asdict(self)
|
|
||||||
|
|
||||||
class UseSource():
|
|
||||||
title: str
|
|
||||||
eventId: str
|
|
||||||
def __init__(self, title, eventId) -> None:
|
|
||||||
self.title = title
|
|
||||||
self.eventId = eventId
|
|
||||||
|
|
||||||
def stripped(self, input_string) -> str:
|
|
||||||
unitext = unidecode(input_string)
|
|
||||||
unitext = re.sub(r'[^a-zA-Z0-9\s]', ' ', unitext)
|
|
||||||
unitext = re.sub(r'\s{2,}', ' ', unitext)
|
|
||||||
return unitext
|
|
||||||
|
|
||||||
def __perform_search(self, title)-> List[WeightedData]:
|
|
||||||
anii = AniiMetadata(title).lookup()
|
|
||||||
imdb = ImdbMetadata(title).lookup()
|
|
||||||
mal = MalMetadata(title).lookup()
|
|
||||||
|
|
||||||
result: List[WeightedData] = []
|
|
||||||
if (anii is not None) and (anii.status == "COMPLETED" and anii.data is not None):
|
|
||||||
result.append(WeightedData(anii, 1.2))
|
|
||||||
if (imdb is not None) and (imdb.status == "COMPLETED" and imdb.data is not None):
|
|
||||||
imdb_weight = 1
|
|
||||||
if (imdb.data.title == title or self.stripped(imdb.data.title) == self.stripped(title)):
|
|
||||||
imdb_weight = 100
|
|
||||||
result.append(WeightedData(imdb, imdb_weight))
|
|
||||||
if (mal is not None) and (mal.status == "COMPLETED" and mal.data is not None):
|
|
||||||
result.append(WeightedData(mal, 1.8))
|
|
||||||
return result
|
|
||||||
|
|
||||||
def __calculate_score(self, title: str, weightData: List[WeightedData]) -> List[DataAndScore]:
|
|
||||||
""""""
|
|
||||||
result: List[WeightedData] = []
|
|
||||||
for wd in weightData:
|
|
||||||
highScore = fuzz.ratio(self.stripped(title.lower()), self.stripped(wd.result.data.title.lower()))
|
|
||||||
logger.info(f"[H:{highScore}]\t{self.stripped(wd.result.data.title.lower())} => {self.stripped(title.lower())}")
|
|
||||||
for name in wd.result.data.altTitle:
|
|
||||||
altScore = fuzz.ratio(self.stripped(title.lower()), self.stripped(name.lower()))
|
|
||||||
if (altScore > highScore):
|
|
||||||
highScore = altScore
|
|
||||||
logger.info(f"[A:{highScore}]\t{self.stripped(wd.result.data.title.lower())} => {self.stripped(title.lower())}")
|
|
||||||
givenScore = highScore * wd.weight
|
|
||||||
logger.info(f"[G:{givenScore}]\t{self.stripped(wd.result.data.title.lower())} => {self.stripped(title.lower())} Weight:{wd.weight}")
|
|
||||||
result.append(DataAndScore(wd.result, givenScore))
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def select_result(self) -> Optional[DataResult]:
|
|
||||||
""""""
|
|
||||||
result = self.__perform_search(title=self.title)
|
|
||||||
|
|
||||||
scoredResult = self.__calculate_score(title=self.title, weightData=result)
|
|
||||||
|
|
||||||
scoredResult.sort(key=lambda x: x.score, reverse=True)
|
|
||||||
|
|
||||||
selected: DataResult|None = scoredResult[0].result if len(scoredResult) > 0 else None
|
|
||||||
|
|
||||||
jsr = ""
|
|
||||||
try:
|
|
||||||
jsr = json.dumps([obj.to_dict() for obj in scoredResult], indent=4)
|
|
||||||
with open(f"./logs/{self.eventId}-{self.title}.json", "w", encoding="utf-8") as f:
|
|
||||||
f.write(jsr)
|
|
||||||
except Exception as e:
|
|
||||||
logger.info("Couldn't dump log..")
|
|
||||||
logger.error(e)
|
|
||||||
logger.info(jsr)
|
|
||||||
|
|
||||||
try:
|
|
||||||
|
|
||||||
titles: List[str] = []
|
|
||||||
for wd in scoredResult:
|
|
||||||
titles.append(wd.result.data.title)
|
|
||||||
titles.extend(wd.result.data.altTitle)
|
|
||||||
joinedTitles = "\n\t" + "\n\t".join(titles)
|
|
||||||
logger.info(f"\nName: {self.title} \n \nFound: {joinedTitles} \nTitle selected: \n\t{selected.data.title} \n")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(e)
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Return the result with the highest score (most likely result)
|
|
||||||
return selected
|
|
||||||
|
|
||||||
Loading…
Reference in New Issue
Block a user