#!/usr/bin/env python3 # Copyright Penta (c) 2018/2020 - Under BSD License # Compatible for Python 3.6.X # # Check and update all the thumbnail for manga/anime in the MyAnimeBot database. # Can be pretty long and send a lot of request to MyAnimeList.net, # Use it only once in a while to clean the database. # # Dependencies (for CentOS 7): # yum install python3 mariadb-devel gcc python3-devel # python3.6 -m pip install --upgrade pip # pip3.6 install mysql python-dateutil asyncio html2text bs4 aiodns cchardet configparser # pip3.6 install mysql.connector # Library import import logging import os import re import asyncio import urllib.request import mysql.connector as mariadb import string import time import socket from html2text import HTML2Text from bs4 import BeautifulSoup from configparser import ConfigParser # Custom library import utils class ImproperlyConfigured(Exception): pass BASE_DIR = os.path.dirname(os.path.abspath(__file__)) HOME_DIR = os.path.expanduser("~") DEFAULT_CONFIG_PATHS = [ os.path.join("myanimebot.conf"), os.path.join(BASE_DIR, "myanimebot.conf"), os.path.join("/etc/malbot/myanimebot.conf"), os.path.join(HOME_DIR, "myanimebot.conf") ] def get_config(): config = ConfigParser() config_paths = [] for path in DEFAULT_CONFIG_PATHS: if os.path.isfile(path): config_paths.append(path) break else: raise ImproperlyConfigured("No configuration file found") config.read(config_paths) return config # Loading configuration try: config=get_config() except Exception as e: print ("Cannot read configuration: " + str(e)) exit (1) CONFIG=config["MYANIMEBOT"] logLevel=CONFIG.get("logLevel", "INFO") dbHost=CONFIG.get("dbHost", "127.0.0.1") dbUser=CONFIG.get("dbUser", "myanimebot") dbPassword=CONFIG.get("dbPassword") dbName=CONFIG.get("dbName", "myanimebot") logPath=CONFIG.get("logPath", "myanimebot.log") # class that send logs to DB class LogDBHandler(logging.Handler): ''' Customized logging handler that puts logs to the database. pymssql required ''' def __init__(self, sql_conn, sql_cursor): logging.Handler.__init__(self) self.sql_cursor = sql_cursor self.sql_conn = sql_conn def emit(self, record): # Clear the log message so it can be put to db via sql (escape quotes) self.log_msg = str(record.msg.strip().replace('\'', '\'\'')) # Make the SQL insert try: self.sql_cursor.execute("INSERT INTO t_logs (host, level, type, log, date, source) VALUES (%s, %s, %s, %s, NOW(), %s)", (str(socket.gethostname()), str(record.levelno), str(record.levelname), self.log_msg, str(record.name))) self.sql_conn.commit() except Exception as e: print ('Error while logging into DB: ' + str(e)) # Log configuration log_format='%(asctime)-13s : %(name)-15s : %(levelname)-8s : %(message)s' logging.basicConfig(handlers=[logging.FileHandler(logPath, 'a', 'utf-8')], format=log_format, level=logLevel) console = logging.StreamHandler() console.setLevel(logging.INFO) console.setFormatter(logging.Formatter(log_format)) logger = logging.getLogger("thumbnailer") logger.setLevel(logLevel) logging.getLogger('').addHandler(console) # Script version VERSION = "1.1" logger.info("Booting the MyAnimeBot Thumbnail Refresher " + VERSION + "...") # Initialization of the database try: conn = mariadb.connect(host=dbHost, user=dbUser, password=dbPassword, database=dbName, buffered=True) # We initialize the logs into the DB. log_conn = mariadb.connect(host=dbHost, user=dbUser, password=dbPassword, database=dbName, buffered=True) log_cursor = log_conn.cursor() logdb = LogDBHandler(log_conn, log_cursor) logging.getLogger('').addHandler(logdb) except Exception as e : logger.critical("Can't connect to the database: " + str(e)) httpclient.close() quit() def main() : logger.info("Starting the refresher task...") count = 0 cursor = conn.cursor(buffered=True) cursor.execute("SELECT guid, title, thumbnail FROM t_animes") datas = cursor.fetchall() logger.info(str(len(datas)) + " medias are going to be checked.") for data in datas: try: image = utils.getThumbnail(data[0]) if (image == data[2]) : if (image != "") : logger.debug("Thumbnail for " + str(data[1]) + " already up to date.") else : logger.info("Thumbnail for " + str(data[1]) + " still empty.") else : if (image != "") : cursor.execute("UPDATE t_animes SET thumbnail = %s WHERE guid = %s", [image, data[0]]) conn.commit() logger.info("Updated thumbnail found for \"" + str(data[1]) + "\": %s", image) count += 1 else : try : urllib.request.urlopen(data[2]) logger.info("Thumbnail for \"" + str(data[1]) + "\" is now empty, avoiding change.") except : logger.info("Thumbnail for \"" + str(data[1]) + "\" has been deleted!") except Exception as e : logger.warning("Error while updating thumbnail for '" + str(data[1]) + "': " + str(e)) time.sleep(3) logger.info("All thumbnails checked!") cursor.close() logger.info(str(count) + " new thumbnails, time taken: %ss" % round((time.time() - startTime), 2)) # Starting main function if __name__ == "__main__" : startTime = time.time() main() logger.info("Thumbnail refresher script stopped") # We close all the ressources conn.close()