| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182 |
- #!/usr/bin/env python3
- # Copyright Penta (c) 2018/2020 - Under BSD License
- # Compatible for Python 3.6.X
- #
- # Check and update all the thumbnail for manga/anime in the MyAnimeBot database.
- # Can be pretty long and send a lot of request to MyAnimeList.net,
- # Use it only once in a while to clean the database.
- #
- # Dependencies (for CentOS 7):
- # yum install python3 mariadb-devel gcc python3-devel
- # python3.6 -m pip install --upgrade pip
- # pip3.6 install mysql python-dateutil asyncio html2text bs4 aiodns cchardet configparser
- # pip3.6 install mysql.connector
- # Library import
- import logging
- import os
- import re
- import asyncio
- import urllib.request
- import mysql.connector as mariadb
- import string
- import time
- import socket
- from html2text import HTML2Text
- from bs4 import BeautifulSoup
- from configparser import ConfigParser
- # Custom library
- import utils
- class ImproperlyConfigured(Exception): pass
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- HOME_DIR = os.path.expanduser("~")
- DEFAULT_CONFIG_PATHS = [
- os.path.join("myanimebot.conf"),
- os.path.join(BASE_DIR, "myanimebot.conf"),
- os.path.join("/etc/malbot/myanimebot.conf"),
- os.path.join(HOME_DIR, "myanimebot.conf")
- ]
- def get_config():
- config = ConfigParser()
- config_paths = []
- for path in DEFAULT_CONFIG_PATHS:
- if os.path.isfile(path):
- config_paths.append(path)
- break
- else: raise ImproperlyConfigured("No configuration file found")
-
- config.read(config_paths)
- return config
- # Loading configuration
- try:
- config=get_config()
- except Exception as e:
- print ("Cannot read configuration: " + str(e))
- exit (1)
-
- CONFIG=config["MYANIMEBOT"]
- logLevel=CONFIG.get("logLevel", "INFO")
- dbHost=CONFIG.get("dbHost", "127.0.0.1")
- dbUser=CONFIG.get("dbUser", "myanimebot")
- dbPassword=CONFIG.get("dbPassword")
- dbName=CONFIG.get("dbName", "myanimebot")
- logPath=CONFIG.get("logPath", "myanimebot.log")
- # class that send logs to DB
- class LogDBHandler(logging.Handler):
- '''
- Customized logging handler that puts logs to the database.
- pymssql required
- '''
- def __init__(self, sql_conn, sql_cursor):
- logging.Handler.__init__(self)
- self.sql_cursor = sql_cursor
- self.sql_conn = sql_conn
- def emit(self, record):
- # Clear the log message so it can be put to db via sql (escape quotes)
- self.log_msg = str(record.msg.strip().replace('\'', '\'\''))
-
- # Make the SQL insert
- try:
- self.sql_cursor.execute("INSERT INTO t_logs (host, level, type, log, date, source) VALUES (%s, %s, %s, %s, NOW(), %s)", (str(socket.gethostname()), str(record.levelno), str(record.levelname), self.log_msg, str(record.name)))
- self.sql_conn.commit()
- except Exception as e:
- print ('Error while logging into DB: ' + str(e))
- # Log configuration
- log_format='%(asctime)-13s : %(name)-15s : %(levelname)-8s : %(message)s'
- logging.basicConfig(handlers=[logging.FileHandler(logPath, 'a', 'utf-8')], format=log_format, level=logLevel)
- console = logging.StreamHandler()
- console.setLevel(logging.INFO)
- console.setFormatter(logging.Formatter(log_format))
- logger = logging.getLogger("thumbnailer")
- logger.setLevel(logLevel)
- logging.getLogger('').addHandler(console)
- # Script version
- VERSION = "1.1"
- logger.info("Booting the MyAnimeBot Thumbnail Refresher " + VERSION + "...")
- # Initialization of the database
- try:
- conn = mariadb.connect(host=dbHost, user=dbUser, password=dbPassword, database=dbName, buffered=True)
-
- # We initialize the logs into the DB.
- log_conn = mariadb.connect(host=dbHost, user=dbUser, password=dbPassword, database=dbName, buffered=True)
- log_cursor = log_conn.cursor()
- logdb = LogDBHandler(log_conn, log_cursor)
- logging.getLogger('').addHandler(logdb)
- except Exception as e :
- logger.critical("Can't connect to the database: " + str(e))
-
- httpclient.close()
- quit()
- def main() :
- logger.info("Starting the refresher task...")
-
- count = 0
-
- cursor = conn.cursor(buffered=True)
- cursor.execute("SELECT guid, title, thumbnail FROM t_animes")
- datas = cursor.fetchall()
-
- logger.info(str(len(datas)) + " medias are going to be checked.")
- for data in datas:
- try:
- image = utils.getThumbnail(data[0])
-
- if (image == data[2]) :
- if (image != "") :
- logger.debug("Thumbnail for " + str(data[1]) + " already up to date.")
- else :
- logger.info("Thumbnail for " + str(data[1]) + " still empty.")
- else :
- if (image != "") :
- cursor.execute("UPDATE t_animes SET thumbnail = %s WHERE guid = %s", [image, data[0]])
- conn.commit()
-
- logger.info("Updated thumbnail found for \"" + str(data[1]) + "\": %s", image)
- count += 1
- else :
- try :
- urllib.request.urlopen(data[2])
- logger.info("Thumbnail for \"" + str(data[1]) + "\" is now empty, avoiding change.")
- except :
- logger.info("Thumbnail for \"" + str(data[1]) + "\" has been deleted!")
- except Exception as e :
- logger.warning("Error while updating thumbnail for '" + str(data[1]) + "': " + str(e))
- time.sleep(3)
-
- logger.info("All thumbnails checked!")
- cursor.close()
-
- logger.info(str(count) + " new thumbnails, time taken: %ss" % round((time.time() - startTime), 2))
- # Starting main function
- if __name__ == "__main__" :
- startTime = time.time()
- main()
- logger.info("Thumbnail refresher script stopped")
-
- # We close all the ressources
- conn.close()
|