malbot-refresh-thumbnail.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. #!/usr/bin/env python3
  2. # Copyright Penta (c) 2018/2020 - Under BSD License
  3. # Compatible for Python 3.6.X
  4. #
  5. # Check and update all the thumbnail for manga/anime in the MyAnimeBot database.
  6. # Can be pretty long and send a lot of request to MyAnimeList.net,
  7. # Use it only once in a while to clean the database.
  8. #
  9. # Dependencies (for CentOS 7):
  10. # yum install python3 mariadb-devel gcc python3-devel
  11. # python3.6 -m pip install --upgrade pip
  12. # pip3.6 install mysql python-dateutil asyncio html2text bs4 aiodns cchardet configparser
  13. # pip3.6 install mysql.connector
  14. # Library import
  15. import logging
  16. import os
  17. import re
  18. import asyncio
  19. import urllib.request
  20. import mysql.connector as mariadb
  21. import string
  22. import time
  23. import socket
  24. from html2text import HTML2Text
  25. from bs4 import BeautifulSoup
  26. from configparser import ConfigParser
  27. # Custom library
  28. import utils
  29. class ImproperlyConfigured(Exception): pass
  30. BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  31. HOME_DIR = os.path.expanduser("~")
  32. DEFAULT_CONFIG_PATHS = [
  33. os.path.join("myanimebot.conf"),
  34. os.path.join(BASE_DIR, "myanimebot.conf"),
  35. os.path.join("/etc/malbot/myanimebot.conf"),
  36. os.path.join(HOME_DIR, "myanimebot.conf")
  37. ]
  38. def get_config():
  39. config = ConfigParser()
  40. config_paths = []
  41. for path in DEFAULT_CONFIG_PATHS:
  42. if os.path.isfile(path):
  43. config_paths.append(path)
  44. break
  45. else: raise ImproperlyConfigured("No configuration file found")
  46. config.read(config_paths)
  47. return config
  48. # Loading configuration
  49. try:
  50. config=get_config()
  51. except Exception as e:
  52. print ("Cannot read configuration: " + str(e))
  53. exit (1)
  54. CONFIG=config["MYANIMEBOT"]
  55. logLevel=CONFIG.get("logLevel", "INFO")
  56. dbHost=CONFIG.get("dbHost", "127.0.0.1")
  57. dbUser=CONFIG.get("dbUser", "myanimebot")
  58. dbPassword=CONFIG.get("dbPassword")
  59. dbName=CONFIG.get("dbName", "myanimebot")
  60. logPath=CONFIG.get("logPath", "myanimebot.log")
  61. # class that send logs to DB
  62. class LogDBHandler(logging.Handler):
  63. '''
  64. Customized logging handler that puts logs to the database.
  65. pymssql required
  66. '''
  67. def __init__(self, sql_conn, sql_cursor):
  68. logging.Handler.__init__(self)
  69. self.sql_cursor = sql_cursor
  70. self.sql_conn = sql_conn
  71. def emit(self, record):
  72. # Clear the log message so it can be put to db via sql (escape quotes)
  73. self.log_msg = str(record.msg.strip().replace('\'', '\'\''))
  74. # Make the SQL insert
  75. try:
  76. self.sql_cursor.execute("INSERT INTO t_logs (host, level, type, log, date, source) VALUES (%s, %s, %s, %s, NOW(), %s)", (str(socket.gethostname()), str(record.levelno), str(record.levelname), self.log_msg, str(record.name)))
  77. self.sql_conn.commit()
  78. except Exception as e:
  79. print ('Error while logging into DB: ' + str(e))
  80. # Log configuration
  81. log_format='%(asctime)-13s : %(name)-15s : %(levelname)-8s : %(message)s'
  82. logging.basicConfig(handlers=[logging.FileHandler(logPath, 'a', 'utf-8')], format=log_format, level=logLevel)
  83. console = logging.StreamHandler()
  84. console.setLevel(logging.INFO)
  85. console.setFormatter(logging.Formatter(log_format))
  86. logger = logging.getLogger("thumbnailer")
  87. logger.setLevel(logLevel)
  88. logging.getLogger('').addHandler(console)
  89. # Script version
  90. VERSION = "1.1"
  91. logger.info("Booting the MyAnimeBot Thumbnail Refresher " + VERSION + "...")
  92. # Initialization of the database
  93. try:
  94. conn = mariadb.connect(host=dbHost, user=dbUser, password=dbPassword, database=dbName, buffered=True)
  95. # We initialize the logs into the DB.
  96. log_conn = mariadb.connect(host=dbHost, user=dbUser, password=dbPassword, database=dbName, buffered=True)
  97. log_cursor = log_conn.cursor()
  98. logdb = LogDBHandler(log_conn, log_cursor)
  99. logging.getLogger('').addHandler(logdb)
  100. except Exception as e :
  101. logger.critical("Can't connect to the database: " + str(e))
  102. httpclient.close()
  103. quit()
  104. def main() :
  105. logger.info("Starting the refresher task...")
  106. count = 0
  107. cursor = conn.cursor(buffered=True)
  108. cursor.execute("SELECT guid, title, thumbnail FROM t_animes")
  109. datas = cursor.fetchall()
  110. logger.info(str(len(datas)) + " medias are going to be checked.")
  111. for data in datas:
  112. try:
  113. image = utils.getThumbnail(data[0])
  114. if (image == data[2]) :
  115. if (image != "") :
  116. logger.debug("Thumbnail for " + str(data[1]) + " already up to date.")
  117. else :
  118. logger.info("Thumbnail for " + str(data[1]) + " still empty.")
  119. else :
  120. if (image != "") :
  121. cursor.execute("UPDATE t_animes SET thumbnail = %s WHERE guid = %s", [image, data[0]])
  122. conn.commit()
  123. logger.info("Updated thumbnail found for \"" + str(data[1]) + "\": %s", image)
  124. count += 1
  125. else :
  126. try :
  127. urllib.request.urlopen(data[2])
  128. logger.info("Thumbnail for \"" + str(data[1]) + "\" is now empty, avoiding change.")
  129. except :
  130. logger.info("Thumbnail for \"" + str(data[1]) + "\" has been deleted!")
  131. except Exception as e :
  132. logger.warning("Error while updating thumbnail for '" + str(data[1]) + "': " + str(e))
  133. time.sleep(3)
  134. logger.info("All thumbnails checked!")
  135. cursor.close()
  136. logger.info(str(count) + " new thumbnails, time taken: %ss" % round((time.time() - startTime), 2))
  137. # Starting main function
  138. if __name__ == "__main__" :
  139. startTime = time.time()
  140. main()
  141. logger.info("Thumbnail refresher script stopped")
  142. # We close all the ressources
  143. conn.close()