|
|
@@ -5,6 +5,10 @@ import json
|
|
|
import urllib3
|
|
|
import base64
|
|
|
import re
|
|
|
+import asyncio
|
|
|
+import websockets
|
|
|
+import numpy as np
|
|
|
+import time
|
|
|
|
|
|
from dotenv import load_dotenv
|
|
|
from openai import AsyncOpenAI, OpenAIError
|
|
|
@@ -12,6 +16,11 @@ from PIL import Image
|
|
|
from io import BytesIO
|
|
|
from datetime import datetime, timezone, timedelta
|
|
|
from charset_normalizer import from_bytes
|
|
|
+from enum import Enum
|
|
|
+from discord.sinks import Sink
|
|
|
+from scipy.signal import resample_poly
|
|
|
+
|
|
|
+
|
|
|
from discord.ext import tasks
|
|
|
|
|
|
# Charger les variables d'environnement depuis le fichier .env
|
|
|
@@ -31,11 +40,12 @@ LOG_LEVEL = os.getenv('LOG_LEVEL', "INFO").upper()
|
|
|
HISTORY_ANALYSIS_IMAGE = os.getenv('HISTORY_ANALYSIS_IMAGE', "false").lower()
|
|
|
PROMPT_STATUS_CHANGE = str(os.getenv('PROMPT_STATUS_CHANGE', "Rédige un message court qui sera utilisé en tant que status sur Discord"))
|
|
|
DELAY_TASK_UPDATE_STATUS = int(os.getenv('DELAY_TASK_UPDATE_STATUS', '30'))
|
|
|
+WHISPER_WS_URL = os.getenv("WHISPER_WS_URL", "ws://whisper-stt:8000/ws/transcribe")
|
|
|
|
|
|
# Initialiser le client OpenAI asynchrone ici
|
|
|
openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY, base_url=URL_OPENAI_API)
|
|
|
|
|
|
-BOT_VERSION = "2.9.0"
|
|
|
+BOT_VERSION = "3.0.0-alpha1"
|
|
|
|
|
|
# Vérifier que les tokens et le prompt de personnalité sont récupérés
|
|
|
if DISCORD_TOKEN is None or OPENAI_API_KEY is None or DISCORD_CHANNEL_ID is None:
|
|
|
@@ -66,13 +76,137 @@ httpx_logger.setLevel(logging.DEBUG)
|
|
|
|
|
|
urllib3.disable_warnings()
|
|
|
|
|
|
+update_status_started = False
|
|
|
+
|
|
|
# Initialiser les intents
|
|
|
intents = discord.Intents.default()
|
|
|
intents.message_content = True # Activer l'intent pour les contenus de message
|
|
|
+intents.voice_states = True
|
|
|
+
|
|
|
+class ReplyMode(Enum):
|
|
|
+ VOICE = "voice"
|
|
|
+ TEXT = "text"
|
|
|
+
|
|
|
+reply_mode = ReplyMode.VOICE
|
|
|
+reply_text_channel = None
|
|
|
+
|
|
|
+class STTSink(Sink):
|
|
|
+ def __init__(self):
|
|
|
+ super().__init__()
|
|
|
+ self.user_ws = {}
|
|
|
+ self.buffers = {}
|
|
|
+ self.last_voice = {}
|
|
|
+ self.flush_tasks = {}
|
|
|
+
|
|
|
+ async def _get_ws(self, user_id):
|
|
|
+ if user_id not in self.user_ws:
|
|
|
+ ws = await websockets.connect(WHISPER_WS_URL)
|
|
|
+ self.user_ws[user_id] = ws
|
|
|
+ asyncio.create_task(self._listen_ws(user_id, ws))
|
|
|
+ return self.user_ws[user_id]
|
|
|
+
|
|
|
+ async def _listen_ws(self, user_id, ws):
|
|
|
+ try:
|
|
|
+ async for msg in ws:
|
|
|
+ data = json.loads(msg)
|
|
|
+ if data.get("type") == "final":
|
|
|
+ text = data["text"].strip()
|
|
|
+
|
|
|
+ if self._ignore_text(text):
|
|
|
+ continue # ✅ PAS return
|
|
|
+
|
|
|
+ if reply_mode == ReplyMode.TEXT and reply_text_channel:
|
|
|
+ member = reply_text_channel.guild.get_member(user_id)
|
|
|
+ name = member.display_name if member else f"User {user_id}"
|
|
|
+
|
|
|
+ await reply_text_channel.send(f"🗣️ **{name}** : {text}")
|
|
|
+ else:
|
|
|
+ logger.info(f"[STT][{user_id}] {text}")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.warning(f"[STT][{user_id}] WS fermé : {e}")
|
|
|
+
|
|
|
+ def write(self, pcm_bytes: bytes, user_id: int):
|
|
|
+ if not pcm_bytes:
|
|
|
+ return
|
|
|
+
|
|
|
+ audio = discord_pcm_to_whisper_int16(pcm_bytes)
|
|
|
+ if not audio:
|
|
|
+ return
|
|
|
+
|
|
|
+ now = time.time()
|
|
|
+ self.last_voice[user_id] = now
|
|
|
+
|
|
|
+ if user_id not in self.buffers:
|
|
|
+ self.buffers[user_id] = bytearray()
|
|
|
+
|
|
|
+ self.buffers[user_id].extend(audio)
|
|
|
+
|
|
|
+ buffer_sec = len(self.buffers[user_id]) / (16000 * 2)
|
|
|
+
|
|
|
+ if buffer_sec >= 1.0 and user_id not in self.flush_tasks:
|
|
|
+ self.flush_tasks[user_id] = asyncio.run_coroutine_threadsafe(
|
|
|
+ self._flush_if_silence(user_id),
|
|
|
+ MAIN_LOOP
|
|
|
+ )
|
|
|
+
|
|
|
+ async def _flush_if_silence(self, user_id):
|
|
|
+ await asyncio.sleep(1.2)
|
|
|
+
|
|
|
+ if time.time() - self.last_voice.get(user_id, 0) < 0.6:
|
|
|
+ self.flush_tasks.pop(user_id, None)
|
|
|
+ return
|
|
|
+
|
|
|
+ chunk = bytes(self.buffers.get(user_id, b""))
|
|
|
+
|
|
|
+ self.buffers[user_id] = bytearray()
|
|
|
+ self.flush_tasks.pop(user_id, None)
|
|
|
+
|
|
|
+ if len(chunk) < 16000 * 2 * 2:
|
|
|
+ self.buffers[user_id].extend(chunk)
|
|
|
+ return
|
|
|
+
|
|
|
+ try:
|
|
|
+ ws = await self._get_ws(user_id)
|
|
|
+ await ws.send(chunk)
|
|
|
+ logger.debug(f"[STT] chunk envoyé user={user_id} bytes={len(chunk)}")
|
|
|
+ except Exception as e:
|
|
|
+ logger.warning(f"[STT] envoi échoué user={user_id}: {e}")
|
|
|
+ self.user_ws.pop(user_id, None)
|
|
|
+
|
|
|
+ def _ignore_text(self, text: str) -> bool:
|
|
|
+ BAD = [
|
|
|
+ "amara",
|
|
|
+ "sous-titres",
|
|
|
+ "merci",
|
|
|
+ "musique",
|
|
|
+ "applaudissements"
|
|
|
+ ]
|
|
|
+
|
|
|
+ t = text.lower()
|
|
|
+ return len(t) < 3 or any(b in t for b in BAD)
|
|
|
+
|
|
|
|
|
|
# Liste pour stocker l'historique des conversations
|
|
|
conversation_history = []
|
|
|
|
|
|
+def discord_pcm_to_whisper_int16(pcm_bytes: bytes) -> bytes:
|
|
|
+ # PCM 48 kHz int16 -> numpy
|
|
|
+ audio_48k = np.frombuffer(pcm_bytes, dtype=np.int16)
|
|
|
+ if audio_48k.size == 0:
|
|
|
+ return b""
|
|
|
+
|
|
|
+ # int16 -> float32 pour resample
|
|
|
+ audio_float32 = audio_48k.astype(np.float32) / 32768.0
|
|
|
+
|
|
|
+ # resample 48kHz -> 16kHz
|
|
|
+ audio_16k = resample_poly(audio_float32, up=1, down=3)
|
|
|
+
|
|
|
+ # float32 -> int16 (CE QUE WHISPER ATTEND)
|
|
|
+ audio_16k_int16 = np.clip(audio_16k * 32768.0, -32768, 32767).astype(np.int16)
|
|
|
+
|
|
|
+ return audio_16k_int16.tobytes()
|
|
|
+
|
|
|
def filter_message(message):
|
|
|
"""Filtre le contenu d'un retour de modèle de language, comme pour enlever les pensées dans le cas par exemple de DeepSeek"""
|
|
|
|
|
|
@@ -172,20 +306,16 @@ def has_text(text):
|
|
|
def resize_image(image_bytes, attachment_filename=None):
|
|
|
"""Redimensionne l'image selon le mode spécifié."""
|
|
|
|
|
|
- try:
|
|
|
- with Image.open(BytesIO(image_bytes)) as img:
|
|
|
- original_format = img.format # Stocker le format original
|
|
|
+ with Image.open(BytesIO(image_bytes)) as img:
|
|
|
+ original_format = img.format # Stocker le format original
|
|
|
|
|
|
- img.thumbnail((2000, 2000))
|
|
|
+ img.thumbnail((2000, 2000))
|
|
|
|
|
|
- buffer = BytesIO()
|
|
|
- img_format = img.format or _infer_image_format(attachment_filename)
|
|
|
- img.save(buffer, format=img_format)
|
|
|
+ buffer = BytesIO()
|
|
|
+ img_format = img.format or _infer_image_format(attachment_filename)
|
|
|
+ img.save(buffer, format=img_format)
|
|
|
|
|
|
- return buffer.getvalue()
|
|
|
- except Exception as e:
|
|
|
- logger.error(f"Erreur lors du redimensionnement de l'image : {e}")
|
|
|
- raise
|
|
|
+ return buffer.getvalue()
|
|
|
|
|
|
async def encode_image_from_attachment(attachment):
|
|
|
"""Encode une image depuis une pièce jointe en base64 après redimensionnement."""
|
|
|
@@ -229,22 +359,11 @@ try:
|
|
|
except ValueError:
|
|
|
raise ValueError("L'ID du channel Discord est invalide. Assurez-vous qu'il s'agit d'un entier.")
|
|
|
|
|
|
-class MyDiscordClient(discord.Client):
|
|
|
- async def setup_hook(self):
|
|
|
- update_status.start()
|
|
|
-
|
|
|
- async def close(self):
|
|
|
- global openai_client
|
|
|
-
|
|
|
- if openai_client is not None:
|
|
|
- await openai_client.close()
|
|
|
- openai_client = None
|
|
|
-
|
|
|
- await super().close()
|
|
|
|
|
|
# Initialiser le client Discord avec les intents modifiés
|
|
|
-client_discord = MyDiscordClient(intents=intents)
|
|
|
+client_discord = discord.Bot(intents=intents)
|
|
|
|
|
|
+MAIN_LOOP = asyncio.get_event_loop()
|
|
|
|
|
|
# Appeler la fonction pour charger l'historique au démarrage
|
|
|
load_conversation_history()
|
|
|
@@ -382,9 +501,15 @@ async def call_openai_api_system(system_text):
|
|
|
|
|
|
@client_discord.event
|
|
|
async def on_ready():
|
|
|
+ global update_status_started
|
|
|
+
|
|
|
logger.info(f'{BOT_NAME} connecté en tant que {client_discord.user}')
|
|
|
logger.info(f'Utilisation du modèle {MODEL}')
|
|
|
|
|
|
+ if not update_status_started:
|
|
|
+ update_status.start()
|
|
|
+ update_status_started = True
|
|
|
+
|
|
|
if not conversation_history:
|
|
|
logger.info("Aucun historique trouvé. L'historique commence vide.")
|
|
|
|
|
|
@@ -448,8 +573,12 @@ async def on_message(message):
|
|
|
for attachment in message.attachments:
|
|
|
# Vérifier si c'est un fichier avec une extension autorisée
|
|
|
if attachment.content_type and attachment.content_type.startswith('image/'):
|
|
|
- image_data = await encode_image_from_attachment(attachment)
|
|
|
- break
|
|
|
+ try:
|
|
|
+ image_data = await encode_image_from_attachment(attachment)
|
|
|
+ break
|
|
|
+ except Exception as e:
|
|
|
+ await message.channel.send("Il semble qu'il y ai un souci avec ton image, je ne peux pas l'ouvrir.")
|
|
|
+ logger.error(f"Erreur lors de la conversion de l'image : {e}")
|
|
|
else:
|
|
|
try:
|
|
|
file_content = await read_text_file(attachment)
|
|
|
@@ -553,6 +682,49 @@ async def add_to_conversation_history(new_message):
|
|
|
logger.info(f"{excess_messages} messages les plus anciens ont été supprimés.")
|
|
|
|
|
|
|
|
|
+@client_discord.slash_command(name="join", description="Le bot rejoint le vocal (réponse voix ou texte)")
|
|
|
+async def join(ctx: discord.ApplicationContext, mode: str = discord.Option(str, choices=["voice", "texte"], default="voice", description="Mode de réponse du bot")):
|
|
|
+ global reply_mode, reply_text_channel
|
|
|
+
|
|
|
+ if not ctx.author.voice:
|
|
|
+ await ctx.respond("❌ Tu n'es pas dans un salon vocal.", ephemeral=True)
|
|
|
+ return
|
|
|
+
|
|
|
+ channel = ctx.author.voice.channel
|
|
|
+
|
|
|
+ if ctx.guild.voice_client:
|
|
|
+ vc = ctx.guild.voice_client
|
|
|
+ await vc.move_to(channel)
|
|
|
+ else:
|
|
|
+ vc = await channel.connect()
|
|
|
+
|
|
|
+ # Démarrer l'écoute audio
|
|
|
+ vc.start_recording(STTSink(), lambda sink: None)
|
|
|
+
|
|
|
+ if mode == "texte":
|
|
|
+ reply_mode = ReplyMode.TEXT
|
|
|
+ reply_text_channel = ctx.channel
|
|
|
+
|
|
|
+ await ctx.respond("🎧 Connecté au vocal — réponses **en texte** ici.")
|
|
|
+ else:
|
|
|
+ reply_mode = ReplyMode.VOICE
|
|
|
+ reply_text_channel = None
|
|
|
+
|
|
|
+ await ctx.respond("🎧 Connecté au vocal — réponses **en voix**.")
|
|
|
+
|
|
|
+
|
|
|
+@client_discord.slash_command(name="quit", description="Le bot quitte le salon vocal")
|
|
|
+async def quit(ctx: discord.ApplicationContext):
|
|
|
+ vc = ctx.guild.voice_client
|
|
|
+
|
|
|
+ if not vc:
|
|
|
+ await ctx.respond("❌ Je ne suis pas dans un salon vocal.", ephemeral=True)
|
|
|
+ return
|
|
|
+
|
|
|
+ await vc.disconnect()
|
|
|
+ await ctx.respond("👋 Déconnecté du salon vocal.")
|
|
|
+
|
|
|
+
|
|
|
@tasks.loop(minutes=DELAY_TASK_UPDATE_STATUS)
|
|
|
async def update_status():
|
|
|
try:
|