|
|
@@ -17,6 +17,7 @@ from datetime import datetime, timezone, timedelta
|
|
|
from charset_normalizer import from_bytes
|
|
|
from enum import Enum
|
|
|
from discord.sinks import Sink
|
|
|
+from scipy.signal import resample_poly
|
|
|
|
|
|
|
|
|
from discord.ext import tasks
|
|
|
@@ -114,14 +115,25 @@ class STTSink(Sink):
|
|
|
except Exception as e:
|
|
|
logger.warning(f"[STT][{user_id}] WS fermé : {e}")
|
|
|
|
|
|
- def write(self, data, user):
|
|
|
- """
|
|
|
- data.pcm : bytes PCM int16 48kHz (Discord natif)
|
|
|
- """
|
|
|
+ def write(self, data):
|
|
|
if not data or not hasattr(data, "pcm"):
|
|
|
return
|
|
|
|
|
|
- asyncio.create_task(self._send_audio(user.id, data.pcm))
|
|
|
+ user = data.user
|
|
|
+
|
|
|
+ if not user:
|
|
|
+ return
|
|
|
+
|
|
|
+ audio_16k_float32 = discord_pcm_to_whisper_float32(data.pcm)
|
|
|
+
|
|
|
+ if not audio_16k_float32:
|
|
|
+ return
|
|
|
+
|
|
|
+ asyncio.create_task(
|
|
|
+ self._send_audio(user.id, audio_16k_float32)
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.debug(f"[STT] audio envoyé user={user.id} bytes={len(audio_16k_float32)}")
|
|
|
|
|
|
async def _send_audio(self, user_id, pcm_bytes):
|
|
|
ws = await self._get_ws(user_id)
|
|
|
@@ -130,6 +142,21 @@ class STTSink(Sink):
|
|
|
# Liste pour stocker l'historique des conversations
|
|
|
conversation_history = []
|
|
|
|
|
|
+def discord_pcm_to_whisper_float32(pcm_bytes: bytes) -> bytes:
|
|
|
+ # int16 PCM -> numpy
|
|
|
+ audio_int16 = np.frombuffer(pcm_bytes, dtype=np.int16)
|
|
|
+
|
|
|
+ if audio_int16.size == 0:
|
|
|
+ return b""
|
|
|
+
|
|
|
+ # int16 -> float32 [-1.0, 1.0]
|
|
|
+ audio_float32 = audio_int16.astype(np.float32) / 32768.0
|
|
|
+
|
|
|
+ # resample 48kHz -> 16kHz
|
|
|
+ audio_16k = resample_poly(audio_float32, up=1, down=3)
|
|
|
+
|
|
|
+ return audio_16k.astype(np.float32).tobytes()
|
|
|
+
|
|
|
def filter_message(message):
|
|
|
"""Filtre le contenu d'un retour de modèle de language, comme pour enlever les pensées dans le cas par exemple de DeepSeek"""
|
|
|
|