feat: whisper STT and record screen (#1363)

2026-02-05 15:03:02 -05:00 · 2025-09-01 15:11:38 +03:00
parent 8cbeb72dd2
commit 817e16ac60
52 changed files with 1179 additions and 339 deletions
--- a/files/models/init.py
+++ b/files/models/init.py
@@ -6,7 +6,7 @@ from .license import License  # noqa: F401
 from .media import Media, MediaPermission  # noqa: F401
 from .playlist import Playlist, PlaylistMedia  # noqa: F401
 from .rating import Rating, RatingCategory  # noqa: F401
-from .subtitle import Language, Subtitle  # noqa: F401
+from .subtitle import Language, Subtitle, TranscriptionRequest  # noqa: F401
 from .utils import CODECS  # noqa: F401
 from .utils import ENCODE_EXTENSIONS  # noqa: F401
 from .utils import ENCODE_EXTENSIONS_KEYS  # noqa: F401
--- a/files/models/media.py
+++ b/files/models/media.py
@@ -23,6 +23,7 @@ from imagekit.processors import ResizeToFit
 from .. import helpers
 from ..stop_words import STOP_WORDS
 from .encoding import EncodeProfile, Encoding
+from .subtitle import TranscriptionRequest
 from .utils import (
    ENCODE_RESOLUTIONS_KEYS,
    MEDIA_ENCODING_STATUS,
@@ -205,6 +206,9 @@ class Media(models.Model):

    views = models.IntegerField(db_index=True, default=1)

+    allow_whisper_transcribe = models.BooleanField("Transcribe auto-detected language", default=False)
+    allow_whisper_transcribe_and_translate = models.BooleanField("Transcribe auto-detected language and translate to English", default=False)
+
    # keep track if media file has changed, on saves
    __original_media_file = None
    __original_thumbnail_time = None
@@ -297,6 +301,26 @@ class Media(models.Model):
                thumbnail_name = helpers.get_file_name(self.uploaded_poster.path)
                self.uploaded_thumbnail.save(content=myfile, name=thumbnail_name)

+    def transcribe_function(self):
+        to_transcribe = False
+        to_transcribe_and_translate = False
+
+        if self.allow_whisper_transcribe or self.allow_whisper_transcribe_and_translate:
+            if self.allow_whisper_transcribe and not TranscriptionRequest.objects.filter(media=self, translate_to_english=False).exists():
+                to_transcribe = True
+
+            if self.allow_whisper_transcribe_and_translate and not TranscriptionRequest.objects.filter(media=self, translate_to_english=True).exists():
+                to_transcribe_and_translate = True
+
+            from .. import tasks
+
+            if to_transcribe:
+                TranscriptionRequest.objects.create(media=self, translate_to_english=False)
+                tasks.whisper_transcribe.delay(self.friendly_token, translate_to_english=False)
+            if to_transcribe_and_translate:
+                TranscriptionRequest.objects.create(media=self, translate_to_english=True)
+                tasks.whisper_transcribe.delay(self.friendly_token, translate_to_english=True)
+
    def update_search_vector(self):
        """
        Update SearchVector field of SearchModel using raw SQL
@@ -965,6 +989,8 @@ def media_save(sender, instance, created, **kwargs):
            tag.update_tag_media()

    instance.update_search_vector()
+    if instance.media_type == "video":
+        instance.transcribe_function()


@receiver(pre_delete, sender=Media)
--- a/files/models/subtitle.py
+++ b/files/models/subtitle.py
@@ -6,7 +6,7 @@ from django.db import models
 from django.urls import reverse

 from .. import helpers
-from .utils import subtitles_file_path
+from .utils import MEDIA_ENCODING_STATUS, subtitles_file_path


 class Language(models.Model):
@@ -14,7 +14,7 @@ class Language(models.Model):
    to be used with Subtitles
    """

-    code = models.CharField(max_length=12, help_text="language code")
+    code = models.CharField(max_length=30, help_text="language code")

    title = models.CharField(max_length=100, help_text="language code")

@@ -70,3 +70,15 @@ class Subtitle(models.Model):
            else:
                raise Exception("Could not convert to srt")
        return True
+
+
+class TranscriptionRequest(models.Model):
+    # Whisper transcription request
+    media = models.ForeignKey("Media", on_delete=models.CASCADE, related_name="transcriptionrequests")
+    add_date = models.DateTimeField(auto_now_add=True)
+    status = models.CharField(max_length=20, choices=MEDIA_ENCODING_STATUS, default="pending", db_index=True)
+    translate_to_english = models.BooleanField(default=False)
+    logs = models.TextField(blank=True, null=True)
+
+    def __str__(self):
+        return f"Transcription request for {self.media.title} - {self.status}"