feat: whisper STT and record screen (#1363)

This commit is contained in:
Markos Gogoulos
2025-09-01 15:11:38 +03:00
committed by GitHub
parent 8cbeb72dd2
commit 817e16ac60
52 changed files with 1179 additions and 339 deletions

View File

@@ -6,7 +6,7 @@ from .license import License # noqa: F401
from .media import Media, MediaPermission # noqa: F401
from .playlist import Playlist, PlaylistMedia # noqa: F401
from .rating import Rating, RatingCategory # noqa: F401
from .subtitle import Language, Subtitle # noqa: F401
from .subtitle import Language, Subtitle, TranscriptionRequest # noqa: F401
from .utils import CODECS # noqa: F401
from .utils import ENCODE_EXTENSIONS # noqa: F401
from .utils import ENCODE_EXTENSIONS_KEYS # noqa: F401

View File

@@ -23,6 +23,7 @@ from imagekit.processors import ResizeToFit
from .. import helpers
from ..stop_words import STOP_WORDS
from .encoding import EncodeProfile, Encoding
from .subtitle import TranscriptionRequest
from .utils import (
ENCODE_RESOLUTIONS_KEYS,
MEDIA_ENCODING_STATUS,
@@ -205,6 +206,9 @@ class Media(models.Model):
views = models.IntegerField(db_index=True, default=1)
allow_whisper_transcribe = models.BooleanField("Transcribe auto-detected language", default=False)
allow_whisper_transcribe_and_translate = models.BooleanField("Transcribe auto-detected language and translate to English", default=False)
# keep track if media file has changed, on saves
__original_media_file = None
__original_thumbnail_time = None
@@ -297,6 +301,26 @@ class Media(models.Model):
thumbnail_name = helpers.get_file_name(self.uploaded_poster.path)
self.uploaded_thumbnail.save(content=myfile, name=thumbnail_name)
def transcribe_function(self):
to_transcribe = False
to_transcribe_and_translate = False
if self.allow_whisper_transcribe or self.allow_whisper_transcribe_and_translate:
if self.allow_whisper_transcribe and not TranscriptionRequest.objects.filter(media=self, translate_to_english=False).exists():
to_transcribe = True
if self.allow_whisper_transcribe_and_translate and not TranscriptionRequest.objects.filter(media=self, translate_to_english=True).exists():
to_transcribe_and_translate = True
from .. import tasks
if to_transcribe:
TranscriptionRequest.objects.create(media=self, translate_to_english=False)
tasks.whisper_transcribe.delay(self.friendly_token, translate_to_english=False)
if to_transcribe_and_translate:
TranscriptionRequest.objects.create(media=self, translate_to_english=True)
tasks.whisper_transcribe.delay(self.friendly_token, translate_to_english=True)
def update_search_vector(self):
"""
Update SearchVector field of SearchModel using raw SQL
@@ -965,6 +989,8 @@ def media_save(sender, instance, created, **kwargs):
tag.update_tag_media()
instance.update_search_vector()
if instance.media_type == "video":
instance.transcribe_function()
@receiver(pre_delete, sender=Media)

View File

@@ -6,7 +6,7 @@ from django.db import models
from django.urls import reverse
from .. import helpers
from .utils import subtitles_file_path
from .utils import MEDIA_ENCODING_STATUS, subtitles_file_path
class Language(models.Model):
@@ -14,7 +14,7 @@ class Language(models.Model):
to be used with Subtitles
"""
code = models.CharField(max_length=12, help_text="language code")
code = models.CharField(max_length=30, help_text="language code")
title = models.CharField(max_length=100, help_text="language code")
@@ -70,3 +70,15 @@ class Subtitle(models.Model):
else:
raise Exception("Could not convert to srt")
return True
class TranscriptionRequest(models.Model):
# Whisper transcription request
media = models.ForeignKey("Media", on_delete=models.CASCADE, related_name="transcriptionrequests")
add_date = models.DateTimeField(auto_now_add=True)
status = models.CharField(max_length=20, choices=MEDIA_ENCODING_STATUS, default="pending", db_index=True)
translate_to_english = models.BooleanField(default=False)
logs = models.TextField(blank=True, null=True)
def __str__(self):
return f"Transcription request for {self.media.title} - {self.status}"