index subtitles too

This commit is contained in:
Markos Gogoulos 2025-10-25 14:08:33 +03:00
parent f65338562e
commit 030e3cbe68
2 changed files with 16 additions and 0 deletions

View File

@ -357,6 +357,10 @@ class Media(models.Model):
a_tags, a_tags,
b_tags, b_tags,
] ]
for subtitle in self.subtitles.all():
items.append(subtitle.subtitle_text)
items = [item for item in items if item] items = [item for item in items if item]
text = " ".join(items) text = " ".join(items)
text = " ".join([token for token in text.lower().split(" ") if token not in STOP_WORDS]) text = " ".join([token for token in text.lower().split(" ") if token not in STOP_WORDS])

View File

@ -1,6 +1,7 @@
import os import os
import tempfile import tempfile
import pysubs2
from django.conf import settings from django.conf import settings
from django.db import models from django.db import models
from django.urls import reverse from django.urls import reverse
@ -73,6 +74,17 @@ class Subtitle(models.Model):
raise Exception("Could not convert to srt") raise Exception("Could not convert to srt")
return True return True
@property
def subtitle_text(self):
sub = pysubs2.load(self.subtitle_file.path, encoding="utf-8")
text = ' '.join([line.text for line in sub])
text = text.replace("\\N", " ")
text = text.replace("-", " ")
text = text.replace(".", " ")
text = text.replace(" ", " ")
return text
class TranscriptionRequest(models.Model): class TranscriptionRequest(models.Model):
# Whisper transcription request # Whisper transcription request