index subtitles too

This commit is contained in:
Markos Gogoulos 2025-10-25 14:08:33 +03:00
parent f65338562e
commit 030e3cbe68
2 changed files with 16 additions and 0 deletions

View File

@ -357,6 +357,10 @@ class Media(models.Model):
a_tags,
b_tags,
]
for subtitle in self.subtitles.all():
items.append(subtitle.subtitle_text)
items = [item for item in items if item]
text = " ".join(items)
text = " ".join([token for token in text.lower().split(" ") if token not in STOP_WORDS])

View File

@ -1,6 +1,7 @@
import os
import tempfile
import pysubs2
from django.conf import settings
from django.db import models
from django.urls import reverse
@ -73,6 +74,17 @@ class Subtitle(models.Model):
raise Exception("Could not convert to srt")
return True
@property
def subtitle_text(self):
sub = pysubs2.load(self.subtitle_file.path, encoding="utf-8")
text = ' '.join([line.text for line in sub])
text = text.replace("\\N", " ")
text = text.replace("-", " ")
text = text.replace(".", " ")
text = text.replace(" ", " ")
return text
class TranscriptionRequest(models.Model):
# Whisper transcription request