mirror of
https://github.com/mediacms-io/mediacms.git
synced 2025-11-05 23:18:53 -05:00
index subtitles too
This commit is contained in:
parent
f65338562e
commit
030e3cbe68
@ -357,6 +357,10 @@ class Media(models.Model):
|
||||
a_tags,
|
||||
b_tags,
|
||||
]
|
||||
|
||||
for subtitle in self.subtitles.all():
|
||||
items.append(subtitle.subtitle_text)
|
||||
|
||||
items = [item for item in items if item]
|
||||
text = " ".join(items)
|
||||
text = " ".join([token for token in text.lower().split(" ") if token not in STOP_WORDS])
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
import pysubs2
|
||||
from django.conf import settings
|
||||
from django.db import models
|
||||
from django.urls import reverse
|
||||
@ -73,6 +74,17 @@ class Subtitle(models.Model):
|
||||
raise Exception("Could not convert to srt")
|
||||
return True
|
||||
|
||||
@property
|
||||
def subtitle_text(self):
|
||||
sub = pysubs2.load(self.subtitle_file.path, encoding="utf-8")
|
||||
text = ' '.join([line.text for line in sub])
|
||||
text = text.replace("\\N", " ")
|
||||
text = text.replace("-", " ")
|
||||
text = text.replace(".", " ")
|
||||
text = text.replace(" ", " ")
|
||||
|
||||
return text
|
||||
|
||||
|
||||
class TranscriptionRequest(models.Model):
|
||||
# Whisper transcription request
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user