mirror of
https://github.com/mediacms-io/mediacms.git
synced 2025-11-06 07:28:53 -05:00
index subtitles too
This commit is contained in:
parent
f65338562e
commit
030e3cbe68
@ -357,6 +357,10 @@ class Media(models.Model):
|
|||||||
a_tags,
|
a_tags,
|
||||||
b_tags,
|
b_tags,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
for subtitle in self.subtitles.all():
|
||||||
|
items.append(subtitle.subtitle_text)
|
||||||
|
|
||||||
items = [item for item in items if item]
|
items = [item for item in items if item]
|
||||||
text = " ".join(items)
|
text = " ".join(items)
|
||||||
text = " ".join([token for token in text.lower().split(" ") if token not in STOP_WORDS])
|
text = " ".join([token for token in text.lower().split(" ") if token not in STOP_WORDS])
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
|
import pysubs2
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.urls import reverse
|
from django.urls import reverse
|
||||||
@ -73,6 +74,17 @@ class Subtitle(models.Model):
|
|||||||
raise Exception("Could not convert to srt")
|
raise Exception("Could not convert to srt")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def subtitle_text(self):
|
||||||
|
sub = pysubs2.load(self.subtitle_file.path, encoding="utf-8")
|
||||||
|
text = ' '.join([line.text for line in sub])
|
||||||
|
text = text.replace("\\N", " ")
|
||||||
|
text = text.replace("-", " ")
|
||||||
|
text = text.replace(".", " ")
|
||||||
|
text = text.replace(" ", " ")
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
class TranscriptionRequest(models.Model):
|
class TranscriptionRequest(models.Model):
|
||||||
# Whisper transcription request
|
# Whisper transcription request
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user