diff --git a/files/models/media.py b/files/models/media.py
index 01303875..0e101a21 100644
--- a/files/models/media.py
+++ b/files/models/media.py
@@ -357,6 +357,10 @@ class Media(models.Model):
             a_tags,
             b_tags,
         ]
+
+        for subtitle in self.subtitles.all():
+            items.append(subtitle.subtitle_text)
+
         items = [item for item in items if item]
         text = " ".join(items)
         text = " ".join([token for token in text.lower().split(" ") if token not in STOP_WORDS])
diff --git a/files/models/subtitle.py b/files/models/subtitle.py
index b671b286..998c7dbd 100644
--- a/files/models/subtitle.py
+++ b/files/models/subtitle.py
@@ -1,6 +1,7 @@
 import os
 import tempfile
 
+import pysubs2
 from django.conf import settings
 from django.db import models
 from django.urls import reverse
@@ -73,6 +74,17 @@ class Subtitle(models.Model):
                 raise Exception("Could not convert to srt")
         return True
 
+    @property
+    def subtitle_text(self):
+        sub = pysubs2.load(self.subtitle_file.path, encoding="utf-8")
+        text = ' '.join([line.text for line in sub])
+        text = text.replace("\\N", " ")
+        text = text.replace("-", " ")
+        text = text.replace(".", " ")
+        text = text.replace("  ", " ")
+
+        return text
+
 
 class TranscriptionRequest(models.Model):
     # Whisper transcription request