Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ filedurations:
learningactivities:
python contentcuration/manage.py set_default_learning_activities

hascaptions:
python contentcuration/manage.py set_orm_based_has_captions

export COMPOSE_PROJECT_NAME=studio_$(shell git rev-parse --abbrev-ref HEAD)

purge-postgres:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
CHUNKSIZE = 10000


def extract_duration_of_media(f_in, extension):
def extract_duration_of_media(f_in, extension): # noqa C901
"""
For more details on these commands, refer to the ffmpeg Wiki:
https://trac.ffmpeg.org/wiki/FFprobeTips#Formatcontainerduration
Expand Down Expand Up @@ -55,9 +55,12 @@ def extract_duration_of_media(f_in, extension):
stdin=f_in,
stderr=subprocess.PIPE
)
second_last_line = result.stderr.decode("utf-8").strip().splitlines()[-2]
time_code = second_last_line.split(" time=")[1].split(" ")[0]
hours, minutes, seconds = time_code.split(":")
try:
second_last_line = result.stderr.decode("utf-8").strip().splitlines()[-2]
time_code = second_last_line.split(" time=")[1].split(" ")[0]
hours, minutes, seconds = time_code.split(":")
except IndexError:
raise RuntimeError("Unable to determine media length")
try:
hours = int(hours)
except ValueError:
Expand Down Expand Up @@ -103,7 +106,7 @@ def handle(self, *args, **options):
except FileNotFoundError:
logging.warning("File {} not found".format(file))
excluded_files.add(file.file_on_disk.name)
except subprocess.CalledProcessError:
except (subprocess.CalledProcessError, RuntimeError):
logging.warning("File {} could not be read for duration".format(file))
excluded_files.add(file.file_on_disk.name)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import logging as logmodule
import time

from django.core.management.base import BaseCommand
from django.db.models import Exists
from django.db.models import OuterRef
from le_utils.constants import content_kinds
from le_utils.constants import format_presets
from le_utils.constants.labels import accessibility_categories

from contentcuration.models import ContentNode
from contentcuration.models import File

logmodule.basicConfig(level=logmodule.INFO)
logging = logmodule.getLogger('command')


CHUNKSIZE = 10000


class Command(BaseCommand):

def handle(self, *args, **options):
start = time.time()

logging.info("Setting 'has captions' for video kinds")

has_captions_subquery = Exists(File.objects.filter(contentnode=OuterRef("id"), language=OuterRef("language"), preset_id=format_presets.VIDEO_SUBTITLE))
# Only try to update video nodes which have not had any accessibility labels set on them
# this will allow this management command to be rerun and resume from where it left off
# and also prevent stomping previous edits to the accessibility_labels field.
updateable_nodes = ContentNode.objects.filter(has_captions_subquery, kind=content_kinds.VIDEO, accessibility_labels__isnull=True)

updateable_node_slice = updateable_nodes.values_list("id", flat=True)[0:CHUNKSIZE]

count = 0

while updateable_nodes.exists():
this_count = ContentNode.objects.filter(
id__in=updateable_node_slice
).update(accessibility_labels={accessibility_categories.CAPTIONS_SUBTITLES: True})

logging.info("Set has captions metadata for {} nodes".format(this_count))

count += this_count

updateable_node_slice = updateable_nodes.values_list("id", flat=True)[0:CHUNKSIZE]

logging.info('Finished setting all has captions metadata for {} nodes in {} seconds'.format(count, time.time() - start))