diff --git a/contentcuration/contentcuration/tests/views/test_views_internal.py b/contentcuration/contentcuration/tests/views/test_views_internal.py index 58a6792884..3a23654ee6 100644 --- a/contentcuration/contentcuration/tests/views/test_views_internal.py +++ b/contentcuration/contentcuration/tests/views/test_views_internal.py @@ -231,6 +231,33 @@ def test_tag_greater_than_30_chars_excluded(self): self.assertEqual(response.status_code, 400, response.content) + def test_add_nodes__not_a_topic(self): + resource_node = self._make_node_data() + test_data = { + "root_id": self.root_node.id, + "content_data": [ + resource_node, + ], + } + response = self.admin_client().post( + reverse_lazy("api_add_nodes_to_tree"), data=test_data, format="json" + ) + # should succeed + self.assertEqual(response.status_code, 200, response.content) + resource_node_id = next(iter(response.json().get('root_ids').values())) + + invalid_child = self._make_node_data() + test_data = { + "root_id": resource_node_id, + "content_data": [ + invalid_child, + ], + } + response = self.admin_client().post( + reverse_lazy("api_add_nodes_to_tree"), data=test_data, format="json" + ) + self.assertEqual(response.status_code, 400, response.content) + def test_invalid_metadata_label_excluded(self): invalid_metadata_labels = self._make_node_data() invalid_metadata_labels["title"] = "invalid_metadata_labels" diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py index b0e9bab857..a2bb5dde5c 100644 --- a/contentcuration/contentcuration/utils/publish.py +++ b/contentcuration/contentcuration/utils/publish.py @@ -31,6 +31,7 @@ from django.utils.translation import gettext_lazy as _ from django.utils.translation import override from kolibri_content import models as kolibrimodels +from kolibri_content.base_models import MAX_TAG_LENGTH from kolibri_content.router import get_active_content_database from kolibri_content.router import using_content_database from kolibri_public.utils.mapper import ChannelMapper @@ -760,7 +761,7 @@ def map_tags_to_node(kolibrinode, ccnode): for tag in ccnode.tags.all(): t, _new = kolibrimodels.ContentTag.objects.get_or_create(pk=tag.pk, tag_name=tag.tag_name) - if len(t.tag_name) <= 30: + if len(t.tag_name) <= MAX_TAG_LENGTH: tags_to_add.append(t) kolibrinode.tags.set(tags_to_add) diff --git a/contentcuration/contentcuration/views/internal.py b/contentcuration/contentcuration/views/internal.py index 4dbc51c8ef..5767928719 100644 --- a/contentcuration/contentcuration/views/internal.py +++ b/contentcuration/contentcuration/views/internal.py @@ -2,8 +2,8 @@ import logging from builtins import str from collections import namedtuple - from distutils.version import LooseVersion + from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import PermissionDenied from django.core.exceptions import SuspiciousOperation @@ -632,11 +632,14 @@ def handle_remote_node(user, node_data, parent_node): @delay_user_storage_calculation -def convert_data_to_nodes(user, content_data, parent_node): +def convert_data_to_nodes(user, content_data, parent_node): # noqa: C901 """ Parse dict and create nodes accordingly """ try: root_mapping = {} parent_node = ContentNode.objects.get(pk=parent_node) + if parent_node.kind_id != content_kinds.TOPIC: + raise NodeValidationError("Parent node must be a topic/folder | actual={}".format(parent_node.kind_id)) + sort_order = parent_node.children.count() + 1 existing_node_ids = ContentNode.objects.filter( parent_id=parent_node.pk diff --git a/contentcuration/kolibri_content/base_models.py b/contentcuration/kolibri_content/base_models.py index 9fb574fa78..0fbe59c710 100644 --- a/contentcuration/kolibri_content/base_models.py +++ b/contentcuration/kolibri_content/base_models.py @@ -20,9 +20,12 @@ from mptt.models import TreeForeignKey +MAX_TAG_LENGTH = 30 + + class ContentTag(models.Model): id = UUIDField(primary_key=True) - tag_name = models.CharField(max_length=30, blank=True) + tag_name = models.CharField(max_length=MAX_TAG_LENGTH, blank=True) class Meta: abstract = True diff --git a/contentcuration/kolibri_public/management/commands/export_channels_to_kolibri_public.py b/contentcuration/kolibri_public/management/commands/export_channels_to_kolibri_public.py index b4b88ab4ba..076053ad90 100644 --- a/contentcuration/kolibri_public/management/commands/export_channels_to_kolibri_public.py +++ b/contentcuration/kolibri_public/management/commands/export_channels_to_kolibri_public.py @@ -27,6 +27,50 @@ class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "--channel-id", + type=str, + dest="channel_id", + help="The channel_id for which generate kolibri_public models [default: all channels]" + ) + + def handle(self, *args, **options): + ids_to_export = [] + + if options["channel_id"]: + ids_to_export.append(options["channel_id"]) + else: + self._republish_problem_channels() + public_channel_ids = set(Channel.objects.filter(public=True, deleted=False, main_tree__published=True).values_list("id", flat=True)) + kolibri_public_channel_ids = set(ChannelMetadata.objects.all().values_list("id", flat=True)) + ids_to_export = public_channel_ids.difference(kolibri_public_channel_ids) + + count = 0 + for channel_id in ids_to_export: + try: + self._export_channel(channel_id) + count += 1 + except FileNotFoundError: + logger.warning("Tried to export channel {} to kolibri_public but its published channel database could not be found".format(channel_id)) + except Exception as e: + logger.exception("Failed to export channel {} to kolibri_public because of error: {}".format(channel_id, e)) + logger.info("Successfully put {} channels into kolibri_public".format(count)) + + def _export_channel(self, channel_id): + logger.info("Putting channel {} into kolibri_public".format(channel_id)) + db_location = os.path.join(settings.DB_ROOT, "{id}.sqlite3".format(id=channel_id)) + with storage.open(db_location) as storage_file: + with tempfile.NamedTemporaryFile(suffix=".sqlite3") as db_file: + shutil.copyfileobj(storage_file, db_file) + db_file.seek(0) + with using_content_database(db_file.name): + # Run migration to handle old content databases published prior to current fields being added. + call_command("migrate", app_label=KolibriContentConfig.label, database=get_active_content_database()) + channel = ExportedChannelMetadata.objects.get(id=channel_id) + logger.info("Found channel {} for id: {} mapping now".format(channel.name, channel_id)) + mapper = ChannelMapper(channel) + mapper.run() def _republish_problem_channels(self): twenty_19 = datetime(year=2019, month=1, day=1) @@ -53,34 +97,3 @@ def _republish_problem_channels(self): channel.save() except Exception as e: logger.exception("Failed to export channel {} to kolibri_public because of error: {}".format(channel.id, e)) - - def _export_channel(self, channel_id): - logger.info("Putting channel {} into kolibri_public".format(channel_id)) - db_location = os.path.join(settings.DB_ROOT, "{id}.sqlite3".format(id=channel_id)) - with storage.open(db_location) as storage_file: - with tempfile.NamedTemporaryFile(suffix=".sqlite3") as db_file: - shutil.copyfileobj(storage_file, db_file) - db_file.seek(0) - with using_content_database(db_file.name): - # Run migration to handle old content databases published prior to current fields being added. - call_command("migrate", app_label=KolibriContentConfig.label, database=get_active_content_database()) - channel = ExportedChannelMetadata.objects.get(id=channel_id) - logger.info("Found channel {} for id: {} mapping now".format(channel.name, channel_id)) - mapper = ChannelMapper(channel) - mapper.run() - - def handle(self, *args, **options): - self._republish_problem_channels() - public_channel_ids = set(Channel.objects.filter(public=True, deleted=False, main_tree__published=True).values_list("id", flat=True)) - kolibri_public_channel_ids = set(ChannelMetadata.objects.all().values_list("id", flat=True)) - ids_to_export = public_channel_ids.difference(kolibri_public_channel_ids) - count = 0 - for channel_id in ids_to_export: - try: - self._export_channel(channel_id) - count += 1 - except FileNotFoundError: - logger.warning("Tried to export channel {} to kolibri_public but its published channel database could not be found".format(channel_id)) - except Exception as e: - logger.exception("Failed to export channel {} to kolibri_public because of error: {}".format(channel_id, e)) - logger.info("Successfully put {} channels into kolibri_public".format(count)) diff --git a/contentcuration/kolibri_public/tests/base.py b/contentcuration/kolibri_public/tests/base.py index 044755591d..1e5f341ab2 100644 --- a/contentcuration/kolibri_public/tests/base.py +++ b/contentcuration/kolibri_public/tests/base.py @@ -32,6 +32,17 @@ def choices(sequence, k): return [random.choice(sequence) for _ in range(0, k)] +OKAY_TAG = "okay_tag" +BAD_TAG = "tag_is_too_long_because_it_is_over_30_characters" + +PROBLEMATIC_HTML5_NODE = "ab9d3fd905c848a6989936c609405abb" + +BUILDER_DEFAULT_OPTIONS = { + "problematic_tags": False, + "problematic_nodes": False, +} + + class ChannelBuilder(object): """ This class is purely to generate all the relevant data for a single @@ -49,11 +60,15 @@ class ChannelBuilder(object): "root_node", ) - def __init__(self, levels=3, num_children=5, models=kolibri_public_models): + def __init__(self, levels=3, num_children=5, models=kolibri_public_models, options=None): self.levels = levels self.num_children = num_children self.models = models + self.options = BUILDER_DEFAULT_OPTIONS.copy() + if options: + self.options.update(options) + self.content_tags = {} self._excluded_channel_fields = None self._excluded_node_fields = None @@ -75,8 +90,16 @@ def generate_new_tree(self): self.channel = self.channel_data() self.files = {} self.localfiles = {} + self.node_to_files_map = {} self.localfile_to_files_map = {} + self.content_tag_map = {} + + tags = [OKAY_TAG] + if self.options["problematic_tags"]: + tags.append(BAD_TAG) + for tag_name in tags: + self.content_tag_data(tag_name) self.root_node = self.generate_topic() if "root_id" in self.channel: @@ -88,6 +111,22 @@ def generate_new_tree(self): self.root_node["children"] = self.recurse_and_generate( self.root_node["id"], self.levels ) + if self.options["problematic_nodes"]: + self.root_node["children"].extend(self.generate_problematic_nodes()) + + def generate_problematic_nodes(self): + nodes = [] + html5_not_a_topic = self.contentnode_data( + node_id=PROBLEMATIC_HTML5_NODE, + kind=content_kinds.HTML5, + parent_id=self.root_node["id"], + ) + # the problem: this node is not a topic, but it has children + html5_not_a_topic["children"] = [ + self.contentnode_data(parent_id=PROBLEMATIC_HTML5_NODE) + ] + nodes.append(html5_not_a_topic) + return nodes def load_data(self): try: @@ -117,7 +156,19 @@ def generate_nodes_from_root_node(self): self.nodes = {n["id"]: n for n in map(to_dict, self._django_nodes)} def insert_into_default_db(self): + self.models.ContentTag.objects.bulk_create( + (self.models.ContentTag(**tag) for tag in self.content_tags.values()) + ) self.models.ContentNode.objects.bulk_create(self._django_nodes) + self.models.ContentNode.tags.through.objects.bulk_create( + ( + self.models.ContentNode.tags.through( + contentnode_id=node["id"], contenttag_id=tag["id"] + ) + for node in self.nodes.values() + for tag in self.content_tags.values() + ) + ) self.models.ChannelMetadata.objects.create(**self.channel) self.models.LocalFile.objects.bulk_create( (self.models.LocalFile(**local) for local in self.localfiles.values()) @@ -153,6 +204,7 @@ def data(self): "content_contentnode": list(self.nodes.values()), "content_file": list(self.files.values()), "content_localfile": list(self.localfiles.values()), + "content_contenttag": list(self.content_tags.values()), } def recurse_and_generate(self, parent_id, levels): @@ -190,6 +242,8 @@ def generate_leaf(self, parent_id): thumbnail=True, preset=format_presets.VIDEO_THUMBNAIL, ) + for tag_id in self.content_tags: + self.content_tag_map[node["id"]] = [tag_id] return node def channel_data(self, channel_id=None, version=1): @@ -218,6 +272,14 @@ def channel_data(self, channel_id=None, version=1): del channel_data[field] return channel_data + def content_tag_data(self, tag_name): + data = { + "id": uuid4_hex(), + "tag_name": tag_name, + } + self.content_tags[data["id"]] = data + return data + def localfile_data(self, extension="mp4"): data = { "file_size": random.randint(1, 1000), diff --git a/contentcuration/kolibri_public/tests/test_content_app.py b/contentcuration/kolibri_public/tests/test_content_app.py index c6f4327972..50c4a6a35b 100644 --- a/contentcuration/kolibri_public/tests/test_content_app.py +++ b/contentcuration/kolibri_public/tests/test_content_app.py @@ -11,6 +11,7 @@ from django.utils.http import http_date from kolibri_public import models from kolibri_public.tests.base import ChannelBuilder +from kolibri_public.tests.base import OKAY_TAG from le_utils.constants import content_kinds from rest_framework.test import APITestCase @@ -373,6 +374,8 @@ def test_contentnode_tags(self): response = self.client.get( reverse("publiccontentnode-detail", kwargs={"pk": self.root.id}) ) + # added by channel builder + tags.append(OKAY_TAG) self.assertEqual(set(response.data["tags"]), set(tags)) def test_channelmetadata_list(self): diff --git a/contentcuration/kolibri_public/tests/test_mapper.py b/contentcuration/kolibri_public/tests/test_mapper.py index 13066ede63..4411032ba4 100644 --- a/contentcuration/kolibri_public/tests/test_mapper.py +++ b/contentcuration/kolibri_public/tests/test_mapper.py @@ -9,7 +9,9 @@ from kolibri_content.router import using_content_database from kolibri_public import models as kolibri_public_models from kolibri_public.tests.base import ChannelBuilder +from kolibri_public.tests.base import OKAY_TAG from kolibri_public.utils.mapper import ChannelMapper +from le_utils.constants import content_kinds from contentcuration.models import Channel @@ -19,14 +21,14 @@ class ChannelMapperTest(TestCase): @property def overrides(self): return { - kolibri_public_models.ContentNode: { - "available": True, - "tree_id": self.mapper.tree_id, - }, - kolibri_public_models.LocalFile: { - "available": True, + kolibri_public_models.ContentNode: { + "available": True, + "tree_id": self.mapper.tree_id, + }, + kolibri_public_models.LocalFile: { + "available": True, + } } - } @classmethod def setUpClass(cls): @@ -36,7 +38,10 @@ def setUpClass(cls): with using_content_database(cls.tempdb): call_command("migrate", "content", database=get_active_content_database(), no_input=True) - builder = ChannelBuilder(models=kolibri_content_models) + builder = ChannelBuilder(models=kolibri_content_models, options={ + "problematic_tags": True, + "problematic_nodes": True, + }) builder.insert_into_default_db() cls.source_root = kolibri_content_models.ContentNode.objects.get(id=builder.root_node["id"]) cls.channel = kolibri_content_models.ChannelMetadata.objects.get(id=builder.channel["id"]) @@ -57,6 +62,10 @@ def _assert_model(self, source, mapped, Model): self.assertEqual(getattr(source, column), getattr(mapped, column)) def _assert_node(self, source, mapped): + """ + :param source: kolibri_content_models.ContentNode + :param mapped: kolibri_public_models.ContentNode + """ self._assert_model(source, mapped, kolibri_public_models.ContentNode) for src, mpd in zip(source.assessmentmetadata.all(), mapped.assessmentmetadata.all()): @@ -66,13 +75,21 @@ def _assert_node(self, source, mapped): self._assert_model(src, mpd, kolibri_public_models.File) self._assert_model(src.local_file, mpd.local_file, kolibri_public_models.LocalFile) + # should only map OKAY_TAG and not BAD_TAG + for mapped_tag in mapped.tags.all(): + self.assertEqual(OKAY_TAG, mapped_tag.tag_name) + def _recurse_and_assert(self, sources, mappeds, recursion_depth=0): recursion_depths = [] for source, mapped in zip(sources, mappeds): self._assert_node(source, mapped) source_children = source.children.all() mapped_children = mapped.children.all() - self.assertEqual(len(source_children), len(mapped_children)) + if mapped.kind == content_kinds.TOPIC: + self.assertEqual(len(source_children), len(mapped_children)) + else: + self.assertEqual(0, len(mapped_children)) + recursion_depths.append( self._recurse_and_assert( source_children, diff --git a/contentcuration/kolibri_public/utils/annotation.py b/contentcuration/kolibri_public/utils/annotation.py index a2fca0289f..3cb800a3d9 100644 --- a/contentcuration/kolibri_public/utils/annotation.py +++ b/contentcuration/kolibri_public/utils/annotation.py @@ -75,6 +75,9 @@ def calculate_next_order(channel, public=False): # Ensure that this channel is always included in the list. Q(public=True, deleted=False, main_tree__published=True) | Q(id=channel.id) ).order_by("-priority").values_list("id", flat=True)) - order = channel_list_order.index(channel.id) - channel.order = order - channel.save() + # this shouldn't happen, but if we're exporting a channel database to Kolibri Public + # and the channel does not actually exist locally, then this would fail + if channel.id in channel_list_order: + order = channel_list_order.index(channel.id) + channel.order = order + channel.save() diff --git a/contentcuration/kolibri_public/utils/mapper.py b/contentcuration/kolibri_public/utils/mapper.py index 2f3db0811f..a74f369403 100644 --- a/contentcuration/kolibri_public/utils/mapper.py +++ b/contentcuration/kolibri_public/utils/mapper.py @@ -1,5 +1,7 @@ from django.db import transaction +from django.db.models.functions import Length from kolibri_content import models as kolibri_content_models +from kolibri_content.base_models import MAX_TAG_LENGTH from kolibri_public import models as kolibri_public_models from kolibri_public.search import annotate_label_bitmasks from kolibri_public.utils.annotation import set_channel_metadata_fields @@ -24,14 +26,14 @@ def __init__(self, channel, public=True): @property def overrides(self): return { - kolibri_public_models.ContentNode: { - "available": True, - "tree_id": self.tree_id, - }, - kolibri_public_models.LocalFile: { - "available": True, + kolibri_public_models.ContentNode: { + "available": True, + "tree_id": self.tree_id, + }, + kolibri_public_models.LocalFile: { + "available": True, + } } - } def _handle_old_tree_if_exists(self): try: @@ -135,17 +137,31 @@ def _map( return [node_copy] def _copy_tags(self, node_ids): - source_tags_mappings = kolibri_content_models.ContentNode.tags.through.objects.filter( + initial_source_tag_mappings = kolibri_content_models.ContentNode.tags.through.objects.filter( contentnode_id__in=node_ids ) - source_tags = kolibri_content_models.ContentTag.objects.filter( - id__in=source_tags_mappings.values_list("contenttag_id", flat=True) + source_tags = ( + kolibri_content_models.ContentTag.objects + .annotate( + tag_name_len=Length("tag_name"), + ) + .filter( + id__in=initial_source_tag_mappings.values_list("contenttag_id", flat=True), + tag_name_len__lte=MAX_TAG_LENGTH, + ) + ) + + source_tag_mappings = ( + initial_source_tag_mappings + .filter( + contenttag_id__in=source_tags.values_list("id", flat=True), + ) ) self._map_and_bulk_create_model(source_tags, kolibri_public_models.ContentTag) - self._map_and_bulk_create_model(source_tags_mappings, kolibri_public_models.ContentNode.tags.through) + self._map_and_bulk_create_model(source_tag_mappings, kolibri_public_models.ContentNode.tags.through) def _copy_assessment_metadata(self, node_ids): node_assessmentmetadata = kolibri_content_models.AssessmentMetaData.objects.filter(contentnode_id__in=node_ids) @@ -200,6 +216,10 @@ def _deep_map( mapped_nodes = kolibri_public_models.ContentNode.objects.bulk_create(nodes_to_create) - self._copy_associated_objects(source_nodes) + # filter to only the nodes that were created, since some source nodes could have + # been problematic + self._copy_associated_objects(source_nodes.filter( + id__in=[mapped_node.id for mapped_node in mapped_nodes], + )) return mapped_nodes