diff --git a/cms/djangoapps/contentstore/management/commands/migrate_to_split.py b/cms/djangoapps/contentstore/management/commands/migrate_to_split.py deleted file mode 100644 index 38fed6265c0c..000000000000 --- a/cms/djangoapps/contentstore/management/commands/migrate_to_split.py +++ /dev/null @@ -1,57 +0,0 @@ -""" -Django management command to migrate a course from the old Mongo modulestore -to the new split-Mongo modulestore. -""" - - -from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user -from django.core.management.base import BaseCommand, CommandError -from opaque_keys import InvalidKeyError -from opaque_keys.edx.keys import CourseKey - -from cms.djangoapps.contentstore.management.commands.utils import user_from_str -from xmodule.modulestore import ModuleStoreEnum # lint-amnesty, pylint: disable=wrong-import-order -from xmodule.modulestore.django import modulestore # lint-amnesty, pylint: disable=wrong-import-order -from xmodule.modulestore.split_migrator import SplitMigrator # lint-amnesty, pylint: disable=wrong-import-order - - -class Command(BaseCommand): - """ - Migrate a course from old-Mongo to split-Mongo. It reuses the old course id except where overridden. - """ - - help = "Migrate a course from old-Mongo to split-Mongo. The new org, course, and run will " \ - "default to the old one unless overridden." - - def add_arguments(self, parser): - parser.add_argument('course_key') - parser.add_argument('email') - parser.add_argument('--org', help='New org to migrate to.') - parser.add_argument('--course', help='New course key to migrate to.') - parser.add_argument('--run', help='New run to migrate to.') - - def parse_args(self, **options): - """ - Return a 5-tuple of passed in values for (course_key, user, org, course, run). - """ - try: - course_key = CourseKey.from_string(options['course_key']) - except InvalidKeyError: - raise CommandError("Invalid location string") # lint-amnesty, pylint: disable=raise-missing-from - - try: - user = user_from_str(options['email']) - except User.DoesNotExist: - raise CommandError("No user found identified by {}".format(options['email'])) # lint-amnesty, pylint: disable=raise-missing-from - - return course_key, user.id, options['org'], options['course'], options['run'] - - def handle(self, *args, **options): - course_key, user, org, course, run = self.parse_args(**options) - - migrator = SplitMigrator( - source_modulestore=modulestore(), - split_modulestore=modulestore()._get_modulestore_by_type(ModuleStoreEnum.Type.split), # lint-amnesty, pylint: disable=protected-access - ) - - migrator.migrate_mongo_course(course_key, user, org, course, run) diff --git a/xmodule/modulestore/mixed.py b/xmodule/modulestore/mixed.py index 88bb8215fe26..17b5e8a0869d 100644 --- a/xmodule/modulestore/mixed.py +++ b/xmodule/modulestore/mixed.py @@ -15,10 +15,9 @@ from xmodule.assetstore import AssetMetadata -from . import XMODULE_FIELDS_WITH_USAGE_KEYS, ModuleStoreEnum, ModuleStoreWriteBase +from . import XMODULE_FIELDS_WITH_USAGE_KEYS, ModuleStoreWriteBase from .draft_and_published import ModuleStoreDraftAndPublished from .exceptions import DuplicateCourseError, ItemNotFoundError -from .split_migrator import SplitMigrator log = logging.getLogger(__name__) @@ -717,17 +716,9 @@ def clone_course(self, source_course_id, dest_course_id, user_id, fields=None, * if source_modulestore == dest_modulestore: return source_modulestore.clone_course(source_course_id, dest_course_id, user_id, fields, **kwargs) - if dest_modulestore.get_modulestore_type() == ModuleStoreEnum.Type.split: - split_migrator = SplitMigrator(dest_modulestore, source_modulestore) - split_migrator.migrate_mongo_course(source_course_id, user_id, dest_course_id.org, - dest_course_id.course, dest_course_id.run, fields, **kwargs) - - # the super handles assets and any other necessities - super().clone_course(source_course_id, dest_course_id, user_id, fields, **kwargs) - else: - raise NotImplementedError("No code for cloning from {} to {}".format( - source_modulestore, dest_modulestore - )) + raise NotImplementedError("No code for cloning from {} to {}".format( + source_modulestore, dest_modulestore + )) @strip_key @prepare_asides diff --git a/xmodule/modulestore/split_migrator.py b/xmodule/modulestore/split_migrator.py deleted file mode 100644 index c773648bbbad..000000000000 --- a/xmodule/modulestore/split_migrator.py +++ /dev/null @@ -1,231 +0,0 @@ -''' -Code for migrating from other modulestores to the split_mongo modulestore. - -Exists at the top level of modulestore b/c it needs to know about and access each modulestore. - -In general, it's strategy is to treat the other modulestores as read-only and to never directly -manipulate storage but use existing api's. -''' - - -import logging - -from opaque_keys.edx.locator import CourseLocator -from xblock.fields import Reference, ReferenceList, ReferenceValueDict - -from xmodule.modulestore import ModuleStoreEnum -from xmodule.modulestore.exceptions import ItemNotFoundError - -log = logging.getLogger(__name__) - - -class SplitMigrator: - """ - Copies courses from old mongo to split mongo and sets up location mapping so any references to the old - name will be able to find the new elements. - """ - def __init__(self, split_modulestore, source_modulestore): - super().__init__() - self.split_modulestore = split_modulestore - self.source_modulestore = source_modulestore - - def migrate_mongo_course( - self, source_course_key, user_id, new_org=None, new_course=None, new_run=None, fields=None, **kwargs - ): - """ - Create a new course in split_mongo representing the published and draft versions of the course from the - original mongo store. And return the new CourseLocator - - If the new course already exists, this raises DuplicateItemError - - :param source_course_key: which course to migrate - :param user_id: the user whose action is causing this migration - :param new_org, new_course, new_run: (optional) identifiers for the new course. Defaults to - the source_course_key's values. - """ - # the only difference in data between the old and split_mongo xblocks are the locations; - # so, any field which holds a location must change to a Locator; otherwise, the persistence - # layer and kvs's know how to store it. - # locations are in location, children, conditionals, course.tab - - # create the course: set fields to explicitly_set for each scope, id_root = new_course_locator, master_branch = 'production' # lint-amnesty, pylint: disable=line-too-long - original_course = self.source_modulestore.get_course(source_course_key, **kwargs) - if original_course is None: - raise ItemNotFoundError(str(source_course_key)) - - if new_org is None: - new_org = source_course_key.org - if new_course is None: - new_course = source_course_key.course - if new_run is None: - new_run = source_course_key.run - - new_course_key = CourseLocator(new_org, new_course, new_run, branch=ModuleStoreEnum.BranchName.published) - with self.split_modulestore.bulk_operations(new_course_key): - new_fields = self._get_fields_translate_references(original_course, new_course_key, None) - if fields: - new_fields.update(fields) - new_course = self.split_modulestore.create_course( - new_org, new_course, new_run, user_id, - fields=new_fields, - master_branch=ModuleStoreEnum.BranchName.published, - skip_auto_publish=True, - **kwargs - ) - - self._copy_published_modules_to_course( - new_course, original_course.location, source_course_key, user_id, **kwargs - ) - - # TODO: This should be merged back into the above transaction, but can't be until split.py - # is refactored to have more coherent access patterns - with self.split_modulestore.bulk_operations(new_course_key): - - # create a new version for the drafts - self._add_draft_modules_to_course(new_course.location, source_course_key, user_id, **kwargs) - - return new_course.id - - def _copy_published_modules_to_course(self, new_course, old_course_loc, source_course_key, user_id, **kwargs): - """ - Copy all of the modules from the 'direct' version of the course to the new split course. - """ - course_version_locator = new_course.id.version_agnostic() - - # iterate over published course elements. Wildcarding rather than descending b/c some elements are orphaned (e.g., # lint-amnesty, pylint: disable=line-too-long - # course about pages, conditionals) - for module in self.source_modulestore.get_items( - source_course_key, revision=ModuleStoreEnum.RevisionOption.published_only, **kwargs - ): - # don't copy the course again. - if module.location != old_course_loc: - # create split_xblock using split.create_item - # NOTE: the below auto populates the children when it migrates the parent; so, - # it doesn't need the parent as the first arg. That is, it translates and populates - # the 'children' field as it goes. - _new_module = self.split_modulestore.create_item( - user_id, - course_version_locator, - module.location.block_type, - block_id=module.location.block_id, - fields=self._get_fields_translate_references( - module, course_version_locator, new_course.location.block_id - ), - skip_auto_publish=True, - **kwargs - ) - # after done w/ published items, add version for DRAFT pointing to the published structure - index_info = self.split_modulestore.get_course_index_info(course_version_locator) - versions = index_info['versions'] - versions[ModuleStoreEnum.BranchName.draft] = versions[ModuleStoreEnum.BranchName.published] - self.split_modulestore.update_course_index(course_version_locator, index_info) - - # clean up orphans in published version: in old mongo, parents pointed to the union of their published and draft - # children which meant some pointers were to non-existent locations in 'direct' - self.split_modulestore.fix_not_found(course_version_locator, user_id) - - def _add_draft_modules_to_course(self, published_course_usage_key, source_course_key, user_id, **kwargs): - """ - update each draft. Create any which don't exist in published and attach to their parents. - """ - # each true update below will trigger a new version of the structure. We may want to just have one new version - # but that's for a later date. - new_draft_course_loc = published_course_usage_key.course_key.for_branch(ModuleStoreEnum.BranchName.draft) - # to prevent race conditions of grandchilden being added before their parents and thus having no parent to - # add to - awaiting_adoption = {} - for module in self.source_modulestore.get_items( - source_course_key, revision=ModuleStoreEnum.RevisionOption.draft_only, **kwargs - ): - new_locator = new_draft_course_loc.make_usage_key(module.category, module.location.block_id) - if self.split_modulestore.has_item(new_locator): - # was in 'direct' so draft is a new version - split_module = self.split_modulestore.get_item(new_locator, **kwargs) - # need to remove any no-longer-explicitly-set values and add/update any now set values. - for name, field in split_module.fields.items(): - if field.is_set_on(split_module) and not module.fields[name].is_set_on(module): - field.delete_from(split_module) - for field, value in self._get_fields_translate_references( - module, new_draft_course_loc, published_course_usage_key.block_id, field_names=False - ).items(): - field.write_to(split_module, value) - - _new_module = self.split_modulestore.update_item(split_module, user_id, **kwargs) - else: - # only a draft version (aka, 'private'). - _new_module = self.split_modulestore.create_item( - user_id, new_draft_course_loc, - new_locator.block_type, - block_id=new_locator.block_id, - fields=self._get_fields_translate_references( - module, new_draft_course_loc, published_course_usage_key.block_id - ), - **kwargs - ) - awaiting_adoption[module.location] = new_locator - for draft_location, new_locator in awaiting_adoption.items(): - parent_loc = self.source_modulestore.get_parent_location( - draft_location, revision=ModuleStoreEnum.RevisionOption.draft_preferred, **kwargs - ) - if parent_loc is None: - log.warning('No parent found in source course for %s', draft_location) - continue - old_parent = self.source_modulestore.get_item(parent_loc, **kwargs) - split_parent_loc = new_draft_course_loc.make_usage_key( - parent_loc.block_type, - parent_loc.block_id if parent_loc.block_type != 'course' else published_course_usage_key.block_id - ) - new_parent = self.split_modulestore.get_item(split_parent_loc, **kwargs) - # this only occurs if the parent was also awaiting adoption: skip this one, go to next - if any(new_locator.block_id == child.block_id for child in new_parent.children): - continue - # find index for module: new_parent may be missing quite a few of old_parent's children - new_parent_cursor = 0 - for old_child_loc in old_parent.children: - if old_child_loc.block_id == draft_location.block_id: - break # moved cursor enough, insert it here - # sibling may move cursor - for idx in range(new_parent_cursor, len(new_parent.children)): - if new_parent.children[idx].block_id == old_child_loc.block_id: - new_parent_cursor = idx + 1 - break # skipped sibs enough, pick back up scan - new_parent.children.insert(new_parent_cursor, new_locator) - new_parent = self.split_modulestore.update_item(new_parent, user_id) - - def _get_fields_translate_references(self, xblock, new_course_key, course_block_id, field_names=True): - """ - Return a dictionary of field: value pairs for explicitly set fields - but convert all references to their BlockUsageLocators - Args: - field_names: if Truthy, the dictionary keys are the field names. If falsey, the keys are the - field objects. - """ - def get_translation(location): - """ - Convert the location - """ - return new_course_key.make_usage_key( - location.block_type, - location.block_id if location.block_type != 'course' else course_block_id - ) - - result = {} - for field_name, field in xblock.fields.items(): - if field.is_set_on(xblock): - field_value = field.read_from(xblock) - field_key = field_name if field_names else field - if isinstance(field, Reference) and field_value is not None: - result[field_key] = get_translation(field_value) - elif isinstance(field, ReferenceList): - result[field_key] = [ - get_translation(ele) for ele in field_value - ] - elif isinstance(field, ReferenceValueDict): - result[field_key] = { - key: get_translation(subvalue) - for key, subvalue in field_value.items() - } - else: - result[field_key] = field_value - - return result diff --git a/xmodule/modulestore/tests/test_split_migrator.py b/xmodule/modulestore/tests/test_split_migrator.py deleted file mode 100644 index a322e2b3962f..000000000000 --- a/xmodule/modulestore/tests/test_split_migrator.py +++ /dev/null @@ -1,190 +0,0 @@ -""" -Tests for split_migrator - -""" - - -import random -import uuid -from unittest import mock - -from xblock.fields import UNIQUE_ID, Reference, ReferenceList, ReferenceValueDict - -from openedx.core.lib.tests import attr -from xmodule.modulestore.split_migrator import SplitMigrator -from xmodule.modulestore.tests.test_split_w_old_mongo import SplitWMongoCourseBootstrapper - - -@attr('mongo') -class TestMigration(SplitWMongoCourseBootstrapper): - """ - Test the split migrator - """ - - def setUp(self): - super().setUp() - self.migrator = SplitMigrator(self.split_mongo, self.draft_mongo) - - def _create_course(self): # lint-amnesty, pylint: disable=arguments-differ - """ - A course testing all of the conversion mechanisms: - * some inheritable settings - * sequences w/ draft and live intermixed children to ensure all get to the draft but - only the live ones get to published. Some are only draft, some are both, some are only live. - * about, static_tab, and conditional documents - """ - super()._create_course(split=False) - - # chapters - chapter1_name = uuid.uuid4().hex - self._create_item('chapter', chapter1_name, {}, {'display_name': 'Chapter 1'}, 'course', 'runid', split=False) - chap2_loc = self.old_course_key.make_usage_key('chapter', uuid.uuid4().hex) - self._create_item( - chap2_loc.block_type, chap2_loc.block_id, {}, {'display_name': 'Chapter 2'}, 'course', 'runid', split=False - ) - # vertical in live only - live_vert_name = uuid.uuid4().hex - self._create_item( - 'vertical', live_vert_name, {}, {'display_name': 'Live vertical'}, 'chapter', chapter1_name, - draft=False, split=False - ) - self.create_random_units(False, self.old_course_key.make_usage_key('vertical', live_vert_name)) - # vertical in both live and draft - both_vert_loc = self.old_course_key.make_usage_key('vertical', uuid.uuid4().hex) - self._create_item( - both_vert_loc.block_type, both_vert_loc.block_id, {}, {'display_name': 'Both vertical'}, 'chapter', chapter1_name, # lint-amnesty, pylint: disable=line-too-long - draft=False, split=False - ) - self.create_random_units(False, both_vert_loc) - draft_both = self.draft_mongo.get_item(both_vert_loc) - draft_both.display_name = 'Both vertical renamed' - self.draft_mongo.update_item(draft_both, self.user_id) - self.create_random_units(True, both_vert_loc) - # vertical in draft only (x2) - draft_vert_loc = self.old_course_key.make_usage_key('vertical', uuid.uuid4().hex) - self._create_item( - draft_vert_loc.block_type, draft_vert_loc.block_id, {}, {'display_name': 'Draft vertical'}, 'chapter', chapter1_name, # lint-amnesty, pylint: disable=line-too-long - draft=True, split=False - ) - self.create_random_units(True, draft_vert_loc) - draft_vert_loc = self.old_course_key.make_usage_key('vertical', uuid.uuid4().hex) - self._create_item( - draft_vert_loc.block_type, draft_vert_loc.block_id, {}, {'display_name': 'Draft vertical2'}, 'chapter', chapter1_name, # lint-amnesty, pylint: disable=line-too-long - draft=True, split=False - ) - self.create_random_units(True, draft_vert_loc) - - # and finally one in live only (so published has to skip 2 preceding sibs) - live_vert_loc = self.old_course_key.make_usage_key('vertical', uuid.uuid4().hex) - self._create_item( - live_vert_loc.block_type, live_vert_loc.block_id, {}, {'display_name': 'Live vertical end'}, 'chapter', chapter1_name, # lint-amnesty, pylint: disable=line-too-long - draft=False, split=False - ) - self.create_random_units(False, live_vert_loc) - - # now the other chapter w/ the conditional - # create pointers to children (before the children exist) - indirect1_loc = self.old_course_key.make_usage_key('discussion', uuid.uuid4().hex) - indirect2_loc = self.old_course_key.make_usage_key('html', uuid.uuid4().hex) - conditional_loc = self.old_course_key.make_usage_key('conditional', uuid.uuid4().hex) - self._create_item( - conditional_loc.block_type, conditional_loc.block_id, - { - 'show_tag_list': [indirect1_loc, indirect2_loc], - 'sources_list': [live_vert_loc, ], - }, - { - 'xml_attributes': { - 'completed': True, - }, - }, - chap2_loc.block_type, chap2_loc.block_id, - draft=False, split=False - ) - # create the children - self._create_item( - indirect1_loc.block_type, indirect1_loc.block_id, {'data': ""}, {'display_name': 'conditional show 1'}, - conditional_loc.block_type, conditional_loc.block_id, - draft=False, split=False - ) - self._create_item( - indirect2_loc.block_type, indirect2_loc.block_id, {'data': ""}, {'display_name': 'conditional show 2'}, - conditional_loc.block_type, conditional_loc.block_id, - draft=False, split=False - ) - - # add direct children - self.create_random_units(False, conditional_loc) - - # and the ancillary docs (not children) - self._create_item( - 'static_tab', uuid.uuid4().hex, {'data': ""}, {'display_name': 'Tab uno'}, - None, None, draft=False, split=False - ) - self._create_item( - 'about', 'overview', {'data': "

test

"}, {}, - None, None, draft=False, split=False - ) - self._create_item( - 'course_info', 'updates', {'data': "
  1. Sep 22

    test

"}, {}, - None, None, draft=False, split=False - ) - - def create_random_units(self, draft, parent_loc): - """ - Create a random selection of units under the given parent w/ random names & attrs - :param store: which store (e.g., direct/draft) to create them in - :param parent: the parent to have point to them - (only makes sense if store is 'direct' and this is 'draft' or vice versa) - """ - for _ in range(random.randrange(6)): - location = parent_loc.replace( - category=random.choice(['html', 'video', 'problem', 'discussion']), - name=uuid.uuid4().hex - ) - metadata = {'display_name': str(uuid.uuid4()), 'graded': True} - data = {} - self._create_item( - location.block_type, location.block_id, data, metadata, parent_loc.block_type, parent_loc.block_id, - draft=draft, split=False - ) - - def compare_courses(self, presplit, new_course_key, published): # lint-amnesty, pylint: disable=missing-function-docstring - # descend via children to do comparison - old_root = presplit.get_course(self.old_course_key) - new_root = self.split_mongo.get_course(new_course_key) - self.compare_dags(presplit, old_root, new_root, published) - - # grab the detached items to compare they should be in both published and draft - for category in ['conditional', 'about', 'course_info', 'static_tab']: - for conditional in presplit.get_items(self.old_course_key, qualifiers={'category': category}): - locator = new_course_key.make_usage_key(category, conditional.location.block_id) - self.compare_dags(presplit, conditional, self.split_mongo.get_item(locator), published) - - def compare_dags(self, presplit, presplit_dag_root, split_dag_root, published): # lint-amnesty, pylint: disable=missing-function-docstring - if split_dag_root.category != 'course': - assert presplit_dag_root.location.block_id == split_dag_root.location.block_id - # compare all fields but references - for name, field in presplit_dag_root.fields.items(): - # fields generated from UNIQUE_IDs are unique to an XBlock's scope, - # so if such a field is unset on an XBlock, we don't expect it - # to persist across courses - field_generated_from_unique_id = not field.is_set_on(presplit_dag_root) and field.default == UNIQUE_ID - should_check_field = not ( - field_generated_from_unique_id or isinstance(field, (Reference, ReferenceList, ReferenceValueDict)) - ) - if should_check_field: - assert getattr(presplit_dag_root, name) == getattr(split_dag_root, name), f'{split_dag_root.location}/{name}: {getattr(presplit_dag_root, name)} != {getattr(split_dag_root, name)}' # pylint: disable=line-too-long - - # compare children - if presplit_dag_root.has_children: - assert len(presplit_dag_root.get_children()) == len(split_dag_root.children), f"{presplit_dag_root.category} '{presplit_dag_root.display_name}': children {presplit_dag_root.children} != {split_dag_root.children}" # pylint: disable=line-too-long - for pre_child, split_child in zip(presplit_dag_root.get_children(), split_dag_root.get_children()): - self.compare_dags(presplit, pre_child, split_child, published) - - def test_migrator(self): - user = mock.Mock(id=1) - new_course_key = self.migrator.migrate_mongo_course(self.old_course_key, user.id, new_run='new_run') - # now compare the migrated to the original course - self.compare_courses(self.draft_mongo, new_course_key, True) # published - self.compare_courses(self.draft_mongo, new_course_key, False) # draft