diff --git a/docs/chefops.md b/docs/chefops.md index b25e15f8..95b726d1 100644 --- a/docs/chefops.md +++ b/docs/chefops.md @@ -11,9 +11,6 @@ This listing shows the `ricecooker` command line interface (CLI) arguments: usage: sushichef.py [-h] [--token TOKEN] [-u] [-v] [--quiet] [--warn] [--debug] [--compress] [--thumbnails] - [--resume] [--step {CONSTRUCT_CHANNEL, CREATE_TREE, - DOWNLOAD_FILES, GET_FILE_DIFF, - START_UPLOAD, UPLOAD_CHANNEL}] [--deploy] [--publish] required arguments: @@ -26,8 +23,6 @@ This listing shows the `ricecooker` command line interface (CLI) arguments: -v, --verbose Verbose mode (default). --compress Compress videos using ffmpeg -crf=32 -b:a 32k mono. --thumbnails Automatically generate thumbnails for content nodes. - --resume Resume chef session from a specified step. - --step {INIT, ... Step to resume progress from (must be used with --resume flag) --update Force re-download of files (skip .ricecookerfilecache/ check) --sample SIZE Upload a sample of SIZE nodes from the channel. --deploy Immediately deploy changes to channel's main tree. @@ -55,7 +50,7 @@ This is required if you suspect the files on the source website have been update Note that some chef scripts implement their own caching mechanism, so you need to disable those caches as well if you want to make sure you're getting new content. Use the commands `rm -rf .webcache` to clear the webcache if it is present, -and `rm -rf .ricecookerfilecache/* storage/* restore/*` to clean all ricecooker +and `rm -rf .ricecookerfilecache/* storage/*` to clean all ricecooker directories and start from scratch. diff --git a/docs/developer/design_cli.md b/docs/developer/design_cli.md index f36c0e8f..0e980598 100644 --- a/docs/developer/design_cli.md +++ b/docs/developer/design_cli.md @@ -112,8 +112,8 @@ There are three types of arguments involved in a chef run: - `args` (dict): command line args as parsed by the sushi chef class and its parents - SushiChef: the `SushiChef.__init__` method configures argparse for the following: - - `compress`, `download_attempts`, `prompt`, `publish`, `resume`, - `stage`, `step`, `thumbnails`, `token`, `update`, `verbose`, `warn` + - `compress`, `download_attempts`, `prompt`, `publish`, + `stage`, `thumbnails`, `token`, `update`, `verbose`, `warn` - MySushiChef: the chef's `__init__` method can define additional cli args - `options` (dict): additional [OPTIONS...] passed at the end of the command line diff --git a/docs/tutorial/gettingstarted.rst b/docs/tutorial/gettingstarted.rst index 23eb3ec1..759270ba 100644 --- a/docs/tutorial/gettingstarted.rst +++ b/docs/tutorial/gettingstarted.rst @@ -108,7 +108,7 @@ If the command succeeds, you should see something like this printed in your term .. parsed-literal:: - In SushiChef.run method. args={'command': 'uploadchannel', 'token': '', 'update': False, 'resume': False, 'stage': True, 'publish': False} options={} + In SushiChef.run method. args={'command': 'uploadchannel', 'token': '', 'update': False, 'stage': True, 'publish': False} options={} Logged in with username you@yourdomain.org Ricecooker v0.6.42 is up-to-date. diff --git a/ricecooker/chefs.py b/ricecooker/chefs.py index 9b75a247..a24367d7 100644 --- a/ricecooker/chefs.py +++ b/ricecooker/chefs.py @@ -16,7 +16,6 @@ from .commands import uploadchannel_wrapper from .exceptions import InvalidUsageException from .exceptions import raise_for_invalid_channel -from .managers.progress import Status from .utils.downloader import get_archive_filename from .utils.jsontrees import build_tree_from_json from .utils.jsontrees import get_channel_node_from_json @@ -142,18 +141,6 @@ def __init__(self, *args, **kwargs): default=3, help="Maximum number of times to retry downloading files.", ) - parser.add_argument( - "--resume", - action="store_true", - help="Resume chef session from a specified step.", - ) - allsteps = [step.name.upper() for step in Status] - parser.add_argument( - "--step", - choices=allsteps, - default="LAST", - help="Step to resume progress from (use with the --resume).", - ) parser.add_argument( "--prompt", action="store_true", diff --git a/ricecooker/commands.py b/ricecooker/commands.py index 5a2766d3..7c77f339 100644 --- a/ricecooker/commands.py +++ b/ricecooker/commands.py @@ -9,8 +9,6 @@ from . import __version__ from . import config from .classes.nodes import ChannelNode -from .managers.progress import RestoreManager -from .managers.progress import Status from .managers.tree import ChannelManager from .utils.slack import send_slack_notification @@ -33,8 +31,6 @@ def uploadchannel( # noqa: C901 update=False, thumbnails=False, download_attempts=3, - resume=False, - step=Status.LAST.name, token="#", prompt=False, publish=False, @@ -49,8 +45,6 @@ def uploadchannel( # noqa: C901 update (bool): indicates whether to re-download files (optional) thumbnails (bool): indicates whether to automatically derive thumbnails from content (optional) download_attempts (int): number of times to retry downloading files (optional) - resume (bool): indicates whether to resume last session automatically (optional) - step (str): step to resume process from (optional) token (str): content server authorization token prompt (bool): indicates whether to prompt user to open channel when done (optional) publish (bool): indicates whether to automatically publish channel (optional) @@ -79,9 +73,6 @@ def uploadchannel( # noqa: C901 config.DOWNLOAD_SESSION.auth = chef.auth - # Get domain to upload to - config.init_file_mapping_store() - if not command == "dryrun": # Authenticate user and check current Ricecooker version username, token = authenticate_user(token) @@ -96,62 +87,35 @@ def uploadchannel( # noqa: C901 config.LOGGER.info("\n\n***** Starting channel build process *****\n\n") - # Set up progress tracker - config.PROGRESS_MANAGER = RestoreManager() - if ( - not resume or not config.PROGRESS_MANAGER.check_for_session() - ) and step.upper() != Status.DONE.name: - config.PROGRESS_MANAGER.init_session() - else: - if resume or prompt_yes_or_no( - "Previous session detected. Would you like to resume your last session?" - ): - config.LOGGER.info("Resuming your last session...") - step = Status.LAST.name if step is None else step - config.PROGRESS_MANAGER = config.PROGRESS_MANAGER.load_progress( - step.upper() - ) - else: - config.PROGRESS_MANAGER.init_session() - if hasattr(chef, "download_content"): chef.download_content() # TODO load csv if exists metadata_dict = chef.load_channel_metadata_from_csv() - # Construct channel if it hasn't been constructed already - if config.PROGRESS_MANAGER.get_status_val() <= Status.CONSTRUCT_CHANNEL.value: - config.LOGGER.info("Calling construct_channel... ") - channel = chef.construct_channel(**kwargs) - if "sample" in kwargs and kwargs["sample"]: - channel = select_sample_nodes(channel, size=kwargs["sample"]) - config.PROGRESS_MANAGER.set_channel(channel) - channel = config.PROGRESS_MANAGER.channel + # Construct channel + config.LOGGER.info("Calling construct_channel... ") + channel = chef.construct_channel(**kwargs) + if "sample" in kwargs and kwargs["sample"]: + channel = select_sample_nodes(channel, size=kwargs["sample"]) - # Set initial tree if it hasn't been set already - if config.PROGRESS_MANAGER.get_status_val() <= Status.CREATE_TREE.value: - config.PROGRESS_MANAGER.set_tree(create_initial_tree(channel)) - tree = config.PROGRESS_MANAGER.tree + # Set initial tree + tree = create_initial_tree(channel) # Early permission check: Try creating the channel before downloading/uploading files # This will fail fast if the user lacks edit permissions # Fixes issues #95 and #434 by avoiding wasted downloads/uploads - if ( - config.PROGRESS_MANAGER.get_status_val() <= Status.CREATE_TREE.value - and command != "dryrun" - ): + if command != "dryrun": config.LOGGER.info("Checking channel permissions...") try: tree.root_id, tree.channel_id = tree.add_channel() except Exception: sys.exit(1) - # Download files if they haven't been downloaded already - if config.PROGRESS_MANAGER.get_status_val() <= Status.DOWNLOAD_FILES.value: - config.LOGGER.info("") - config.LOGGER.info("Downloading files...") - config.PROGRESS_MANAGER.set_files(*process_tree_files(tree)) + # Download files + config.LOGGER.info("") + config.LOGGER.info("Downloading files...") + files_to_diff = process_tree_files(tree) # Apply any modifications to chef chef.apply_modifications(channel, metadata_dict) @@ -164,43 +128,26 @@ def uploadchannel( # noqa: C901 config.LOGGER.info("Command is dryrun so we are not uploading channel.") return - # Set download manager in case steps were skipped - files_to_diff = config.PROGRESS_MANAGER.files_downloaded - config.FAILED_FILES = config.PROGRESS_MANAGER.files_failed + # Get file diff + config.LOGGER.info("") + config.LOGGER.info("Getting file diff...") + file_diff = get_file_diff(tree, files_to_diff) - # Get file diff if it hasn't been generated already - if config.PROGRESS_MANAGER.get_status_val() <= Status.GET_FILE_DIFF.value: - config.LOGGER.info("") - config.LOGGER.info("Getting file diff...") - config.PROGRESS_MANAGER.set_diff(get_file_diff(tree, files_to_diff)) - file_diff = config.PROGRESS_MANAGER.file_diff + # Upload files + config.LOGGER.info("") + config.LOGGER.info("Uploading files...") + upload_files(tree, file_diff) - # Set which files have already been uploaded - tree.uploaded_files = config.PROGRESS_MANAGER.files_uploaded - - # Upload files if they haven't been uploaded already - if config.PROGRESS_MANAGER.get_status_val() <= Status.UPLOADING_FILES.value: - config.LOGGER.info("") - config.LOGGER.info("Uploading files...") - config.PROGRESS_MANAGER.set_uploaded(upload_files(tree, file_diff)) - - # Create channel on Kolibri Studio if it hasn't been created already - if config.PROGRESS_MANAGER.get_status_val() <= Status.UPLOAD_CHANNEL.value: - config.LOGGER.info("") - config.LOGGER.info("Creating channel...") - config.PROGRESS_MANAGER.set_channel_created(*create_tree(tree)) - channel_link = config.PROGRESS_MANAGER.channel_link - channel_id = config.PROGRESS_MANAGER.channel_id + # Create channel on Kolibri Studio + config.LOGGER.info("") + config.LOGGER.info("Creating channel...") + channel_link, channel_id = create_tree(tree) # Publish tree if flag is set to True - if ( - config.PUBLISH - and config.PROGRESS_MANAGER.get_status_val() <= Status.PUBLISH_CHANNEL.value - ): + if config.PUBLISH: config.LOGGER.info("") config.LOGGER.info("Publishing channel...") publish_tree(tree, channel_id) - config.PROGRESS_MANAGER.set_published() # Open link on web browser (if specified) and return new link config.LOGGER.info("\n\nDONE: Channel created at {0}\n".format(channel_link)) @@ -211,7 +158,6 @@ def uploadchannel( # noqa: C901 if config.SLACK_WEBHOOK_URL: send_slack_notification(tree.channel, channel_link) - config.PROGRESS_MANAGER.set_done() return channel_link @@ -292,13 +238,13 @@ def process_tree_files(tree): """process_tree_files: Download files from nodes Args: tree (ChannelManager): manager to handle communication to Kolibri Studio - Returns: None + Returns: list of files to check against Kolibri Studio """ # Fill in values necessary for next steps config.LOGGER.info("Processing content...") files_to_diff = tree.process_tree() tree.check_for_files_failed() - return files_to_diff, config.FAILED_FILES + return files_to_diff def get_file_diff(tree, files_to_diff): diff --git a/ricecooker/config.py b/ricecooker/config.py index 9d4aeca9..e25d2121 100644 --- a/ricecooker/config.py +++ b/ricecooker/config.py @@ -3,7 +3,6 @@ """ import atexit -import hashlib import logging.config import os import shutil @@ -19,7 +18,6 @@ VIDEO_HEIGHT = None THUMBNAILS = False PUBLISH = False -PROGRESS_MANAGER = None SUSHI_BAR_CLIENT = None FILE_PIPELINE = None STAGE = False @@ -140,7 +138,7 @@ def setup_logging(level=logging.INFO, main_log=None, error_log=None, add_loggers setup_logging() -# Domain and file store location for uploading to production Studio server +# Domain for uploading to production Studio server DEFAULT_DOMAIN = "https://api.studio.learningequality.org" DOMAIN_ENV = os.getenv("STUDIO_URL", None) if DOMAIN_ENV is None: # check old ENV varable for backward compatibility @@ -148,7 +146,6 @@ def setup_logging(level=logging.INFO, main_log=None, error_log=None, add_loggers DOMAIN = DOMAIN_ENV if DOMAIN_ENV else DEFAULT_DOMAIN if DOMAIN.endswith("/"): DOMAIN = DOMAIN.rstrip("/") -FILE_STORE_LOCATION = hashlib.md5(DOMAIN.encode("utf-8")).hexdigest() try: TASK_THREADS = int(os.environ.get("TASK_THREADS")) @@ -192,9 +189,6 @@ def setup_logging(level=logging.INFO, main_log=None, error_log=None, add_loggers "RICECOOKER_STORAGE", os.path.join(CURRENT_CWD, "storage") ) -# Folder to store progress tracking information -RESTORE_DIRECTORY = "restore" - # Session for communicating to Kolibri Studio SESSION = requests.Session() @@ -389,29 +383,6 @@ def authentication_url(): return AUTHENTICATION_URL.format(domain=DOMAIN) -def init_file_mapping_store(): - """init_file_mapping_store: creates log to keep track of downloaded files - Args: None - Returns: None - """ - # Make storage directory for restore files if it doesn't already exist - path = os.path.join(RESTORE_DIRECTORY, FILE_STORE_LOCATION) - if not os.path.exists(path): - os.makedirs(path) - - -def get_restore_path(filename): - """get_restore_path: returns path to directory for restoration points - Args: - filename (str): Name of file to store - Returns: string path to file - """ - path = os.path.join(RESTORE_DIRECTORY, FILE_STORE_LOCATION) - if not os.path.exists(path): - os.makedirs(path) - return os.path.join(path, filename + ".pickle") - - def check_version_url(): """check_version_url: returns url to check ricecooker version Args: None diff --git a/ricecooker/managers/progress.py b/ricecooker/managers/progress.py deleted file mode 100644 index 507ba709..00000000 --- a/ricecooker/managers/progress.py +++ /dev/null @@ -1,240 +0,0 @@ -import os -import pickle -import time -from enum import Enum - -from .. import config - - -class Status(Enum): - """Enum containing all statuses Ricecooker can have - - Steps: - INIT: Ricecooker process has been started - CONSTRUCT_CHANNEL: Ricecooker is ready to call sushi chef's construct_channel method - CREATE_TREE: Ricecooker is ready to create relationships between nodes - DOWNLOAD_FILES: Ricecooker is ready to start downloading files - GET_FILE_DIFF: Ricecooker is ready to get file diff from Kolibri Studio - START_UPLOAD: Ricecooker is ready to start uploading files to Kolibri Studio - UPLOADING_FILES: Ricecooker is in the middle of uploading files - UPLOAD_CHANNEL: Ricecooker is ready to upload the channel to Kolibri Studio - PUBLISH_CHANNEL: Ricecooker is ready to publish the channel to Kolibri - DONE: Ricecooker is done - LAST: Place where Ricecooker left off - """ - - INIT = 0 - CONSTRUCT_CHANNEL = 1 - CREATE_TREE = 2 - DOWNLOAD_FILES = 3 - GET_FILE_DIFF = 4 - START_UPLOAD = 5 - UPLOADING_FILES = 6 - UPLOAD_CHANNEL = 7 - PUBLISH_CHANNEL = 8 - DONE = 9 - LAST = 10 - - -class RestoreManager: - """Manager for handling resuming rice cooking process - - Attributes: - restore_path (str): path to .pickle file to store progress - channel (Channel): channel Ricecooker is creating - tree (ChannelManager): manager Ricecooker is using - files_downloaded ([str]): list of files that have been downloaded - file_mapping ({filename:...}): filenames mapped to metadata - files_failed ([str]): list of files that failed to download - file_diff ([str]): list of files that don't exist on Kolibri Studio - files_uploaded ([str]): list of files that have been successfully uploaded - channel_link (str): link to uploaded channel - channel_id (str): id of channel that has been uploaded - status (str): status of Ricecooker - """ - - def __init__(self): - self.channel = None - self.tree = None - self.files_downloaded = [] - self.file_mapping = {} - self.files_failed = [] - self.file_diff = [] - self.files_uploaded = [] - self.channel_link = None - self.channel_id = None - self.status = Status.INIT - self.timestamp = time.time() - - def check_for_session(self, status=None): - """check_for_session: see if session is in progress - Args: - status (str): step to check if last session reached (optional) - Returns: boolean indicating if session exists - """ - status = Status.LAST if status is None else status - return ( - os.path.isfile(self.get_restore_path(status)) - and os.path.getsize(self.get_restore_path(status)) > 0 - ) - - def get_restore_path(self, status=None): - """get_restore_path: get path to restoration file - Args: - status (str): step to get restore file (optional) - Returns: string path to restoration file - """ - status = self.get_status() if status is None else status - return config.get_restore_path(status.name.lower()) - - def __record_progress(self, next_step=None): - """__record_progress: save progress to respective restoration file - Args: None - Returns: None - """ - with ( - open(self.get_restore_path(Status.LAST), "wb") as handle, - open(self.get_restore_path(), "wb") as step_handle, - ): - pickle.dump(self, handle) - pickle.dump(self, step_handle) - - def load_progress(self, resume_step): - """load_progress: loads progress from restoration file - Args: resume_step (str): step at which to resume session - Returns: manager with progress from step - """ - resume_step = Status[resume_step] - progress_path = self.get_restore_path(resume_step) - - # If progress is corrupted, revert to step before - while not self.check_for_session(resume_step): - config.LOGGER.error( - "Ricecooker has not reached {0} status. Reverting to earlier step...".format( - resume_step.name - ) - ) - # All files are corrupted or absent, restart process - if resume_step.value - 1 < 0: - self.init_session() - return self - resume_step = Status(resume_step.value - 1) - progress_path = self.get_restore_path(resume_step) - config.LOGGER.error("Starting from status {0}".format(resume_step.name)) - - # Load manager - with open(progress_path, "rb") as handle: - manager = pickle.load(handle) - if isinstance(manager, RestoreManager): - return manager - else: - return self - - def get_status(self): - """get_status: retrieves current status of Ricecooker - Args: None - Returns: string status of Ricecooker - """ - return self.status - - def get_status_val(self): - """get_status_val: retrieves value of status of Ricecooker - Args: None - Returns: number value of status of Ricecooker - """ - return self.status.value - - def init_session(self): - """init_session: sets session to beginning status - Args: None - Returns: None - """ - # Clear out previous session's restoration files - for status in Status: - path = self.get_restore_path(status) - if os.path.isfile(path): - os.remove(path) - - self.__record_progress() - self.__record_progress(Status.CONSTRUCT_CHANNEL) - - def set_channel(self, channel): - """set_channel: records progress from constructed channel - Args: channel (Channel): channel Ricecooker is creating - Returns: None - """ - self.channel = channel - self.__record_progress(Status.CREATE_TREE) - - def set_tree(self, tree): - """set_channel: records progress from creating the tree - Args: tree (ChannelManager): manager Ricecooker is using - Returns: None - """ - self.tree = tree - self.__record_progress(Status.DOWNLOAD_FILES) - - def set_files(self, files_downloaded, files_failed): - """set_files: records progress from downloading files - Args: - files_downloaded ([str]): list of files that have been downloaded - files_failed ([str]): list of files that failed to download - Returns: None - """ - self.files_downloaded = files_downloaded - self.files_failed = files_failed - self.__record_progress(Status.GET_FILE_DIFF) - - def set_diff(self, file_diff): - """set_diff: records progress from getting file diff - Args: file_diff ([str]): list of files that don't exist on Kolibri Studio - Returns: None - """ - self.file_diff = file_diff - self.__record_progress(Status.START_UPLOAD) - - def set_uploading(self, files_uploaded): - """set_uploading: records progress during uploading files - Args: files_uploaded ([str]): list of files that have been successfully uploaded - Returns: None - """ - self.files_uploaded = files_uploaded - self.__record_progress(Status.UPLOADING_FILES) - - def set_uploaded(self, files_uploaded): - """set_uploaded: records progress after uploading files - Args: files_uploaded ([str]): list of files that have been successfully uploaded - Returns: None - """ - self.files_uploaded = files_uploaded - self.__record_progress(Status.UPLOAD_CHANNEL) - - def set_channel_created(self, channel_link, channel_id): - """set_channel_created: records progress after creating channel on Kolibri Studio - Args: - channel_link (str): link to uploaded channel - channel_id (str): id of channel that has been uploaded - Returns: None - """ - self.channel_link = channel_link - self.channel_id = channel_id - self.__record_progress( - Status.PUBLISH_CHANNEL if config.PUBLISH else Status.DONE - ) - - def set_published(self): - """set_published: records progress after channel has been published - Args: None - Returns: None - """ - self.__record_progress(Status.DONE) - - def set_done(self): - """set_done: records progress after Ricecooker process has been completed - Args: None - Returns: None - """ - self.__record_progress(Status.DONE) - - # Delete restoration point for last step to indicate process has been completed - os.remove(self.get_restore_path(Status.LAST)) diff --git a/ricecooker/managers/tree.py b/ricecooker/managers/tree.py index 1bd6c8db..1f557113 100644 --- a/ricecooker/managers/tree.py +++ b/ricecooker/managers/tree.py @@ -231,24 +231,19 @@ def upload_files(self, file_list): Returns: None """ counter = 0 - files_to_upload = list( - set(file_list) - set(self.uploaded_files) - ) # In case restoring from previous session - try: - with concurrent.futures.ThreadPoolExecutor( - max_workers=config.TASK_THREADS - ) as executor: - # Start the upload operations - for filename in executor.map(self._handle_upload, files_to_upload): - if filename is not None: - counter += 1 - config.LOGGER.info( - "\tUploaded {0} ({count}/{total}) ".format( - filename, count=counter, total=len(files_to_upload) - ) + files_to_upload = list(set(file_list) - set(self.uploaded_files)) + with concurrent.futures.ThreadPoolExecutor( + max_workers=config.TASK_THREADS + ) as executor: + # Start the upload operations + for filename in executor.map(self._handle_upload, files_to_upload): + if filename is not None: + counter += 1 + config.LOGGER.info( + "\tUploaded {0} ({count}/{total}) ".format( + filename, count=counter, total=len(files_to_upload) ) - finally: - config.PROGRESS_MANAGER.set_uploading(self.uploaded_files) + ) def reattempt_upload_fails(self): """reattempt_upload_fails: uploads failed files to server diff --git a/tests/test_argparse.py b/tests/test_argparse.py index f87d48c6..f18a684c 100644 --- a/tests/test_argparse.py +++ b/tests/test_argparse.py @@ -19,8 +19,6 @@ def cli_args_and_expected(): "compress": False, "thumbnails": False, "download_attempts": 3, - "resume": False, - "step": "LAST", "prompt": False, "reset_deprecated": False, "stage": True, @@ -39,13 +37,6 @@ def cli_args_and_expected(): "expected_args": dict(defaults, token="letoken"), "expected_options": {}, }, - { - "cli_input": "./sushichef.py --token=letoken --resume --step=START_UPLOAD", - "expected_args": dict( - defaults, token="letoken", resume=True, step="START_UPLOAD" - ), - "expected_options": {}, - }, { "cli_input": "./sushichef.py --token=letoken lang=fr", "expected_args": dict(defaults, token="letoken"), @@ -59,7 +50,7 @@ def cli_args_and_expected(): { "cli_input": ( "./sushichef.py -uv --warn --compress --download-attempts=4 " - "--token=besttokenever --resume --step=PUBLISH_CHANNEL --prompt --deploy --publish" + "--token=besttokenever --prompt --deploy --publish" ), "expected_args": dict( defaults, @@ -68,8 +59,6 @@ def cli_args_and_expected(): compress=True, download_attempts=4, token="besttokenever", - resume=True, - step="PUBLISH_CHANNEL", prompt=True, stage=False, publish=True,