From 49f5add2f9fb393db5d7436230cc0cf2536f0b57 Mon Sep 17 00:00:00 2001 From: Loraine Gueguen <loraine.gueguen@sb-roscoff.fr> Date: Fri, 28 May 2021 23:19:47 +0200 Subject: [PATCH] Refactor run_wf (WIP) --- ...constants.py => phaeoexplorer_constants.py | 18 +- run_workflow_phaeoexplorer.py | 242 +++++++----------- 2 files changed, 112 insertions(+), 148 deletions(-) rename phaoexplorer_constants.py => phaeoexplorer_constants.py (75%) diff --git a/phaoexplorer_constants.py b/phaeoexplorer_constants.py similarity index 75% rename from phaoexplorer_constants.py rename to phaeoexplorer_constants.py index 1cecdb1..2773e2b 100644 --- a/phaoexplorer_constants.py +++ b/phaeoexplorer_constants.py @@ -6,11 +6,23 @@ import constants ### Workflows WORKFLOWS_PATH = "workflows_phaeoexplorer/" -WORKFLOW_LOAD_FASTA_GFF_JBROWSE = "load_fasta_gff_jbrowse" -WORKFLOW_LOAD_FASTA_GFF_JBROWSE_FILE_1_ORG = "Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v4.ga" +WF_LOAD_GFF_JB = "load_fasta_gff_jbrowse" + +WF_LOAD_GFF_JB_1_ORG_FILE = "Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v4.ga" +WF_LOAD_GFF_JB_1_ORG_INPUT_GENOME = "0" +WF_LOAD_GFF_JB_1_ORG_INPUT_GFF = "1" +WF_LOAD_GFF_JB_1_ORG_INPUT_PROTEINS = "2" +WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_FASTA = "3" +WF_LOAD_GFF_JB_1_ORG_STEP_JBROWSE = "4" +WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_GFF = "5" +WF_LOAD_GFF_JB_1_ORG_STEP_JB_TO_CONTAINER = "6" +WF_LOAD_GFF_JB_1_ORG_STEP_FEATURE_SYNC = "7" +WF_LOAD_GFF_JB_1_ORG_STEP_POPULATE_VIEWS = "8" +WF_LOAD_GFF_JB_1_ORG_STEP_INDEX = "9" + WORKFLOW_BLAST = "blast" WORKFLOW_INTERPRO = "interpro" -WORKFLOW_VALID_TYPES = [WORKFLOW_LOAD_FASTA_GFF_JBROWSE, WORKFLOW_BLAST, WORKFLOW_INTERPRO] +WORKFLOW_VALID_TYPES = [WF_LOAD_GFF_JB, WORKFLOW_BLAST, WORKFLOW_INTERPRO] ### Galaxy tools diff --git a/run_workflow_phaeoexplorer.py b/run_workflow_phaeoexplorer.py index c03b917..60c0ed3 100755 --- a/run_workflow_phaeoexplorer.py +++ b/run_workflow_phaeoexplorer.py @@ -16,7 +16,7 @@ from bioblend import galaxy import utilities import speciesData import constants -import phaoexplorer_constants +import phaeoexplorer_constants """ gga_init.py @@ -27,7 +27,7 @@ Usage: $ python3 gga_init.py -i input_example.yml --config [config file] [OPTION class StrainWorkflowParam: def __init__(self, genus_species, strain_sex, genus, species, sex, strain, full_name, species_folder_name, org_id, - genome_analysis_id, ogs_analysis_id, blastp_analysis_id, interpro_analysis_id, hda_ids, history_id, + genome_analysis_id, ogs_analysis_id, blastp_analysis_id, interpro_analysis_id, history_id, instance, instance_url, email, password): self.genus_species = genus_species self.strain_sex = strain_sex @@ -42,7 +42,6 @@ class StrainWorkflowParam: self.ogs_analysis_id = ogs_analysis_id self.blastp_analysis_id = blastp_analysis_id self.interpro_analysis_id = interpro_analysis_id - self.hda_ids = hda_ids self.history_id = history_id self.instance = instance self.instance_url = instance_url @@ -58,21 +57,21 @@ class StrainWorkflowParam: def check_param_for_workflow_load_fasta_gff_jbrowse(self, params): params = [self.genus_species, self.strain_sex, self.genus, self.species, self.sex, self.strain, self.full_name, self.species_folder_name, self.org_id, - self.genome_analysis_id, self.ogs_analysis_id, self.hda_ids, self.history_id, + self.genome_analysis_id, self.ogs_analysis_id, self.history_id, self.instance, self.instance_url, self.email, self.password] self.check_param(params) def check_param_for_workflow_blast(self, params): params = [self.genus_species, self.strain_sex, self.genus, self.species, self.sex, self.strain, self.full_name, self.species_folder_name, self.org_id, - self.blastp_analysis_id, self.hda_ids, self.history_id, + self.blastp_analysis_id, self.history_id, self.instance, self.instance_url, self.email, self.password] self.check_param(params) def check_param_for_workflow_interpro(self, params): params = [self.genus_species, self.strain_sex, self.genus, self.species, self.sex, self.strain, self.full_name, self.species_folder_name, self.org_id, - self.interpro_analysis_id, self.hda_ids, self.history_id, + self.interpro_analysis_id, self.history_id, self.instance, self.instance_url, self.email, self.password] self.check_param(params) @@ -111,6 +110,14 @@ class RunWorkflow(speciesData.SpeciesData): self.ogs_analysis_programversion = "OGS{0}".format(self.ogs_version) self.ogs_analysis_sourcename = self.full_name + self.genome_hda_id = None + self.gff_hda_id = None + self.transcripts_hda_id = None + self.proteins_hda_id = None + self.blastp_hda_id = None + self.blastx_hda_id = None + self.interproscan_hda_id = None + def set_history(self): """ Create or set the working history to the current species one @@ -165,50 +172,50 @@ class RunWorkflow(speciesData.SpeciesData): # Verify that the add_organism and add_analysis versions are correct in the instance - add_organism_tool = self.instance.tools.show_tool(phaoexplorer_constants.ADD_ORGANISM_TOOL_ID) - add_analysis_tool = self.instance.tools.show_tool(phaoexplorer_constants.ADD_ANALYSIS_TOOL_ID) - get_organisms_tool = self.instance.tools.show_tool(phaoexplorer_constants.GET_ORGANISMS_TOOL_ID) - get_analyses_tool = self.instance.tools.show_tool(phaoexplorer_constants.GET_ANALYSES_TOOL_ID) - analysis_sync_tool = self.instance.tools.show_tool(phaoexplorer_constants.ANALYSIS_SYNC_TOOL_ID) - organism_sync_tool = self.instance.tools.show_tool(phaoexplorer_constants.ORGANISM_SYNC_TOOL_ID) + add_organism_tool = self.instance.tools.show_tool(phaeoexplorer_constants.ADD_ORGANISM_TOOL_ID) + add_analysis_tool = self.instance.tools.show_tool(phaeoexplorer_constants.ADD_ANALYSIS_TOOL_ID) + get_organisms_tool = self.instance.tools.show_tool(phaeoexplorer_constants.GET_ORGANISMS_TOOL_ID) + get_analyses_tool = self.instance.tools.show_tool(phaeoexplorer_constants.GET_ANALYSES_TOOL_ID) + analysis_sync_tool = self.instance.tools.show_tool(phaeoexplorer_constants.ANALYSIS_SYNC_TOOL_ID) + organism_sync_tool = self.instance.tools.show_tool(phaeoexplorer_constants.ORGANISM_SYNC_TOOL_ID) # changeset for 2.3.4+galaxy0 has to be manually found because there is no way to get the wanted changeset of a non installed tool via bioblend # except for workflows (.ga) that already contain the changeset revisions inside the steps ids utilities.install_repository_revision(current_version=get_organisms_tool["version"], toolshed_dict=get_organisms_tool["tool_shed_repository"], - version_to_install=phaoexplorer_constants.GET_ORGANISMS_TOOL_VERSION, - changeset_revision=phaoexplorer_constants.GET_ORGANISMS_TOOL_CHANGESET_REVISION, + version_to_install=phaeoexplorer_constants.GET_ORGANISMS_TOOL_VERSION, + changeset_revision=phaeoexplorer_constants.GET_ORGANISMS_TOOL_CHANGESET_REVISION, instance=self.instance) utilities.install_repository_revision(current_version=get_analyses_tool["version"], toolshed_dict=get_analyses_tool["tool_shed_repository"], - version_to_install=phaoexplorer_constants.GET_ANALYSES_TOOL_VERSION, - changeset_revision=phaoexplorer_constants.GET_ANALYSES_TOOL_CHANGESET_REVISION, + version_to_install=phaeoexplorer_constants.GET_ANALYSES_TOOL_VERSION, + changeset_revision=phaeoexplorer_constants.GET_ANALYSES_TOOL_CHANGESET_REVISION, instance=self.instance) utilities.install_repository_revision(current_version=add_organism_tool["version"], toolshed_dict=add_organism_tool["tool_shed_repository"], - version_to_install=phaoexplorer_constants.ADD_ORGANISM_TOOL_VERSION, - changeset_revision=phaoexplorer_constants.ADD_ORGANISM_TOOL_CHANGESET_REVISION, + version_to_install=phaeoexplorer_constants.ADD_ORGANISM_TOOL_VERSION, + changeset_revision=phaeoexplorer_constants.ADD_ORGANISM_TOOL_CHANGESET_REVISION, instance=self.instance) utilities.install_repository_revision(current_version=add_analysis_tool["version"], toolshed_dict=add_analysis_tool["tool_shed_repository"], - version_to_install=phaoexplorer_constants.ADD_ANALYSIS_TOOL_VERSION, - changeset_revision=phaoexplorer_constants.ADD_ANALYSIS_TOOL_CHANGESET_REVISION, + version_to_install=phaeoexplorer_constants.ADD_ANALYSIS_TOOL_VERSION, + changeset_revision=phaeoexplorer_constants.ADD_ANALYSIS_TOOL_CHANGESET_REVISION, instance=self.instance) utilities.install_repository_revision(current_version=analysis_sync_tool["version"], toolshed_dict=analysis_sync_tool["tool_shed_repository"], - version_to_install=phaoexplorer_constants.ANALYSIS_SYNC_TOOL_VERSION, - changeset_revision=phaoexplorer_constants.ANALYSIS_SYNC_TOOL_CHANGESET_REVISION, + version_to_install=phaeoexplorer_constants.ANALYSIS_SYNC_TOOL_VERSION, + changeset_revision=phaeoexplorer_constants.ANALYSIS_SYNC_TOOL_CHANGESET_REVISION, instance=self.instance) utilities.install_repository_revision(current_version=organism_sync_tool["version"], toolshed_dict=organism_sync_tool["tool_shed_repository"], - version_to_install=phaoexplorer_constants.ORGANISM_SYNC_TOOL_VERSION, - changeset_revision=phaoexplorer_constants.ORGANISM_SYNC_TOOL_CHANGESET_REVISION, + version_to_install=phaeoexplorer_constants.ORGANISM_SYNC_TOOL_VERSION, + changeset_revision=phaeoexplorer_constants.ORGANISM_SYNC_TOOL_CHANGESET_REVISION, instance=self.instance) logging.info("Success: individual tools versions and changesets validated") @@ -217,13 +224,13 @@ class RunWorkflow(speciesData.SpeciesData): add_analysis_tool_dataset = utilities.run_tool_and_download_single_output_dataset( instance=self.instance, - tool_id=phaoexplorer_constants.ADD_ANALYSIS_TOOL_ID, + tool_id=phaeoexplorer_constants.ADD_ANALYSIS_TOOL_ID, history_id=self.history_id, tool_inputs={"name": name, - "program": phaoexplorer_constants.ADD_ANALYSIS_TOOL_PARAM_PROGRAM, + "program": phaeoexplorer_constants.ADD_ANALYSIS_TOOL_PARAM_PROGRAM, "programversion": programversion, "sourcename": sourcename, - "date_executed": phaoexplorer_constants.ADD_ANALYSIS_TOOL_PARAM_DATE}) + "date_executed": phaeoexplorer_constants.ADD_ANALYSIS_TOOL_PARAM_DATE}) analysis_dict = json.loads(add_analysis_tool_dataset) analysis_id = str(analysis_dict["analysis_id"]) @@ -234,7 +241,7 @@ class RunWorkflow(speciesData.SpeciesData): time.sleep(60) utilities.run_tool( instance=self.instance, - tool_id=phaoexplorer_constants.ANALYSIS_SYNC_TOOL_ID, + tool_id=phaeoexplorer_constants.ANALYSIS_SYNC_TOOL_ID, history_id=self.history_id, tool_inputs={"analysis_id": analysis_id}) @@ -242,7 +249,7 @@ class RunWorkflow(speciesData.SpeciesData): get_organisms_tool_dataset = utilities.run_tool_and_download_single_output_dataset( instance=self.instance, - tool_id=phaoexplorer_constants.GET_ORGANISMS_TOOL_ID, + tool_id=phaeoexplorer_constants.GET_ORGANISMS_TOOL_ID, history_id=self.history_id, tool_inputs={}, time_sleep=10 @@ -259,7 +266,7 @@ class RunWorkflow(speciesData.SpeciesData): if org_id is None: add_organism_tool_dataset = utilities.run_tool_and_download_single_output_dataset( instance=self.instance, - tool_id=phaoexplorer_constants.ADD_ORGANISM_TOOL_ID, + tool_id=phaeoexplorer_constants.ADD_ORGANISM_TOOL_ID, history_id=self.history_id, tool_inputs={"abbr": self.abbreviation, "genus": self.genus_uppercase, @@ -273,7 +280,7 @@ class RunWorkflow(speciesData.SpeciesData): time.sleep(60) utilities.run_tool( instance=self.instance, - tool_id=phaoexplorer_constants.ORGANISM_SYNC_TOOL_ID, + tool_id=phaeoexplorer_constants.ORGANISM_SYNC_TOOL_ID, history_id=self.history_id, tool_inputs={"organism_id": org_id}) @@ -283,7 +290,7 @@ class RunWorkflow(speciesData.SpeciesData): get_analyses_tool_dataset = utilities.run_tool_and_download_single_output_dataset( instance=self.instance, - tool_id=phaoexplorer_constants.GET_ANALYSES_TOOL_ID, + tool_id=phaeoexplorer_constants.GET_ANALYSES_TOOL_ID, history_id=self.history_id, tool_inputs={}, time_sleep=10 @@ -672,77 +679,13 @@ class RunWorkflow(speciesData.SpeciesData): except Exception as exc: logging.debug("blastp file not found in library (history: {0})".format(self.history_id)) - hda_ids = {"genome_hda_id": genome_hda_id, - "gff_hda_id": gff_hda_id, - "transcripts_hda_id": transcripts_hda_id, - "proteins_hda_id": proteins_hda_id, - "blastp_hda_id": blastp_hda_id, - "blastx_hda_id": blastx_hda_id, - "interproscan_hda_id": interproscan_hda_id} - - # logging.debug("History dataset IDs (hda_id) for %s:" % self.full_name) - # logging.debug(hda_ids) - - # Return a dict made of the hda ids - return hda_ids - -def run_workflow(workflow_path, workflow_parameters, datamap, config, input_species_number): - """ - Run a workflow in galaxy - Requires the .ga file to be loaded as a dictionary (optionally could be uploaded as a raw file) - - :param workflow_name: - :param workflow_parameters: - :param datamap: - :return: - """ - - logging.info("Importing workflow %s" % str(workflow_path)) - - # Load the workflow file (.ga) in a buffer - with open(workflow_path, 'r') as ga_in_file: - - # Then store the decoded json dictionary - workflow_dict = json.load(ga_in_file) - - # In case of the Jbrowse workflow, we unfortunately have to manually edit the parameters instead of setting them - # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error) - # Scratchgmod test: need "http" (or "https"), the hostname (+ port) - if "jbrowse_menu_url" not in config.keys(): - jbrowse_menu_url = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=self.config["hostname"], genus_sp=self.genus_species, Genus=self.genus_uppercase, species=self.species, id="{id}") - else: - jbrowse_menu_url = config["jbrowse_menu_url"] - if workflow_name == "Jbrowse": - workflow_dict["steps"]["2"]["tool_state"] = workflow_dict["steps"]["2"]["tool_state"].replace("__MENU_URL__", jbrowse_menu_url) - # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow - # in galaxy --> define a naming method for these workflows - workflow_dict["steps"]["3"]["tool_state"] = workflow_dict["steps"]["3"]["tool_state"].replace("__FULL_NAME__", self.full_name).replace("__UNIQUE_ID__", self.species_folder_name) - - # Import the workflow in galaxy as a dict - self.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) - - # Get its attributes - workflow_attributes = self.instance.workflows.get_workflows(name=workflow_name) - # Then get its ID (required to invoke the workflow) - workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want) - show_workflow = self.instance.workflows.show_workflow(workflow_id=workflow_id) - # Check if the workflow is found - try: - logging.debug("Workflow ID: %s" % workflow_id) - except bioblend.ConnectionError: - logging.warning("Error retrieving workflow attributes for workflow %s" % workflow_name) - - # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it - self.instance.workflows.invoke_workflow(workflow_id=workflow_id, - history_id=self.history_id, - params=workflow_parameters, - inputs=datamap, - allow_tool_state_corrections=True) - - logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, self.instance_url)) - - - + self.genome_hda_id = genome_hda_id + self.gff_hda_id = gff_hda_id + self.transcripts_hda_id = transcripts_hda_id + self.proteins_hda_id = proteins_hda_id + self.blastp_hda_id = blastp_hda_id + self.blastx_hda_id = blastx_hda_id + self.interproscan_hda_id = interproscan_hda_id def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type): """ @@ -775,7 +718,7 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type): history_id = run_workflow_for_current_organism.set_history() run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools() - if workflow_type == phaoexplorer_constants.WORKFLOW_LOAD_FASTA_GFF_JBROWSE: + if workflow_type == phaeoexplorer_constants.WF_LOAD_GFF_JB: analyses_dict_list = run_workflow_for_current_organism.get_analyses() @@ -793,7 +736,7 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type): analysis_sourcename=run_workflow_for_current_organism.ogs_analysis_sourcename ) - hda_ids = run_workflow_for_current_organism.import_datasets_into_history() + run_workflow_for_current_organism.import_datasets_into_history() # Create the StrainWorkflowParam object holding all attributes needed for the workflow sp_wf_param = StrainWorkflowParam( @@ -808,7 +751,13 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type): org_id=org_id, genome_analysis_id=genome_analysis_id, ogs_analysis_id=ogs_analysis_id, - hda_ids=hda_ids, + genome_hda_id=run_workflow_for_current_organism.genome_hda_id, + gff_hda_id=run_workflow_for_current_organism.gff_hda_id, + transcripts_hda_id=run_workflow_for_current_organism.transcripts_hda_id, + proteins_hda_id=run_workflow_for_current_organism.proteins_hda_id, + blastp_hda_id=run_workflow_for_current_organism.blastp_hda_id, + blastx_hda_id=run_workflow_for_current_organism.blastx_hda_id, + interproscan_hda_id=run_workflow_for_current_organism.interproscan_hda_id, history_id=history_id, instance=run_workflow_for_current_organism.instance, instance_url=run_workflow_for_current_organism.instance_url, @@ -823,7 +772,7 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type): org_id = ids["org_id"] blastp_analysis_id = ids["blastp_analysis_id"] - hda_ids = run_workflow_for_current_organism.import_datasets_into_history() + run_workflow_for_current_organism.import_datasets_into_history() # Create the StrainWorkflowParam object holding all attributes needed for the workflow sp_wf_param = StrainWorkflowParam( @@ -837,7 +786,13 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type): strain=run_workflow_for_current_organism.strain, org_id=org_id, blastp_analysis_id=genome_analysis_id, - hda_ids=hda_ids, + genome_hda_id=run_workflow_for_current_organism.genome_hda_id, + gff_hda_id=run_workflow_for_current_organism.gff_hda_id, + transcripts_hda_id=run_workflow_for_current_organism.transcripts_hda_id, + proteins_hda_id=run_workflow_for_current_organism.proteins_hda_id, + blastp_hda_id=run_workflow_for_current_organism.blastp_hda_id, + blastx_hda_id=run_workflow_for_current_organism.blastx_hda_id, + interproscan_hda_id=run_workflow_for_current_organism.interproscan_hda_id, history_id=history_id, instance=run_workflow_for_current_organism.instance, instance_url=run_workflow_for_current_organism.instance_url, @@ -852,7 +807,7 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type): org_id = ids["org_id"] interpro_analysis_id = ids["interpro_analysis_id"] - hda_ids = run_workflow_for_current_organism.import_datasets_into_history() + run_workflow_for_current_organism.import_datasets_into_history() # Create the StrainWorkflowParam object holding all attributes needed for the workflow sp_wf_param = StrainWorkflowParam( @@ -866,7 +821,13 @@ def get_sp_workflow_param(sp_dict, main_dir, config, workflow_type): strain=run_workflow_for_current_organism.strain, org_id=org_id, interpro_analysis_id=interpro_analysis_id, - hda_ids=hda_ids, + genome_hda_id=run_workflow_for_current_organism.genome_hda_id, + gff_hda_id=run_workflow_for_current_organism.gff_hda_id, + transcripts_hda_id=run_workflow_for_current_organism.transcripts_hda_id, + proteins_hda_id=run_workflow_for_current_organism.proteins_hda_id, + blastp_hda_id=run_workflow_for_current_organism.blastp_hda_id, + blastx_hda_id=run_workflow_for_current_organism.blastx_hda_id, + interproscan_hda_id=run_workflow_for_current_organism.interproscan_hda_id, history_id=history_id, instance=run_workflow_for_current_organism.instance, instance_url=run_workflow_for_current_organism.instance_url, @@ -981,14 +942,14 @@ if __name__ == "__main__": if not args.workflow: logging.critical("No workflow type specified, exiting") sys.exit() - elif args.workflow in phaoexplorer_constants.WORKFLOW_VALID_TYPES: + elif args.workflow in phaeoexplorer_constants.WORKFLOW_VALID_TYPES: workflow_type = args.workflow logging.info("Workflow type set to '%s'" % workflow_type) script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) all_sp_workflow_dict = {} - if workflow_type == phaoexplorer_constants.WORKFLOW_LOAD_FASTA_GFF_JBROWSE: + if workflow_type == phaeoexplorer_constants.WF_LOAD_GFF_JB: for sp_dict in sp_dict_list: # Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary @@ -996,7 +957,7 @@ if __name__ == "__main__": sp_dict, main_dir=main_dir, config=config, - workflow_type=phaoexplorer_constants.WORKFLOW_LOAD_FASTA_GFF_JBROWSE) + workflow_type=phaeoexplorer_constants.WF_LOAD_GFF_JB) current_sp_genus_species = sp_workflow_param.genus_species current_sp_strain_sex = sp_workflow_param.strain_sex @@ -1021,7 +982,7 @@ if __name__ == "__main__": sp_workflow_param = strains[strain_sex] # Set workflow path (1 organism) - workflow_path = os.path.join(os.path.abspath(script_dir), phaoexplorer_constants.WORKFLOWS_PATH, phaoexplorer_constants.WORKFLOW_LOAD_FASTA_GFF_JBROWSE_FILE_1_ORG) + workflow_path = os.path.join(os.path.abspath(script_dir), phaeoexplorer_constants.WORKFLOWS_PATH, phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_FILE) # Check if the versions of tools specified in the workflow are installed in galaxy install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=sp_workflow_param.instance) @@ -1039,46 +1000,37 @@ if __name__ == "__main__": org_org_id = sp_workflow_param.org_id org_genome_analysis_id = sp_workflow_param.genome_analysis_id org_ogs_analysis_id = sp_workflow_param.ogs_analysis_id - org_genome_hda_id = sp_workflow_param.hda_ids["genome_hda_id"] - org_transcripts_hda_id = sp_workflow_param.hda_ids["transcripts_hda_id"] - org_proteins_hda_id = sp_workflow_param.hda_ids["proteins_hda_id"] - org_gff_hda_id = sp_workflow_param.hda_ids["gff_hda_id"] + org_genome_hda_id = sp_workflow_param.genome_hda_id + org_transcripts_hda_id = sp_workflow_param.transcripts_hda_id + org_proteins_hda_id = sp_workflow_param.proteins_hda_id + org_gff_hda_id = sp_workflow_param.gff_hda_id # Set the workflow parameters (individual tools runtime parameters in the workflow) workflow_parameters = {} - GENOME_FASTA_FILE_ORG = "0" - GFF_FILE_ORG = "1" - PROTEINS_FASTA_FILE_ORG = "2" - LOAD_FASTA_ORG = "3" - JBROWSE_ORG = "4" - LOAD_GFF_ORG = "5" - JBROWSE_CONTAINER = "6" - SYNC_FEATURES_ORG = "7" - POPULATE_MAT_VIEWS = "8" - INDEX_TRIPAL_DATA = "9" - # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) - workflow_parameters[GENOME_FASTA_FILE_ORG] = {} - workflow_parameters[GFF_FILE_ORG] = {} - workflow_parameters[PROTEINS_FASTA_FILE_ORG] = {} - workflow_parameters[LOAD_FASTA_ORG] = {"organism": org_org_id, - "analysis_id": org_genome_analysis_id, - "do_update": "true"} - workflow_parameters[JBROWSE_ORG] = {} - workflow_parameters[LOAD_GFF_ORG] = {"organism": org_org_id, "analysis_id": org_ogs_analysis_id} - workflow_parameters[SYNC_FEATURES_ORG] = {"organism_id": org_org_id} - # POPULATE + INDEX DATA - workflow_parameters[POPULATE_MAT_VIEWS] = {} - workflow_parameters[INDEX_TRIPAL_DATA] = {} + workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GENOME] = {} + workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GFF] = {} + workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_PROTEINS] = {} + workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_FASTA] = { + "organism": org_org_id, + "analysis_id": org_genome_analysis_id, + "do_update": "true"} + workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_JBROWSE] = {} + workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_GFF] = { + "organism": org_org_id, + "analysis_id": org_ogs_analysis_id} + workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_FEATURE_SYNC] = { + "organism_id": org_org_id} + workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_POPULATE_VIEWS] = {} + workflow_parameters[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_STEP_INDEX] = {} # Set datamap (mapping of input files in the workflow) datamap = {} - datamap[GENOME_FASTA_FILE_ORG] = {"src": "hda", "id": org_genome_hda_id} - datamap[GFF_FILE_ORG] = {"src": "hda", "id": org_gff_hda_id} - datamap[PROTEINS_FASTA_FILE_ORG] = {"src": "hda", "id": org_proteins_hda_id} - + datamap[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GENOME] = {"src": "hda", "id": org_genome_hda_id} + datamap[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_GFF] = {"src": "hda", "id": org_gff_hda_id} + datamap[phaeoexplorer_constants.WF_LOAD_GFF_JB_1_ORG_INPUT_PROTEINS] = {"src": "hda", "id": org_proteins_hda_id} with open(workflow_path, 'r') as ga_in_file: -- GitLab