From d66753e769a1045e0afc6752be96c69bd2b80b20 Mon Sep 17 00:00:00 2001 From: Loraine Gueguen <loraine.gueguen@sb-roscoff.fr> Date: Mon, 31 May 2021 17:25:44 +0200 Subject: [PATCH] Update to wf_v7: sync in wf --- constants_phaeo.py | 14 ++-- gga_run_workflow_phaeo.py | 2 +- gga_run_workflow_phaeo_jbrowse.py | 132 ++++++++++++++++++------------ 3 files changed, 88 insertions(+), 60 deletions(-) diff --git a/constants_phaeo.py b/constants_phaeo.py index 3bf4b2f..fd5ff9e 100644 --- a/constants_phaeo.py +++ b/constants_phaeo.py @@ -32,19 +32,19 @@ WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2 = "4" WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG2 = "5" WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG1 = "6" WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1 = "7" -WF_LOAD_GFF_JB_2_ORG_STEP_JRBOWSE_ORG2 = "8" +WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG2 = "8" WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG1 = "9" WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER = "10" WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_FEATURE_ORG1 = "11" WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG2 = "12" WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG2 = "13" WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_FEATURE_ORG2 = "14" -WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_ORG1 = "15" -WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_ORG2 = "16" -WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_GENOME_ANALYSIS_ORG1 = "17" -WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_OGS_ANALYSIS_ORG1 = "18" -WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_GENOME_ANALYSIS_ORG2 = "19" -WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_OGS_ANALYSIS_ORG2 = "20" +WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_ORG1 = "15" +WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_ORG2 = "16" +WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_GENOME_ANALYSIS_ORG1 = "17" +WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_OGS_ANALYSIS_ORG1 = "18" +WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_GENOME_ANALYSIS_ORG2 = "19" +WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_OGS_ANALYSIS_ORG2 = "20" WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "21" WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "22" diff --git a/gga_run_workflow_phaeo.py b/gga_run_workflow_phaeo.py index 3ddfb18..c9c873a 100644 --- a/gga_run_workflow_phaeo.py +++ b/gga_run_workflow_phaeo.py @@ -136,7 +136,7 @@ class RunWorkflow(species_data.SpeciesData): :return: """ - self.add_analysis_if_needed(analyses_dict_list, analysis_name, analysis_programversion, analysis_sourcename) + analysis_id = self.add_analysis_if_needed(analyses_dict_list, analysis_name, analysis_programversion, analysis_sourcename) # Synchronize analysis in Tripal logging.info("Synchronizing analysis %s in Tripal" % analysis_name) diff --git a/gga_run_workflow_phaeo_jbrowse.py b/gga_run_workflow_phaeo_jbrowse.py index 741ede2..8d22c22 100644 --- a/gga_run_workflow_phaeo_jbrowse.py +++ b/gga_run_workflow_phaeo_jbrowse.py @@ -131,7 +131,7 @@ class RunWorkflowJbrowse(gga_run_workflow_phaeo.RunWorkflow): logging.info("Success: individual tools versions and changesets validated") - def add_organism_and_sync(self): + def add_organism_if_needed(self): get_organisms_tool_dataset = utilities_bioblend.run_tool_and_download_single_output_dataset( instance=self.instance, @@ -283,7 +283,7 @@ def prepare_history_and_get_wf_param(sp_dict_list, main_dir, config): analyses_dict_list = run_workflow_for_current_organism.get_analyses() - org_id = run_workflow_for_current_organism.add_organism_and_sync() + org_id = run_workflow_for_current_organism.add_organism_if_needed() genome_analysis_id = run_workflow_for_current_organism.add_analysis_if_needed( analyses_dict_list=analyses_dict_list, analysis_name=run_workflow_for_current_organism.genome_analysis_name, @@ -405,17 +405,28 @@ if __name__ == "__main__": # Set the workflow parameters (individual tools runtime parameters in the workflow) workflow_parameters = {} - # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_FASTA] = { "organism": org_wf_param.org_id, "analysis_id": org_wf_param.genome_analysis_id, - "do_update": "true"} + "do_update": "true" + } workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_JBROWSE] = {} workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_GFF] = { "organism": org_wf_param.org_id, - "analysis_id": org_wf_param.ogs_analysis_id} + "analysis_id": org_wf_param.ogs_analysis_id + } workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_FEATURE] = { - "organism_id": org_wf_param.org_id} + "organism_id": org_wf_param.org_id + } + workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_ORG] = { + "organism_id": org_wf_param.org_id + } + workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_GENOME_ANALYSIS] = { + "analysis_id": org_wf_param.genome_analysis_id + } + workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_OGS_ANALYSIS] = { + "analysis_id": org_wf_param.ogs_analysis_id + } workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_POPULATE_VIEWS] = {} workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_INDEX] = {} @@ -485,50 +496,67 @@ if __name__ == "__main__": logging.info("Input organism %s: 2 species detected in input dictionary" % genus_species) strain_sex_org1 = strains_list[0] strain_sex_org2 = strains_list[1] - sp_wf_param_org1 = strains[strain_sex_org1] - sp_wf_param_org2 = strains[strain_sex_org2] + org_wf_param_org1 = strains[strain_sex_org1] + org_wf_param_org2 = strains[strain_sex_org2] # Set workflow path (2 organisms) workflow_path = os.path.join(os.path.abspath(script_dir), constants_phaeo.WORKFLOWS_PATH, constants_phaeo.WF_LOAD_GFF_JB_2_ORG_FILE) # Check if the versions of tools specified in the workflow are installed in galaxy - utilities_bioblend.install_workflow_tools(workflow_path=workflow_path, instance=sp_wf_param_org1.instance) + utilities_bioblend.install_workflow_tools(workflow_path=workflow_path, instance=org_wf_param_org1.instance) # Set the workflow parameters (individual tools runtime parameters in the workflow) workflow_parameters = {} - # Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method) - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG1] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG1] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG1] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG2] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2] = {} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG2] = {} # Organism 1 workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG1] = { - "organism": sp_wf_param_org1.org_id, - "analysis_id": sp_wf_param_org1.genome_analysis_id, - "do_update": "true"} + "organism": org_wf_param_org1.org_id, + "analysis_id": org_wf_param_org1.genome_analysis_id, + "do_update": "true" + } # workflow_parameters[JBROWSE_ORG1] = {"jbrowse_menu_url": jbrowse_menu_url_org1} + # workflow_parameters[JRBOWSE_ORG2] = {"jbrowse_menu_url": jbrowse_menu_url_org2} workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1] = {} + workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG2] = {} workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG1] = { - "organism": sp_wf_param_org1.org_id, - "analysis_id": sp_wf_param_org1.ogs_analysis_id} + "organism": org_wf_param_org1.org_id, + "analysis_id": org_wf_param_org1.ogs_analysis_id + } + workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER] = {} workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_FEATURE_ORG1] = { - "organism_id": sp_wf_param_org1.org_id} + "organism_id": org_wf_param_org1.org_id + } # workflow_parameters[JBROWSE_CONTAINER] = {"organisms": [{"name": org1_full_name, "unique_id": org1_species_folder_name, }, {"name": org2_full_name, "unique_id": org2_species_folder_name}]} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER] = {} # Organism 2 workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG2] = { - "organism": sp_wf_param_org2.org_id, - "analysis_id": sp_wf_param_org2.genome_analysis_id, - "do_update": "true"} + "organism": org_wf_param_org2.org_id, + "analysis_id": org_wf_param_org2.genome_analysis_id, + "do_update": "true" + } workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG2] = { - "organism": sp_wf_param_org2.org_id, - "analysis_id": sp_wf_param_org2.ogs_analysis_id} - # workflow_parameters[JRBOWSE_ORG2] = {"jbrowse_menu_url": jbrowse_menu_url_org2} - workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JRBOWSE_ORG2] = {} + "organism": org_wf_param_org2.org_id, + "analysis_id": org_wf_param_org2.ogs_analysis_id + } workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_FEATURE_ORG2] = { - "organism_id": sp_wf_param_org2.org_id} + "organism_id": org_wf_param_org2.org_id + } + workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_ORG1] = { + "organism_id": org_wf_param_org1.org_id + } + workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_ORG2] = { + "organism_id": org_wf_param_org2.org_id + } + workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_GENOME_ANALYSIS_ORG1] = { + "analysis_id": org_wf_param_org1.genome_analysis_id + } + workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_OGS_ANALYSIS_ORG1] = { + "analysis_id": org_wf_param_org1.ogs_analysis_id + } + workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_GENOME_ANALYSIS_ORG2] = { + "analysis_id": org_wf_param_org2.genome_analysis_id + } + workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_OGS_ANALYSIS_ORG2] = { + "analysis_id": org_wf_param_org2.ogs_analysis_id + } # POPULATE + INDEX DATA workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS] = {} workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_INDEX] = {} @@ -536,13 +564,13 @@ if __name__ == "__main__": # Set datamap (mapping of input files in the workflow) datamap = {} # Organism 1 - datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG1] = {"src": "hda", "id": sp_wf_param_org1.genome_hda_id} - datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG1] = {"src": "hda", "id": sp_wf_param_org1.gff_hda_id} - datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG1] = {"src": "hda", "id": sp_wf_param_org1.proteins_hda_id} + datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG1] = {"src": "hda", "id": org_wf_param_org1.genome_hda_id} + datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG1] = {"src": "hda", "id": org_wf_param_org1.gff_hda_id} + datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG1] = {"src": "hda", "id": org_wf_param_org1.proteins_hda_id} # Organism 2 - datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG2] = {"src": "hda", "id": sp_wf_param_org2.genome_hda_id} - datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2] = {"src": "hda", "id": sp_wf_param_org2.gff_hda_id} - datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG2] = {"src": "hda", "id": sp_wf_param_org2.proteins_hda_id} + datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG2] = {"src": "hda", "id": org_wf_param_org2.genome_hda_id} + datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2] = {"src": "hda", "id": org_wf_param_org2.gff_hda_id} + datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG2] = {"src": "hda", "id": org_wf_param_org2.proteins_hda_id} with open(workflow_path, 'r') as ga_in_file: @@ -559,54 +587,54 @@ if __name__ == "__main__": else: root_url = config[constants.CONF_JBROWSE_MENU_URL] # Set "Genus" and "species" as they are given in the add_organism tool (with spaces replaced by "_") - species_strain_sex_org1 = sp_wf_param_org1.chado_species_name.replace(" ", "-") - species_strain_sex_org2 = sp_wf_param_org2.chado_species_name.replace(" ", "-") + species_strain_sex_org1 = org_wf_param_org1.chado_species_name.replace(" ", "-") + species_strain_sex_org2 = org_wf_param_org2.chado_species_name.replace(" ", "-") jbrowse_menu_url_org1 = "{root_url}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format( root_url=root_url, genus_sp=genus_species, - Genus=sp_wf_param_org1.genus_uppercase, + Genus=org_wf_param_org1.genus_uppercase, species=species_strain_sex_org1, id="{id}") jbrowse_menu_url_org2 = "{root_url}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format( root_url=root_url, genus_sp=genus_species, - Genus=sp_wf_param_org2.genus_uppercase, + Genus=org_wf_param_org2.genus_uppercase, species=species_strain_sex_org2, id="{id}") # Replace values in the workflow dictionary workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1]["tool_state"] = \ workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1]["tool_state"]\ .replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1) - workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JRBOWSE_ORG2]["tool_state"] = \ - workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JRBOWSE_ORG2]["tool_state"]\ + workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG2]["tool_state"] = \ + workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG2]["tool_state"]\ .replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2) # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow # in galaxy --> define a naming method for these workflows workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER]["tool_state"] = \ workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER]["tool_state"]\ - .replace("__DISPLAY_NAME_ORG1__", sp_wf_param_org1.full_name)\ - .replace("__UNIQUE_ID_ORG1__", sp_wf_param_org1.species_folder_name)\ - .replace("__DISPLAY_NAME_ORG2__", sp_wf_param_org2.full_name)\ - .replace("__UNIQUE_ID_ORG2__", sp_wf_param_org2.species_folder_name) + .replace("__DISPLAY_NAME_ORG1__", org_wf_param_org1.full_name)\ + .replace("__UNIQUE_ID_ORG1__", org_wf_param_org1.species_folder_name)\ + .replace("__DISPLAY_NAME_ORG2__", org_wf_param_org2.full_name)\ + .replace("__UNIQUE_ID_ORG2__", org_wf_param_org2.species_folder_name) # Import the workflow in galaxy as a dict - sp_wf_param_org1.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) + org_wf_param_org1.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict) # Get its attributes - workflow_dict_list = sp_wf_param_org1.instance.workflows.get_workflows(name=workflow_name) + workflow_dict_list = org_wf_param_org1.instance.workflows.get_workflows(name=workflow_name) # Then get its ID (required to invoke the workflow) workflow_id = workflow_dict_list[0]["id"] # Index 0 is the most recently imported workflow (the one we want) logging.debug("Workflow ID: %s" % workflow_id) # Check if the workflow is found try: - show_workflow = sp_wf_param_org1.instance.workflows.show_workflow(workflow_id=workflow_id) + show_workflow = org_wf_param_org1.instance.workflows.show_workflow(workflow_id=workflow_id) except bioblend.ConnectionError: logging.warning("Error finding workflow %s" % workflow_name) # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it - sp_wf_param_org1.instance.workflows.invoke_workflow( + org_wf_param_org1.instance.workflows.invoke_workflow( workflow_id=workflow_id, - history_id=sp_wf_param_org1.history_id, + history_id=org_wf_param_org1.history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True) -- GitLab