Skip to content
Snippets Groups Projects
Commit d66753e7 authored by Loraine Gueguen's avatar Loraine Gueguen
Browse files

Update to wf_v7: sync in wf

parent d396190f
No related branches found
No related tags found
2 merge requests!25Sync in wf,!18Release v2.1.0
This commit is part of merge request !25. Comments created here will be created in the context of that merge request.
......@@ -32,19 +32,19 @@ WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2 = "4"
WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG2 = "5"
WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG1 = "6"
WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1 = "7"
WF_LOAD_GFF_JB_2_ORG_STEP_JRBOWSE_ORG2 = "8"
WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG2 = "8"
WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG1 = "9"
WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER = "10"
WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_FEATURE_ORG1 = "11"
WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG2 = "12"
WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG2 = "13"
WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_FEATURE_ORG2 = "14"
WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_ORG1 = "15"
WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_ORG2 = "16"
WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_GENOME_ANALYSIS_ORG1 = "17"
WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_OGS_ANALYSIS_ORG1 = "18"
WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_GENOME_ANALYSIS_ORG2 = "19"
WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_OGS_ANALYSIS_ORG2 = "20"
WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_ORG1 = "15"
WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_ORG2 = "16"
WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_GENOME_ANALYSIS_ORG1 = "17"
WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_OGS_ANALYSIS_ORG1 = "18"
WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_GENOME_ANALYSIS_ORG2 = "19"
WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_OGS_ANALYSIS_ORG2 = "20"
WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS = "21"
WF_LOAD_GFF_JB_2_ORG_STEP_INDEX = "22"
......
......@@ -136,7 +136,7 @@ class RunWorkflow(species_data.SpeciesData):
:return:
"""
self.add_analysis_if_needed(analyses_dict_list, analysis_name, analysis_programversion, analysis_sourcename)
analysis_id = self.add_analysis_if_needed(analyses_dict_list, analysis_name, analysis_programversion, analysis_sourcename)
# Synchronize analysis in Tripal
logging.info("Synchronizing analysis %s in Tripal" % analysis_name)
......
......@@ -131,7 +131,7 @@ class RunWorkflowJbrowse(gga_run_workflow_phaeo.RunWorkflow):
logging.info("Success: individual tools versions and changesets validated")
def add_organism_and_sync(self):
def add_organism_if_needed(self):
get_organisms_tool_dataset = utilities_bioblend.run_tool_and_download_single_output_dataset(
instance=self.instance,
......@@ -283,7 +283,7 @@ def prepare_history_and_get_wf_param(sp_dict_list, main_dir, config):
analyses_dict_list = run_workflow_for_current_organism.get_analyses()
org_id = run_workflow_for_current_organism.add_organism_and_sync()
org_id = run_workflow_for_current_organism.add_organism_if_needed()
genome_analysis_id = run_workflow_for_current_organism.add_analysis_if_needed(
analyses_dict_list=analyses_dict_list,
analysis_name=run_workflow_for_current_organism.genome_analysis_name,
......@@ -405,17 +405,28 @@ if __name__ == "__main__":
# Set the workflow parameters (individual tools runtime parameters in the workflow)
workflow_parameters = {}
# Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method)
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_FASTA] = {
"organism": org_wf_param.org_id,
"analysis_id": org_wf_param.genome_analysis_id,
"do_update": "true"}
"do_update": "true"
}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_JBROWSE] = {}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_LOAD_GFF] = {
"organism": org_wf_param.org_id,
"analysis_id": org_wf_param.ogs_analysis_id}
"analysis_id": org_wf_param.ogs_analysis_id
}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_FEATURE] = {
"organism_id": org_wf_param.org_id}
"organism_id": org_wf_param.org_id
}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_ORG] = {
"organism_id": org_wf_param.org_id
}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_GENOME_ANALYSIS] = {
"analysis_id": org_wf_param.genome_analysis_id
}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_SYNC_OGS_ANALYSIS] = {
"analysis_id": org_wf_param.ogs_analysis_id
}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_POPULATE_VIEWS] = {}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_1_ORG_STEP_INDEX] = {}
......@@ -485,50 +496,67 @@ if __name__ == "__main__":
logging.info("Input organism %s: 2 species detected in input dictionary" % genus_species)
strain_sex_org1 = strains_list[0]
strain_sex_org2 = strains_list[1]
sp_wf_param_org1 = strains[strain_sex_org1]
sp_wf_param_org2 = strains[strain_sex_org2]
org_wf_param_org1 = strains[strain_sex_org1]
org_wf_param_org2 = strains[strain_sex_org2]
# Set workflow path (2 organisms)
workflow_path = os.path.join(os.path.abspath(script_dir), constants_phaeo.WORKFLOWS_PATH, constants_phaeo.WF_LOAD_GFF_JB_2_ORG_FILE)
# Check if the versions of tools specified in the workflow are installed in galaxy
utilities_bioblend.install_workflow_tools(workflow_path=workflow_path, instance=sp_wf_param_org1.instance)
utilities_bioblend.install_workflow_tools(workflow_path=workflow_path, instance=org_wf_param_org1.instance)
# Set the workflow parameters (individual tools runtime parameters in the workflow)
workflow_parameters = {}
# Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method)
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG1] = {}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG1] = {}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG1] = {}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG2] = {}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2] = {}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG2] = {}
# Organism 1
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG1] = {
"organism": sp_wf_param_org1.org_id,
"analysis_id": sp_wf_param_org1.genome_analysis_id,
"do_update": "true"}
"organism": org_wf_param_org1.org_id,
"analysis_id": org_wf_param_org1.genome_analysis_id,
"do_update": "true"
}
# workflow_parameters[JBROWSE_ORG1] = {"jbrowse_menu_url": jbrowse_menu_url_org1}
# workflow_parameters[JRBOWSE_ORG2] = {"jbrowse_menu_url": jbrowse_menu_url_org2}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1] = {}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG2] = {}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG1] = {
"organism": sp_wf_param_org1.org_id,
"analysis_id": sp_wf_param_org1.ogs_analysis_id}
"organism": org_wf_param_org1.org_id,
"analysis_id": org_wf_param_org1.ogs_analysis_id
}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER] = {}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_FEATURE_ORG1] = {
"organism_id": sp_wf_param_org1.org_id}
"organism_id": org_wf_param_org1.org_id
}
# workflow_parameters[JBROWSE_CONTAINER] = {"organisms": [{"name": org1_full_name, "unique_id": org1_species_folder_name, }, {"name": org2_full_name, "unique_id": org2_species_folder_name}]}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER] = {}
# Organism 2
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_FASTA_ORG2] = {
"organism": sp_wf_param_org2.org_id,
"analysis_id": sp_wf_param_org2.genome_analysis_id,
"do_update": "true"}
"organism": org_wf_param_org2.org_id,
"analysis_id": org_wf_param_org2.genome_analysis_id,
"do_update": "true"
}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_LOAD_GFF_ORG2] = {
"organism": sp_wf_param_org2.org_id,
"analysis_id": sp_wf_param_org2.ogs_analysis_id}
# workflow_parameters[JRBOWSE_ORG2] = {"jbrowse_menu_url": jbrowse_menu_url_org2}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JRBOWSE_ORG2] = {}
"organism": org_wf_param_org2.org_id,
"analysis_id": org_wf_param_org2.ogs_analysis_id
}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_FEATURE_ORG2] = {
"organism_id": sp_wf_param_org2.org_id}
"organism_id": org_wf_param_org2.org_id
}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_ORG1] = {
"organism_id": org_wf_param_org1.org_id
}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_ORG2] = {
"organism_id": org_wf_param_org2.org_id
}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_GENOME_ANALYSIS_ORG1] = {
"analysis_id": org_wf_param_org1.genome_analysis_id
}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_OGS_ANALYSIS_ORG1] = {
"analysis_id": org_wf_param_org1.ogs_analysis_id
}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_GENOME_ANALYSIS_ORG2] = {
"analysis_id": org_wf_param_org2.genome_analysis_id
}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_SYNC_OGS_ANALYSIS_ORG2] = {
"analysis_id": org_wf_param_org2.ogs_analysis_id
}
# POPULATE + INDEX DATA
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_POPULATE_VIEWS] = {}
workflow_parameters[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_INDEX] = {}
......@@ -536,13 +564,13 @@ if __name__ == "__main__":
# Set datamap (mapping of input files in the workflow)
datamap = {}
# Organism 1
datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG1] = {"src": "hda", "id": sp_wf_param_org1.genome_hda_id}
datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG1] = {"src": "hda", "id": sp_wf_param_org1.gff_hda_id}
datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG1] = {"src": "hda", "id": sp_wf_param_org1.proteins_hda_id}
datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG1] = {"src": "hda", "id": org_wf_param_org1.genome_hda_id}
datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG1] = {"src": "hda", "id": org_wf_param_org1.gff_hda_id}
datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG1] = {"src": "hda", "id": org_wf_param_org1.proteins_hda_id}
# Organism 2
datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG2] = {"src": "hda", "id": sp_wf_param_org2.genome_hda_id}
datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2] = {"src": "hda", "id": sp_wf_param_org2.gff_hda_id}
datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG2] = {"src": "hda", "id": sp_wf_param_org2.proteins_hda_id}
datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GENOME_ORG2] = {"src": "hda", "id": org_wf_param_org2.genome_hda_id}
datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_GFF_ORG2] = {"src": "hda", "id": org_wf_param_org2.gff_hda_id}
datamap[constants_phaeo.WF_LOAD_GFF_JB_2_ORG_INPUT_PROTEINS_ORG2] = {"src": "hda", "id": org_wf_param_org2.proteins_hda_id}
with open(workflow_path, 'r') as ga_in_file:
......@@ -559,54 +587,54 @@ if __name__ == "__main__":
else:
root_url = config[constants.CONF_JBROWSE_MENU_URL]
# Set "Genus" and "species" as they are given in the add_organism tool (with spaces replaced by "_")
species_strain_sex_org1 = sp_wf_param_org1.chado_species_name.replace(" ", "-")
species_strain_sex_org2 = sp_wf_param_org2.chado_species_name.replace(" ", "-")
species_strain_sex_org1 = org_wf_param_org1.chado_species_name.replace(" ", "-")
species_strain_sex_org2 = org_wf_param_org2.chado_species_name.replace(" ", "-")
jbrowse_menu_url_org1 = "{root_url}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(
root_url=root_url,
genus_sp=genus_species,
Genus=sp_wf_param_org1.genus_uppercase,
Genus=org_wf_param_org1.genus_uppercase,
species=species_strain_sex_org1,
id="{id}")
jbrowse_menu_url_org2 = "{root_url}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(
root_url=root_url,
genus_sp=genus_species,
Genus=sp_wf_param_org2.genus_uppercase,
Genus=org_wf_param_org2.genus_uppercase,
species=species_strain_sex_org2,
id="{id}")
# Replace values in the workflow dictionary
workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1]["tool_state"] = \
workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG1]["tool_state"]\
.replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1)
workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JRBOWSE_ORG2]["tool_state"] = \
workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JRBOWSE_ORG2]["tool_state"]\
workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG2]["tool_state"] = \
workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JBROWSE_ORG2]["tool_state"]\
.replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2)
# The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow
# in galaxy --> define a naming method for these workflows
workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER]["tool_state"] = \
workflow_dict["steps"][constants_phaeo.WF_LOAD_GFF_JB_2_ORG_STEP_JB_TO_CONTAINER]["tool_state"]\
.replace("__DISPLAY_NAME_ORG1__", sp_wf_param_org1.full_name)\
.replace("__UNIQUE_ID_ORG1__", sp_wf_param_org1.species_folder_name)\
.replace("__DISPLAY_NAME_ORG2__", sp_wf_param_org2.full_name)\
.replace("__UNIQUE_ID_ORG2__", sp_wf_param_org2.species_folder_name)
.replace("__DISPLAY_NAME_ORG1__", org_wf_param_org1.full_name)\
.replace("__UNIQUE_ID_ORG1__", org_wf_param_org1.species_folder_name)\
.replace("__DISPLAY_NAME_ORG2__", org_wf_param_org2.full_name)\
.replace("__UNIQUE_ID_ORG2__", org_wf_param_org2.species_folder_name)
# Import the workflow in galaxy as a dict
sp_wf_param_org1.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
org_wf_param_org1.instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
# Get its attributes
workflow_dict_list = sp_wf_param_org1.instance.workflows.get_workflows(name=workflow_name)
workflow_dict_list = org_wf_param_org1.instance.workflows.get_workflows(name=workflow_name)
# Then get its ID (required to invoke the workflow)
workflow_id = workflow_dict_list[0]["id"] # Index 0 is the most recently imported workflow (the one we want)
logging.debug("Workflow ID: %s" % workflow_id)
# Check if the workflow is found
try:
show_workflow = sp_wf_param_org1.instance.workflows.show_workflow(workflow_id=workflow_id)
show_workflow = org_wf_param_org1.instance.workflows.show_workflow(workflow_id=workflow_id)
except bioblend.ConnectionError:
logging.warning("Error finding workflow %s" % workflow_name)
# Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
sp_wf_param_org1.instance.workflows.invoke_workflow(
org_wf_param_org1.instance.workflows.invoke_workflow(
workflow_id=workflow_id,
history_id=sp_wf_param_org1.history_id,
history_id=org_wf_param_org1.history_id,
params=workflow_parameters,
inputs=datamap,
allow_tool_state_corrections=True)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment