Skip to content
Snippets Groups Projects

Workflow v2

Merged Loraine Gueguen requested to merge workflow_v2 into dev
2 unresolved threads
@@ -287,7 +287,7 @@ class RunWorkflow(speciesData.SpeciesData):
org_job_out_id = add_org_job["outputs"][0]["id"]
org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id)
org_output = json.loads(org_json_output)
org_id = str(organism_output_dict["organism_id"]) # id needs to be a str to be recognized by chado tools
org_id = str(org_output["organism_id"]) # id needs to be a str to be recognized by chado tools
else:
add_org_job = self.instance.tools.run_tool(
tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version,
@@ -299,7 +299,7 @@ class RunWorkflow(speciesData.SpeciesData):
org_job_out_id = add_org_job["outputs"][0]["id"]
org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id)
org_output = json.loads(org_json_output)
org_id = str(organism_output_dict["organism_id"]) # id needs to be a str to be recognized by chado tools
org_id = str(org_output["organism_id"]) # id needs to be a str to be recognized by chado tools
get_analyses = self.instance.tools.run_tool(
@@ -338,7 +338,7 @@ class RunWorkflow(speciesData.SpeciesData):
analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id)
analysis_output = json.loads(analysis_json_output)
ogs_analysis_id = analysis_output["analysis_id"]
ogs_analysis_id = str(analysis_output_dict["analysis_id"])
ogs_analysis_id = str(ogs_analysis_id["analysis_id"])
if genome_analysis_id is None:
add_genome_analysis_job = self.instance.tools.run_tool(
@@ -353,7 +353,7 @@ class RunWorkflow(speciesData.SpeciesData):
analysis_job_out_id = analysis_outputs[0]["id"]
analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id)
analysis_output = json.loads(analysis_json_output)
genome_analysis_id = str(analysis_output_dict["analysis_id"])
genome_analysis_id = str(analysis_output["analysis_id"])
print({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id})
return({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id})
@@ -575,11 +575,13 @@ class RunWorkflow(speciesData.SpeciesData):
genome_hda_id, gff_hda_id, transcripts_hda_id, proteins_hda_id, blast_diamond_hda_id, interproscan_hda_id = None, None, None, None, None, None
# Finding datasets in history (matches datasets name)
# TODO: match version as well
# TODO: match whoole dataset name (not just prefix and affix)
for dataset in history_datasets_li:
dataset_name = dataset["name"]
if dataset_shortname in dataset_name:
dataset_id = dataset["id"]
if dataset_name.endswith(".fasta"):
if dataset_name.endswith("%s.fasta" % self.genome_version):
genome_hda_id = dataset_id
if dataset_name.endswith(".gff"):
gff_hda_id = dataset_id
@@ -961,7 +963,7 @@ if __name__ == "__main__":
logging.info("Input organism %s: 1 species detected in input dictionary" % k)
# Set workflow path (1 organism)
workflow_path = os.path.join(os.path.abspath(script_dir), "workflows/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v2.ga")
workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_1org_v2.ga")
# Set the galaxy instance variables
for k2, v2 in v.items():
@@ -986,7 +988,7 @@ if __name__ == "__main__":
logging.info("Input organism %s: 2 species detected in input dictionary" % k)
# Set workflow path (2 organisms)
workflow_path = os.path.join(os.path.abspath(script_dir), "workflows/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v3.ga")
workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-chado_load_tripal_synchronize_jbrowse_2org_v3.ga")
# Instance object required variables
instance_url, email, password = None, None, None
@@ -1160,48 +1162,51 @@ if __name__ == "__main__":
datamap[GFF_FILE_ORG2] = {"src": "hda", "id": org2_gff_hda_id}
datamap[PROTEINS_FASTA_FILE_ORG2] = {"src": "hda", "id": org2_proteins_hda_id}
# with open(workflow_path, 'r') as ga_in_file:
# # Store the decoded json dictionary
# workflow_dict = json.load(ga_in_file)
# workflow_name = workflow_dict["name"]
# # For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them
# # as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error)
# # Scratchgmod test: need "http" (or "https"), the hostname (+ port)
# if "menu_url" not in config.keys():
# jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="\{id\}")
# jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}")
# else:
# jbrowse_menu_url_org1 = config["menu_url"]
# jbrowse_menu_url_org2 = jbrowse_menu_url_org1
# # Replace values in the workflow dictionary
# workflow_dict["steps"]["7"]["tool_state"] = workflow_dict["steps"]["7"]["tool_state"].replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1)
# workflow_dict["steps"]["8"]["tool_state"] = workflow_dict["steps"]["8"]["tool_state"].replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2)
# # The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow
# # in galaxy --> define a naming method for these workflows
# workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__FULL_NAME_ORG1__", org1_full_name).replace("__UNIQUE_ID_ORG1__", org1_species_folder_name)
# workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__FULL_NAME_ORG2__", org2_full_name).replace("__UNIQUE_ID_ORG2__", org2_species_folder_name)
# # Import the workflow in galaxy as a dict
# instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
# # Get its attributes
# workflow_attributes = instance.workflows.get_workflows(name=workflow_name)
# # Then get its ID (required to invoke the workflow)
# workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want)
# show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id)
# # Check if the workflow is found
# try:
# logging.debug("Workflow ID: %s" % workflow_id)
# except bioblend.ConnectionError:
# logging.warning("Error finding workflow %s" % workflow_name)
# # Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
# instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
# logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))
with open(workflow_path, 'r') as ga_in_file:
# Store the decoded json dictionary
workflow_dict = json.load(ga_in_file)
workflow_name = workflow_dict["name"]
# For the Jbrowse tool, we unfortunately have to manually edit the parameters instead of setting them
# as runtime values, using runtime parameters makes the tool throw an internal critical error ("replace not found" error)
# Scratchgmod test: need "http" (or "https"), the hostname (+ port)
jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="\{id\}")
jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}")
if "menu_url" not in config.keys():
jbrowse_menu_url_org1 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org1_genus_species, Genus=org1_genus[0].upper() + org1_genus[1:], species=org1_species, id="\{id\}")
jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}")
else:
jbrowse_menu_url_org1 = config["menu_url"]
jbrowse_menu_url_org2 = "https://{hostname}/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(hostname=config["hostname"], genus_sp=org2_genus_species, Genus=org2_genus[0].upper() + org2_genus[1:], species=org2_species, id="\{id\}")
# Replace values in the workflow dictionary
workflow_dict["steps"]["7"]["tool_state"] = workflow_dict["steps"]["7"]["tool_state"].replace("__MENU_URL_ORG1__", jbrowse_menu_url_org1)
workflow_dict["steps"]["8"]["tool_state"] = workflow_dict["steps"]["8"]["tool_state"].replace("__MENU_URL_ORG2__", jbrowse_menu_url_org2)
# The UNIQUE_ID is specific to a combination genus_species_strain_sex so every combination should have its unique workflow
# in galaxy --> define a naming method for these workflows
workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__FULL_NAME_ORG1__", org1_full_name).replace("__UNIQUE_ID_ORG1__", org1_species_folder_name)
workflow_dict["steps"]["10"]["tool_state"] = workflow_dict["steps"]["10"]["tool_state"].replace("__FULL_NAME_ORG2__", org2_full_name).replace("__UNIQUE_ID_ORG2__", org2_species_folder_name)
# Import the workflow in galaxy as a dict
instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
# Get its attributes
workflow_attributes = instance.workflows.get_workflows(name=workflow_name)
# Then get its ID (required to invoke the workflow)
workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want)
show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id)
# Check if the workflow is found
try:
logging.debug("Workflow ID: %s" % workflow_id)
except bioblend.ConnectionError:
logging.warning("Error finding workflow %s" % workflow_name)
# Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))
Loading