Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • abims/e-infra/gga_load_data
1 result
Show changes
Commits on Source (5)
......@@ -495,20 +495,120 @@ class RunWorkflow(speciesData.SpeciesData):
# print({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id})
return({"org_id": org_id, "blastp_analysis_id": blastp_analysis_id})
def add_interproscan_analysis(self):
def add_organism_interproscan_analysis(self):
"""
Add OGS and genome vX analyses to Chado database
Required for Chado Load Tripal Synchronize workflow (which should be ran as the first workflow)
Called outside workflow for practical reasons (Chado add doesn't have an input link for analysis or organism)
:return:
"""
# Add Interpro analysis to chado
logging.info("Adding Interproscan analysis to the instance's chado database")
self.instance.tools.run_tool(
tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.4+galaxy0",
self.connect_to_instance()
self.set_get_history()
tool_version = "2.3.4+galaxy0"
get_organism_tool = self.instance.tools.show_tool("toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0")
get_organisms = self.instance.tools.run_tool(
tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/%s" % tool_version,
history_id=self.history_id,
tool_inputs={"name": "InterproScan on OGS%s" % self.ogs_version,
"program": "InterproScan",
"programversion": "OGS%s" % self.ogs_version,
"sourcename": "Genoscope",
"date_executed": self.date})
tool_inputs={})
time.sleep(10) # Ensure the tool has had time to complete
org_outputs = get_organisms["outputs"] # Outputs from the get_organism tool
org_job_out_id = org_outputs[0]["id"] # ID of the get_organism output dataset (list of dicts)
org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id) # Download the dataset
org_output = json.loads(org_json_output) # Turn the dataset into a list for parsing
org_id = None
# Look up list of outputs (dictionaries)
for organism_output_dict in org_output:
if organism_output_dict["genus"] == self.genus and organism_output_dict["species"] == "{0} {1}".format(self.species, self.sex):
correct_organism_id = str(organism_output_dict["organism_id"]) # id needs to be a str to be recognized by chado tools
org_id = str(correct_organism_id)
if org_id is None:
if self.common == "" or self.common is None:
add_org_job = self.instance.tools.run_tool(
tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version,
history_id=self.history_id,
tool_inputs={"abbr": self.abbreviation,
"genus": self.genus_uppercase,
"species": self.chado_species_name,
"common": self.abbreviation})
org_job_out_id = add_org_job["outputs"][0]["id"]
org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id)
org_output = json.loads(org_json_output)
org_id = str(org_output["organism_id"]) # id needs to be a str to be recognized by chado tools
else:
add_org_job = self.instance.tools.run_tool(
tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/%s" % tool_version,
history_id=self.history_id,
tool_inputs={"abbr": self.abbreviation,
"genus": self.genus_uppercase,
"species": self.chado_species_name,
"common": self.common})
org_job_out_id = add_org_job["outputs"][0]["id"]
org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out_id)
org_output = json.loads(org_json_output)
org_id = str(org_output["organism_id"]) # id needs to be a str to be recognized by chado tools
# Synchronize newly added organism in Tripal
logging.info("Synchronizing organism %s in Tripal" % self.full_name)
time.sleep(60)
org_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1.0",
history_id=self.history_id,
tool_inputs={"organism_id": org_id})
get_analyses = self.instance.tools.run_tool(
tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/%s" % tool_version,
history_id=self.history_id,
tool_inputs={})
time.sleep(10)
analysis_outputs = get_analyses["outputs"]
analysis_job_out_id = analysis_outputs[0]["id"]
analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id)
analysis_output = json.loads(analysis_json_output)
interpro_analysis_id = None
# Look up list of outputs (dictionaries)
for analysis_output_dict in analysis_output:
if analysis_output_dict["name"] == "Interproscan on " + self.full_name_lowercase + " OGS" + self.ogs_version:
interpro_analysis_id = str(analysis_output_dict["analysis_id"])
if interpro_analysis_id is None:
add_interproscan_analysis_job = self.instance.tools.run_tool(
tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/%s" % tool_version,
history_id=self.history_id,
tool_inputs={"name": "Interproscan on " + self.full_name_lowercase + " OGS" + self.ogs_version,
"program": "Performed by Genoscope",
"programversion": str(self.sex + " OGS" + self.ogs_version),
"sourcename": "Genoscope",
"date_executed": self.date})
analysis_outputs = add_interproscan_analysis_job["outputs"]
analysis_job_out_id = analysis_outputs[0]["id"]
analysis_json_output = self.instance.datasets.download_dataset(dataset_id=analysis_job_out_id)
analysis_output = json.loads(analysis_json_output)
interpro_analysis_id = str(analysis_output["analysis_id"])
# Synchronize blastp analysis
logging.info("Synchronizing Diamong blastp OGS%s analysis in Tripal" % self.ogs_version)
time.sleep(60)
interproscan_analysis_sync = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0",
history_id=self.history_id,
tool_inputs={"analysis_id": interpro_analysis_id})
# print({"org_id": org_id, "genome_analysis_id": genome_analysis_id, "ogs_analysis_id": ogs_analysis_id})
return({"org_id": org_id, "interpro_analysis_id": interpro_analysis_id})
def get_interpro_analysis_id(self):
......@@ -628,6 +728,8 @@ class RunWorkflow(speciesData.SpeciesData):
proteins_hda_id = dataset_id
if dataset_name == "{0}_OGS{1}_blastp.xml".format(self.dataset_prefix, self.ogs_version):
blastp_hda_id = dataset_id
if dataset_name == "{0}_OGS{1}_interproscan.xml".format(self.dataset_prefix, self.ogs_version):
interproscan_hda_id = dataset_id
# Import each dataset into history if it is not imported
......@@ -906,6 +1008,45 @@ def create_sp_workflow_dict(sp_dict, main_dir, config, workflow_type):
sp_workflow_dict[genus_species] = {strain_sex: attributes}
if workflow_type == "interpro":
run_workflow_for_current_organism.connect_to_instance()
history_id = run_workflow_for_current_organism.set_get_history()
run_workflow_for_current_organism.install_changesets_revisions_for_individual_tools()
ids = run_workflow_for_current_organism.add_organism_interproscan_analysis()
org_id = None
org_id = ids["org_id"]
interpro_analysis_id = None
interpro_analysis_id = ids["interpro_analysis_id"]
instance_attributes = run_workflow_for_current_organism.get_instance_attributes()
hda_ids = run_workflow_for_current_organism.import_datasets_into_history()
strain_sex = "{0}_{1}".format(run_workflow_for_current_organism.strain, run_workflow_for_current_organism.sex)
genus_species = run_workflow_for_current_organism.genus_species
# Create the dictionary holding all attributes needed to connect to the galaxy instance
attributes = {"genus": run_workflow_for_current_organism.genus,
"species": run_workflow_for_current_organism.species,
"genus_species": run_workflow_for_current_organism.genus_species,
"full_name": run_workflow_for_current_organism.full_name,
"species_folder_name": run_workflow_for_current_organism.species_folder_name,
"sex": run_workflow_for_current_organism.sex,
"strain": run_workflow_for_current_organism.strain,
"org_id": org_id,
"interpro_analysis_id": interpro_analysis_id,
"instance_attributes": instance_attributes,
"hda_ids": hda_ids,
"history_id": history_id,
"instance": run_workflow_for_current_organism.instance,
"instance_url": run_workflow_for_current_organism.instance_url,
"email": config["galaxy_default_admin_email"],
"password": config["galaxy_default_admin_password"]}
sp_workflow_dict[genus_species] = {strain_sex: attributes}
else:
logging.critical("The galaxy container for %s is not ready yet!" % run_workflow_for_current_organism.full_name)
sys.exit()
......@@ -987,12 +1128,13 @@ if __name__ == "__main__":
args = parser.parse_args()
bioblend_logger = logging.getLogger("bioblend")
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
bioblend_logger.setLevel(logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
logging.getLogger("urllib3").setLevel(logging.INFO)
logging.getLogger("bioblend").setLevel(logging.INFO)
bioblend_logger.setLevel(logging.INFO)
# Parsing the config file if provided, using the default config otherwise
if not args.config:
......@@ -1000,6 +1142,12 @@ if __name__ == "__main__":
else:
args.config = os.path.abspath(args.config)
if args.config:
config_file = os.path.abspath(args.config)
else:
config_file = os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])), constants.DEFAULT_CONFIG)
config = utilities.parse_config(config_file)
if not args.main_directory:
args.main_directory = os.getcwd()
else:
......@@ -1164,13 +1312,6 @@ if __name__ == "__main__":
else:
jbrowse_menu_url_org = config["jbrowse_menu_url"] + "/sp/{genus_sp}/feature/{Genus}/{species}/mRNA/{id}".format(genus_sp=org_genus_species, Genus=org_genus[0].upper() + org_genus[1:], species=org_species, id="{id}")
# show_tool_add_organism = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.4+galaxy0", io_details=True)
# print(show_tool_add_organism)
# show_jbrowse_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy0", io_details=True)
# print(show_jbrowse_tool)
# show_jbrowse_container_tool = instance.tools.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", io_details=True)
# print(show_jbrowse_container_tool)
# Replace values in the workflow dictionary
workflow_dict["steps"]["4"]["tool_state"] = workflow_dict["steps"]["4"]["tool_state"].replace("__MENU_URL_ORG__", jbrowse_menu_url_org)
workflow_dict["steps"]["6"]["tool_state"] = workflow_dict["steps"]["6"]["tool_state"].replace("__DISPLAY_NAME_ORG__", org_full_name).replace("__UNIQUE_ID_ORG__", org_species_folder_name)
......@@ -1190,7 +1331,7 @@ if __name__ == "__main__":
logging.warning("Error finding workflow %s" % workflow_name)
# Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
# instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))
......@@ -1426,7 +1567,7 @@ if __name__ == "__main__":
logging.warning("Error finding workflow %s" % workflow_name)
# Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
# instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))
......@@ -1541,7 +1682,7 @@ if __name__ == "__main__":
logging.warning("Error finding workflow %s" % workflow_name)
# Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
# instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))
......@@ -1695,7 +1836,276 @@ if __name__ == "__main__":
logging.warning("Error finding workflow %s" % workflow_name)
# Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
# instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))
if workflow_type == "interpro":
for sp_dict in sp_dict_list:
# Add and retrieve all analyses/organisms for the current input species and add their IDs to the input dictionary
current_sp_workflow_dict = create_sp_workflow_dict(sp_dict, main_dir=args.main_directory, config=config, workfow_type="blast")
current_sp_key = list(current_sp_workflow_dict.keys())[0]
current_sp_value = list(current_sp_workflow_dict.values())[0]
current_sp_strain_sex_key = list(current_sp_value.keys())[0]
current_sp_strain_sex_value = list(current_sp_value.values())[0]
# Add the species dictionary to the complete dictionary
# This dictionary contains every organism present in the input file
# Its structure is the following:
# {genus species: {strain1_sex1: {variables_key: variables_values}, strain1_sex2: {variables_key: variables_values}}}
if not current_sp_key in all_sp_workflow_dict.keys():
all_sp_workflow_dict[current_sp_key] = current_sp_value
else:
all_sp_workflow_dict[current_sp_key][current_sp_strain_sex_key] = current_sp_strain_sex_value
if len(list(v.keys())) == 1:
logging.info("Input organism %s: 1 species detected in input dictionary" % k)
# Set workflow path (1 organism)
workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_1org_v1.ga")
# Instance object required variables
instance_url, email, password = None, None, None
# Set the galaxy instance variables
for k2, v2 in v.items():
instance_url = v2["instance_url"]
email = v2["email"]
password = v2["password"]
instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password)
# Check if the versions of tools specified in the workflow are installed in galaxy
install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance)
organism_key_name = list(v.keys())
org_dict = v[organisms_key_name[0]]
history_id = org_dict["history_id"]
# Organism attributes
org_genus = org_dict["genus"]
org_species = org_dict["species"]
org_genus_species = org_dict["genus_species"]
org_species_folder_name = org_dict["species_folder_name"]
org_full_name = org_dict["full_name"]
org_strain = org_dict["sex"]
org_sex = org_dict["strain"]
org_org_id = org_dict["org_id"]
org_inteproscan_analysis_id = org_dict["inteproscan_analysis_id"]
org_interproscan_hda_id = org_dict["hda_ids"]["interproscan_hda_id"]
# Store these values into a dict for parameters logging/validation
org_parameters_dict = {
"org_genus": org_genus,
"org_species": org_species,
"org_genus_species": org_genus_species,
"org_species_folder_name": org_species_folder_name,
"org_full_name": org_full_name,
"org_strain": org_strain,
"org_sex": org_sex,
"org_org_id": org_org_id,
"org_inteproscan_analysis_id": org_inteproscan_analysis_id,
"org_interproscan_hda_id": org_interproscan_hda_id,
}
# Look for empty parameters values, throw a critical error if a parameter value is invalid
for param_name, param_value in org_parameters_dict.items():
if param_value is None or param_value == "":
logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org_full_name, param_name, param_value))
sys.exit()
INTEPRO_FILE = "0"
LOAD_INTERPRO_FILE = "1"
POPULATE_MAT_VIEWS = "2"
INDEX_TRIPAL_DATA = "3"
# Set the workflow parameters (individual tools runtime parameters in the workflow)
workflow_parameters = {}
workflow_parameters[INTEPRO_FILE] = {}
workflow_parameters[LOAD_INTERPRO_FILE] = {"analysis_id": org_inteproscan_analysis_id, "organism_id": org_org_id}
workflow_parameters[POPULATE_MAT_VIEWS] = {}
workflow_parameters[INDEX_TRIPAL_DATA] = {}
datamap = {}
datamap[INTEPRO_FILE] = {"src": "hda", "id": org_interproscan_hda_id}
with open(workflow_path, 'r') as ga_in_file:
# Store the decoded json dictionary
workflow_dict = json.load(ga_in_file)
workflow_name = workflow_dict["name"]
# Import the workflow in galaxy as a dict
instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
# Get its attributes
workflow_attributes = instance.workflows.get_workflows(name=workflow_name)
# Then get its ID (required to invoke the workflow)
workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want)
show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id)
# Check if the workflow is found
try:
logging.debug("Workflow ID: %s" % workflow_id)
except bioblend.ConnectionError:
logging.warning("Error finding workflow %s" % workflow_name)
# Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))
if len(list(v.keys())) == 2:
logging.info("Input organism %s: 2 species detected in input dictionary" % k)
# Set workflow path (2 organisms)
workflow_path = os.path.join(os.path.abspath(script_dir), "workflows_phaeoexplorer/Galaxy-Workflow-load_blast_results_2org_v1.ga")
# Instance object required variables
instance_url, email, password = None, None, None
# Set the galaxy instance variables
for k2, v2 in v.items():
instance_url = v2["instance_url"]
email = v2["email"]
password = v2["password"]
instance = galaxy.GalaxyInstance(url=instance_url, email=email, password=password)
# Check if the versions of tools specified in the workflow are installed in galaxy
install_changesets_revisions_from_workflow(workflow_path=workflow_path, instance=instance)
organisms_key_names = list(v.keys())
org1_dict = v[organisms_key_names[0]]
org2_dict = v[organisms_key_names[1]]
history_id = org1_dict["history_id"]
# Organism 1 attributes
org1_genus = org1_dict["genus"]
org1_species = org1_dict["species"]
org1_genus_species = org1_dict["genus_species"]
org1_species_folder_name = org1_dict["species_folder_name"]
org1_full_name = org1_dict["full_name"]
org1_strain = org1_dict["sex"]
org1_sex = org1_dict["strain"]
org1_org_id = org1_dict["org_id"]
org1_interproscan_analysis_id = org1_dict["interproscan_analysis_id"]
org1_interproscan_hda_id = org1_dict["hda_ids"]["interproscan_hda_id"]
# Store these values into a dict for parameters logging/validation
org1_parameters_dict = {
"org1_genus": org1_genus,
"org1_species": org1_species,
"org1_genus_species": org1_genus_species,
"org1_species_folder_name": org1_species_folder_name,
"org1_full_name": org1_full_name,
"org1_strain": org1_strain,
"org1_sex": org1_sex,
"org1_org_id": org1_org_id,
"org1_interproscan_analysis_id": org1_interproscan_analysis_id,
"org1_interproscan_hda_id": org1_interproscan_hda_id,
}
# Look for empty parameters values, throw a critical error if a parameter value is invalid
for param_name, param_value in org1_parameters_dict.items():
if param_value is None or param_value == "":
logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org1_full_name, param_name, param_value))
sys.exit()
# Organism 2 attributes
org2_genus = org2_dict["genus"]
org2_species = org2_dict["species"]
org2_genus_species = org2_dict["genus_species"]
org2_species_folder_name = org2_dict["species_folder_name"]
org2_full_name = org2_dict["full_name"]
org2_strain = org2_dict["sex"]
org2_sex = org2_dict["strain"]
org2_org_id = org2_dict["org_id"]
org2_interproscan_analysis_id = org2_dict["interproscan_analysis_id"]
org2_interproscan_hda_id = org2_dict["hda_ids"]["interproscan_hda_id"]
# Store these values into a dict for parameters logging/validation
org2_parameters_dict = {
"org2_genus": org2_genus,
"org2_species": org2_species,
"org2_genus_species": org2_genus_species,
"org2_species_folder_name": orgé_species_folder_name,
"org2_full_name": org2_full_name,
"org2_strain": org2_strain,
"org2_sex": org2_sex,
"org2_org_id": org2_org_id,
"org2_interproscan_analysis_id": org2_interproscan_analysis_id,
"org2_interproscan_hda_id": org2_interproscan_hda_id,
}
# Look for empty parameters values, throw a critical error if a parameter value is invalid
for param_name, param_value in org2_parameters_dict.items():
if param_value is None or param_value == "":
logging.critical("Empty parameter value found for organism {0} (parameter: {1}, parameter value: {2})".format(org2_full_name, param_name, param_value))
sys.exit()
# Source files association (ordered by their IDs in the workflow)
# WARNING: Be very careful about how the workflow is "organized" (i.e the order of the steps/datasets, check the .ga if there is any error)
INTERPRO_FILE_ORG1 = "0"
INTERPRO_FILE_ORG2 = "1"
LOAD_INTERPRO_FILE_ORG1 = "2"
LOAD_INTERPRO_FILE_ORG2 = "3"
POPULATE_MAT_VIEWS = "4"
INDEX_TRIPAL_DATA = "5"
# Set the workflow parameters (individual tools runtime parameters in the workflow)
workflow_parameters = {}
# Input files have no parameters (they are set via assigning the hda IDs in the datamap parameter of the bioblend method)
workflow_parameters[INTERPRO_FILE_ORG1] = {}
workflow_parameters[INTERPRO_FILE_ORG2] = {}
# Organism 1
workflow_parameters[LOAD_INTERPRO_FILE_ORG1] = {"organism_id": org1_org_id,
"analysis_id": org1_interproscan_analysis_id}
# Organism 2
workflow_parameters[LOAD_INTERPRO_FILE_ORG2] = {"organism_id": org2_org_id,
"analysis_id": org2_interproscan_analysis_id}
workflow_parameters[POPULATE_MAT_VIEWS] = {}
workflow_parameters[INDEX_TRIPAL_DATA] = {}
# Set datamap (mapping of input files in the workflow)
datamap = {}
# Organism 1
datamap[BLASTP_FILE_ORG1] = {"src": "hda", "id": org1_interproscan_hda_id}
# Organism 2
datamap[BLASTP_FILE_ORG2] = {"src": "hda", "id": org2_interproscan_hda_id}
with open(workflow_path, 'r') as ga_in_file:
# Store the decoded json dictionary
workflow_dict = json.load(ga_in_file)
workflow_name = workflow_dict["name"]
# Import the workflow in galaxy as a dict
instance.workflows.import_workflow_dict(workflow_dict=workflow_dict)
# Get its attributes
workflow_attributes = instance.workflows.get_workflows(name=workflow_name)
# Then get its ID (required to invoke the workflow)
workflow_id = workflow_attributes[0]["id"] # Index 0 is the most recently imported workflow (the one we want)
show_workflow = instance.workflows.show_workflow(workflow_id=workflow_id)
# Check if the workflow is found
try:
logging.debug("Workflow ID: %s" % workflow_id)
except bioblend.ConnectionError:
logging.warning("Error finding workflow %s" % workflow_name)
# Finally, invoke the workflow alogn with its datamap, parameters and the history in which to invoke it
instance.workflows.invoke_workflow(workflow_id=workflow_id, history_id=history_id, params=workflow_parameters, inputs=datamap, allow_tool_state_corrections=True)
logging.info("Successfully imported and invoked workflow {0}, check the galaxy instance ({1}) for the jobs state".format(workflow_name, instance_url))
{
"a_galaxy_workflow": "true",
"annotation": "",
"format-version": "0.1",
"name": "load_inteproscan_1org1_v1",
"steps": {
"0": {
"annotation": "",
"content_id": null,
"errors": null,
"id": 0,
"input_connections": {},
"inputs": [
{
"description": "",
"name": "inteproscan file org1"
}
],
"label": "inteproscan file org1",
"name": "Input dataset",
"outputs": [],
"position": {
"bottom": 319.1999969482422,
"height": 82.19999694824219,
"left": 287,
"right": 487,
"top": 237,
"width": 200,
"x": 287,
"y": 237
},
"tool_id": null,
"tool_state": "{\"optional\": false}",
"tool_version": null,
"type": "data_input",
"uuid": "3b13466d-1b81-475a-b652-183ed9d24bfa",
"workflow_outputs": [
{
"label": null,
"output_name": "output",
"uuid": "aff82f29-1971-4283-bfe8-cdab4857a215"
}
]
},
"1": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.6+galaxy0",
"errors": null,
"id": 1,
"input_connections": {
"input": {
"id": 0,
"output_name": "output"
}
},
"inputs": [
{
"description": "runtime parameter for tool Chado load InterProScan results",
"name": "analysis_id"
},
{
"description": "runtime parameter for tool Chado load InterProScan results",
"name": "input"
},
{
"description": "runtime parameter for tool Chado load InterProScan results",
"name": "organism_id"
},
{
"description": "runtime parameter for tool Chado load InterProScan results",
"name": "wait_for"
}
],
"label": "interproscan load org 1",
"name": "Chado load InterProScan results",
"outputs": [
{
"name": "results",
"type": "json"
}
],
"position": {
"bottom": 371.3999938964844,
"height": 164.39999389648438,
"left": 595,
"right": 795,
"top": 207,
"width": 200,
"x": 595,
"y": 207
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.6+galaxy0",
"tool_shed_repository": {
"changeset_revision": "1e54f2717e74",
"name": "chado_load_interpro",
"owner": "gga",
"tool_shed": "toolshed.g2.bx.psu.edu"
},
"tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"input\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"parse_go\": \"false\", \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
"tool_version": "2.3.6+galaxy0",
"type": "tool",
"uuid": "a084abdc-a0f4-4670-a486-4aed4e0f61fa",
"workflow_outputs": [
{
"label": null,
"output_name": "results",
"uuid": "dbaab44d-1494-4fbe-bd15-455f7cbf7307"
}
]
},
"2": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0",
"errors": null,
"id": 2,
"input_connections": {
"wait_for": {
"id": 1,
"output_name": "results"
}
},
"inputs": [],
"label": null,
"name": "Populate materialized views",
"outputs": [
{
"name": "results",
"type": "txt"
}
],
"position": {
"bottom": 360.3999938964844,
"height": 154.39999389648438,
"left": 876,
"right": 1076,
"top": 206,
"width": 200,
"x": 876,
"y": 206
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0",
"tool_shed_repository": {
"changeset_revision": "3c08f32a3dc1",
"name": "tripal_db_populate_mviews",
"owner": "gga",
"tool_shed": "toolshed.g2.bx.psu.edu"
},
"tool_state": "{\"mview\": \"\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
"tool_version": "3.2.1.0",
"type": "tool",
"uuid": "2acbe412-1318-46e8-a178-72f50df36a07",
"workflow_outputs": [
{
"label": "Populate Tripal materialized view(s)",
"output_name": "results",
"uuid": "08a4eec2-b95a-4c75-8116-b0fc9477812f"
}
]
},
"3": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1",
"errors": null,
"id": 3,
"input_connections": {
"wait_for": {
"id": 2,
"output_name": "results"
}
},
"inputs": [],
"label": null,
"name": "Index Tripal data",
"outputs": [
{
"name": "results",
"type": "txt"
}
],
"position": {
"bottom": 340.6000061035156,
"height": 113.60000610351562,
"left": 1150,
"right": 1350,
"top": 227,
"width": 200,
"x": 1150,
"y": 227
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1",
"tool_shed_repository": {
"changeset_revision": "d55a39f12dda",
"name": "tripal_db_index",
"owner": "gga",
"tool_shed": "toolshed.g2.bx.psu.edu"
},
"tool_state": "{\"expose\": {\"do_expose\": \"no\", \"__current_case__\": 0}, \"queues\": \"10\", \"table\": {\"mode\": \"website\", \"__current_case__\": 0}, \"tokenizer\": \"standard\", \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
"tool_version": "3.2.1.1",
"type": "tool",
"uuid": "42d782ff-9e6b-49d8-bd17-37dc67f31a18",
"workflow_outputs": [
{
"label": "Index Tripal data",
"output_name": "results",
"uuid": "8b38b48f-5d4e-49e6-ad70-748ad64dc901"
}
]
}
},
"tags": [],
"uuid": "9912daac-c00e-4d5e-a93e-a747bd1b3499",
"version": 3
}
\ No newline at end of file
{
"a_galaxy_workflow": "true",
"annotation": "",
"format-version": "0.1",
"name": "load_interproscan_2org_v1",
"steps": {
"0": {
"annotation": "",
"content_id": null,
"errors": null,
"id": 0,
"input_connections": {},
"inputs": [
{
"description": "",
"name": "interproscan file org 1"
}
],
"label": "interproscan file org 1",
"name": "Input dataset",
"outputs": [],
"position": {
"bottom": 350.1999969482422,
"height": 82.19999694824219,
"left": 414,
"right": 614,
"top": 268,
"width": 200,
"x": 414,
"y": 268
},
"tool_id": null,
"tool_state": "{\"optional\": false}",
"tool_version": null,
"type": "data_input",
"uuid": "bfce9a38-df8a-46ef-af2e-390a4982ebfc",
"workflow_outputs": [
{
"label": null,
"output_name": "output",
"uuid": "d5d96afa-8dcc-4322-89fa-df56fd503d89"
}
]
},
"1": {
"annotation": "",
"content_id": null,
"errors": null,
"id": 1,
"input_connections": {},
"inputs": [
{
"description": "",
"name": "interproscan file org 2"
}
],
"label": "interproscan file org 2",
"name": "Input dataset",
"outputs": [],
"position": {
"bottom": 451.1999969482422,
"height": 82.19999694824219,
"left": 437,
"right": 637,
"top": 369,
"width": 200,
"x": 437,
"y": 369
},
"tool_id": null,
"tool_state": "{\"optional\": false}",
"tool_version": null,
"type": "data_input",
"uuid": "45f57c7f-f08c-4b3b-879d-1acd246868fb",
"workflow_outputs": [
{
"label": null,
"output_name": "output",
"uuid": "f5e4ae10-fdbf-47e9-99d8-0644bf8bb46f"
}
]
},
"2": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.6+galaxy0",
"errors": null,
"id": 2,
"input_connections": {
"input": {
"id": 0,
"output_name": "output"
}
},
"inputs": [
{
"description": "runtime parameter for tool Chado load InterProScan results",
"name": "analysis_id"
},
{
"description": "runtime parameter for tool Chado load InterProScan results",
"name": "input"
},
{
"description": "runtime parameter for tool Chado load InterProScan results",
"name": "organism_id"
},
{
"description": "runtime parameter for tool Chado load InterProScan results",
"name": "wait_for"
}
],
"label": "inteproscan load org 1",
"name": "Chado load InterProScan results",
"outputs": [
{
"name": "results",
"type": "json"
}
],
"position": {
"bottom": 365.3999938964844,
"height": 164.39999389648438,
"left": 790,
"right": 990,
"top": 201,
"width": 200,
"x": 790,
"y": 201
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.6+galaxy0",
"tool_shed_repository": {
"changeset_revision": "1e54f2717e74",
"name": "chado_load_interpro",
"owner": "gga",
"tool_shed": "toolshed.g2.bx.psu.edu"
},
"tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"input\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"parse_go\": \"false\", \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
"tool_version": "2.3.6+galaxy0",
"type": "tool",
"uuid": "e6c509b4-ea16-4fda-b171-3dc781a54759",
"workflow_outputs": [
{
"label": null,
"output_name": "results",
"uuid": "bfff8893-5c92-45a0-88ca-594b33a8b4e2"
}
]
},
"3": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.6+galaxy0",
"errors": null,
"id": 3,
"input_connections": {
"input": {
"id": 1,
"output_name": "output"
},
"wait_for": {
"id": 2,
"output_name": "results"
}
},
"inputs": [
{
"description": "runtime parameter for tool Chado load InterProScan results",
"name": "analysis_id"
},
{
"description": "runtime parameter for tool Chado load InterProScan results",
"name": "input"
},
{
"description": "runtime parameter for tool Chado load InterProScan results",
"name": "organism_id"
},
{
"description": "runtime parameter for tool Chado load InterProScan results",
"name": "wait_for"
}
],
"label": "interproscan load org 2",
"name": "Chado load InterProScan results",
"outputs": [
{
"name": "results",
"type": "json"
}
],
"position": {
"bottom": 555.3999938964844,
"height": 164.39999389648438,
"left": 819,
"right": 1019,
"top": 391,
"width": 200,
"x": 819,
"y": 391
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_load_interpro/load_interpro/2.3.6+galaxy0",
"tool_shed_repository": {
"changeset_revision": "1e54f2717e74",
"name": "chado_load_interpro",
"owner": "gga",
"tool_shed": "toolshed.g2.bx.psu.edu"
},
"tool_state": "{\"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"input\": {\"__class__\": \"RuntimeValue\"}, \"match_on_name\": \"false\", \"organism_id\": {\"__class__\": \"RuntimeValue\"}, \"parse_go\": \"false\", \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"query_type\": \"polypeptide\", \"re_name\": \"\", \"skip_missing\": \"false\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
"tool_version": "2.3.6+galaxy0",
"type": "tool",
"uuid": "1832aceb-851c-4020-9c07-8193e8b2299a",
"workflow_outputs": [
{
"label": null,
"output_name": "results",
"uuid": "be9f442f-7f19-4b84-94cf-9c2c28d9a6a5"
}
]
},
"4": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0",
"errors": null,
"id": 4,
"input_connections": {
"wait_for": {
"id": 3,
"output_name": "results"
}
},
"inputs": [
{
"description": "runtime parameter for tool Populate materialized views",
"name": "wait_for"
}
],
"label": null,
"name": "Populate materialized views",
"outputs": [
{
"name": "results",
"type": "txt"
}
],
"position": {
"bottom": 459.3999938964844,
"height": 154.39999389648438,
"left": 1118,
"right": 1318,
"top": 305,
"width": 200,
"x": 1118,
"y": 305
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_populate_mviews/db_populate_mviews/3.2.1.0",
"tool_shed_repository": {
"changeset_revision": "3c08f32a3dc1",
"name": "tripal_db_populate_mviews",
"owner": "gga",
"tool_shed": "toolshed.g2.bx.psu.edu"
},
"tool_state": "{\"mview\": \"\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
"tool_version": "3.2.1.0",
"type": "tool",
"uuid": "a819f33b-e566-43e5-a467-e3410ce431ec",
"workflow_outputs": [
{
"label": "Populate Tripal materialized view(s)",
"output_name": "results",
"uuid": "96483655-995e-4d4e-b8c6-7f602c974ee3"
}
]
},
"5": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1",
"errors": null,
"id": 5,
"input_connections": {
"wait_for": {
"id": 4,
"output_name": "results"
}
},
"inputs": [
{
"description": "runtime parameter for tool Index Tripal data",
"name": "wait_for"
}
],
"label": null,
"name": "Index Tripal data",
"outputs": [
{
"name": "results",
"type": "txt"
}
],
"position": {
"bottom": 442.6000061035156,
"height": 113.60000610351562,
"left": 1382,
"right": 1582,
"top": 329,
"width": 200,
"x": 1382,
"y": 329
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_db_index/db_index/3.2.1.1",
"tool_shed_repository": {
"changeset_revision": "d55a39f12dda",
"name": "tripal_db_index",
"owner": "gga",
"tool_shed": "toolshed.g2.bx.psu.edu"
},
"tool_state": "{\"expose\": {\"do_expose\": \"no\", \"__current_case__\": 0}, \"queues\": \"10\", \"table\": {\"mode\": \"website\", \"__current_case__\": 0}, \"tokenizer\": \"standard\", \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
"tool_version": "3.2.1.1",
"type": "tool",
"uuid": "489483da-c7c7-49a5-b5d4-353db41b1240",
"workflow_outputs": [
{
"label": "Index Tripal data",
"output_name": "results",
"uuid": "3df4fd46-5dd9-49d6-b0d2-24425d9ba91e"
}
]
}
},
"tags": [],
"uuid": "ac96d70e-60b6-4455-8d14-9219d0674950",
"version": 1
}
\ No newline at end of file