Skip to content
Snippets Groups Projects
Commit 3e598ebb authored by Arthur Le Bars's avatar Arthur Le Bars
Browse files

Delete workflow.py

parent cea3f321
No related branches found
No related tags found
1 merge request!1Release 1.0
import os
from bioblend.galaxy import GalaxyInstance
from toolrunner import ToolRunner
import json
import logging
"""
Workflow creation for generation and visualization of data and analyses output
"""
class Workflow:
logging.basicConfig(level=logging.INFO)
def __init__(self, parameters_dict, instance, history_id):
self.history_id = history_id
self.instance = instance
self.parameters_dict = parameters_dict
self.genus = parameters_dict["genus"]
self.species = parameters_dict["species"]
self.strain = parameters_dict["strain"]
self.common = parameters_dict["common"]
self.performed = parameters_dict["performed by"]
self.genome_version = parameters_dict["genome version"]
self.ogs_version = parameters_dict["ogs version"]
self.sex = parameters_dict["sex"]
self.date = parameters_dict["date"]
self.custom_ga_file = None
self.custom_ga_file_path = None
self.preset_ga_file = None
self.sp_url = self.genus[0].lower() + self.genus[1:] + "_" + self.species
if self.strain != "":
self.abbr = self.genus[0].lower() + "_" + self.species + "_" + self.strain
self.full = "_".join([self.genus, self.species, self.strain, self.sex])
else:
self.abbr = self.genus[0].lower() + "_" + self.species
self.full = "_".join([self.genus, self.species, self.strain, self.sex])
self.workflow = None
def generate(self, working_directory, main_directory, workflow_name):
"""
Generation of a galaxy workflow using the defined parameters in the .json input file
Output format is a json dict
:param working_directory:
:param main_directory:
:return:
"""
# template workflow as a string
# template_workflow_str = '"{"uuid": "ea9c3050-416f-4098-a7ff-b05c952bcd73", "tags": [], "format-version": "0.1", "name": "test", "version": 2, "steps": {"0": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "txt", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "d9b75b03-49e7-4a81-a67c-eaf6a9671905", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"organism\": \"\\\"2\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\"}", "id": 0, "tool_shed_repository": {"owner": "gga", "changeset_revision": "13da56fdaeb1", "name": "chado_organism_delete_organisms", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "f70569bc-9ac0-441a-a2d8-b547086a5bdf", "errors": null, "name": "Chado organism delete", "post_job_actions": {}, "label": "$ORGADELETE", "inputs": [], "position": {"top": 362, "left": 200}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2", "type": "tool"}, "1": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "8ca0b891-0f01-4787-9b7f-57105dc303b0", "label": null}], "input_connections": {}, "tool_state": "{\"comment\": \"\\\"\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"common\": \"\\\"$COMMON\\\"\", \"genus\": \"\\\"$GENUS\\\"\", \"species\": \"\\\"$SPECIES\\\"\", \"abbr\": \"\\\"$ABBR\\\"\"}", "id": 1, "tool_shed_repository": {"owner": "gga", "changeset_revision": "0f4956cec445", "name": "chado_organism_add_organism", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "24f0e175-f932-4e48-8b42-a53d9a432d5e", "errors": null, "name": "Chado organism add", "post_job_actions": {}, "label": "$ORGADD", "inputs": [], "position": {"top": 361, "left": 467.5}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2", "type": "tool"}, "2": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "8fa0e728-8803-4800-93b4-70f906f95f87", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"name\": \"\\\"$GENOME\\\"\", \"sourceuri\": \"\\\"\\\"\", \"sourcename\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"programversion\": \"\\\"\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"sourceversion\": \"\\\"\\\"\", \"program\": \"\\\"$PERFORMEDBY\\\"\", \"algorithm\": \"\\\"\\\"\", \"date_executed\": \"\\\"\\\"\", \"description\": \"\\\"\\\"\"}", "id": 2, "tool_shed_repository": {"owner": "gga", "changeset_revision": "3a1f3c9b755b", "name": "chado_analysis_add_analysis", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "76cbbd55-f1ac-4e48-be3c-c7bbda5add4c", "errors": null, "name": "Chado analysis add", "post_job_actions": {}, "label": "$ADDGENOME", "inputs": [], "position": {"top": 307, "left": 690}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "type": "tool"}, "3": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "5e7da027-0723-4077-8885-2dbe51cb5dda", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"name\": \"\\\"$OGS\\\"\", \"sourceuri\": \"\\\"\\\"\", \"sourcename\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"programversion\": \"\\\"\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"sourceversion\": \"\\\"\\\"\", \"program\": \"\\\"$PERFORMEDBY\\\"\", \"algorithm\": \"\\\"\\\"\", \"date_executed\": \"\\\"\\\"\", \"description\": \"\\\"\\\"\"}", "id": 3, "tool_shed_repository": {"owner": "gga", "changeset_revision": "3a1f3c9b755b", "name": "chado_analysis_add_analysis", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "4d1ffee4-00b2-445d-b630-b7b774c17873", "errors": null, "name": "Chado analysis add", "post_job_actions": {}, "label": "$ADDOGS", "inputs": [], "position": {"top": 395, "left": 697}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "type": "tool"}, "4": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "737dddc9-ae1b-463d-99fa-d9176053594d", "label": null}], "input_connections": {}, "tool_state": "{\"do_update\": \"\\\"false\\\"\", \"relationships\": \"{\\\"__current_case__\\\": 0, \\\"rel_type\\\": \\\"none\\\"}\", \"ext_db\": \"{\\\"db\\\": \\\"\\\", \\\"re_db_accession\\\": \\\"\\\"}\", \"analysis_id\": \"\\\"4\\\"\", \"re_uniquename\": \"\\\"\\\"\", \"match_on_name\": \"\\\"false\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"re_name\": \"\\\"\\\"\", \"fasta\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"wait_for\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"organism\": \"\\\"2\\\"\", \"sequence_type\": \"\\\"contig\\\"\"}", "id": 4, "tool_shed_repository": {"owner": "gga", "changeset_revision": "1421dbc33a92", "name": "chado_feature_load_fasta", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "3d417ced-fc48-4c04-8a92-fdb7b9fecafc", "errors": null, "name": "Chado load fasta", "post_job_actions": {}, "label": "$LOADFASTA", "inputs": [{"name": "fasta", "description": "runtime parameter for tool Chado load fasta"}, {"name": "wait_for", "description": "runtime parameter for tool Chado load fasta"}], "position": {"top": 306, "left": 933.5}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2", "type": "tool"}}, "annotation": "", "a_galaxy_workflow": "true"}"'
# TODO: store workflow as a var instead of file (once it runs smoothly)
os.chdir(path=working_directory)
self.preset_ga_file = main_directory + "Galaxy-Workflow-" + workflow_name + ".ga"
if self.strain != "":
self.custom_ga_file = "_".join([self.genus, self.species, self.strain]) + "_workflow.ga"
self.custom_ga_file_path = os.path.abspath(self.custom_ga_file)
# print("Workflow file @ " + self.custom_ga_file_path)
else:
self.custom_ga_file = "_".join([self.genus, self.species]) + "_workflow.ga"
self.custom_ga_file_path = os.path.abspath(self.custom_ga_file)
# print("Workflow file @ " + self.custom_ga_file_path)
with open(self.preset_ga_file, 'r') as ga_in_file:
ga_in = str(ga_in_file.readlines())
print(ga_in)
ga_in = ga_in.replace('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"UNIQUE_ID\\\\\\\\\\\\"}',
str('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus + " " + self.species) + '\\\\\\\\\\\\"')
ga_in = ga_in.replace('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"NAME\\\\\\\\\\\\"',
str('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus.lower()[0] + self.species) + '\\\\\\\\\\\\"')
ga_in = ga_in.replace("\\\\", "\\") # to restore the correct amount of backslashes in the workflow string before import
# ga_in = ga_in.replace("\\\\\\\\\\\\", "\\\\\\")
ga_in = ga_in.replace('http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}',
"http://localhost/sp/" + self.genus.lower()[0] + self.genus[1:] + "_" + self.species + "/feature/" + self.genus + "/mRNA/{id}")
# ga_in = ga_in.replace('"index\\\": \\\"false', '"index\\\": \\\"true')
# workflow_name = '"name": "' + self.full + '"'
# ga_in = ga_in.replace('"name": "preset_workflow"', '"name": "preset_workflow"')
# print(workflow_name)
ga_in = ga_in[2:-2] # if the line under doesn't outputs a correct json
# ga_in = ga_in[:-2] # if the line above doesn't outputs a correct json
self.workflow = ga_in
# print(ga_in)
return ga_in
def set_main_workflow_parameters(self, datasets):
"""
Test function
:return:
"""
# TODO: move tool calls to main/autoload
toolrunner = ToolRunner(parameters_dict=self.parameters_dict, instance=self.instance, history=self.history_id)
toolrunner.add_organism()
toolrunner.add_ogs()
toolrunner.add_genome()
org_id: str = None
genome_analysis_id: str = None
ogs_analysis_id: str = None
org = toolrunner.get_organism()
org_job_out = org["outputs"][0]["id"]
org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out)
try:
org_output = json.loads(org_json_output)[0]
org_id = str(org_output["organism_id"]) # needs to be str to be recognized by the chado tool
global_org_id = org_id
except IndexError:
logging.info("No organism matching " + self.full + " exists in the Chado database")
ogs_analysis = toolrunner.get_ogs_analysis()
ogs_analysis_job_out = ogs_analysis["outputs"][0]["id"]
ogs_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=ogs_analysis_job_out)
try:
ogs_analysis_output = json.loads(ogs_analysis_json_output)[0]
ogs_analysis_id = str(ogs_analysis_output["analysis_id"]) # needs to be str to be recognized by the chado tool
global_ogs_id = ogs_analysis_id
except IndexError:
logging.info("No matching OGS analysis exists in the Chado database")
genome_analysis = toolrunner.get_genome_analysis()
genome_analysis_job_out = genome_analysis["outputs"][0]["id"]
genome_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=genome_analysis_job_out)
try:
genome_analysis_output = json.loads(genome_analysis_json_output)[0]
genome_analysis_id = str(genome_analysis_output["analysis_id"]) # needs to be str to be recognized by the chado tool
global_genome_id = genome_analysis_id
except IndexError:
logging.info("No matching genome analysis exists in the Chado database")
params = dict()
params["0"] = {}
params["1"] = {}
params["2"] = {}
params["3"] = {}
params["4"] = {"organism": org_id,
"analysis_id": genome_analysis_id,
"do_update": "true"} # the do_update parameter is to prevent assertion errors when loading the file, should always be set to "true"
params["5"] = {"organism": org_id,
"analysis_id": ogs_analysis_id,
}
params["6"] = {"organism_id": org_id}
params["7"] = {"analysis_id": ogs_analysis_id}
params["8"] = {"analysis_id": genome_analysis_id}
params["9"] = {"organism_id": org_id}
return params
def set_jbrowse_workflow_parameters(self):
params = dict()
params["0"] = {}
params["1"] = {}
# jbrowse tools parameters are inside nested dictionaries, read tool inputs before adding or modifying anything
# problem is with Jbrowse add organism --> the parameters are nested
params["2"] = {} # {"jbmenu": {"menu_url": "http://localhost/sp/undaria_pinnatifida/feature/Undaria/pinnatifida/mRNA/{id}"}} # {"menu_url": "/".join(["http://localhost/sp", self.sp_url, "feature", self.genus, self.species, "mRNA", "{id}"])}}
params["3"] = {}
# params["3"] = {"name": " ".join([self.genus, self.species + self.strain + "male"]),
# "unique_id": self.genus.lower()[0] + self.species}
# params["3"] = {"name": [{"name": str(self.genus + " " + self.species),
# "unique_id": str(self.genus.lower()[0] + self.species)}]}
def set_datamap(self):
gi = self.instance
def dict_port(self):
"""
Import workflow into a galaxy instance from a json dict
:return:
"""
try:
self.instance.workflows.import_workflow_dict(workflow_dict=self.workflow)
except ConnectionError:
return False
return True
def port(self):
"""
Import workflow into a galaxy instance from a local file
:return:
"""
try:
self.instance.workflows.import_workflow_from_local_path(self.custom_ga_file_path)
except ConnectionError:
return False
else:
return True
def get_workflow_name(self):
"""
Name of the imported workflow
:return:
"""
return str("preset_workflow")
def show(self):
"""
Print the instance's main workflow to stdout (dict form)
:return:
"""
workflow_id = self.instance.workflows.get_workflows()[0]['id']
return logging.info(self.instance.workflows.show_workflow(workflow_id=workflow_id))
def store(self):
"""
Store the instance's workflow
:return:
"""
workflow_id = self.instance.workflows.get_workflows()[0]['id']
return workflow_id
def delete(self):
"""
Delete custom workflow
:return:
"""
return None
def run(self, datamap, params):
"""
Run the custom workflow into a galaxy instance
Input datasets in the form of a list
Params
:return:
"""
return None
# def add_step(self, step_position, description, name):
# """
# TODO: add a step to the workflow (data loading into chado for example)
#
# :param workflow:
# :return:
# """
# return None
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment