From 0b4c80b710946479d339eaf5c88c07faab262688 Mon Sep 17 00:00:00 2001 From: Arthur Le Bars <arthur.le-bars@sb-roscoff.fr> Date: Thu, 5 Mar 2020 16:06:12 +0100 Subject: [PATCH] Functional version 1.0: Chado loading + Tripal sync + jbrowse --- main.py | 122 ++++++++++++++----------------- toolrunner.py | 123 ++++++++++++++++++++++++++++--- workflow.py | 139 ++++++++++++++++++++++-------------- workflows/tool_example.json | 0 4 files changed, 252 insertions(+), 132 deletions(-) create mode 100644 workflows/tool_example.json diff --git a/main.py b/main.py index 3d0403f..7cd904d 100644 --- a/main.py +++ b/main.py @@ -1,22 +1,29 @@ from bioblend import galaxy +import bioblend.galaxy.objects as bbo import bioblend as bb import argparse import os import sys import subprocess import json +import urllib3 as ul +from chado import ChadoInstance from workflow import Workflow from filetransfer import FileTransfer from toolrunner import ToolRunner - -# TODO: script description +from webscrap import WebScrap """ +TODO: script description +python3 ~/PycharmProjects/ggauto/gga_load_data/main.py ~/PycharmProjects/ggauto/gga_load_data/dataloader.json + """ class Autoload: - + """ + TODO: turn main into an object + """ def __init__(self, json_in): self.json_in = json_in @@ -83,7 +90,7 @@ def main(): # Connect to the galaxy instance of the current species TODO: API key connection issues gi = galaxy.GalaxyInstance(url=instance_url, - key="0e993414b2f876515e74dd890f16ffc7", + key="3b36455cb16b4d0e4348e2c42f4bb934", email="alebars@sb-roscoff.fr", password="pouet", verify=True) @@ -107,7 +114,7 @@ def main(): print("Cannot access " + sp_dir + ", run with higher privileges") break - # Production instance example TODO: secure pswd and API key? + manage API keys (if needed) + # Production instance example TODO: secure pswd and API key + manage API keys # gi = galaxy.GalaxyInstance(url="http://abims-gga.sb-roscoff.fr/sp/ectocarpus_species1/galaxy/", # key="84dfbee3c0efa9155518f01fbeff57c8", # email="gga@sb-roscoff.fr", @@ -145,23 +152,13 @@ def main(): # print("Data successfully loaded into docker container for " + genus_species_strain) # generate workflow file and run it in the galaxy instance - workflow = Workflow(parameters_dict=sp_dict, instance=gi) - print("Generating custom workflow for " + genus_species_strain) - wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir) gi.histories.create_history(name=str(genus_species_strain + "_" + genome_version)) hi = gi.histories.get_histories(name=str(genus_species_strain + "_" + genome_version)) hi_id = hi[0]["id"] - # print(hi) - li = gi.libraries.get_libraries() # only one library - # print(li) - li_id = gi.libraries.get_libraries()[0]["id"] # project data folder/library - # print(li_id) - fo_gi = gi.libraries.get_folders(library_id=li_id) # data location (project data) - # print(fo_gi) fo_id = {} current_fo_name = "" @@ -174,16 +171,14 @@ def main(): if k == "id": fo_id[current_fo_name] = v + # TODO: turn data id parsing into a function print("Folders and datasets IDs: ") datasets = dict() for k, v in fo_id.items(): print("\t" + k + ": " + v) if k == "/genome": sub_folder_content = gi.folders.show_folder(folder_id=v, contents=True) - # print(sub_folder_content) for k2, v2 in sub_folder_content.items(): - # print(k2) - # print(v2) for e in v2: if type(e) == dict: if e["name"].endswith(".fa"): @@ -191,10 +186,7 @@ def main(): print("\t\t" + e["name"] + ": " + e["ldda_id"]) elif k == "/annotation/" + genus_species: sub_folder_content = gi.folders.show_folder(folder_id=v, contents=True) - # print(sub_folder_content) for k2, v2 in sub_folder_content.items(): - # print(k2) - # print(v2) for e in v2: if type(e) == dict: # TODO: manage several files of the same type @@ -215,74 +207,66 @@ def main(): gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["transcripts_file"]) gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["proteins_file"]) + # Workflow generation + workflow = Workflow(parameters_dict=sp_dict, instance=gi, history_id = current_hi_id) + wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir, workflow_name="preset_workflow") + tools = gi.tools.get_tool_panel() # tools panel -> alternative to wf # print(tools) wf_dict = json.loads(wf_dict_json) # doesn't work with eval() - # print(wf_dict) - # print(wf_dict_json.count("id")) - # TODO: fill in workflow inputs and attributes (doable?) + gi.workflows.import_workflow_dict(workflow_dict=wf_dict) - print(gi.workflows.import_workflow_dict(workflow_dict=wf_dict)) wf_name = workflow.get_workflow_name() wf_attr = gi.workflows.get_workflows(name=wf_name) - # print(wf_attr) wf_id = wf_attr[0]["id"] wf_show = gi.workflows.show_workflow(workflow_id=wf_id) print("Workflow ID: " + wf_id) - print(wf_show["inputs"]) - # toolrunner = ToolRunner(parameters_dict=sp_dict, instance=gi, history=current_hi_id) + toolrunner = ToolRunner(parameters_dict=sp_dict, instance=gi, history=current_hi_id) # toolrunner.purge_organisms() - wf_params = workflow.set_params(datasets=datasets) + # wf_o = bbo.Workflow(wf_dict=wf_dict, gi=gi) + + wf_params = workflow.set_main_workflow_parameters(datasets=datasets) + print("Inputs:") + print(wf_show["inputs"]) datamap = dict() datamap["0"] = {"src": "hda", "id": datasets["genome_file"]} datamap["1"] = {"src": "hda", "id": datasets["gff_file"]} - datamap["2"] = {"src": "hda", "id": datasets["transcripts_file"]} - datamap["4"] = {"src": "hda", "id": datasets["proteins_file"]} + datamap["2"] = {"src": "hda", "id": datasets["proteins_file"]} + datamap["3"] = {"src": "hda", "id": datasets["transcripts_file"]} + # + # gi.workflows.invoke_workflow(workflow_id=wf_id, + # history_id=current_hi_id, + # params=wf_params, + # inputs=datamap) + # + gi.workflows.delete_workflow(workflow_id=wf_id) + + datamap = dict() + datamap["0"] = {"src": "hda", "id": datasets["genome_file"]} + datamap["1"] = {"src": "hda", "id": datasets["proteins_file"]} + + wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir, workflow_name="jbrowse") + wf_dict = json.loads(wf_dict_json) # doesn't work with eval() + + gi.workflows.import_workflow_dict(workflow_dict=wf_dict) + wf_attr = gi.workflows.get_workflows(name="jbrowse") + wf_id = wf_attr[0]["id"] + wf_show = gi.workflows.show_workflow(workflow_id=wf_id) + print("Jbrowse workflow ID: " + wf_id) + wf_params = workflow.set_jbrowse_workflow_parameters() - # print(gi.workflows.show_workflow(wf_id)) - gi.workflows.run_workflow(workflow_id=wf_id, - history_id=current_hi_id, - params=wf_params, - dataset_map=datamap) + gi.workflows.invoke_workflow(workflow_id=wf_id, + history_id=current_hi_id, + params=wf_params, + inputs=datamap) + gi.workflows.delete_workflow(workflow_id=wf_id) - # for testing, purge configured @ ~/config/galaxy.yml.docker_sample + # remove active instance history for testing, purge configured @ ~/config/galaxy.yml.docker_sample # gi.histories.delete_history(history_id=current_hi_id, purge=True) - # gi.workflows.delete_workflow(workflow_id=wf_id) - - # # PRE FILLED METHOD - # wf_id = workflow.store() - # hi_id = gi.histories.get_histories()[0]["id"] - # print("Workflow id: " + wf_id) - # print("History id: " + hi_id) - # wf_show = gi.workflows.show_workflow(workflow_id=wf_id) - # # print(wf_show["inputs"]) # ->no inputs - # # workflow.port() - # li_id = gi.libraries.get_libraries()[0]["id"] - # # gi.folders.update_folder() # TODO: add method to enable data updates - # tsi = gi.toolshed.get_repositories() # tool shed repo access point - # # print(gi.users.get_users()) # TODO: users management - # # print(gi.libraries.show_library(li_id)) - # fo_gi = gi.libraries.get_folders(library_id=li_id) # data location - # fo_id = {} - # current_fo_name = "" - # # print(fo_gi) - # # folders ids: access to data to run the first tools - # for i in fo_gi: - # for k, v in i.items(): - # if k == "name": - # fo_id[v] = 0 - # current_fo_name = v - # if k == "id": - # fo_id[current_fo_name] = v - # print("Folders id: ") - # for k, v in fo_id.items(): - # print("\t" + k + ": " + v) - # workflow.show() - # # gi.workflows.run_workflow(workflow_id=wf_id) # pre filled workflow, use the set on runtime approach instead os.chdir(main_dir) print("\n") diff --git a/toolrunner.py b/toolrunner.py index 6d2ee7e..b1e65a0 100644 --- a/toolrunner.py +++ b/toolrunner.py @@ -2,7 +2,7 @@ from bioblend import galaxy from bioblend.galaxy import GalaxyInstance import os """ -Methods to run Chado and Tripal tools on galaxy +Methods to run all Chado and Tripal tools on galaxy """ @@ -20,20 +20,21 @@ class ToolRunner: self.genome_version = parameters_dict["genome version"] self.ogs_version = parameters_dict["ogs version"] self.sex = parameters_dict["sex"] + self.date = parameters_dict["date"] self.custom_ga_file = None self.custom_ga_file_path = None self.preset_ga_file = None self.analysis = None self.organism = None self.program = None - self.ogs = str("OGS"+ self.ogs_version) - self.genome = str(self.full + " genome v" + self.genome_version) if self.strain != "": self.abbr = self.genus[0].lower() + "_" + self.species + "_" + self.strain self.full = "_".join([self.genus, self.species, self.strain, self.sex]) else: self.abbr = self.genus[0].lower() + "_" + self.species self.full = "_".join([self.genus, self.species, self.strain, self.sex]) + self.ogs = str("OGS"+ self.ogs_version) + self.genome = str(self.full + " genome v" + self.genome_version) def show_tool(self, tool_id): print(self.instance.tools.show_tool(tool_id=tool_id, io_details=True)) @@ -44,15 +45,115 @@ class ToolRunner: self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2", history_id=self.history, tool_inputs={"name": self.organism}) + def purge_analyses(self): + return None - def purge_genome(self): - self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", - history_id=self.history, - tool_inputs={"name": self.genome, - "program": "Performed by Genoscope", - "programversion": str("genome v" + self.genome_version), - "sourcename": "Genoscope", - "date_executed": "2020-02-02"}) + def get_organism(self): + """ + Tool invocation must be as return (or else it executes but returns nothing when called) + :return: + """ + return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2", + history_id=self.history, + tool_inputs={"genus": self.genus, "species": self.species}) + + def get_ogs_analysis(self): + """ + Tool invocation must be as return (or else it executes but returns nothing when called) + :return: + """ + return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2", + history_id=self.history, + tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version}) + + def get_genome_analysis(self): + """ + Tool invocation must be as return (or else it executes but returns nothing when called) + :return: + """ + return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2", + history_id=self.history, + tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version}) + + def add_organism(self): + """ + Tool invocation must be as return (or else it executes but returns nothing when called) + :return: + """ + return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2", + history_id=self.history, + tool_inputs={"abbr": self.abbr, + "genus": self.genus, + "species": self.species, + "common": self.common + }) + + def add_ogs(self): + """ + Tool invocation must be as return (or else it executes but returns nothing when called) + :return: + """ + return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", + history_id=self.history, + tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version, + "program": "Performed by Genoscope", + "programversion": str("OGS" + self.genome_version), + "sourcename": "Genoscope", + "date_executed": self.date + }) + def add_genome(self): + """ + Tool invocation must be as return (or else it executes but returns nothing when called) + :return: + """ + return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", + history_id=self.history, + tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version, + "program": "Performed by Genoscope", + "programversion": str("genome v" + self.genome_version), + "sourcename": "Genoscope", + "date_executed": self.date + }) + + def add_jbrowse(self, datasets): + return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.5+galaxy7", + history_id=self.history, + tool_inputs={"genomes": datasets["genome_file"]}) + + def add_organism_jbrowse(self): + return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1", + history_id=self.history, + tool_inputs={"multiple": "false", + "name": "foo", + "unique_id": "bar"}) + + # Homo sapiens deletion TODO: concat the 2 calls + def get_sapiens_id(self): + """ + Tool invocation must be as return (or else it executes but returns nothing when called) + :return: + """ + return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2", + history_id=self.history, + tool_inputs={"genus": "Homo", "species": "species"}) + + def delete_sapiens(self, hs_id): + """ + Tool invocation must be as return (or else it executes but returns nothing when called) + :return: + """ + return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2", + history_id=self.history, + tool_inputs={"organism": hs_id}) + + # def load_fasta(self, inputs_dict): + # """ + # + # :return: + # """ + # return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2", + # history_id=self.history, + # tool_inputs=) def show_pannel(self): print(self.instance.tools.get_tool_panel()) diff --git a/workflow.py b/workflow.py index 64be187..16fff71 100644 --- a/workflow.py +++ b/workflow.py @@ -1,6 +1,7 @@ import os from bioblend.galaxy import GalaxyInstance - +from toolrunner import ToolRunner +import json """ Workflow creation for generation and visualization of data and analyses output """ @@ -8,7 +9,8 @@ Workflow creation for generation and visualization of data and analyses output class Workflow: - def __init__(self, parameters_dict, instance): + def __init__(self, parameters_dict, instance, history_id): + self.history_id = history_id self.instance = instance self.parameters_dict = parameters_dict self.genus = parameters_dict["genus"] @@ -23,6 +25,7 @@ class Workflow: self.custom_ga_file = None self.custom_ga_file_path = None self.preset_ga_file = None + self.sp_url = self.genus[0].lower() + self.genus[1:] + "_" + self.species if self.strain != "": self.abbr = self.genus[0].lower() + "_" + self.species + "_" + self.strain self.full = "_".join([self.genus, self.species, self.strain, self.sex]) @@ -31,7 +34,7 @@ class Workflow: self.full = "_".join([self.genus, self.species, self.strain, self.sex]) self.workflow = None - def generate(self, working_directory, main_directory): + def generate(self, working_directory, main_directory, workflow_name): """ Generation of a galaxy workflow using the defined parameters in the .json input file Output format is a json dict @@ -42,10 +45,10 @@ class Workflow: """ # template workflow as a string # template_workflow_str = '"{"uuid": "ea9c3050-416f-4098-a7ff-b05c952bcd73", "tags": [], "format-version": "0.1", "name": "test", "version": 2, "steps": {"0": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "txt", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "d9b75b03-49e7-4a81-a67c-eaf6a9671905", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"organism\": \"\\\"2\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\"}", "id": 0, "tool_shed_repository": {"owner": "gga", "changeset_revision": "13da56fdaeb1", "name": "chado_organism_delete_organisms", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "f70569bc-9ac0-441a-a2d8-b547086a5bdf", "errors": null, "name": "Chado organism delete", "post_job_actions": {}, "label": "$ORGADELETE", "inputs": [], "position": {"top": 362, "left": 200}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2", "type": "tool"}, "1": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "8ca0b891-0f01-4787-9b7f-57105dc303b0", "label": null}], "input_connections": {}, "tool_state": "{\"comment\": \"\\\"\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"common\": \"\\\"$COMMON\\\"\", \"genus\": \"\\\"$GENUS\\\"\", \"species\": \"\\\"$SPECIES\\\"\", \"abbr\": \"\\\"$ABBR\\\"\"}", "id": 1, "tool_shed_repository": {"owner": "gga", "changeset_revision": "0f4956cec445", "name": "chado_organism_add_organism", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "24f0e175-f932-4e48-8b42-a53d9a432d5e", "errors": null, "name": "Chado organism add", "post_job_actions": {}, "label": "$ORGADD", "inputs": [], "position": {"top": 361, "left": 467.5}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2", "type": "tool"}, "2": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "8fa0e728-8803-4800-93b4-70f906f95f87", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"name\": \"\\\"$GENOME\\\"\", \"sourceuri\": \"\\\"\\\"\", \"sourcename\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"programversion\": \"\\\"\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"sourceversion\": \"\\\"\\\"\", \"program\": \"\\\"$PERFORMEDBY\\\"\", \"algorithm\": \"\\\"\\\"\", \"date_executed\": \"\\\"\\\"\", \"description\": \"\\\"\\\"\"}", "id": 2, "tool_shed_repository": {"owner": "gga", "changeset_revision": "3a1f3c9b755b", "name": "chado_analysis_add_analysis", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "76cbbd55-f1ac-4e48-be3c-c7bbda5add4c", "errors": null, "name": "Chado analysis add", "post_job_actions": {}, "label": "$ADDGENOME", "inputs": [], "position": {"top": 307, "left": 690}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "type": "tool"}, "3": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "5e7da027-0723-4077-8885-2dbe51cb5dda", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"name\": \"\\\"$OGS\\\"\", \"sourceuri\": \"\\\"\\\"\", \"sourcename\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"programversion\": \"\\\"\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"sourceversion\": \"\\\"\\\"\", \"program\": \"\\\"$PERFORMEDBY\\\"\", \"algorithm\": \"\\\"\\\"\", \"date_executed\": \"\\\"\\\"\", \"description\": \"\\\"\\\"\"}", "id": 3, "tool_shed_repository": {"owner": "gga", "changeset_revision": "3a1f3c9b755b", "name": "chado_analysis_add_analysis", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "4d1ffee4-00b2-445d-b630-b7b774c17873", "errors": null, "name": "Chado analysis add", "post_job_actions": {}, "label": "$ADDOGS", "inputs": [], "position": {"top": 395, "left": 697}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "type": "tool"}, "4": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "737dddc9-ae1b-463d-99fa-d9176053594d", "label": null}], "input_connections": {}, "tool_state": "{\"do_update\": \"\\\"false\\\"\", \"relationships\": \"{\\\"__current_case__\\\": 0, \\\"rel_type\\\": \\\"none\\\"}\", \"ext_db\": \"{\\\"db\\\": \\\"\\\", \\\"re_db_accession\\\": \\\"\\\"}\", \"analysis_id\": \"\\\"4\\\"\", \"re_uniquename\": \"\\\"\\\"\", \"match_on_name\": \"\\\"false\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"re_name\": \"\\\"\\\"\", \"fasta\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"wait_for\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"organism\": \"\\\"2\\\"\", \"sequence_type\": \"\\\"contig\\\"\"}", "id": 4, "tool_shed_repository": {"owner": "gga", "changeset_revision": "1421dbc33a92", "name": "chado_feature_load_fasta", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "3d417ced-fc48-4c04-8a92-fdb7b9fecafc", "errors": null, "name": "Chado load fasta", "post_job_actions": {}, "label": "$LOADFASTA", "inputs": [{"name": "fasta", "description": "runtime parameter for tool Chado load fasta"}, {"name": "wait_for", "description": "runtime parameter for tool Chado load fasta"}], "position": {"top": 306, "left": 933.5}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2", "type": "tool"}}, "annotation": "", "a_galaxy_workflow": "true"}"' - # TODO: store workflow as a var instead of file + # TODO: store workflow as a var instead of file (once it runs smoothly) os.chdir(path=working_directory) - self.preset_ga_file = main_directory + "Galaxy-Workflow-preset_workflow.ga" + self.preset_ga_file = main_directory + "Galaxy-Workflow-" + workflow_name + ".ga" if self.strain != "": self.custom_ga_file = "_".join([self.genus, self.species, self.strain]) + "_workflow.ga" self.custom_ga_file_path = os.path.abspath(self.custom_ga_file) @@ -55,67 +58,99 @@ class Workflow: self.custom_ga_file_path = os.path.abspath(self.custom_ga_file) # print("Workflow file @ " + self.custom_ga_file_path) with open(self.preset_ga_file, 'r') as ga_in_file: - ga_in: str = str(ga_in_file.readlines()) - # Pre-filled workflow, useless for parameters and data set on runtime - # ga_in = ga_in.replace("$OGS", "OGS") - # ga_in = ga_in.replace("$VERSION", self.ogs_version) - # ga_in = ga_in.replace("$GENUS", self.genus) - # ga_in = ga_in.replace("$SPECIES", self.species) - # ga_in = ga_in.replace("$ABBR", self.abbr) - # ga_in = ga_in.replace("$STRAIN", self.strain) - # ga_in = ga_in.replace("$PERFORMEDBY", self.performed) - # ga_in = ga_in.replace("$COMMON", self.common) - # ga_in = ga_in.replace("$ORGA", self.full) - # ga_in = ga_in.replace("$ADDAN", "Add analysis") + ga_in = str(ga_in_file.readlines()) ga_in = ga_in.replace("\\\\", "\\") # to restore the correct amount of backslashes in the workflow string before import - workflow_name = '"name": "' + self.full + '"' + ga_in = ga_in.replace('"name": "NAME"', str('"name": "' + self.genus.lower()[0] + self.species) + '"') + ga_in = ga_in.replace('{"unique_id": "UNIQUEID"}', str('{"unique_id": "' + self.genus + " " + self.species) + '"') + ga_in = ga_in.replace('http://localhost/sp/undaria_pinnatifida/feature/Undaria/pinnatifida/mRNA/{id}"', + "http://localhost/sp/" + self.genus.lower()[0] + self.genus[1:] + " " + self.species + "/feature/" + self.genus + "/mRNA/{id}") + # ga_in = ga_in.replace('"index\\\": \\\"false', '"index\\\": \\\"true') + # workflow_name = '"name": "' + self.full + '"' # ga_in = ga_in.replace('"name": "preset_workflow"', '"name": "preset_workflow"') # print(workflow_name) ga_in = ga_in[2:-2] - # ga_in = ga_in[:-2] + # ga_in = ga_in[:-2] # if the line above doesn't outputs a correct json self.workflow = ga_in - print(str(main_directory + "Galaxy-Workflow-preset_workflow.ga")) return ga_in - def set_params(self, datasets): + def set_main_workflow_parameters(self, datasets): """ - + Test function :return: """ + # TODO: move tool calls to main/autoload + + toolrunner = ToolRunner(parameters_dict=self.parameters_dict, instance=self.instance, history=self.history_id) + + toolrunner.add_organism() + toolrunner.add_ogs() + toolrunner.add_genome() + + org_id: str = None + genome_analysis_id: str = None + ogs_analysis_id: str = None + + org = toolrunner.get_organism() + org_job_out = org["outputs"][0]["id"] + org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out) + try: + org_output = json.loads(org_json_output)[0] + org_id = str(org_output["organism_id"]) # needs to be str to be recognized by the chado tool + global_org_id = org_id + except IndexError: + print("No organism matching " + self.full + " exists in the Chado database") + + ogs_analysis = toolrunner.get_ogs_analysis() + ogs_analysis_job_out = ogs_analysis["outputs"][0]["id"] + ogs_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=ogs_analysis_job_out) + try: + ogs_analysis_output = json.loads(ogs_analysis_json_output)[0] + ogs_analysis_id = str(ogs_analysis_output["analysis_id"]) # needs to be str to be recognized by the chado tool + global_ogs_id = ogs_analysis_id + except IndexError: + print("No matching OGS analysis exists in the Chado database") + + genome_analysis = toolrunner.get_genome_analysis() + genome_analysis_job_out = genome_analysis["outputs"][0]["id"] + genome_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=genome_analysis_job_out) + try: + genome_analysis_output = json.loads(genome_analysis_json_output)[0] + genome_analysis_id = str(genome_analysis_output["analysis_id"]) # needs to be str to be recognized by the chado tool + global_genome_id = genome_analysis_id + except IndexError: + print("No matching genome analysis exists in the Chado database") + params = dict() - # params["0"] = {} - # params["1"] = {} - # params["2"] = {} - # params["3"] = {} - params["4"] = {"confirm": "True"} - params["5"] = {"abbr": self.abbr, "genus": self.genus, "species": self.species, "common": self.common} - # params["2"] = {"analysis": str(self.genus + " " + self.species + " genome v" + self.genome_version)} - # params["3"] = {"analysis": str(self.genus + " " + self.species + " OGS" + self.ogs_version)} - params["6"] = {"name": self.genus + " " + self.species + " genome v" + self.genome_version, - "program": "Performed by Genoscope", - "programversion": str("genome v" + self.genome_version), - "sourcename": "Genoscope", - "date_executed": self.date - } - params["7"] = {"name": self.genus + " " + self.species + " OGS" + self.ogs_version, - "program": "Performed by Genoscope", - "programversion": str("OGS" + self.genome_version), - "sourcename": "Genoscope", - "date_executed": self.date - } - params["8"] = {"genus": self.genus, - "species": self.species, - "common": self.common, - "abbr": self.abbr + params["0"] = {} + params["1"] = {} + params["2"] = {} + params["3"] = {} + params["4"] = {"organism": org_id, + "analysis_id": genome_analysis_id, + "do_update": "true"} # the do_update parameter is to prevent assertion errors when loading the file, should always be set to "true" + params["5"] = {"organism": org_id, + "analysis_id": ogs_analysis_id, } - params["9"] = {"name": self.genus + " " + self.species + " genome v" + self.genome_version, - "program": "Performed by Genoscope", - "programversion": str("genome v" + self.genome_version), - "sourcename": "Genoscope", - } - # params["10"] = {"organism": "13", "analysis": "2", "sequence": "contig"} + params["6"] = {"organism_id": org_id} + params["7"] = {"analysis_id": ogs_analysis_id} + params["8"] = {"analysis_id": genome_analysis_id} + params["9"] = {"organism_id": org_id} + return params + def set_jbrowse_workflow_parameters(self): + params = dict() + params["0"] = {} + params["1"] = {} + # jbrowse tools parameters are inside nested dictionaries, read tool inputs before adding or modifying anything + # problem is with Jbrowse add organism --> the parameters are nested + params["2"] = {} # {"jbmenu": {"menu_url": "http://localhost/sp/undaria_pinnatifida/feature/Undaria/pinnatifida/mRNA/{id}"}} # {"menu_url": "/".join(["http://localhost/sp", self.sp_url, "feature", self.genus, self.species, "mRNA", "{id}"])}} + params["3"] = {} + # params["3"] = {"name": " ".join([self.genus, self.species + self.strain + "male"]), + # "unique_id": self.genus.lower()[0] + self.species} + # params["3"] = {"name": [{"name": str(self.genus + " " + self.species), + # "unique_id": str(self.genus.lower()[0] + self.species)}]} + def set_datamap(self): gi = self.instance diff --git a/workflows/tool_example.json b/workflows/tool_example.json new file mode 100644 index 0000000..e69de29 -- GitLab