Skip to content
Snippets Groups Projects
Commit 0b4c80b7 authored by Arthur Le Bars's avatar Arthur Le Bars
Browse files

Functional version 1.0: Chado loading + Tripal sync + jbrowse

parent c9b3ff8f
No related branches found
No related tags found
1 merge request!1Release 1.0
from bioblend import galaxy
import bioblend.galaxy.objects as bbo
import bioblend as bb
import argparse
import os
import sys
import subprocess
import json
import urllib3 as ul
from chado import ChadoInstance
from workflow import Workflow
from filetransfer import FileTransfer
from toolrunner import ToolRunner
# TODO: script description
from webscrap import WebScrap
"""
TODO: script description
python3 ~/PycharmProjects/ggauto/gga_load_data/main.py ~/PycharmProjects/ggauto/gga_load_data/dataloader.json
"""
class Autoload:
"""
TODO: turn main into an object
"""
def __init__(self, json_in):
self.json_in = json_in
......@@ -83,7 +90,7 @@ def main():
# Connect to the galaxy instance of the current species TODO: API key connection issues
gi = galaxy.GalaxyInstance(url=instance_url,
key="0e993414b2f876515e74dd890f16ffc7",
key="3b36455cb16b4d0e4348e2c42f4bb934",
email="alebars@sb-roscoff.fr",
password="pouet",
verify=True)
......@@ -107,7 +114,7 @@ def main():
print("Cannot access " + sp_dir + ", run with higher privileges")
break
# Production instance example TODO: secure pswd and API key? + manage API keys (if needed)
# Production instance example TODO: secure pswd and API key + manage API keys
# gi = galaxy.GalaxyInstance(url="http://abims-gga.sb-roscoff.fr/sp/ectocarpus_species1/galaxy/",
# key="84dfbee3c0efa9155518f01fbeff57c8",
# email="gga@sb-roscoff.fr",
......@@ -145,23 +152,13 @@ def main():
# print("Data successfully loaded into docker container for " + genus_species_strain)
# generate workflow file and run it in the galaxy instance
workflow = Workflow(parameters_dict=sp_dict, instance=gi)
print("Generating custom workflow for " + genus_species_strain)
wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir)
gi.histories.create_history(name=str(genus_species_strain + "_" + genome_version))
hi = gi.histories.get_histories(name=str(genus_species_strain + "_" + genome_version))
hi_id = hi[0]["id"]
# print(hi)
li = gi.libraries.get_libraries() # only one library
# print(li)
li_id = gi.libraries.get_libraries()[0]["id"] # project data folder/library
# print(li_id)
fo_gi = gi.libraries.get_folders(library_id=li_id) # data location (project data)
# print(fo_gi)
fo_id = {}
current_fo_name = ""
......@@ -174,16 +171,14 @@ def main():
if k == "id":
fo_id[current_fo_name] = v
# TODO: turn data id parsing into a function
print("Folders and datasets IDs: ")
datasets = dict()
for k, v in fo_id.items():
print("\t" + k + ": " + v)
if k == "/genome":
sub_folder_content = gi.folders.show_folder(folder_id=v, contents=True)
# print(sub_folder_content)
for k2, v2 in sub_folder_content.items():
# print(k2)
# print(v2)
for e in v2:
if type(e) == dict:
if e["name"].endswith(".fa"):
......@@ -191,10 +186,7 @@ def main():
print("\t\t" + e["name"] + ": " + e["ldda_id"])
elif k == "/annotation/" + genus_species:
sub_folder_content = gi.folders.show_folder(folder_id=v, contents=True)
# print(sub_folder_content)
for k2, v2 in sub_folder_content.items():
# print(k2)
# print(v2)
for e in v2:
if type(e) == dict:
# TODO: manage several files of the same type
......@@ -215,74 +207,66 @@ def main():
gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["transcripts_file"])
gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["proteins_file"])
# Workflow generation
workflow = Workflow(parameters_dict=sp_dict, instance=gi, history_id = current_hi_id)
wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir, workflow_name="preset_workflow")
tools = gi.tools.get_tool_panel() # tools panel -> alternative to wf
# print(tools)
wf_dict = json.loads(wf_dict_json) # doesn't work with eval()
# print(wf_dict)
# print(wf_dict_json.count("id"))
# TODO: fill in workflow inputs and attributes (doable?)
gi.workflows.import_workflow_dict(workflow_dict=wf_dict)
print(gi.workflows.import_workflow_dict(workflow_dict=wf_dict))
wf_name = workflow.get_workflow_name()
wf_attr = gi.workflows.get_workflows(name=wf_name)
# print(wf_attr)
wf_id = wf_attr[0]["id"]
wf_show = gi.workflows.show_workflow(workflow_id=wf_id)
print("Workflow ID: " + wf_id)
print(wf_show["inputs"])
# toolrunner = ToolRunner(parameters_dict=sp_dict, instance=gi, history=current_hi_id)
toolrunner = ToolRunner(parameters_dict=sp_dict, instance=gi, history=current_hi_id)
# toolrunner.purge_organisms()
wf_params = workflow.set_params(datasets=datasets)
# wf_o = bbo.Workflow(wf_dict=wf_dict, gi=gi)
wf_params = workflow.set_main_workflow_parameters(datasets=datasets)
print("Inputs:")
print(wf_show["inputs"])
datamap = dict()
datamap["0"] = {"src": "hda", "id": datasets["genome_file"]}
datamap["1"] = {"src": "hda", "id": datasets["gff_file"]}
datamap["2"] = {"src": "hda", "id": datasets["transcripts_file"]}
datamap["4"] = {"src": "hda", "id": datasets["proteins_file"]}
datamap["2"] = {"src": "hda", "id": datasets["proteins_file"]}
datamap["3"] = {"src": "hda", "id": datasets["transcripts_file"]}
#
# gi.workflows.invoke_workflow(workflow_id=wf_id,
# history_id=current_hi_id,
# params=wf_params,
# inputs=datamap)
#
gi.workflows.delete_workflow(workflow_id=wf_id)
datamap = dict()
datamap["0"] = {"src": "hda", "id": datasets["genome_file"]}
datamap["1"] = {"src": "hda", "id": datasets["proteins_file"]}
wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir, workflow_name="jbrowse")
wf_dict = json.loads(wf_dict_json) # doesn't work with eval()
gi.workflows.import_workflow_dict(workflow_dict=wf_dict)
wf_attr = gi.workflows.get_workflows(name="jbrowse")
wf_id = wf_attr[0]["id"]
wf_show = gi.workflows.show_workflow(workflow_id=wf_id)
print("Jbrowse workflow ID: " + wf_id)
wf_params = workflow.set_jbrowse_workflow_parameters()
# print(gi.workflows.show_workflow(wf_id))
gi.workflows.run_workflow(workflow_id=wf_id,
history_id=current_hi_id,
params=wf_params,
dataset_map=datamap)
gi.workflows.invoke_workflow(workflow_id=wf_id,
history_id=current_hi_id,
params=wf_params,
inputs=datamap)
gi.workflows.delete_workflow(workflow_id=wf_id)
# for testing, purge configured @ ~/config/galaxy.yml.docker_sample
# remove active instance history for testing, purge configured @ ~/config/galaxy.yml.docker_sample
# gi.histories.delete_history(history_id=current_hi_id, purge=True)
# gi.workflows.delete_workflow(workflow_id=wf_id)
# # PRE FILLED METHOD
# wf_id = workflow.store()
# hi_id = gi.histories.get_histories()[0]["id"]
# print("Workflow id: " + wf_id)
# print("History id: " + hi_id)
# wf_show = gi.workflows.show_workflow(workflow_id=wf_id)
# # print(wf_show["inputs"]) # ->no inputs
# # workflow.port()
# li_id = gi.libraries.get_libraries()[0]["id"]
# # gi.folders.update_folder() # TODO: add method to enable data updates
# tsi = gi.toolshed.get_repositories() # tool shed repo access point
# # print(gi.users.get_users()) # TODO: users management
# # print(gi.libraries.show_library(li_id))
# fo_gi = gi.libraries.get_folders(library_id=li_id) # data location
# fo_id = {}
# current_fo_name = ""
# # print(fo_gi)
# # folders ids: access to data to run the first tools
# for i in fo_gi:
# for k, v in i.items():
# if k == "name":
# fo_id[v] = 0
# current_fo_name = v
# if k == "id":
# fo_id[current_fo_name] = v
# print("Folders id: ")
# for k, v in fo_id.items():
# print("\t" + k + ": " + v)
# workflow.show()
# # gi.workflows.run_workflow(workflow_id=wf_id) # pre filled workflow, use the set on runtime approach instead
os.chdir(main_dir)
print("\n")
......
......@@ -2,7 +2,7 @@ from bioblend import galaxy
from bioblend.galaxy import GalaxyInstance
import os
"""
Methods to run Chado and Tripal tools on galaxy
Methods to run all Chado and Tripal tools on galaxy
"""
......@@ -20,20 +20,21 @@ class ToolRunner:
self.genome_version = parameters_dict["genome version"]
self.ogs_version = parameters_dict["ogs version"]
self.sex = parameters_dict["sex"]
self.date = parameters_dict["date"]
self.custom_ga_file = None
self.custom_ga_file_path = None
self.preset_ga_file = None
self.analysis = None
self.organism = None
self.program = None
self.ogs = str("OGS"+ self.ogs_version)
self.genome = str(self.full + " genome v" + self.genome_version)
if self.strain != "":
self.abbr = self.genus[0].lower() + "_" + self.species + "_" + self.strain
self.full = "_".join([self.genus, self.species, self.strain, self.sex])
else:
self.abbr = self.genus[0].lower() + "_" + self.species
self.full = "_".join([self.genus, self.species, self.strain, self.sex])
self.ogs = str("OGS"+ self.ogs_version)
self.genome = str(self.full + " genome v" + self.genome_version)
def show_tool(self, tool_id):
print(self.instance.tools.show_tool(tool_id=tool_id, io_details=True))
......@@ -44,15 +45,115 @@ class ToolRunner:
self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2",
history_id=self.history,
tool_inputs={"name": self.organism})
def purge_analyses(self):
return None
def purge_genome(self):
self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
history_id=self.history,
tool_inputs={"name": self.genome,
"program": "Performed by Genoscope",
"programversion": str("genome v" + self.genome_version),
"sourcename": "Genoscope",
"date_executed": "2020-02-02"})
def get_organism(self):
"""
Tool invocation must be as return (or else it executes but returns nothing when called)
:return:
"""
return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2",
history_id=self.history,
tool_inputs={"genus": self.genus, "species": self.species})
def get_ogs_analysis(self):
"""
Tool invocation must be as return (or else it executes but returns nothing when called)
:return:
"""
return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
history_id=self.history,
tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version})
def get_genome_analysis(self):
"""
Tool invocation must be as return (or else it executes but returns nothing when called)
:return:
"""
return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
history_id=self.history,
tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version})
def add_organism(self):
"""
Tool invocation must be as return (or else it executes but returns nothing when called)
:return:
"""
return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2",
history_id=self.history,
tool_inputs={"abbr": self.abbr,
"genus": self.genus,
"species": self.species,
"common": self.common
})
def add_ogs(self):
"""
Tool invocation must be as return (or else it executes but returns nothing when called)
:return:
"""
return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
history_id=self.history,
tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version,
"program": "Performed by Genoscope",
"programversion": str("OGS" + self.genome_version),
"sourcename": "Genoscope",
"date_executed": self.date
})
def add_genome(self):
"""
Tool invocation must be as return (or else it executes but returns nothing when called)
:return:
"""
return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
history_id=self.history,
tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version,
"program": "Performed by Genoscope",
"programversion": str("genome v" + self.genome_version),
"sourcename": "Genoscope",
"date_executed": self.date
})
def add_jbrowse(self, datasets):
return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.5+galaxy7",
history_id=self.history,
tool_inputs={"genomes": datasets["genome_file"]})
def add_organism_jbrowse(self):
return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1",
history_id=self.history,
tool_inputs={"multiple": "false",
"name": "foo",
"unique_id": "bar"})
# Homo sapiens deletion TODO: concat the 2 calls
def get_sapiens_id(self):
"""
Tool invocation must be as return (or else it executes but returns nothing when called)
:return:
"""
return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2",
history_id=self.history,
tool_inputs={"genus": "Homo", "species": "species"})
def delete_sapiens(self, hs_id):
"""
Tool invocation must be as return (or else it executes but returns nothing when called)
:return:
"""
return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2",
history_id=self.history,
tool_inputs={"organism": hs_id})
# def load_fasta(self, inputs_dict):
# """
#
# :return:
# """
# return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2",
# history_id=self.history,
# tool_inputs=)
def show_pannel(self):
print(self.instance.tools.get_tool_panel())
......
import os
from bioblend.galaxy import GalaxyInstance
from toolrunner import ToolRunner
import json
"""
Workflow creation for generation and visualization of data and analyses output
"""
......@@ -8,7 +9,8 @@ Workflow creation for generation and visualization of data and analyses output
class Workflow:
def __init__(self, parameters_dict, instance):
def __init__(self, parameters_dict, instance, history_id):
self.history_id = history_id
self.instance = instance
self.parameters_dict = parameters_dict
self.genus = parameters_dict["genus"]
......@@ -23,6 +25,7 @@ class Workflow:
self.custom_ga_file = None
self.custom_ga_file_path = None
self.preset_ga_file = None
self.sp_url = self.genus[0].lower() + self.genus[1:] + "_" + self.species
if self.strain != "":
self.abbr = self.genus[0].lower() + "_" + self.species + "_" + self.strain
self.full = "_".join([self.genus, self.species, self.strain, self.sex])
......@@ -31,7 +34,7 @@ class Workflow:
self.full = "_".join([self.genus, self.species, self.strain, self.sex])
self.workflow = None
def generate(self, working_directory, main_directory):
def generate(self, working_directory, main_directory, workflow_name):
"""
Generation of a galaxy workflow using the defined parameters in the .json input file
Output format is a json dict
......@@ -42,10 +45,10 @@ class Workflow:
"""
# template workflow as a string
# template_workflow_str = '"{"uuid": "ea9c3050-416f-4098-a7ff-b05c952bcd73", "tags": [], "format-version": "0.1", "name": "test", "version": 2, "steps": {"0": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "txt", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "d9b75b03-49e7-4a81-a67c-eaf6a9671905", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"organism\": \"\\\"2\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\"}", "id": 0, "tool_shed_repository": {"owner": "gga", "changeset_revision": "13da56fdaeb1", "name": "chado_organism_delete_organisms", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "f70569bc-9ac0-441a-a2d8-b547086a5bdf", "errors": null, "name": "Chado organism delete", "post_job_actions": {}, "label": "$ORGADELETE", "inputs": [], "position": {"top": 362, "left": 200}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2", "type": "tool"}, "1": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "8ca0b891-0f01-4787-9b7f-57105dc303b0", "label": null}], "input_connections": {}, "tool_state": "{\"comment\": \"\\\"\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"common\": \"\\\"$COMMON\\\"\", \"genus\": \"\\\"$GENUS\\\"\", \"species\": \"\\\"$SPECIES\\\"\", \"abbr\": \"\\\"$ABBR\\\"\"}", "id": 1, "tool_shed_repository": {"owner": "gga", "changeset_revision": "0f4956cec445", "name": "chado_organism_add_organism", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "24f0e175-f932-4e48-8b42-a53d9a432d5e", "errors": null, "name": "Chado organism add", "post_job_actions": {}, "label": "$ORGADD", "inputs": [], "position": {"top": 361, "left": 467.5}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2", "type": "tool"}, "2": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "8fa0e728-8803-4800-93b4-70f906f95f87", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"name\": \"\\\"$GENOME\\\"\", \"sourceuri\": \"\\\"\\\"\", \"sourcename\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"programversion\": \"\\\"\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"sourceversion\": \"\\\"\\\"\", \"program\": \"\\\"$PERFORMEDBY\\\"\", \"algorithm\": \"\\\"\\\"\", \"date_executed\": \"\\\"\\\"\", \"description\": \"\\\"\\\"\"}", "id": 2, "tool_shed_repository": {"owner": "gga", "changeset_revision": "3a1f3c9b755b", "name": "chado_analysis_add_analysis", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "76cbbd55-f1ac-4e48-be3c-c7bbda5add4c", "errors": null, "name": "Chado analysis add", "post_job_actions": {}, "label": "$ADDGENOME", "inputs": [], "position": {"top": 307, "left": 690}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "type": "tool"}, "3": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "5e7da027-0723-4077-8885-2dbe51cb5dda", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"name\": \"\\\"$OGS\\\"\", \"sourceuri\": \"\\\"\\\"\", \"sourcename\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"programversion\": \"\\\"\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"sourceversion\": \"\\\"\\\"\", \"program\": \"\\\"$PERFORMEDBY\\\"\", \"algorithm\": \"\\\"\\\"\", \"date_executed\": \"\\\"\\\"\", \"description\": \"\\\"\\\"\"}", "id": 3, "tool_shed_repository": {"owner": "gga", "changeset_revision": "3a1f3c9b755b", "name": "chado_analysis_add_analysis", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "4d1ffee4-00b2-445d-b630-b7b774c17873", "errors": null, "name": "Chado analysis add", "post_job_actions": {}, "label": "$ADDOGS", "inputs": [], "position": {"top": 395, "left": 697}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "type": "tool"}, "4": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "737dddc9-ae1b-463d-99fa-d9176053594d", "label": null}], "input_connections": {}, "tool_state": "{\"do_update\": \"\\\"false\\\"\", \"relationships\": \"{\\\"__current_case__\\\": 0, \\\"rel_type\\\": \\\"none\\\"}\", \"ext_db\": \"{\\\"db\\\": \\\"\\\", \\\"re_db_accession\\\": \\\"\\\"}\", \"analysis_id\": \"\\\"4\\\"\", \"re_uniquename\": \"\\\"\\\"\", \"match_on_name\": \"\\\"false\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"re_name\": \"\\\"\\\"\", \"fasta\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"wait_for\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"organism\": \"\\\"2\\\"\", \"sequence_type\": \"\\\"contig\\\"\"}", "id": 4, "tool_shed_repository": {"owner": "gga", "changeset_revision": "1421dbc33a92", "name": "chado_feature_load_fasta", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "3d417ced-fc48-4c04-8a92-fdb7b9fecafc", "errors": null, "name": "Chado load fasta", "post_job_actions": {}, "label": "$LOADFASTA", "inputs": [{"name": "fasta", "description": "runtime parameter for tool Chado load fasta"}, {"name": "wait_for", "description": "runtime parameter for tool Chado load fasta"}], "position": {"top": 306, "left": 933.5}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2", "type": "tool"}}, "annotation": "", "a_galaxy_workflow": "true"}"'
# TODO: store workflow as a var instead of file
# TODO: store workflow as a var instead of file (once it runs smoothly)
os.chdir(path=working_directory)
self.preset_ga_file = main_directory + "Galaxy-Workflow-preset_workflow.ga"
self.preset_ga_file = main_directory + "Galaxy-Workflow-" + workflow_name + ".ga"
if self.strain != "":
self.custom_ga_file = "_".join([self.genus, self.species, self.strain]) + "_workflow.ga"
self.custom_ga_file_path = os.path.abspath(self.custom_ga_file)
......@@ -55,67 +58,99 @@ class Workflow:
self.custom_ga_file_path = os.path.abspath(self.custom_ga_file)
# print("Workflow file @ " + self.custom_ga_file_path)
with open(self.preset_ga_file, 'r') as ga_in_file:
ga_in: str = str(ga_in_file.readlines())
# Pre-filled workflow, useless for parameters and data set on runtime
# ga_in = ga_in.replace("$OGS", "OGS")
# ga_in = ga_in.replace("$VERSION", self.ogs_version)
# ga_in = ga_in.replace("$GENUS", self.genus)
# ga_in = ga_in.replace("$SPECIES", self.species)
# ga_in = ga_in.replace("$ABBR", self.abbr)
# ga_in = ga_in.replace("$STRAIN", self.strain)
# ga_in = ga_in.replace("$PERFORMEDBY", self.performed)
# ga_in = ga_in.replace("$COMMON", self.common)
# ga_in = ga_in.replace("$ORGA", self.full)
# ga_in = ga_in.replace("$ADDAN", "Add analysis")
ga_in = str(ga_in_file.readlines())
ga_in = ga_in.replace("\\\\", "\\") # to restore the correct amount of backslashes in the workflow string before import
workflow_name = '"name": "' + self.full + '"'
ga_in = ga_in.replace('"name": "NAME"', str('"name": "' + self.genus.lower()[0] + self.species) + '"')
ga_in = ga_in.replace('{"unique_id": "UNIQUEID"}', str('{"unique_id": "' + self.genus + " " + self.species) + '"')
ga_in = ga_in.replace('http://localhost/sp/undaria_pinnatifida/feature/Undaria/pinnatifida/mRNA/{id}"',
"http://localhost/sp/" + self.genus.lower()[0] + self.genus[1:] + " " + self.species + "/feature/" + self.genus + "/mRNA/{id}")
# ga_in = ga_in.replace('"index\\\": \\\"false', '"index\\\": \\\"true')
# workflow_name = '"name": "' + self.full + '"'
# ga_in = ga_in.replace('"name": "preset_workflow"', '"name": "preset_workflow"')
# print(workflow_name)
ga_in = ga_in[2:-2]
# ga_in = ga_in[:-2]
# ga_in = ga_in[:-2] # if the line above doesn't outputs a correct json
self.workflow = ga_in
print(str(main_directory + "Galaxy-Workflow-preset_workflow.ga"))
return ga_in
def set_params(self, datasets):
def set_main_workflow_parameters(self, datasets):
"""
Test function
:return:
"""
# TODO: move tool calls to main/autoload
toolrunner = ToolRunner(parameters_dict=self.parameters_dict, instance=self.instance, history=self.history_id)
toolrunner.add_organism()
toolrunner.add_ogs()
toolrunner.add_genome()
org_id: str = None
genome_analysis_id: str = None
ogs_analysis_id: str = None
org = toolrunner.get_organism()
org_job_out = org["outputs"][0]["id"]
org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out)
try:
org_output = json.loads(org_json_output)[0]
org_id = str(org_output["organism_id"]) # needs to be str to be recognized by the chado tool
global_org_id = org_id
except IndexError:
print("No organism matching " + self.full + " exists in the Chado database")
ogs_analysis = toolrunner.get_ogs_analysis()
ogs_analysis_job_out = ogs_analysis["outputs"][0]["id"]
ogs_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=ogs_analysis_job_out)
try:
ogs_analysis_output = json.loads(ogs_analysis_json_output)[0]
ogs_analysis_id = str(ogs_analysis_output["analysis_id"]) # needs to be str to be recognized by the chado tool
global_ogs_id = ogs_analysis_id
except IndexError:
print("No matching OGS analysis exists in the Chado database")
genome_analysis = toolrunner.get_genome_analysis()
genome_analysis_job_out = genome_analysis["outputs"][0]["id"]
genome_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=genome_analysis_job_out)
try:
genome_analysis_output = json.loads(genome_analysis_json_output)[0]
genome_analysis_id = str(genome_analysis_output["analysis_id"]) # needs to be str to be recognized by the chado tool
global_genome_id = genome_analysis_id
except IndexError:
print("No matching genome analysis exists in the Chado database")
params = dict()
# params["0"] = {}
# params["1"] = {}
# params["2"] = {}
# params["3"] = {}
params["4"] = {"confirm": "True"}
params["5"] = {"abbr": self.abbr, "genus": self.genus, "species": self.species, "common": self.common}
# params["2"] = {"analysis": str(self.genus + " " + self.species + " genome v" + self.genome_version)}
# params["3"] = {"analysis": str(self.genus + " " + self.species + " OGS" + self.ogs_version)}
params["6"] = {"name": self.genus + " " + self.species + " genome v" + self.genome_version,
"program": "Performed by Genoscope",
"programversion": str("genome v" + self.genome_version),
"sourcename": "Genoscope",
"date_executed": self.date
}
params["7"] = {"name": self.genus + " " + self.species + " OGS" + self.ogs_version,
"program": "Performed by Genoscope",
"programversion": str("OGS" + self.genome_version),
"sourcename": "Genoscope",
"date_executed": self.date
}
params["8"] = {"genus": self.genus,
"species": self.species,
"common": self.common,
"abbr": self.abbr
params["0"] = {}
params["1"] = {}
params["2"] = {}
params["3"] = {}
params["4"] = {"organism": org_id,
"analysis_id": genome_analysis_id,
"do_update": "true"} # the do_update parameter is to prevent assertion errors when loading the file, should always be set to "true"
params["5"] = {"organism": org_id,
"analysis_id": ogs_analysis_id,
}
params["9"] = {"name": self.genus + " " + self.species + " genome v" + self.genome_version,
"program": "Performed by Genoscope",
"programversion": str("genome v" + self.genome_version),
"sourcename": "Genoscope",
}
# params["10"] = {"organism": "13", "analysis": "2", "sequence": "contig"}
params["6"] = {"organism_id": org_id}
params["7"] = {"analysis_id": ogs_analysis_id}
params["8"] = {"analysis_id": genome_analysis_id}
params["9"] = {"organism_id": org_id}
return params
def set_jbrowse_workflow_parameters(self):
params = dict()
params["0"] = {}
params["1"] = {}
# jbrowse tools parameters are inside nested dictionaries, read tool inputs before adding or modifying anything
# problem is with Jbrowse add organism --> the parameters are nested
params["2"] = {} # {"jbmenu": {"menu_url": "http://localhost/sp/undaria_pinnatifida/feature/Undaria/pinnatifida/mRNA/{id}"}} # {"menu_url": "/".join(["http://localhost/sp", self.sp_url, "feature", self.genus, self.species, "mRNA", "{id}"])}}
params["3"] = {}
# params["3"] = {"name": " ".join([self.genus, self.species + self.strain + "male"]),
# "unique_id": self.genus.lower()[0] + self.species}
# params["3"] = {"name": [{"name": str(self.genus + " " + self.species),
# "unique_id": str(self.genus.lower()[0] + self.species)}]}
def set_datamap(self):
gi = self.instance
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment