Skip to content
Snippets Groups Projects
Commit c9b3ff8f authored by Arthur Le Bars's avatar Arthur Le Bars
Browse files

Load data + chado fasta

parent 16c13da8
No related branches found
No related tags found
1 merge request!1Release 1.0
......@@ -9,6 +9,6 @@
"performed by" : "Genoscope",
"genome version" : "1.0",
"ogs version" : "1.0",
"date" : "2020-20-20"
"date" : "2020-01-01"
}
]
......@@ -12,8 +12,5 @@ class FileTransfer:
user_host = [user, host]
subprocess.Popen(user_host)
def file(self):
return 0
......@@ -7,6 +7,7 @@ import subprocess
import json
from workflow import Workflow
from filetransfer import FileTransfer
from toolrunner import ToolRunner
# TODO: script description
......@@ -85,7 +86,7 @@ def main():
key="0e993414b2f876515e74dd890f16ffc7",
email="alebars@sb-roscoff.fr",
password="pouet",
verify=False)
verify=True)
"""
This part creates the current species directory and go to it
......@@ -143,24 +144,25 @@ def main():
# else:
# print("Data successfully loaded into docker container for " + genus_species_strain)
# TODO: datamapping to run workflow (get datasets from instance)
datamap = {}
# generate workflow file and run it in the galaxy instance
workflow = Workflow(parameters_dict=sp_dict, instance=gi)
print("Generating custom workflow for " + genus_species_strain)
wf_dict = workflow.generate(working_directory=wd, main_directory=main_dir)
wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir)
gi.histories.create_history(name=str(genus_species_strain + "_" + genome_version))
hi = gi.histories.get_histories(name=str(genus_species_strain + "_" + genome_version))
hi_id = hi[0]["id"]
# print(hi)
li = gi.libraries.get_libraries() # only one library
# print(li)
li_id = gi.libraries.get_libraries()[0]["id"] # project data folder/library
# print(li_id)
fo_gi = gi.libraries.get_folders(library_id=li_id) # data location (project data)
# print(fo_gi)
fo_id = {}
current_fo_name = ""
# folders ids: access to data to run the first tools
......@@ -171,8 +173,9 @@ def main():
current_fo_name = v
if k == "id":
fo_id[current_fo_name] = v
print("IDs: ")
datasets = {}
print("Folders and datasets IDs: ")
datasets = dict()
for k, v in fo_id.items():
print("\t" + k + ": " + v)
if k == "/genome":
......@@ -205,18 +208,50 @@ def main():
datasets["gff_file"] = e["ldda_id"]
print("\t\t" + e["name"] + ": " + e["ldda_id"])
current_hi = gi.histories.get_current_history()["id"]
print("History ID:" + current_hi)
gi.histories.upload_dataset_from_library(history_id=hi_id, lib_dataset_id=datasets["genome_file"])
gi.histories.upload_dataset_from_library(history_id=hi_id, lib_dataset_id=datasets["gff_file"])
gi.histories.upload_dataset_from_library(history_id=hi_id, lib_dataset_id=datasets["transcripts_file"])
gi.histories.upload_dataset_from_library(history_id=hi_id, lib_dataset_id=datasets["proteins_file"])
# print(gi.tools.get_tool_panel())
current_hi_id = gi.histories.get_current_history()["id"]
print("History ID: " + current_hi_id)
gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["genome_file"])
gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["gff_file"])
gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["transcripts_file"])
gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["proteins_file"])
tools = gi.tools.get_tool_panel() # tools panel -> alternative to wf
# print(tools)
wf_dict = json.loads(wf_dict_json) # doesn't work with eval()
# print(wf_dict)
wf_dict_2 = json.loads(wf_dict) # add to workflow class
gi.workflows.import_workflow_dict(workflow_dict=wf_dict_2) # doesn't work with eval()
# print(wf_dict_json.count("id"))
# TODO: fill in workflow inputs and attributes (doable?)
gi.workflows.import_workflow_dict(workflow_dict=wf_dict)
print(gi.workflows.import_workflow_dict(workflow_dict=wf_dict))
wf_name = workflow.get_workflow_name()
wf_attr = gi.workflows.get_workflows(name=wf_name)
# print(wf_attr)
wf_id = wf_attr[0]["id"]
wf_show = gi.workflows.show_workflow(workflow_id=wf_id)
print("Workflow ID: " + wf_id)
print(wf_show["inputs"])
# toolrunner = ToolRunner(parameters_dict=sp_dict, instance=gi, history=current_hi_id)
# toolrunner.purge_organisms()
wf_params = workflow.set_params(datasets=datasets)
datamap = dict()
datamap["0"] = {"src": "hda", "id": datasets["genome_file"]}
datamap["1"] = {"src": "hda", "id": datasets["gff_file"]}
datamap["2"] = {"src": "hda", "id": datasets["transcripts_file"]}
datamap["4"] = {"src": "hda", "id": datasets["proteins_file"]}
# print(gi.workflows.show_workflow(wf_id))
gi.workflows.run_workflow(workflow_id=wf_id,
history_id=current_hi_id,
params=wf_params,
dataset_map=datamap)
# for testing, purge configured @ ~/config/galaxy.yml.docker_sample
# gi.histories.delete_history(history_id=current_hi_id, purge=True)
# gi.workflows.delete_workflow(workflow_id=wf_id)
# # PRE FILLED METHOD
# wf_id = workflow.store()
......@@ -231,7 +266,6 @@ def main():
# tsi = gi.toolshed.get_repositories() # tool shed repo access point
# # print(gi.users.get_users()) # TODO: users management
# # print(gi.libraries.show_library(li_id))
# # TODO: create a NEW history, import data to galaxy from library (src_data)
# fo_gi = gi.libraries.get_folders(library_id=li_id) # data location
# fo_id = {}
# current_fo_name = ""
......@@ -250,8 +284,6 @@ def main():
# workflow.show()
# # gi.workflows.run_workflow(workflow_id=wf_id) # pre filled workflow, use the set on runtime approach instead
# for testing, purge configured @ ~/config/galaxy.yml.docker_sample
# gi.histories.delete_history(history_id=hi_id, purge=True)
os.chdir(main_dir)
print("\n")
......
from bioblend import galaxy
from bioblend.galaxy import GalaxyInstance
import os
"""
Methods to run Chado and Tripal tools on galaxy
"""
class ToolRunner:
def __init__(self, parameters_dict, instance, history):
self.history = history
self.instance = instance
self.parameters_dict = parameters_dict
self.genus = parameters_dict["genus"]
self.species = parameters_dict["species"]
self.strain = parameters_dict["strain"]
self.common = parameters_dict["common"]
self.performed = parameters_dict["performed by"]
self.genome_version = parameters_dict["genome version"]
self.ogs_version = parameters_dict["ogs version"]
self.sex = parameters_dict["sex"]
self.custom_ga_file = None
self.custom_ga_file_path = None
self.preset_ga_file = None
self.analysis = None
self.organism = None
self.program = None
self.ogs = str("OGS"+ self.ogs_version)
self.genome = str(self.full + " genome v" + self.genome_version)
if self.strain != "":
self.abbr = self.genus[0].lower() + "_" + self.species + "_" + self.strain
self.full = "_".join([self.genus, self.species, self.strain, self.sex])
else:
self.abbr = self.genus[0].lower() + "_" + self.species
self.full = "_".join([self.genus, self.species, self.strain, self.sex])
def show_tool(self, tool_id):
print(self.instance.tools.show_tool(tool_id=tool_id, io_details=True))
return self.instance.tools.show_tool(tool_id=tool_id, io_details=True)
def purge_organism(self):
self.show_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2")
self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2",
history_id=self.history,
tool_inputs={"name": self.organism})
def purge_genome(self):
self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
history_id=self.history,
tool_inputs={"name": self.genome,
"program": "Performed by Genoscope",
"programversion": str("genome v" + self.genome_version),
"sourcename": "Genoscope",
"date_executed": "2020-02-02"})
def show_pannel(self):
print(self.instance.tools.get_tool_panel())
return self.instance.tools.get_tool_panel()
......@@ -19,6 +19,7 @@ class Workflow:
self.genome_version = parameters_dict["genome version"]
self.ogs_version = parameters_dict["ogs version"]
self.sex = parameters_dict["sex"]
self.date = parameters_dict["date"]
self.custom_ga_file = None
self.custom_ga_file_path = None
self.preset_ga_file = None
......@@ -33,7 +34,7 @@ class Workflow:
def generate(self, working_directory, main_directory):
"""
Generation of a galaxy workflow using the defined parameters in the .json input file
Output format is .ga (basically a .json for galaxy)
Output format is a json dict
:param working_directory:
:param main_directory:
......@@ -41,43 +42,89 @@ class Workflow:
"""
# template workflow as a string
# template_workflow_str = '"{"uuid": "ea9c3050-416f-4098-a7ff-b05c952bcd73", "tags": [], "format-version": "0.1", "name": "test", "version": 2, "steps": {"0": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "txt", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "d9b75b03-49e7-4a81-a67c-eaf6a9671905", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"organism\": \"\\\"2\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\"}", "id": 0, "tool_shed_repository": {"owner": "gga", "changeset_revision": "13da56fdaeb1", "name": "chado_organism_delete_organisms", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "f70569bc-9ac0-441a-a2d8-b547086a5bdf", "errors": null, "name": "Chado organism delete", "post_job_actions": {}, "label": "$ORGADELETE", "inputs": [], "position": {"top": 362, "left": 200}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2", "type": "tool"}, "1": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "8ca0b891-0f01-4787-9b7f-57105dc303b0", "label": null}], "input_connections": {}, "tool_state": "{\"comment\": \"\\\"\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"common\": \"\\\"$COMMON\\\"\", \"genus\": \"\\\"$GENUS\\\"\", \"species\": \"\\\"$SPECIES\\\"\", \"abbr\": \"\\\"$ABBR\\\"\"}", "id": 1, "tool_shed_repository": {"owner": "gga", "changeset_revision": "0f4956cec445", "name": "chado_organism_add_organism", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "24f0e175-f932-4e48-8b42-a53d9a432d5e", "errors": null, "name": "Chado organism add", "post_job_actions": {}, "label": "$ORGADD", "inputs": [], "position": {"top": 361, "left": 467.5}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2", "type": "tool"}, "2": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "8fa0e728-8803-4800-93b4-70f906f95f87", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"name\": \"\\\"$GENOME\\\"\", \"sourceuri\": \"\\\"\\\"\", \"sourcename\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"programversion\": \"\\\"\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"sourceversion\": \"\\\"\\\"\", \"program\": \"\\\"$PERFORMEDBY\\\"\", \"algorithm\": \"\\\"\\\"\", \"date_executed\": \"\\\"\\\"\", \"description\": \"\\\"\\\"\"}", "id": 2, "tool_shed_repository": {"owner": "gga", "changeset_revision": "3a1f3c9b755b", "name": "chado_analysis_add_analysis", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "76cbbd55-f1ac-4e48-be3c-c7bbda5add4c", "errors": null, "name": "Chado analysis add", "post_job_actions": {}, "label": "$ADDGENOME", "inputs": [], "position": {"top": 307, "left": 690}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "type": "tool"}, "3": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "5e7da027-0723-4077-8885-2dbe51cb5dda", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"name\": \"\\\"$OGS\\\"\", \"sourceuri\": \"\\\"\\\"\", \"sourcename\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"programversion\": \"\\\"\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"sourceversion\": \"\\\"\\\"\", \"program\": \"\\\"$PERFORMEDBY\\\"\", \"algorithm\": \"\\\"\\\"\", \"date_executed\": \"\\\"\\\"\", \"description\": \"\\\"\\\"\"}", "id": 3, "tool_shed_repository": {"owner": "gga", "changeset_revision": "3a1f3c9b755b", "name": "chado_analysis_add_analysis", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "4d1ffee4-00b2-445d-b630-b7b774c17873", "errors": null, "name": "Chado analysis add", "post_job_actions": {}, "label": "$ADDOGS", "inputs": [], "position": {"top": 395, "left": 697}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "type": "tool"}, "4": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "737dddc9-ae1b-463d-99fa-d9176053594d", "label": null}], "input_connections": {}, "tool_state": "{\"do_update\": \"\\\"false\\\"\", \"relationships\": \"{\\\"__current_case__\\\": 0, \\\"rel_type\\\": \\\"none\\\"}\", \"ext_db\": \"{\\\"db\\\": \\\"\\\", \\\"re_db_accession\\\": \\\"\\\"}\", \"analysis_id\": \"\\\"4\\\"\", \"re_uniquename\": \"\\\"\\\"\", \"match_on_name\": \"\\\"false\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"re_name\": \"\\\"\\\"\", \"fasta\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"wait_for\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"organism\": \"\\\"2\\\"\", \"sequence_type\": \"\\\"contig\\\"\"}", "id": 4, "tool_shed_repository": {"owner": "gga", "changeset_revision": "1421dbc33a92", "name": "chado_feature_load_fasta", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "3d417ced-fc48-4c04-8a92-fdb7b9fecafc", "errors": null, "name": "Chado load fasta", "post_job_actions": {}, "label": "$LOADFASTA", "inputs": [{"name": "fasta", "description": "runtime parameter for tool Chado load fasta"}, {"name": "wait_for", "description": "runtime parameter for tool Chado load fasta"}], "position": {"top": 306, "left": 933.5}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2", "type": "tool"}}, "annotation": "", "a_galaxy_workflow": "true"}"'
# TODO: store workflow as a var instead of file
os.chdir(path=working_directory)
self.preset_ga_file = main_directory + "preset_workflow.ga"
self.preset_ga_file = main_directory + "Galaxy-Workflow-preset_workflow.ga"
if self.strain != "":
self.custom_ga_file = "_".join([self.genus, self.species, self.strain]) + "_workflow.ga"
self.custom_ga_file_path = os.path.abspath(self.custom_ga_file)
print("Workflow file @ " + self.custom_ga_file_path)
# print("Workflow file @ " + self.custom_ga_file_path)
else:
self.custom_ga_file = "_".join([self.genus, self.species]) + "_workflow.ga"
self.custom_ga_file_path = os.path.abspath(self.custom_ga_file)
print("Workflow file @ " + self.custom_ga_file_path)
# print("Workflow file @ " + self.custom_ga_file_path)
with open(self.preset_ga_file, 'r') as ga_in_file:
ga_in = str(ga_in_file.readlines())
ga_in = ga_in.replace("$OGS", "OGS")
ga_in = ga_in.replace("$VERSION", self.ogs_version)
ga_in = ga_in.replace("$GENUS", self.genus)
ga_in = ga_in.replace("$SPECIES", self.species)
ga_in = ga_in.replace("$ABBR", self.abbr)
ga_in = ga_in.replace("$STRAIN", self.strain)
ga_in = ga_in.replace("$PERFORMEDBY", self.performed)
ga_in = ga_in.replace("$COMMON", self.common)
ga_in = ga_in.replace("$ORGA", self.full)
ga_in = ga_in.replace("$ADDAN", "Add analysis")
ga_in = ga_in.replace("\\\\", "\\") # restore the correct amount of backslashes in the ga file
ga_in: str = str(ga_in_file.readlines())
# Pre-filled workflow, useless for parameters and data set on runtime
# ga_in = ga_in.replace("$OGS", "OGS")
# ga_in = ga_in.replace("$VERSION", self.ogs_version)
# ga_in = ga_in.replace("$GENUS", self.genus)
# ga_in = ga_in.replace("$SPECIES", self.species)
# ga_in = ga_in.replace("$ABBR", self.abbr)
# ga_in = ga_in.replace("$STRAIN", self.strain)
# ga_in = ga_in.replace("$PERFORMEDBY", self.performed)
# ga_in = ga_in.replace("$COMMON", self.common)
# ga_in = ga_in.replace("$ORGA", self.full)
# ga_in = ga_in.replace("$ADDAN", "Add analysis")
ga_in = ga_in.replace("\\\\", "\\") # to restore the correct amount of backslashes in the workflow string before import
workflow_name = '"name": "' + self.full + '"'
ga_in = ga_in.replace('"name": "preset"', workflow_name)
ga_in = ga_in[2:]
ga_in = ga_in[:-4]
# ga_in = ga_in.replace('"name": "preset_workflow"', '"name": "preset_workflow"')
# print(workflow_name)
ga_in = ga_in[2:-2]
# ga_in = ga_in[:-2]
self.workflow = ga_in
print(str(main_directory + "Galaxy-Workflow-preset_workflow.ga"))
return ga_in
def dict_port(self):
def set_params(self, datasets):
"""
:return:
"""
params = dict()
# params["0"] = {}
# params["1"] = {}
# params["2"] = {}
# params["3"] = {}
params["4"] = {"confirm": "True"}
params["5"] = {"abbr": self.abbr, "genus": self.genus, "species": self.species, "common": self.common}
# params["2"] = {"analysis": str(self.genus + " " + self.species + " genome v" + self.genome_version)}
# params["3"] = {"analysis": str(self.genus + " " + self.species + " OGS" + self.ogs_version)}
params["6"] = {"name": self.genus + " " + self.species + " genome v" + self.genome_version,
"program": "Performed by Genoscope",
"programversion": str("genome v" + self.genome_version),
"sourcename": "Genoscope",
"date_executed": self.date
}
params["7"] = {"name": self.genus + " " + self.species + " OGS" + self.ogs_version,
"program": "Performed by Genoscope",
"programversion": str("OGS" + self.genome_version),
"sourcename": "Genoscope",
"date_executed": self.date
}
params["8"] = {"genus": self.genus,
"species": self.species,
"common": self.common,
"abbr": self.abbr
}
params["9"] = {"name": self.genus + " " + self.species + " genome v" + self.genome_version,
"program": "Performed by Genoscope",
"programversion": str("genome v" + self.genome_version),
"sourcename": "Genoscope",
}
# params["10"] = {"organism": "13", "analysis": "2", "sequence": "contig"}
return params
def set_datamap(self):
gi = self.instance
def dict_port(self):
"""
Import workflow into a galaxy instance from a json dict
:return:
"""
try:
self.instance.workflows.import_workflow_dict(workflow_dict=self.workflow)
except ConnectionError:
......@@ -86,8 +133,8 @@ class Workflow:
def port(self):
"""
Import workflow into a galaxy instance
Importing from string doesnt work (MUST be dict) -> TODO: handle dict import
Import workflow into a galaxy instance from a local file
:return:
"""
try:
......@@ -97,9 +144,18 @@ class Workflow:
else:
return True
def get_workflow_name(self):
"""
Name of the imported workflow
:return:
"""
return str("preset_workflow")
def show(self):
"""
Print the instance's main workflow to stdout (dict form)
:return:
"""
workflow_id = self.instance.workflows.get_workflows()[0]['id']
......@@ -108,33 +164,29 @@ class Workflow:
def store(self):
"""
Store the instance's workflow
:return:
"""
workflow_id = self.instance.workflows.get_workflows()[0]['id']
return workflow_id
def delete(self):
"""
Delete custom workflow
:return:
"""
return None
def run(self):
def run(self, datamap, params):
"""
Run the custom workflow into a galaxy instance
Input datasets in the form of a list
Params
:return:
"""
wf_id = self.show()
datamap = {"genus": self.genus, "species": self.species, "strain": self.strain, "abbr": self.abbr,
"full": self.full, "common": self.common, "ogs_version": self.ogs_version,
"genome_version": self.genome_version, "sex": self.sex, "performed": self.performed}
return None
# def add_step(self, step_position, description, name):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment