Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
gga_load_data
Manage
Activity
Members
Labels
Plan
Issues
12
Issue boards
Milestones
Wiki
Code
Merge requests
5
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
abims
e-infra
gga_load_data
Commits
0b4c80b7
Commit
0b4c80b7
authored
5 years ago
by
Arthur Le Bars
Browse files
Options
Downloads
Patches
Plain Diff
Functional version 1.0: Chado loading + Tripal sync + jbrowse
parent
c9b3ff8f
No related branches found
Branches containing commit
No related tags found
Tags containing commit
1 merge request
!1
Release 1.0
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
main.py
+53
-69
53 additions, 69 deletions
main.py
toolrunner.py
+112
-11
112 additions, 11 deletions
toolrunner.py
workflow.py
+87
-52
87 additions, 52 deletions
workflow.py
workflows/tool_example.json
+0
-0
0 additions, 0 deletions
workflows/tool_example.json
with
252 additions
and
132 deletions
main.py
+
53
−
69
View file @
0b4c80b7
from
bioblend
import
galaxy
import
bioblend.galaxy.objects
as
bbo
import
bioblend
as
bb
import
argparse
import
os
import
sys
import
subprocess
import
json
import
urllib3
as
ul
from
chado
import
ChadoInstance
from
workflow
import
Workflow
from
filetransfer
import
FileTransfer
from
toolrunner
import
ToolRunner
# TODO: script description
from
webscrap
import
WebScrap
"""
TODO: script description
python3 ~/PycharmProjects/ggauto/gga_load_data/main.py ~/PycharmProjects/ggauto/gga_load_data/dataloader.json
"""
class
Autoload
:
"""
TODO: turn main into an object
"""
def
__init__
(
self
,
json_in
):
self
.
json_in
=
json_in
...
...
@@ -83,7 +90,7 @@ def main():
# Connect to the galaxy instance of the current species TODO: API key connection issues
gi
=
galaxy
.
GalaxyInstance
(
url
=
instance_url
,
key
=
"
0e993414b2f876515e74dd890f16ffc7
"
,
key
=
"
3b36455cb16b4d0e4348e2c42f4bb934
"
,
email
=
"
alebars@sb-roscoff.fr
"
,
password
=
"
pouet
"
,
verify
=
True
)
...
...
@@ -107,7 +114,7 @@ def main():
print
(
"
Cannot access
"
+
sp_dir
+
"
, run with higher privileges
"
)
break
# Production instance example TODO: secure pswd and API key
?
+ manage API keys
(if needed)
# Production instance example TODO: secure pswd and API key + manage API keys
# gi = galaxy.GalaxyInstance(url="http://abims-gga.sb-roscoff.fr/sp/ectocarpus_species1/galaxy/",
# key="84dfbee3c0efa9155518f01fbeff57c8",
# email="gga@sb-roscoff.fr",
...
...
@@ -145,23 +152,13 @@ def main():
# print("Data successfully loaded into docker container for " + genus_species_strain)
# generate workflow file and run it in the galaxy instance
workflow
=
Workflow
(
parameters_dict
=
sp_dict
,
instance
=
gi
)
print
(
"
Generating custom workflow for
"
+
genus_species_strain
)
wf_dict_json
=
workflow
.
generate
(
working_directory
=
wd
,
main_directory
=
main_dir
)
gi
.
histories
.
create_history
(
name
=
str
(
genus_species_strain
+
"
_
"
+
genome_version
))
hi
=
gi
.
histories
.
get_histories
(
name
=
str
(
genus_species_strain
+
"
_
"
+
genome_version
))
hi_id
=
hi
[
0
][
"
id
"
]
# print(hi)
li
=
gi
.
libraries
.
get_libraries
()
# only one library
# print(li)
li_id
=
gi
.
libraries
.
get_libraries
()[
0
][
"
id
"
]
# project data folder/library
# print(li_id)
fo_gi
=
gi
.
libraries
.
get_folders
(
library_id
=
li_id
)
# data location (project data)
# print(fo_gi)
fo_id
=
{}
current_fo_name
=
""
...
...
@@ -174,16 +171,14 @@ def main():
if
k
==
"
id
"
:
fo_id
[
current_fo_name
]
=
v
# TODO: turn data id parsing into a function
print
(
"
Folders and datasets IDs:
"
)
datasets
=
dict
()
for
k
,
v
in
fo_id
.
items
():
print
(
"
\t
"
+
k
+
"
:
"
+
v
)
if
k
==
"
/genome
"
:
sub_folder_content
=
gi
.
folders
.
show_folder
(
folder_id
=
v
,
contents
=
True
)
# print(sub_folder_content)
for
k2
,
v2
in
sub_folder_content
.
items
():
# print(k2)
# print(v2)
for
e
in
v2
:
if
type
(
e
)
==
dict
:
if
e
[
"
name
"
].
endswith
(
"
.fa
"
):
...
...
@@ -191,10 +186,7 @@ def main():
print
(
"
\t\t
"
+
e
[
"
name
"
]
+
"
:
"
+
e
[
"
ldda_id
"
])
elif
k
==
"
/annotation/
"
+
genus_species
:
sub_folder_content
=
gi
.
folders
.
show_folder
(
folder_id
=
v
,
contents
=
True
)
# print(sub_folder_content)
for
k2
,
v2
in
sub_folder_content
.
items
():
# print(k2)
# print(v2)
for
e
in
v2
:
if
type
(
e
)
==
dict
:
# TODO: manage several files of the same type
...
...
@@ -215,74 +207,66 @@ def main():
gi
.
histories
.
upload_dataset_from_library
(
history_id
=
current_hi_id
,
lib_dataset_id
=
datasets
[
"
transcripts_file
"
])
gi
.
histories
.
upload_dataset_from_library
(
history_id
=
current_hi_id
,
lib_dataset_id
=
datasets
[
"
proteins_file
"
])
# Workflow generation
workflow
=
Workflow
(
parameters_dict
=
sp_dict
,
instance
=
gi
,
history_id
=
current_hi_id
)
wf_dict_json
=
workflow
.
generate
(
working_directory
=
wd
,
main_directory
=
main_dir
,
workflow_name
=
"
preset_workflow
"
)
tools
=
gi
.
tools
.
get_tool_panel
()
# tools panel -> alternative to wf
# print(tools)
wf_dict
=
json
.
loads
(
wf_dict_json
)
# doesn't work with eval()
# print(wf_dict)
# print(wf_dict_json.count("id"))
# TODO: fill in workflow inputs and attributes (doable?)
gi
.
workflows
.
import_workflow_dict
(
workflow_dict
=
wf_dict
)
print
(
gi
.
workflows
.
import_workflow_dict
(
workflow_dict
=
wf_dict
))
wf_name
=
workflow
.
get_workflow_name
()
wf_attr
=
gi
.
workflows
.
get_workflows
(
name
=
wf_name
)
# print(wf_attr)
wf_id
=
wf_attr
[
0
][
"
id
"
]
wf_show
=
gi
.
workflows
.
show_workflow
(
workflow_id
=
wf_id
)
print
(
"
Workflow ID:
"
+
wf_id
)
print
(
wf_show
[
"
inputs
"
])
#
toolrunner = ToolRunner(parameters_dict=sp_dict, instance=gi, history=current_hi_id)
toolrunner
=
ToolRunner
(
parameters_dict
=
sp_dict
,
instance
=
gi
,
history
=
current_hi_id
)
# toolrunner.purge_organisms()
wf_params
=
workflow
.
set_params
(
datasets
=
datasets
)
# wf_o = bbo.Workflow(wf_dict=wf_dict, gi=gi)
wf_params
=
workflow
.
set_main_workflow_parameters
(
datasets
=
datasets
)
print
(
"
Inputs:
"
)
print
(
wf_show
[
"
inputs
"
])
datamap
=
dict
()
datamap
[
"
0
"
]
=
{
"
src
"
:
"
hda
"
,
"
id
"
:
datasets
[
"
genome_file
"
]}
datamap
[
"
1
"
]
=
{
"
src
"
:
"
hda
"
,
"
id
"
:
datasets
[
"
gff_file
"
]}
datamap
[
"
2
"
]
=
{
"
src
"
:
"
hda
"
,
"
id
"
:
datasets
[
"
transcripts_file
"
]}
datamap
[
"
4
"
]
=
{
"
src
"
:
"
hda
"
,
"
id
"
:
datasets
[
"
proteins_file
"
]}
datamap
[
"
2
"
]
=
{
"
src
"
:
"
hda
"
,
"
id
"
:
datasets
[
"
proteins_file
"
]}
datamap
[
"
3
"
]
=
{
"
src
"
:
"
hda
"
,
"
id
"
:
datasets
[
"
transcripts_file
"
]}
#
# gi.workflows.invoke_workflow(workflow_id=wf_id,
# history_id=current_hi_id,
# params=wf_params,
# inputs=datamap)
#
gi
.
workflows
.
delete_workflow
(
workflow_id
=
wf_id
)
datamap
=
dict
()
datamap
[
"
0
"
]
=
{
"
src
"
:
"
hda
"
,
"
id
"
:
datasets
[
"
genome_file
"
]}
datamap
[
"
1
"
]
=
{
"
src
"
:
"
hda
"
,
"
id
"
:
datasets
[
"
proteins_file
"
]}
wf_dict_json
=
workflow
.
generate
(
working_directory
=
wd
,
main_directory
=
main_dir
,
workflow_name
=
"
jbrowse
"
)
wf_dict
=
json
.
loads
(
wf_dict_json
)
# doesn't work with eval()
gi
.
workflows
.
import_workflow_dict
(
workflow_dict
=
wf_dict
)
wf_attr
=
gi
.
workflows
.
get_workflows
(
name
=
"
jbrowse
"
)
wf_id
=
wf_attr
[
0
][
"
id
"
]
wf_show
=
gi
.
workflows
.
show_workflow
(
workflow_id
=
wf_id
)
print
(
"
Jbrowse workflow ID:
"
+
wf_id
)
wf_params
=
workflow
.
set_jbrowse_workflow_parameters
()
# print(
gi.workflows.
show
_workflow(w
f_id))
gi
.
workflows
.
run_workflow
(
workflow_id
=
wf
_id
,
history_id
=
current_hi_id
,
params
=
wf_params
,
dataset_map
=
datamap
)
gi
.
workflows
.
invoke
_workflow
(
w
orkflow_id
=
wf_id
,
history_id
=
current_hi
_id
,
params
=
wf_params
,
inputs
=
datamap
)
gi
.
workflows
.
delete_workflow
(
workflow_id
=
wf_id
)
# for testing, purge configured @ ~/config/galaxy.yml.docker_sample
#
remove active instance history
for testing, purge configured @ ~/config/galaxy.yml.docker_sample
# gi.histories.delete_history(history_id=current_hi_id, purge=True)
# gi.workflows.delete_workflow(workflow_id=wf_id)
# # PRE FILLED METHOD
# wf_id = workflow.store()
# hi_id = gi.histories.get_histories()[0]["id"]
# print("Workflow id: " + wf_id)
# print("History id: " + hi_id)
# wf_show = gi.workflows.show_workflow(workflow_id=wf_id)
# # print(wf_show["inputs"]) # ->no inputs
# # workflow.port()
# li_id = gi.libraries.get_libraries()[0]["id"]
# # gi.folders.update_folder() # TODO: add method to enable data updates
# tsi = gi.toolshed.get_repositories() # tool shed repo access point
# # print(gi.users.get_users()) # TODO: users management
# # print(gi.libraries.show_library(li_id))
# fo_gi = gi.libraries.get_folders(library_id=li_id) # data location
# fo_id = {}
# current_fo_name = ""
# # print(fo_gi)
# # folders ids: access to data to run the first tools
# for i in fo_gi:
# for k, v in i.items():
# if k == "name":
# fo_id[v] = 0
# current_fo_name = v
# if k == "id":
# fo_id[current_fo_name] = v
# print("Folders id: ")
# for k, v in fo_id.items():
# print("\t" + k + ": " + v)
# workflow.show()
# # gi.workflows.run_workflow(workflow_id=wf_id) # pre filled workflow, use the set on runtime approach instead
os
.
chdir
(
main_dir
)
print
(
"
\n
"
)
...
...
This diff is collapsed.
Click to expand it.
toolrunner.py
+
112
−
11
View file @
0b4c80b7
...
...
@@ -2,7 +2,7 @@ from bioblend import galaxy
from
bioblend.galaxy
import
GalaxyInstance
import
os
"""
Methods to run Chado and Tripal tools on galaxy
Methods to run
all
Chado and Tripal tools on galaxy
"""
...
...
@@ -20,20 +20,21 @@ class ToolRunner:
self
.
genome_version
=
parameters_dict
[
"
genome version
"
]
self
.
ogs_version
=
parameters_dict
[
"
ogs version
"
]
self
.
sex
=
parameters_dict
[
"
sex
"
]
self
.
date
=
parameters_dict
[
"
date
"
]
self
.
custom_ga_file
=
None
self
.
custom_ga_file_path
=
None
self
.
preset_ga_file
=
None
self
.
analysis
=
None
self
.
organism
=
None
self
.
program
=
None
self
.
ogs
=
str
(
"
OGS
"
+
self
.
ogs_version
)
self
.
genome
=
str
(
self
.
full
+
"
genome v
"
+
self
.
genome_version
)
if
self
.
strain
!=
""
:
self
.
abbr
=
self
.
genus
[
0
].
lower
()
+
"
_
"
+
self
.
species
+
"
_
"
+
self
.
strain
self
.
full
=
"
_
"
.
join
([
self
.
genus
,
self
.
species
,
self
.
strain
,
self
.
sex
])
else
:
self
.
abbr
=
self
.
genus
[
0
].
lower
()
+
"
_
"
+
self
.
species
self
.
full
=
"
_
"
.
join
([
self
.
genus
,
self
.
species
,
self
.
strain
,
self
.
sex
])
self
.
ogs
=
str
(
"
OGS
"
+
self
.
ogs_version
)
self
.
genome
=
str
(
self
.
full
+
"
genome v
"
+
self
.
genome_version
)
def
show_tool
(
self
,
tool_id
):
print
(
self
.
instance
.
tools
.
show_tool
(
tool_id
=
tool_id
,
io_details
=
True
))
...
...
@@ -44,15 +45,115 @@ class ToolRunner:
self
.
instance
.
tools
.
run_tool
(
tool_id
=
"
toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2
"
,
history_id
=
self
.
history
,
tool_inputs
=
{
"
name
"
:
self
.
organism
})
def
purge_analyses
(
self
):
return
None
def
purge_genome
(
self
):
self
.
instance
.
tools
.
run_tool
(
tool_id
=
"
toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2
"
,
history_id
=
self
.
history
,
tool_inputs
=
{
"
name
"
:
self
.
genome
,
"
program
"
:
"
Performed by Genoscope
"
,
"
programversion
"
:
str
(
"
genome v
"
+
self
.
genome_version
),
"
sourcename
"
:
"
Genoscope
"
,
"
date_executed
"
:
"
2020-02-02
"
})
def
get_organism
(
self
):
"""
Tool invocation must be as return (or else it executes but returns nothing when called)
:return:
"""
return
self
.
instance
.
tools
.
run_tool
(
tool_id
=
"
toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2
"
,
history_id
=
self
.
history
,
tool_inputs
=
{
"
genus
"
:
self
.
genus
,
"
species
"
:
self
.
species
})
def
get_ogs_analysis
(
self
):
"""
Tool invocation must be as return (or else it executes but returns nothing when called)
:return:
"""
return
self
.
instance
.
tools
.
run_tool
(
tool_id
=
"
toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2
"
,
history_id
=
self
.
history
,
tool_inputs
=
{
"
name
"
:
self
.
genus
+
"
"
+
self
.
species
+
"
OGS
"
+
self
.
ogs_version
})
def
get_genome_analysis
(
self
):
"""
Tool invocation must be as return (or else it executes but returns nothing when called)
:return:
"""
return
self
.
instance
.
tools
.
run_tool
(
tool_id
=
"
toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2
"
,
history_id
=
self
.
history
,
tool_inputs
=
{
"
name
"
:
self
.
genus
+
"
"
+
self
.
species
+
"
genome v
"
+
self
.
genome_version
})
def
add_organism
(
self
):
"""
Tool invocation must be as return (or else it executes but returns nothing when called)
:return:
"""
return
self
.
instance
.
tools
.
run_tool
(
tool_id
=
"
toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2
"
,
history_id
=
self
.
history
,
tool_inputs
=
{
"
abbr
"
:
self
.
abbr
,
"
genus
"
:
self
.
genus
,
"
species
"
:
self
.
species
,
"
common
"
:
self
.
common
})
def
add_ogs
(
self
):
"""
Tool invocation must be as return (or else it executes but returns nothing when called)
:return:
"""
return
self
.
instance
.
tools
.
run_tool
(
tool_id
=
"
toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2
"
,
history_id
=
self
.
history
,
tool_inputs
=
{
"
name
"
:
self
.
genus
+
"
"
+
self
.
species
+
"
OGS
"
+
self
.
ogs_version
,
"
program
"
:
"
Performed by Genoscope
"
,
"
programversion
"
:
str
(
"
OGS
"
+
self
.
genome_version
),
"
sourcename
"
:
"
Genoscope
"
,
"
date_executed
"
:
self
.
date
})
def
add_genome
(
self
):
"""
Tool invocation must be as return (or else it executes but returns nothing when called)
:return:
"""
return
self
.
instance
.
tools
.
run_tool
(
tool_id
=
"
toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2
"
,
history_id
=
self
.
history
,
tool_inputs
=
{
"
name
"
:
self
.
genus
+
"
"
+
self
.
species
+
"
genome v
"
+
self
.
genome_version
,
"
program
"
:
"
Performed by Genoscope
"
,
"
programversion
"
:
str
(
"
genome v
"
+
self
.
genome_version
),
"
sourcename
"
:
"
Genoscope
"
,
"
date_executed
"
:
self
.
date
})
def
add_jbrowse
(
self
,
datasets
):
return
self
.
instance
.
tools
.
run_tool
(
tool_id
=
"
toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.5+galaxy7
"
,
history_id
=
self
.
history
,
tool_inputs
=
{
"
genomes
"
:
datasets
[
"
genome_file
"
]})
def
add_organism_jbrowse
(
self
):
return
self
.
instance
.
tools
.
run_tool
(
tool_id
=
"
toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1
"
,
history_id
=
self
.
history
,
tool_inputs
=
{
"
multiple
"
:
"
false
"
,
"
name
"
:
"
foo
"
,
"
unique_id
"
:
"
bar
"
})
# Homo sapiens deletion TODO: concat the 2 calls
def
get_sapiens_id
(
self
):
"""
Tool invocation must be as return (or else it executes but returns nothing when called)
:return:
"""
return
self
.
instance
.
tools
.
run_tool
(
tool_id
=
"
toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2
"
,
history_id
=
self
.
history
,
tool_inputs
=
{
"
genus
"
:
"
Homo
"
,
"
species
"
:
"
species
"
})
def
delete_sapiens
(
self
,
hs_id
):
"""
Tool invocation must be as return (or else it executes but returns nothing when called)
:return:
"""
return
self
.
instance
.
tools
.
run_tool
(
tool_id
=
"
toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2
"
,
history_id
=
self
.
history
,
tool_inputs
=
{
"
organism
"
:
hs_id
})
# def load_fasta(self, inputs_dict):
# """
#
# :return:
# """
# return self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2",
# history_id=self.history,
# tool_inputs=)
def
show_pannel
(
self
):
print
(
self
.
instance
.
tools
.
get_tool_panel
())
...
...
This diff is collapsed.
Click to expand it.
workflow.py
+
87
−
52
View file @
0b4c80b7
import
os
from
bioblend.galaxy
import
GalaxyInstance
from
toolrunner
import
ToolRunner
import
json
"""
Workflow creation for generation and visualization of data and analyses output
"""
...
...
@@ -8,7 +9,8 @@ Workflow creation for generation and visualization of data and analyses output
class
Workflow
:
def
__init__
(
self
,
parameters_dict
,
instance
):
def
__init__
(
self
,
parameters_dict
,
instance
,
history_id
):
self
.
history_id
=
history_id
self
.
instance
=
instance
self
.
parameters_dict
=
parameters_dict
self
.
genus
=
parameters_dict
[
"
genus
"
]
...
...
@@ -23,6 +25,7 @@ class Workflow:
self
.
custom_ga_file
=
None
self
.
custom_ga_file_path
=
None
self
.
preset_ga_file
=
None
self
.
sp_url
=
self
.
genus
[
0
].
lower
()
+
self
.
genus
[
1
:]
+
"
_
"
+
self
.
species
if
self
.
strain
!=
""
:
self
.
abbr
=
self
.
genus
[
0
].
lower
()
+
"
_
"
+
self
.
species
+
"
_
"
+
self
.
strain
self
.
full
=
"
_
"
.
join
([
self
.
genus
,
self
.
species
,
self
.
strain
,
self
.
sex
])
...
...
@@ -31,7 +34,7 @@ class Workflow:
self
.
full
=
"
_
"
.
join
([
self
.
genus
,
self
.
species
,
self
.
strain
,
self
.
sex
])
self
.
workflow
=
None
def
generate
(
self
,
working_directory
,
main_directory
):
def
generate
(
self
,
working_directory
,
main_directory
,
workflow_name
):
"""
Generation of a galaxy workflow using the defined parameters in the .json input file
Output format is a json dict
...
...
@@ -42,10 +45,10 @@ class Workflow:
"""
# template workflow as a string
# template_workflow_str = '"{"uuid": "ea9c3050-416f-4098-a7ff-b05c952bcd73", "tags": [], "format-version": "0.1", "name": "test", "version": 2, "steps": {"0": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "txt", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "d9b75b03-49e7-4a81-a67c-eaf6a9671905", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"organism\": \"\\\"2\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\"}", "id": 0, "tool_shed_repository": {"owner": "gga", "changeset_revision": "13da56fdaeb1", "name": "chado_organism_delete_organisms", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "f70569bc-9ac0-441a-a2d8-b547086a5bdf", "errors": null, "name": "Chado organism delete", "post_job_actions": {}, "label": "$ORGADELETE", "inputs": [], "position": {"top": 362, "left": 200}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2", "type": "tool"}, "1": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "8ca0b891-0f01-4787-9b7f-57105dc303b0", "label": null}], "input_connections": {}, "tool_state": "{\"comment\": \"\\\"\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"common\": \"\\\"$COMMON\\\"\", \"genus\": \"\\\"$GENUS\\\"\", \"species\": \"\\\"$SPECIES\\\"\", \"abbr\": \"\\\"$ABBR\\\"\"}", "id": 1, "tool_shed_repository": {"owner": "gga", "changeset_revision": "0f4956cec445", "name": "chado_organism_add_organism", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "24f0e175-f932-4e48-8b42-a53d9a432d5e", "errors": null, "name": "Chado organism add", "post_job_actions": {}, "label": "$ORGADD", "inputs": [], "position": {"top": 361, "left": 467.5}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2", "type": "tool"}, "2": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "8fa0e728-8803-4800-93b4-70f906f95f87", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"name\": \"\\\"$GENOME\\\"\", \"sourceuri\": \"\\\"\\\"\", \"sourcename\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"programversion\": \"\\\"\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"sourceversion\": \"\\\"\\\"\", \"program\": \"\\\"$PERFORMEDBY\\\"\", \"algorithm\": \"\\\"\\\"\", \"date_executed\": \"\\\"\\\"\", \"description\": \"\\\"\\\"\"}", "id": 2, "tool_shed_repository": {"owner": "gga", "changeset_revision": "3a1f3c9b755b", "name": "chado_analysis_add_analysis", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "76cbbd55-f1ac-4e48-be3c-c7bbda5add4c", "errors": null, "name": "Chado analysis add", "post_job_actions": {}, "label": "$ADDGENOME", "inputs": [], "position": {"top": 307, "left": 690}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "type": "tool"}, "3": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "5e7da027-0723-4077-8885-2dbe51cb5dda", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"name\": \"\\\"$OGS\\\"\", \"sourceuri\": \"\\\"\\\"\", \"sourcename\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"programversion\": \"\\\"\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"sourceversion\": \"\\\"\\\"\", \"program\": \"\\\"$PERFORMEDBY\\\"\", \"algorithm\": \"\\\"\\\"\", \"date_executed\": \"\\\"\\\"\", \"description\": \"\\\"\\\"\"}", "id": 3, "tool_shed_repository": {"owner": "gga", "changeset_revision": "3a1f3c9b755b", "name": "chado_analysis_add_analysis", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "4d1ffee4-00b2-445d-b630-b7b774c17873", "errors": null, "name": "Chado analysis add", "post_job_actions": {}, "label": "$ADDOGS", "inputs": [], "position": {"top": 395, "left": 697}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "type": "tool"}, "4": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "737dddc9-ae1b-463d-99fa-d9176053594d", "label": null}], "input_connections": {}, "tool_state": "{\"do_update\": \"\\\"false\\\"\", \"relationships\": \"{\\\"__current_case__\\\": 0, \\\"rel_type\\\": \\\"none\\\"}\", \"ext_db\": \"{\\\"db\\\": \\\"\\\", \\\"re_db_accession\\\": \\\"\\\"}\", \"analysis_id\": \"\\\"4\\\"\", \"re_uniquename\": \"\\\"\\\"\", \"match_on_name\": \"\\\"false\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"re_name\": \"\\\"\\\"\", \"fasta\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"wait_for\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"organism\": \"\\\"2\\\"\", \"sequence_type\": \"\\\"contig\\\"\"}", "id": 4, "tool_shed_repository": {"owner": "gga", "changeset_revision": "1421dbc33a92", "name": "chado_feature_load_fasta", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "3d417ced-fc48-4c04-8a92-fdb7b9fecafc", "errors": null, "name": "Chado load fasta", "post_job_actions": {}, "label": "$LOADFASTA", "inputs": [{"name": "fasta", "description": "runtime parameter for tool Chado load fasta"}, {"name": "wait_for", "description": "runtime parameter for tool Chado load fasta"}], "position": {"top": 306, "left": 933.5}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2", "type": "tool"}}, "annotation": "", "a_galaxy_workflow": "true"}"'
# TODO: store workflow as a var instead of file
# TODO: store workflow as a var instead of file
(once it runs smoothly)
os
.
chdir
(
path
=
working_directory
)
self
.
preset_ga_file
=
main_directory
+
"
Galaxy-Workflow-
preset_
workflow.ga
"
self
.
preset_ga_file
=
main_directory
+
"
Galaxy-Workflow-
"
+
workflow
_name
+
"
.ga
"
if
self
.
strain
!=
""
:
self
.
custom_ga_file
=
"
_
"
.
join
([
self
.
genus
,
self
.
species
,
self
.
strain
])
+
"
_workflow.ga
"
self
.
custom_ga_file_path
=
os
.
path
.
abspath
(
self
.
custom_ga_file
)
...
...
@@ -55,67 +58,99 @@ class Workflow:
self
.
custom_ga_file_path
=
os
.
path
.
abspath
(
self
.
custom_ga_file
)
# print("Workflow file @ " + self.custom_ga_file_path)
with
open
(
self
.
preset_ga_file
,
'
r
'
)
as
ga_in_file
:
ga_in
:
str
=
str
(
ga_in_file
.
readlines
())
# Pre-filled workflow, useless for parameters and data set on runtime
# ga_in = ga_in.replace("$OGS", "OGS")
# ga_in = ga_in.replace("$VERSION", self.ogs_version)
# ga_in = ga_in.replace("$GENUS", self.genus)
# ga_in = ga_in.replace("$SPECIES", self.species)
# ga_in = ga_in.replace("$ABBR", self.abbr)
# ga_in = ga_in.replace("$STRAIN", self.strain)
# ga_in = ga_in.replace("$PERFORMEDBY", self.performed)
# ga_in = ga_in.replace("$COMMON", self.common)
# ga_in = ga_in.replace("$ORGA", self.full)
# ga_in = ga_in.replace("$ADDAN", "Add analysis")
ga_in
=
str
(
ga_in_file
.
readlines
())
ga_in
=
ga_in
.
replace
(
"
\\\\
"
,
"
\\
"
)
# to restore the correct amount of backslashes in the workflow string before import
workflow_name
=
'"
name
"
:
"'
+
self
.
full
+
'"'
ga_in
=
ga_in
.
replace
(
'"
name
"
:
"
NAME
"'
,
str
(
'"
name
"
:
"'
+
self
.
genus
.
lower
()[
0
]
+
self
.
species
)
+
'"'
)
ga_in
=
ga_in
.
replace
(
'
{
"
unique_id
"
:
"
UNIQUEID
"
}
'
,
str
(
'
{
"
unique_id
"
:
"'
+
self
.
genus
+
"
"
+
self
.
species
)
+
'"'
)
ga_in
=
ga_in
.
replace
(
'
http://localhost/sp/undaria_pinnatifida/feature/Undaria/pinnatifida/mRNA/{id}
"'
,
"
http://localhost/sp/
"
+
self
.
genus
.
lower
()[
0
]
+
self
.
genus
[
1
:]
+
"
"
+
self
.
species
+
"
/feature/
"
+
self
.
genus
+
"
/mRNA/{id}
"
)
# ga_in = ga_in.replace('"index\\\": \\\"false', '"index\\\": \\\"true')
# workflow_name = '"name": "' + self.full + '"'
# ga_in = ga_in.replace('"name": "preset_workflow"', '"name": "preset_workflow"')
# print(workflow_name)
ga_in
=
ga_in
[
2
:
-
2
]
# ga_in = ga_in[:-2]
# ga_in = ga_in[:-2]
# if the line above doesn't outputs a correct json
self
.
workflow
=
ga_in
print
(
str
(
main_directory
+
"
Galaxy-Workflow-preset_workflow.ga
"
))
return
ga_in
def
set_params
(
self
,
datasets
):
def
set_
main_workflow_
param
eter
s
(
self
,
datasets
):
"""
Test function
:return:
"""
# TODO: move tool calls to main/autoload
toolrunner
=
ToolRunner
(
parameters_dict
=
self
.
parameters_dict
,
instance
=
self
.
instance
,
history
=
self
.
history_id
)
toolrunner
.
add_organism
()
toolrunner
.
add_ogs
()
toolrunner
.
add_genome
()
org_id
:
str
=
None
genome_analysis_id
:
str
=
None
ogs_analysis_id
:
str
=
None
org
=
toolrunner
.
get_organism
()
org_job_out
=
org
[
"
outputs
"
][
0
][
"
id
"
]
org_json_output
=
self
.
instance
.
datasets
.
download_dataset
(
dataset_id
=
org_job_out
)
try
:
org_output
=
json
.
loads
(
org_json_output
)[
0
]
org_id
=
str
(
org_output
[
"
organism_id
"
])
# needs to be str to be recognized by the chado tool
global_org_id
=
org_id
except
IndexError
:
print
(
"
No organism matching
"
+
self
.
full
+
"
exists in the Chado database
"
)
ogs_analysis
=
toolrunner
.
get_ogs_analysis
()
ogs_analysis_job_out
=
ogs_analysis
[
"
outputs
"
][
0
][
"
id
"
]
ogs_analysis_json_output
=
self
.
instance
.
datasets
.
download_dataset
(
dataset_id
=
ogs_analysis_job_out
)
try
:
ogs_analysis_output
=
json
.
loads
(
ogs_analysis_json_output
)[
0
]
ogs_analysis_id
=
str
(
ogs_analysis_output
[
"
analysis_id
"
])
# needs to be str to be recognized by the chado tool
global_ogs_id
=
ogs_analysis_id
except
IndexError
:
print
(
"
No matching OGS analysis exists in the Chado database
"
)
genome_analysis
=
toolrunner
.
get_genome_analysis
()
genome_analysis_job_out
=
genome_analysis
[
"
outputs
"
][
0
][
"
id
"
]
genome_analysis_json_output
=
self
.
instance
.
datasets
.
download_dataset
(
dataset_id
=
genome_analysis_job_out
)
try
:
genome_analysis_output
=
json
.
loads
(
genome_analysis_json_output
)[
0
]
genome_analysis_id
=
str
(
genome_analysis_output
[
"
analysis_id
"
])
# needs to be str to be recognized by the chado tool
global_genome_id
=
genome_analysis_id
except
IndexError
:
print
(
"
No matching genome analysis exists in the Chado database
"
)
params
=
dict
()
# params["0"] = {}
# params["1"] = {}
# params["2"] = {}
# params["3"] = {}
params
[
"
4
"
]
=
{
"
confirm
"
:
"
True
"
}
params
[
"
5
"
]
=
{
"
abbr
"
:
self
.
abbr
,
"
genus
"
:
self
.
genus
,
"
species
"
:
self
.
species
,
"
common
"
:
self
.
common
}
# params["2"] = {"analysis": str(self.genus + " " + self.species + " genome v" + self.genome_version)}
# params["3"] = {"analysis": str(self.genus + " " + self.species + " OGS" + self.ogs_version)}
params
[
"
6
"
]
=
{
"
name
"
:
self
.
genus
+
"
"
+
self
.
species
+
"
genome v
"
+
self
.
genome_version
,
"
program
"
:
"
Performed by Genoscope
"
,
"
programversion
"
:
str
(
"
genome v
"
+
self
.
genome_version
),
"
sourcename
"
:
"
Genoscope
"
,
"
date_executed
"
:
self
.
date
}
params
[
"
7
"
]
=
{
"
name
"
:
self
.
genus
+
"
"
+
self
.
species
+
"
OGS
"
+
self
.
ogs_version
,
"
program
"
:
"
Performed by Genoscope
"
,
"
programversion
"
:
str
(
"
OGS
"
+
self
.
genome_version
),
"
sourcename
"
:
"
Genoscope
"
,
"
date_executed
"
:
self
.
date
}
params
[
"
8
"
]
=
{
"
genus
"
:
self
.
genus
,
"
species
"
:
self
.
species
,
"
common
"
:
self
.
common
,
"
abbr
"
:
self
.
abbr
params
[
"
0
"
]
=
{}
params
[
"
1
"
]
=
{}
params
[
"
2
"
]
=
{}
params
[
"
3
"
]
=
{}
params
[
"
4
"
]
=
{
"
organism
"
:
org_id
,
"
analysis_id
"
:
genome_analysis_id
,
"
do_update
"
:
"
true
"
}
# the do_update parameter is to prevent assertion errors when loading the file, should always be set to "true"
params
[
"
5
"
]
=
{
"
organism
"
:
org_id
,
"
analysis_id
"
:
ogs_analysis_id
,
}
params
[
"
9
"
]
=
{
"
name
"
:
self
.
genus
+
"
"
+
self
.
species
+
"
genome v
"
+
self
.
genome_version
,
"
program
"
:
"
Performed by Genoscope
"
,
"
programversion
"
:
str
(
"
genome v
"
+
self
.
genome_version
),
"
sourcename
"
:
"
Genoscope
"
,
}
# params["10"] = {"organism": "13", "analysis": "2", "sequence": "contig"}
params
[
"
6
"
]
=
{
"
organism_id
"
:
org_id
}
params
[
"
7
"
]
=
{
"
analysis_id
"
:
ogs_analysis_id
}
params
[
"
8
"
]
=
{
"
analysis_id
"
:
genome_analysis_id
}
params
[
"
9
"
]
=
{
"
organism_id
"
:
org_id
}
return
params
def
set_jbrowse_workflow_parameters
(
self
):
params
=
dict
()
params
[
"
0
"
]
=
{}
params
[
"
1
"
]
=
{}
# jbrowse tools parameters are inside nested dictionaries, read tool inputs before adding or modifying anything
# problem is with Jbrowse add organism --> the parameters are nested
params
[
"
2
"
]
=
{}
# {"jbmenu": {"menu_url": "http://localhost/sp/undaria_pinnatifida/feature/Undaria/pinnatifida/mRNA/{id}"}} # {"menu_url": "/".join(["http://localhost/sp", self.sp_url, "feature", self.genus, self.species, "mRNA", "{id}"])}}
params
[
"
3
"
]
=
{}
# params["3"] = {"name": " ".join([self.genus, self.species + self.strain + "male"]),
# "unique_id": self.genus.lower()[0] + self.species}
# params["3"] = {"name": [{"name": str(self.genus + " " + self.species),
# "unique_id": str(self.genus.lower()[0] + self.species)}]}
def
set_datamap
(
self
):
gi
=
self
.
instance
...
...
This diff is collapsed.
Click to expand it.
workflows/tool_example.json
0 → 100644
+
0
−
0
View file @
0b4c80b7
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment