Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
gga_load_data
Manage
Activity
Members
Labels
Plan
Issues
12
Issue boards
Milestones
Wiki
Code
Merge requests
5
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
abims
e-infra
gga_load_data
Commits
3e598ebb
Commit
3e598ebb
authored
4 years ago
by
Arthur Le Bars
Browse files
Options
Downloads
Patches
Plain Diff
Delete workflow.py
parent
cea3f321
No related branches found
Branches containing commit
No related tags found
Tags containing commit
1 merge request
!1
Release 1.0
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
workflow.py
+0
-242
0 additions, 242 deletions
workflow.py
with
0 additions
and
242 deletions
workflow.py
deleted
100644 → 0
+
0
−
242
View file @
cea3f321
import
os
from
bioblend.galaxy
import
GalaxyInstance
from
toolrunner
import
ToolRunner
import
json
import
logging
"""
Workflow creation for generation and visualization of data and analyses output
"""
class
Workflow
:
logging
.
basicConfig
(
level
=
logging
.
INFO
)
def
__init__
(
self
,
parameters_dict
,
instance
,
history_id
):
self
.
history_id
=
history_id
self
.
instance
=
instance
self
.
parameters_dict
=
parameters_dict
self
.
genus
=
parameters_dict
[
"
genus
"
]
self
.
species
=
parameters_dict
[
"
species
"
]
self
.
strain
=
parameters_dict
[
"
strain
"
]
self
.
common
=
parameters_dict
[
"
common
"
]
self
.
performed
=
parameters_dict
[
"
performed by
"
]
self
.
genome_version
=
parameters_dict
[
"
genome version
"
]
self
.
ogs_version
=
parameters_dict
[
"
ogs version
"
]
self
.
sex
=
parameters_dict
[
"
sex
"
]
self
.
date
=
parameters_dict
[
"
date
"
]
self
.
custom_ga_file
=
None
self
.
custom_ga_file_path
=
None
self
.
preset_ga_file
=
None
self
.
sp_url
=
self
.
genus
[
0
].
lower
()
+
self
.
genus
[
1
:]
+
"
_
"
+
self
.
species
if
self
.
strain
!=
""
:
self
.
abbr
=
self
.
genus
[
0
].
lower
()
+
"
_
"
+
self
.
species
+
"
_
"
+
self
.
strain
self
.
full
=
"
_
"
.
join
([
self
.
genus
,
self
.
species
,
self
.
strain
,
self
.
sex
])
else
:
self
.
abbr
=
self
.
genus
[
0
].
lower
()
+
"
_
"
+
self
.
species
self
.
full
=
"
_
"
.
join
([
self
.
genus
,
self
.
species
,
self
.
strain
,
self
.
sex
])
self
.
workflow
=
None
def
generate
(
self
,
working_directory
,
main_directory
,
workflow_name
):
"""
Generation of a galaxy workflow using the defined parameters in the .json input file
Output format is a json dict
:param working_directory:
:param main_directory:
:return:
"""
# template workflow as a string
# template_workflow_str = '"{"uuid": "ea9c3050-416f-4098-a7ff-b05c952bcd73", "tags": [], "format-version": "0.1", "name": "test", "version": 2, "steps": {"0": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "txt", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "d9b75b03-49e7-4a81-a67c-eaf6a9671905", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"organism\": \"\\\"2\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\"}", "id": 0, "tool_shed_repository": {"owner": "gga", "changeset_revision": "13da56fdaeb1", "name": "chado_organism_delete_organisms", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "f70569bc-9ac0-441a-a2d8-b547086a5bdf", "errors": null, "name": "Chado organism delete", "post_job_actions": {}, "label": "$ORGADELETE", "inputs": [], "position": {"top": 362, "left": 200}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2", "type": "tool"}, "1": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "8ca0b891-0f01-4787-9b7f-57105dc303b0", "label": null}], "input_connections": {}, "tool_state": "{\"comment\": \"\\\"\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"common\": \"\\\"$COMMON\\\"\", \"genus\": \"\\\"$GENUS\\\"\", \"species\": \"\\\"$SPECIES\\\"\", \"abbr\": \"\\\"$ABBR\\\"\"}", "id": 1, "tool_shed_repository": {"owner": "gga", "changeset_revision": "0f4956cec445", "name": "chado_organism_add_organism", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "24f0e175-f932-4e48-8b42-a53d9a432d5e", "errors": null, "name": "Chado organism add", "post_job_actions": {}, "label": "$ORGADD", "inputs": [], "position": {"top": 361, "left": 467.5}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2", "type": "tool"}, "2": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "8fa0e728-8803-4800-93b4-70f906f95f87", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"name\": \"\\\"$GENOME\\\"\", \"sourceuri\": \"\\\"\\\"\", \"sourcename\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"programversion\": \"\\\"\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"sourceversion\": \"\\\"\\\"\", \"program\": \"\\\"$PERFORMEDBY\\\"\", \"algorithm\": \"\\\"\\\"\", \"date_executed\": \"\\\"\\\"\", \"description\": \"\\\"\\\"\"}", "id": 2, "tool_shed_repository": {"owner": "gga", "changeset_revision": "3a1f3c9b755b", "name": "chado_analysis_add_analysis", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "76cbbd55-f1ac-4e48-be3c-c7bbda5add4c", "errors": null, "name": "Chado analysis add", "post_job_actions": {}, "label": "$ADDGENOME", "inputs": [], "position": {"top": 307, "left": 690}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "type": "tool"}, "3": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "5e7da027-0723-4077-8885-2dbe51cb5dda", "label": null}], "input_connections": {}, "tool_state": "{\"__page__\": null, \"name\": \"\\\"$OGS\\\"\", \"sourceuri\": \"\\\"\\\"\", \"sourcename\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"programversion\": \"\\\"\\\"\", \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"sourceversion\": \"\\\"\\\"\", \"program\": \"\\\"$PERFORMEDBY\\\"\", \"algorithm\": \"\\\"\\\"\", \"date_executed\": \"\\\"\\\"\", \"description\": \"\\\"\\\"\"}", "id": 3, "tool_shed_repository": {"owner": "gga", "changeset_revision": "3a1f3c9b755b", "name": "chado_analysis_add_analysis", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "4d1ffee4-00b2-445d-b630-b7b774c17873", "errors": null, "name": "Chado analysis add", "post_job_actions": {}, "label": "$ADDOGS", "inputs": [], "position": {"top": 395, "left": 697}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2", "type": "tool"}, "4": {"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2", "tool_version": "2.3.2", "outputs": [{"type": "json", "name": "results"}], "workflow_outputs": [{"output_name": "results", "uuid": "737dddc9-ae1b-463d-99fa-d9176053594d", "label": null}], "input_connections": {}, "tool_state": "{\"do_update\": \"\\\"false\\\"\", \"relationships\": \"{\\\"__current_case__\\\": 0, \\\"rel_type\\\": \\\"none\\\"}\", \"ext_db\": \"{\\\"db\\\": \\\"\\\", \\\"re_db_accession\\\": \\\"\\\"}\", \"analysis_id\": \"\\\"4\\\"\", \"re_uniquename\": \"\\\"\\\"\", \"match_on_name\": \"\\\"false\\\"\", \"__page__\": null, \"__rerun_remap_job_id__\": null, \"psql_target\": \"{\\\"__current_case__\\\": 0, \\\"method\\\": \\\"remote\\\"}\", \"re_name\": \"\\\"\\\"\", \"fasta\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"wait_for\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"organism\": \"\\\"2\\\"\", \"sequence_type\": \"\\\"contig\\\"\"}", "id": 4, "tool_shed_repository": {"owner": "gga", "changeset_revision": "1421dbc33a92", "name": "chado_feature_load_fasta", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "3d417ced-fc48-4c04-8a92-fdb7b9fecafc", "errors": null, "name": "Chado load fasta", "post_job_actions": {}, "label": "$LOADFASTA", "inputs": [{"name": "fasta", "description": "runtime parameter for tool Chado load fasta"}, {"name": "wait_for", "description": "runtime parameter for tool Chado load fasta"}], "position": {"top": 306, "left": 933.5}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.2", "type": "tool"}}, "annotation": "", "a_galaxy_workflow": "true"}"'
# TODO: store workflow as a var instead of file (once it runs smoothly)
os
.
chdir
(
path
=
working_directory
)
self
.
preset_ga_file
=
main_directory
+
"
Galaxy-Workflow-
"
+
workflow_name
+
"
.ga
"
if
self
.
strain
!=
""
:
self
.
custom_ga_file
=
"
_
"
.
join
([
self
.
genus
,
self
.
species
,
self
.
strain
])
+
"
_workflow.ga
"
self
.
custom_ga_file_path
=
os
.
path
.
abspath
(
self
.
custom_ga_file
)
# print("Workflow file @ " + self.custom_ga_file_path)
else
:
self
.
custom_ga_file
=
"
_
"
.
join
([
self
.
genus
,
self
.
species
])
+
"
_workflow.ga
"
self
.
custom_ga_file_path
=
os
.
path
.
abspath
(
self
.
custom_ga_file
)
# print("Workflow file @ " + self.custom_ga_file_path)
with
open
(
self
.
preset_ga_file
,
'
r
'
)
as
ga_in_file
:
ga_in
=
str
(
ga_in_file
.
readlines
())
print
(
ga_in
)
ga_in
=
ga_in
.
replace
(
'
{
\\\\\\\\\\\\
"
unique_id
\\\\\\\\\\\\
"
:
\\\\\\\\\\\\
"
UNIQUE_ID
\\\\\\\\\\\\
"
}
'
,
str
(
'
{
\\\\\\\\\\\\
"
unique_id
\\\\\\\\\\\\
"
:
\\\\\\\\\\\\
"'
+
self
.
genus
+
"
"
+
self
.
species
)
+
'
\\\\\\\\\\\\
"'
)
ga_in
=
ga_in
.
replace
(
'
\\\\\\\\\\\\
"
name
\\\\\\\\\\\\
"
:
\\\\\\\\\\\\
"
NAME
\\\\\\\\\\\\
"'
,
str
(
'
\\\\\\\\\\\\
"
name
\\\\\\\\\\\\
"
:
\\\\\\\\\\\\
"'
+
self
.
genus
.
lower
()[
0
]
+
self
.
species
)
+
'
\\\\\\\\\\\\
"'
)
ga_in
=
ga_in
.
replace
(
"
\\\\
"
,
"
\\
"
)
# to restore the correct amount of backslashes in the workflow string before import
# ga_in = ga_in.replace("\\\\\\\\\\\\", "\\\\\\")
ga_in
=
ga_in
.
replace
(
'
http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}
'
,
"
http://localhost/sp/
"
+
self
.
genus
.
lower
()[
0
]
+
self
.
genus
[
1
:]
+
"
_
"
+
self
.
species
+
"
/feature/
"
+
self
.
genus
+
"
/mRNA/{id}
"
)
# ga_in = ga_in.replace('"index\\\": \\\"false', '"index\\\": \\\"true')
# workflow_name = '"name": "' + self.full + '"'
# ga_in = ga_in.replace('"name": "preset_workflow"', '"name": "preset_workflow"')
# print(workflow_name)
ga_in
=
ga_in
[
2
:
-
2
]
# if the line under doesn't outputs a correct json
# ga_in = ga_in[:-2] # if the line above doesn't outputs a correct json
self
.
workflow
=
ga_in
# print(ga_in)
return
ga_in
def
set_main_workflow_parameters
(
self
,
datasets
):
"""
Test function
:return:
"""
# TODO: move tool calls to main/autoload
toolrunner
=
ToolRunner
(
parameters_dict
=
self
.
parameters_dict
,
instance
=
self
.
instance
,
history
=
self
.
history_id
)
toolrunner
.
add_organism
()
toolrunner
.
add_ogs
()
toolrunner
.
add_genome
()
org_id
:
str
=
None
genome_analysis_id
:
str
=
None
ogs_analysis_id
:
str
=
None
org
=
toolrunner
.
get_organism
()
org_job_out
=
org
[
"
outputs
"
][
0
][
"
id
"
]
org_json_output
=
self
.
instance
.
datasets
.
download_dataset
(
dataset_id
=
org_job_out
)
try
:
org_output
=
json
.
loads
(
org_json_output
)[
0
]
org_id
=
str
(
org_output
[
"
organism_id
"
])
# needs to be str to be recognized by the chado tool
global_org_id
=
org_id
except
IndexError
:
logging
.
info
(
"
No organism matching
"
+
self
.
full
+
"
exists in the Chado database
"
)
ogs_analysis
=
toolrunner
.
get_ogs_analysis
()
ogs_analysis_job_out
=
ogs_analysis
[
"
outputs
"
][
0
][
"
id
"
]
ogs_analysis_json_output
=
self
.
instance
.
datasets
.
download_dataset
(
dataset_id
=
ogs_analysis_job_out
)
try
:
ogs_analysis_output
=
json
.
loads
(
ogs_analysis_json_output
)[
0
]
ogs_analysis_id
=
str
(
ogs_analysis_output
[
"
analysis_id
"
])
# needs to be str to be recognized by the chado tool
global_ogs_id
=
ogs_analysis_id
except
IndexError
:
logging
.
info
(
"
No matching OGS analysis exists in the Chado database
"
)
genome_analysis
=
toolrunner
.
get_genome_analysis
()
genome_analysis_job_out
=
genome_analysis
[
"
outputs
"
][
0
][
"
id
"
]
genome_analysis_json_output
=
self
.
instance
.
datasets
.
download_dataset
(
dataset_id
=
genome_analysis_job_out
)
try
:
genome_analysis_output
=
json
.
loads
(
genome_analysis_json_output
)[
0
]
genome_analysis_id
=
str
(
genome_analysis_output
[
"
analysis_id
"
])
# needs to be str to be recognized by the chado tool
global_genome_id
=
genome_analysis_id
except
IndexError
:
logging
.
info
(
"
No matching genome analysis exists in the Chado database
"
)
params
=
dict
()
params
[
"
0
"
]
=
{}
params
[
"
1
"
]
=
{}
params
[
"
2
"
]
=
{}
params
[
"
3
"
]
=
{}
params
[
"
4
"
]
=
{
"
organism
"
:
org_id
,
"
analysis_id
"
:
genome_analysis_id
,
"
do_update
"
:
"
true
"
}
# the do_update parameter is to prevent assertion errors when loading the file, should always be set to "true"
params
[
"
5
"
]
=
{
"
organism
"
:
org_id
,
"
analysis_id
"
:
ogs_analysis_id
,
}
params
[
"
6
"
]
=
{
"
organism_id
"
:
org_id
}
params
[
"
7
"
]
=
{
"
analysis_id
"
:
ogs_analysis_id
}
params
[
"
8
"
]
=
{
"
analysis_id
"
:
genome_analysis_id
}
params
[
"
9
"
]
=
{
"
organism_id
"
:
org_id
}
return
params
def
set_jbrowse_workflow_parameters
(
self
):
params
=
dict
()
params
[
"
0
"
]
=
{}
params
[
"
1
"
]
=
{}
# jbrowse tools parameters are inside nested dictionaries, read tool inputs before adding or modifying anything
# problem is with Jbrowse add organism --> the parameters are nested
params
[
"
2
"
]
=
{}
# {"jbmenu": {"menu_url": "http://localhost/sp/undaria_pinnatifida/feature/Undaria/pinnatifida/mRNA/{id}"}} # {"menu_url": "/".join(["http://localhost/sp", self.sp_url, "feature", self.genus, self.species, "mRNA", "{id}"])}}
params
[
"
3
"
]
=
{}
# params["3"] = {"name": " ".join([self.genus, self.species + self.strain + "male"]),
# "unique_id": self.genus.lower()[0] + self.species}
# params["3"] = {"name": [{"name": str(self.genus + " " + self.species),
# "unique_id": str(self.genus.lower()[0] + self.species)}]}
def
set_datamap
(
self
):
gi
=
self
.
instance
def
dict_port
(
self
):
"""
Import workflow into a galaxy instance from a json dict
:return:
"""
try
:
self
.
instance
.
workflows
.
import_workflow_dict
(
workflow_dict
=
self
.
workflow
)
except
ConnectionError
:
return
False
return
True
def
port
(
self
):
"""
Import workflow into a galaxy instance from a local file
:return:
"""
try
:
self
.
instance
.
workflows
.
import_workflow_from_local_path
(
self
.
custom_ga_file_path
)
except
ConnectionError
:
return
False
else
:
return
True
def
get_workflow_name
(
self
):
"""
Name of the imported workflow
:return:
"""
return
str
(
"
preset_workflow
"
)
def
show
(
self
):
"""
Print the instance
'
s main workflow to stdout (dict form)
:return:
"""
workflow_id
=
self
.
instance
.
workflows
.
get_workflows
()[
0
][
'
id
'
]
return
logging
.
info
(
self
.
instance
.
workflows
.
show_workflow
(
workflow_id
=
workflow_id
))
def
store
(
self
):
"""
Store the instance
'
s workflow
:return:
"""
workflow_id
=
self
.
instance
.
workflows
.
get_workflows
()[
0
][
'
id
'
]
return
workflow_id
def
delete
(
self
):
"""
Delete custom workflow
:return:
"""
return
None
def
run
(
self
,
datamap
,
params
):
"""
Run the custom workflow into a galaxy instance
Input datasets in the form of a list
Params
:return:
"""
return
None
# def add_step(self, step_position, description, name):
# """
# TODO: add a step to the workflow (data loading into chado for example)
#
# :param workflow:
# :return:
# """
# return None
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment