Skip to content
Snippets Groups Projects
Commit adf8df17 authored by Arthur Le Bars's avatar Arthur Le Bars
Browse files

autoload.py: script implementation, easier to understand and use. small fixes...

autoload.py: script implementation, easier to understand and use. small fixes to workflow.py and main.py
parent a9daa4b9
No related branches found
No related tags found
1 merge request!1Release 1.0
from bioblend import galaxy
import bioblend
import argparse
import os
import subprocess
import sys
import json
import yaml
import numpy
import pandas
import logging
import re
class Autoload:
"""
Cleaner version for gga_auto_load (to use in production).
This class possesses most useful parameters to interact with GGA as attributes (as defined in __init__), so new
methods can be more easily implemented by copying already existing ones (i.e add new analysis, run a workflow, ...)
To run the workflows, place them in the same directory as this script, and add the method + the workflow
parameters in the main invocation (at the end of the file)
"""
def __init__(self, species_parameters_dictionary: dict):
self.species_parameters_dictionary = species_parameters_dictionary
self.species = species_parameters_dictionary["species"]
self.genus = species_parameters_dictionary["genus"]
self.strain = species_parameters_dictionary["strain"]
self.sex = species_parameters_dictionary["sex"]
self.common = species_parameters_dictionary["common"]
self.date = species_parameters_dictionary["date"]
self.performed = species_parameters_dictionary["performed by"]
self.genome_version = species_parameters_dictionary["genome version"]
self.ogs_version = species_parameters_dictionary["ogs version"]
self.genus_lowercase = self.genus[0].lower() + self.genus[1:]
self.full_name = " ".join([self.genus_lowercase, self.species, self.strain, self.sex])
self.abbreviation = " ".join([self.genus_lowercase[0], self.species, self.strain, self.sex])
self.genus_species = self.genus_lowercase + "_" + self.species
self.instance_url = "http://localhost/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/"
self.instance: galaxy = None
self.history_id = None
self.library_id = None
self.main_dir = None
self.species_dir = None
self.org_id = None
self.genome_analysis_id = None
self.ogs_analysis_id = None
self.tool_panel = None
# Test the connection to the galaxy instance for the current species
# Additionally set some class attributes
# TODO: auth issues with nginx
self.instance = galaxy.GalaxyInstance(url=self.instance_url,
key="3b36455cb16b4d0e4348e2c42f4bb934",
email="alebars@sb-roscoff.fr",
password="pouet",
verify=True)
logging.info("testing connection to the galaxy instance ...")
try:
self.instance.histories.get_histories()
self.tool_panel = self.instance.tools.get_tool_panel()
except bioblend.ConnectionError:
logging.info("cannot connect to galaxy instance @ " + self.instance_url)
sys.exit()
else:
logging.info("successfully connected to galaxy instance @ " + self.instance_url)
self.main_dir = os.getcwd() + "/"
self.species_dir = os.path.join(self.main_dir, self.genus_species) + "/"
def load_data_in_galaxy(self, method):
"""
- create the src_data directory tree for the species
- change headers for pep file
- load data into the galaxy container with the galaxy_data_libs_SI.py script
:param method:
:return:
"""
os.chdir(self.main_dir)
try:
os.mkdir(self.species_dir)
except FileExistsError:
logging.debug("directory " + self.species_dir + " already exists")
try:
os.chdir(self.species_dir)
working_dir = os.getcwd()
except OSError:
logging.info("cannot access " + self.species_dir + ", run with higher privileges")
sys.exit()
src_data_folders = ["annotation", "genome"]
species_folder_name = "_".join([self.genus_lowercase, self.species, self.strain, self.sex])
try:
os.mkdir("./src_data")
os.mkdir("./src_data/annotation")
os.mkdir("./src_data/genome")
os.mkdir("./src_data/annotation/" + species_folder_name)
os.mkdir("./src_data/genome/" + species_folder_name)
except FileExistsError:
logging.debug("src_data directory tree already exists")
except PermissionError:
logging.debug("insufficient permission to create src_data directory tree")
# Data import into galaxy
source_files = dict()
annotation_dir, genome_dir = None, None
for d in [i[0] for i in os.walk(os.getcwd() + "/src_data")]:
if "annotation/" in d:
annotation_dir = d
for f in os.listdir(d):
if f.endswith("proteins.fasta"):
source_files["proteins_file"] = os.path.join(d, f)
elif f.endswith("transcripts-gff.fa"):
source_files["transcripts_file"] = os.path.join(d, f)
elif f.endswith(".gff"):
source_files["gff_file"] = os.path.join(d, f)
elif "genome/" in d:
genome_dir = d
for f in os.listdir(d):
if f.endswith(".fa"):
source_files["genome_file"] = os.path.join(d, f)
logging.debug("source files found:")
for k, v in source_files.items():
logging.debug("\t" + k + "\t" + v)
# Changing headers in the *proteins.fasta file from >mRNA* to >protein*
# production version
modify_pep_headers = ["/usr/local/genome2/mmo/scripts/phaeoexplorer/phaeoexplorer-change_pep_fasta_header.sh",
source_files["proteins_file"]]
# test version
modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_pep_fasta_header.sh",
source_files["proteins_file"]]
logging.info("changing fasta headers in " + source_files["proteins_file"])
subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir)
# src_data cleaning
if os.path.exists(annotation_dir + "outfile"):
subprocess.run(["mv", annotation_dir + "/outfile", source_files["proteins_file"]],
stdout=subprocess.PIPE,
cwd=annotation_dir)
if os.path.exists(annotation_dir + "gmon.out"):
subprocess.run(["rm", annotation_dir + "/gmon.out"],
stdout=subprocess.PIPE,
cwd=annotation_dir)
setup_data_libraries = "docker-compose exec galaxy /tool_deps/_conda/bin/python /opt/setup_data_libraries.py"
try:
logging.info("loading data into the galaxy container")
subprocess.run(setup_data_libraries,
stdout=subprocess.PIPE,
shell=True)
except subprocess.CalledProcessError:
logging.info("cannot load data into container for " + self.full_name)
pass
else:
logging.info("data successfully loaded into docker container for " + self.full_name)
# gi.histories.create_history(name=str(genus_species_strain + "_" + genome_version))
histories = self.instance.histories.get_histories(name=str(self.full_name + "_" + self.genome_version))
self.history_id = histories[0]["id"]
libraries = self.instance.libraries.get_libraries() # normally only one library
self.library_id = self.instance.libraries.get_libraries()[0]["id"] # project data folder/library
instance_source_data_folders = self.instance.libraries.get_folders(library_id=self.library_id)
folders_ids = {}
current_fo_name = ""
# folders ids: access to data to run the first tools
for i in instance_source_data_folders:
for k, v in i.items():
if k == "name":
folders_ids[v] = 0
current_fo_name = v
if k == "id":
folders_ids[current_fo_name] = v
logging.info("folders and datasets IDs: ")
datasets = dict()
for k, v in folders_ids.items():
logging.info("\t" + k + ": " + v)
if k == "/genome":
sub_folder_content = self.instance.folders.show_folder(folder_id=v, contents=True)
for k2, v2 in sub_folder_content.items():
for e in v2:
if type(e) == dict:
if e["name"].endswith(".fa"):
datasets["genome_file"] = e["ldda_id"]
logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
elif k == "/annotation/" + self.genus_species:
sub_folder_content = self.instance.folders.show_folder(folder_id=v, contents=True)
for k2, v2 in sub_folder_content.items():
for e in v2:
if type(e) == dict:
# TODO: manage several files of the same type and manage versions
if e["name"].endswith("transcripts-gff.fa"):
datasets["transcripts_file"] = e["ldda_id"]
logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
elif e["name"].endswith("proteins.fasta"):
datasets["proteins_file"] = e["ldda_id"]
logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
elif e["name"].endswith(".gff"):
datasets["gff_file"] = e["ldda_id"]
logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
elif e["name"].endswith("MALE"):
datasets["gff_file"] = e["ldda_id"]
logging.info("\t\t" + e["name"] + ": " + e["ldda_id"])
self.history_id = self.instance.histories.get_current_history()["id"]
logging.debug("history ID: " + self.history_id)
# import all datasets into current history
self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=datasets["genome_file"])
self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=datasets["gff_file"])
self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=datasets["transcripts_file"])
self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=datasets["proteins_file"])
def run_workflow(self, workflow_name, workflow_parameters):
"""
:param workflow_ga_file:
:param workflow_parameters:
:return:
"""
logging.debug("running workflow: " + str(workflow_name))
workflow_ga_file = self.main_dir + "Galaxy-Workflow-" + workflow_name + ".ga"
if self.strain != "":
custom_ga_file = "_".join([self.genus, self.species, self.strain]) + "_workflow.ga"
custom_ga_file_path = os.path.abspath(custom_ga_file)
else:
custom_ga_file = "_".join([self.genus, self.species]) + "_workflow.ga"
custom_ga_file_path = os.path.abspath(custom_ga_file)
with open(workflow_ga_file, 'r') as ga_in_file:
ga_in = str(ga_in_file.readlines())
ga_in = ga_in.replace('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"UNIQUE_ID\\\\\\\\\\\\"}',
str('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus + " " + self.species) + '\\\\\\\\\\\\"')
ga_in = ga_in.replace('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"NAME\\\\\\\\\\\\"',
str('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus.lower()[0] + self.species) + '\\\\\\\\\\\\"')
ga_in = ga_in.replace("\\\\", "\\") # to restore the correct amount of backslashes in the workflow string before import
# test
ga_in = ga_in.replace('http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}',
"http://localhost/sp/" + self.genus_lowercase+ "_" + self.species + "/feature/" + self.genus + "/mRNA/{id}")
# production
# ga_in = ga_in.replace('http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}',
# "http://abims--gga.sb-roscoff.fr/sp/" + self.genus_lowercase + "_" + self.species + "/feature/" + self.genus + "/mRNA/{id}")
ga_in = ga_in[2:-2] # if the line under doesn't outputs a correct json
# ga_in = ga_in[:-2] # if the line above doesn't outputs a correct json
def init_instance(self):
"""
Galaxy instance startup in preparation for running workflows
- remove Homo sapiens from the chado database.
- add organism and analyses into the chado database
- get any other existing organisms IDs (mainly used for testing)
:return:
"""
# Delete Homo sapiens from Chado database
get_sapiens_id_job = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2",
tool_inputs={"genus": "Homo", "species": "species"},
history=self.history_id)
get_sapiens_id_job_output = get_sapiens_id_job["outputs"][0]["id"]
get_sapiens_id_json_output = self.instance.datasets.download_dataset(dataset_id=get_sapiens_id_job_output)
try:
get_sapiens_id_final_output = json.loads(get_sapiens_id_json_output)[0]
sapiens_id = str(get_sapiens_id_final_output["organism_id"]) # needs to be str to be recognized by the chado tool
self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.2",
history_id=self.history_id,
tool_inputs={"organism": str(sapiens_id)})
except bioblend.ConnectionError:
logging.debug("homo sapiens isn't in the database")
except IndexError:
pass
# Add organism (species) to chado
self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_add_organism/organism_add_organism/2.3.2",
history_id=self.history_id,
tool_inputs={"abbr": self.abbreviation,
"genus": self.genus,
"species": self.species,
"common": self.common})
# Add OGS analysis to chado
self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
history_id=self.history_id,
tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version,
"program": "Performed by Genoscope",
"programversion": str("OGS" + self.ogs_version),
"sourcename": "Genoscope",
"date_executed": self.date})
# Add genome analysis to chado
self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_add_analysis/analysis_add_analysis/2.3.2",
history_id=self.history_id,
tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version,
"program": "Performed by Genoscope",
"programversion": str("genome v" + self.genome_version),
"sourcename": "Genoscope",
"date_executed": self.date})
# Get the ID from OGS analysis in chado
org = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.2",
history_id=self.history_id,
tool_inputs={"genus": self.genus, "species": self.species})
org_job_out = org["outputs"][0]["id"]
org_json_output = self.instance.datasets.download_dataset(dataset_id=org_job_out)
try:
org_output = json.loads(org_json_output)[0]
self.org_id = str(org_output["organism_id"]) # needs to be str to be recognized by chado tools
except IndexError:
logging.debug("no organism matching " + self.full_name + " exists in the Chado database")
ogs_analysis = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
history_id=self.history_id,
tool_inputs={"name": self.genus + " " + self.species + " OGS" + self.ogs_version})
ogs_analysis_job_out = ogs_analysis["outputs"][0]["id"]
ogs_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=ogs_analysis_job_out)
try:
ogs_analysis_output = json.loads(ogs_analysis_json_output)[0]
self.ogs_analysis_id = str(ogs_analysis_output["analysis_id"]) # needs to be str to be recognized by chado tools
except IndexError:
logging.debug("no matching OGS analysis exists in the Chado database")
genome_analysis = self.instance.tools.run_tool(tool_id="toolshed.g2.bx.psu.edu/repos/gga/chado_analysis_get_analyses/analysis_get_analyses/2.3.2",
history_id=self.history_id,
tool_inputs={"name": self.genus + " " + self.species + " genome v" + self.genome_version})
genome_analysis_job_out = genome_analysis["outputs"][0]["id"]
genome_analysis_json_output = self.instance.datasets.download_dataset(dataset_id=genome_analysis_job_out)
try:
genome_analysis_output = json.loads(genome_analysis_json_output)[0]
self.genome_analysis_id = str(genome_analysis_output["analysis_id"]) # needs to be str to be recognized by chado tools
except IndexError:
logging.debug("no matching genome analysis exists in the Chado database")
logging.info("finished initializing instance")
def clean_instance(self):
"""
TODO: function to purge the instance from analyses and organisms
:return:
"""
return None
if __name__ == "main":
parser = argparse.ArgumentParser(description="Input genus, species, strain, version")
parser.add_argument("json", type=str, help="Input JSON file")
parser.add_argument("-v", "--verbose", help="Increase output verbosity")
parser.add_argument("--load-data", help="Create src_data directory tree and load data into galaxy")
parser.add_argument("--main-workflow", help="Run main workflow (initialize galaxy instance, load data into chado,"
"sync with tripal, create jbrowse and add organism to jbrowse")
args = parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
sp_dict_list = list()
with open(args.json, 'r') as infile:
json_sp_dict = json.load(infile)
json_sp_dump = json.dumps(json_sp_dict, indent=4, sort_keys=True)
for json_sp in json_sp_dict:
sp_dict_list.append(json_sp)
for sp_dict in sp_dict_list:
al = Autoload(species_parameters_dictionary=sp_dict)
if args.main_workflow:
workflow_parameters = dict()
workflow_parameters["0"] = {}
workflow_parameters["1"] = {}
workflow_parameters["2"] = {}
workflow_parameters["3"] = {}
workflow_parameters["4"] = {"organism": al.org_id,
"analysis_id": al.genome_analysis_id,
"do_update": "true"} # the do_update parameter is to prevent assertion errors when loading the file, should always be set to "true"
workflow_parameters["5"] = {"organism": al.org_id,
"analysis_id": al.ogs_analysis_id}
workflow_parameters["6"] = {"organism_id": al.org_id}
workflow_parameters["7"] = {"analysis_id": al.ogs_analysis_id}
workflow_parameters["8"] = {"analysis_id": al.genome_analysis_id}
workflow_parameters["9"] = {"organism_id": al.org_id}
al.run_workflow(workflow_name="main", workflow_parameters=workflow_parameters)
#!/usr/bin/env bash
\ No newline at end of file
......@@ -53,6 +53,7 @@ def main():
genus_species = genus_lower + "_" + species
common = sp_dict["common"]
strain = sp_dict["strain"]
sex = sp_dict["sex"]
if strain != "":
genus_species_strain = genus_species + "_" + strain
else:
......@@ -123,65 +124,84 @@ def main():
print("Successfully connected to galaxy instance @ " + instance_url)
# TODO: FTP/symlink stuff to retrieve the datasets + change headers in pep.fasta
setup_data_libraries_cl = "docker-compose exec galaxy /tool_deps/_conda/bin/python /opt/setup_data_libraries.py"
# try:
# os.mkdir("./src_data")
# except FileExistsError:
# print("src_data folder already exists for " + genus_species_strain)
# print("Loading data into galaxy...")
# try:
# setup_data_libraries = subprocess.Popen(setup_data_libraries_cl.split(), stdout=subprocess.PIPE)
# print("Output from setup_data_libraries.py")
# print(setup_data_libraries.communicate())
# except bb.ConnectionError:
# print("Cannot load data into container for " + genus_species_strain)
# break
# else:
# print("Data successfully loaded into docker container for " + genus_species_strain)
# else:
# print("src_data folder created for " + genus_species_strain)
# try:
# setup_data_libraries = subprocess.Popen(setup_data_libraries_cl.split(), stdout=subprocess.PIPE)
# print("Output from setup_data_libraries.py")
# print(setup_data_libraries.communicate())
# except bb.ConnectionError:
# print("Cannot load data into container for " + genus_species_strain)
# break
# else:
# print("Data successfully loaded into docker container for " + genus_species_strain)
genome_dir, annotation_dir = None, None
# ---------------------------------------------------------------------
# src_data directory tree creation
# ---------------------------------------------------------------------
src_data_folders = ["annotation", "genome"]
species_folder_name = "_".join([genus_lower, species, strain, sex])
try:
os.mkdir("./src_data")
os.mkdir("./src_data/annotation")
os.mkdir("./src_data/genome")
os.mkdir("./src_data/annotation/" + species_folder_name)
os.mkdir("./src_data/genome/" + species_folder_name)
except FileExistsError:
print("src_data directory tree already exists")
pass
except PermissionError:
print("Insufficient permission to create src_data directory tree")
# ---------------------------------------------------------------------
# Data import into galaxy
# ---------------------------------------------------------------------
source_files = dict()
annotation_dir, genome_dir = None, None
for d in [i[0] for i in os.walk(os.getcwd() + "/src_data")]:
if "annotation/" in d:
annotation_dir = d
annotation_dir_files = [f for f in os.listdir(d) if os.path.isfile(os.path.join(d, f))]
print("src_data annotation file(s):")
print(str('\t' + file) for file in annotation_dir_files)
for f in os.listdir(d):
if f.endswith("proteins.fasta"):
source_files["proteins_file"] = os.path.join(d, f)
elif f.endswith("transcripts-gff.fa"):
source_files["transcripts_file"] = os.path.join(d, f)
elif f.endswith(".gff"):
source_files["gff_file"] = os.path.join(d, f)
# annotation_dir_files = [f for f in os.listdir(d) if os.path.isfile(os.path.join(d, f))]
elif "genome/" in d:
genome_dir = d
genome_dir_files = [f for f in os.listdir(d) if os.path.isfile(os.path.join(d, f))]
print("src_data genome file(s):")
print(str('\t' + file) for file in genome_dir_files)
modify_pep_headers = ["sh /usr/local/genome2/mmo/scripts/phaeoexplorer/phaeoexplorer-change_pep_fasta_header.sh"]
for f in os.listdir(d):
if f.endswith(".fa"):
source_files["genome_file"] = os.path.join(d, f)
# genome_dir_files = [f for f in os.listdir(d) if os.path.isfile(os.path.join(d, f))]
print("Source files found:")
for k, v in source_files.items():
print("\t" + k + "\t" + v)
# Changing headers in the *proteins.fasta file from >mRNA* to >protein*
# production version
modify_pep_headers = ["/usr/local/genome2/mmo/scripts/phaeoexplorer/phaeoexplorer-change_pep_fasta_header.sh",
source_files["proteins_file"]]
# test version
modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_pep_fasta_header.sh",
source_files["proteins_file"]]
print("Changing fasta headers in " + source_files["proteins_file"])
subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir)
# src_data cleaning
if os.path.exists(annotation_dir + "outfile"):
subprocess.run(["mv", annotation_dir + "/outfile", source_files["proteins_file"]],
stdout=subprocess.PIPE,
cwd=annotation_dir)
if os.path.exists(annotation_dir + "gmon.out"):
subprocess.run(["rm", annotation_dir + "/gmon.out"],
stdout=subprocess.PIPE,
cwd=annotation_dir)
# TODO: load the data into the current species directory and load it into galaxy instance
# setup_data_libraries_cl = \
# "docker-compose exec galaxy /tool_deps/_conda/bin/python /opt/setup_data_libraries.py"
#
# try:
# setup_data_libraries = subprocess.Popen(setup_data_libraries_cl.split(), stdout=subprocess.PIPE)
# # output message from the data loading script
# setup_data_libraries_output = setup_data_libraries.communicate()
# except Exception:
# print("Cannot load data into container for " + genus_species_strain)
# break
# else:
# print("Data successfully loaded into docker container for " + genus_species_strain)
setup_data_libraries = "docker-compose exec galaxy /tool_deps/_conda/bin/python /opt/setup_data_libraries.py"
try:
print("Loading data into the galaxy container")
subprocess.run(setup_data_libraries,
stdout=subprocess.PIPE,
shell=True)
except subprocess.CalledProcessError:
print("Cannot load data into container for " + genus_species_strain)
break
else:
print("Data successfully loaded into docker container for " + genus_species_strain)
# generate workflow file and run it in the galaxy instance
......@@ -202,8 +222,6 @@ def main():
current_fo_name = v
if k == "id":
fo_id[current_fo_name] = v
# TODO: turn data id parsing into a function
print("Folders and datasets IDs: ")
datasets = dict()
for k, v in fo_id.items():
......@@ -242,6 +260,9 @@ def main():
gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["transcripts_file"])
gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["proteins_file"])
toolrunner = ToolRunner(parameters_dict=sp_dict, instance=gi, history=current_hi_id)
# toolrunner.show_pannel() # show tools pannel (with tool_id and versions)
# ---------------------------------------------------------------------
# Galaxy instance interaction
# ---------------------------------------------------------------------
......@@ -301,7 +322,7 @@ def main():
# datamap["0"] = {"src": "hda", "id": datasets["genome_file"]}
# datamap["1"] = {"src": "hda", "id": datasets["proteins_file"]}
#
wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir, workflow_name="jbrowse")
wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir, workflow_name="main")
wf_dict = json.loads(wf_dict_json) # doesn't work with eval()
#
# gi.workflows.import_workflow_dict(workflow_dict=wf_dict)
......@@ -311,10 +332,13 @@ def main():
# print("Jbrowse workflow ID: " + wf_id)
# wf_params = workflow.set_jbrowse_workflow_parameters()
#
# allow_tool_state_correction makes galaxy fill missing tool states,
# because workflow was edited outside of galaxy with only some inputs (precaution parameter)
# gi.workflows.invoke_workflow(workflow_id=wf_id,
# history_id=current_hi_id,
# params=wf_params,
# inputs=datamap)
# inputs=datamap,
# allow_tool_state_corrections=True)
# gi.workflows.delete_workflow(workflow_id=wf_id)
# remove active instance history for testing, purge configured @ ~/config/galaxy.yml.docker_sample
......
File moved
......@@ -62,14 +62,15 @@ class Workflow:
# print("Workflow file @ " + self.custom_ga_file_path)
with open(self.preset_ga_file, 'r') as ga_in_file:
ga_in = str(ga_in_file.readlines())
ga_in = ga_in.replace('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"UNIQUEID\\\\\\\\\\\\"}',
print(ga_in)
ga_in = ga_in.replace('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"UNIQUE_ID\\\\\\\\\\\\"}',
str('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus + " " + self.species) + '\\\\\\\\\\\\"')
ga_in = ga_in.replace('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"NAME\\\\\\\\\\\\"',
str('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus.lower()[0] + self.species) + '\\\\\\\\\\\\"')
ga_in = ga_in.replace("\\\\", "\\") # to restore the correct amount of backslashes in the workflow string before import
ga_in = ga_in.replace("\\\\\\\\\\\\", "\\\\\\")
ga_in = ga_in.replace('http://localhost/sp/undaria_pinnatifida/feature/Undaria/pinnatifida/mRNA/{id}"',
"http://localhost/sp/" + self.genus.lower()[0] + self.genus[1:] + " " + self.species + "/feature/" + self.genus + "/mRNA/{id}")
# ga_in = ga_in.replace("\\\\\\\\\\\\", "\\\\\\")
ga_in = ga_in.replace('http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}',
"http://localhost/sp/" + self.genus.lower()[0] + self.genus[1:] + "_" + self.species + "/feature/" + self.genus + "/mRNA/{id}")
# ga_in = ga_in.replace('"index\\\": \\\"false', '"index\\\": \\\"true')
# workflow_name = '"name": "' + self.full + '"'
# ga_in = ga_in.replace('"name": "preset_workflow"', '"name": "preset_workflow"')
......@@ -77,7 +78,7 @@ class Workflow:
ga_in = ga_in[2:-2] # if the line under doesn't outputs a correct json
# ga_in = ga_in[:-2] # if the line above doesn't outputs a correct json
self.workflow = ga_in
print(ga_in)
# print(ga_in)
return ga_in
def set_main_workflow_parameters(self, datasets):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment