Skip to content
Snippets Groups Projects
Commit adf8df17 authored by Arthur Le Bars's avatar Arthur Le Bars
Browse files

autoload.py: script implementation, easier to understand and use. small fixes...

autoload.py: script implementation, easier to understand and use. small fixes to workflow.py and main.py
parent a9daa4b9
No related branches found
No related tags found
1 merge request!1Release 1.0
This diff is collapsed.
#!/usr/bin/env bash
\ No newline at end of file
......@@ -53,6 +53,7 @@ def main():
genus_species = genus_lower + "_" + species
common = sp_dict["common"]
strain = sp_dict["strain"]
sex = sp_dict["sex"]
if strain != "":
genus_species_strain = genus_species + "_" + strain
else:
......@@ -123,65 +124,84 @@ def main():
print("Successfully connected to galaxy instance @ " + instance_url)
# TODO: FTP/symlink stuff to retrieve the datasets + change headers in pep.fasta
setup_data_libraries_cl = "docker-compose exec galaxy /tool_deps/_conda/bin/python /opt/setup_data_libraries.py"
# try:
# os.mkdir("./src_data")
# except FileExistsError:
# print("src_data folder already exists for " + genus_species_strain)
# print("Loading data into galaxy...")
# try:
# setup_data_libraries = subprocess.Popen(setup_data_libraries_cl.split(), stdout=subprocess.PIPE)
# print("Output from setup_data_libraries.py")
# print(setup_data_libraries.communicate())
# except bb.ConnectionError:
# print("Cannot load data into container for " + genus_species_strain)
# break
# else:
# print("Data successfully loaded into docker container for " + genus_species_strain)
# else:
# print("src_data folder created for " + genus_species_strain)
# try:
# setup_data_libraries = subprocess.Popen(setup_data_libraries_cl.split(), stdout=subprocess.PIPE)
# print("Output from setup_data_libraries.py")
# print(setup_data_libraries.communicate())
# except bb.ConnectionError:
# print("Cannot load data into container for " + genus_species_strain)
# break
# else:
# print("Data successfully loaded into docker container for " + genus_species_strain)
genome_dir, annotation_dir = None, None
# ---------------------------------------------------------------------
# src_data directory tree creation
# ---------------------------------------------------------------------
src_data_folders = ["annotation", "genome"]
species_folder_name = "_".join([genus_lower, species, strain, sex])
try:
os.mkdir("./src_data")
os.mkdir("./src_data/annotation")
os.mkdir("./src_data/genome")
os.mkdir("./src_data/annotation/" + species_folder_name)
os.mkdir("./src_data/genome/" + species_folder_name)
except FileExistsError:
print("src_data directory tree already exists")
pass
except PermissionError:
print("Insufficient permission to create src_data directory tree")
# ---------------------------------------------------------------------
# Data import into galaxy
# ---------------------------------------------------------------------
source_files = dict()
annotation_dir, genome_dir = None, None
for d in [i[0] for i in os.walk(os.getcwd() + "/src_data")]:
if "annotation/" in d:
annotation_dir = d
annotation_dir_files = [f for f in os.listdir(d) if os.path.isfile(os.path.join(d, f))]
print("src_data annotation file(s):")
print(str('\t' + file) for file in annotation_dir_files)
for f in os.listdir(d):
if f.endswith("proteins.fasta"):
source_files["proteins_file"] = os.path.join(d, f)
elif f.endswith("transcripts-gff.fa"):
source_files["transcripts_file"] = os.path.join(d, f)
elif f.endswith(".gff"):
source_files["gff_file"] = os.path.join(d, f)
# annotation_dir_files = [f for f in os.listdir(d) if os.path.isfile(os.path.join(d, f))]
elif "genome/" in d:
genome_dir = d
genome_dir_files = [f for f in os.listdir(d) if os.path.isfile(os.path.join(d, f))]
print("src_data genome file(s):")
print(str('\t' + file) for file in genome_dir_files)
modify_pep_headers = ["sh /usr/local/genome2/mmo/scripts/phaeoexplorer/phaeoexplorer-change_pep_fasta_header.sh"]
for f in os.listdir(d):
if f.endswith(".fa"):
source_files["genome_file"] = os.path.join(d, f)
# genome_dir_files = [f for f in os.listdir(d) if os.path.isfile(os.path.join(d, f))]
print("Source files found:")
for k, v in source_files.items():
print("\t" + k + "\t" + v)
# Changing headers in the *proteins.fasta file from >mRNA* to >protein*
# production version
modify_pep_headers = ["/usr/local/genome2/mmo/scripts/phaeoexplorer/phaeoexplorer-change_pep_fasta_header.sh",
source_files["proteins_file"]]
# test version
modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_pep_fasta_header.sh",
source_files["proteins_file"]]
print("Changing fasta headers in " + source_files["proteins_file"])
subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir)
# src_data cleaning
if os.path.exists(annotation_dir + "outfile"):
subprocess.run(["mv", annotation_dir + "/outfile", source_files["proteins_file"]],
stdout=subprocess.PIPE,
cwd=annotation_dir)
if os.path.exists(annotation_dir + "gmon.out"):
subprocess.run(["rm", annotation_dir + "/gmon.out"],
stdout=subprocess.PIPE,
cwd=annotation_dir)
# TODO: load the data into the current species directory and load it into galaxy instance
# setup_data_libraries_cl = \
# "docker-compose exec galaxy /tool_deps/_conda/bin/python /opt/setup_data_libraries.py"
#
# try:
# setup_data_libraries = subprocess.Popen(setup_data_libraries_cl.split(), stdout=subprocess.PIPE)
# # output message from the data loading script
# setup_data_libraries_output = setup_data_libraries.communicate()
# except Exception:
# print("Cannot load data into container for " + genus_species_strain)
# break
# else:
# print("Data successfully loaded into docker container for " + genus_species_strain)
setup_data_libraries = "docker-compose exec galaxy /tool_deps/_conda/bin/python /opt/setup_data_libraries.py"
try:
print("Loading data into the galaxy container")
subprocess.run(setup_data_libraries,
stdout=subprocess.PIPE,
shell=True)
except subprocess.CalledProcessError:
print("Cannot load data into container for " + genus_species_strain)
break
else:
print("Data successfully loaded into docker container for " + genus_species_strain)
# generate workflow file and run it in the galaxy instance
......@@ -202,8 +222,6 @@ def main():
current_fo_name = v
if k == "id":
fo_id[current_fo_name] = v
# TODO: turn data id parsing into a function
print("Folders and datasets IDs: ")
datasets = dict()
for k, v in fo_id.items():
......@@ -242,6 +260,9 @@ def main():
gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["transcripts_file"])
gi.histories.upload_dataset_from_library(history_id=current_hi_id, lib_dataset_id=datasets["proteins_file"])
toolrunner = ToolRunner(parameters_dict=sp_dict, instance=gi, history=current_hi_id)
# toolrunner.show_pannel() # show tools pannel (with tool_id and versions)
# ---------------------------------------------------------------------
# Galaxy instance interaction
# ---------------------------------------------------------------------
......@@ -301,7 +322,7 @@ def main():
# datamap["0"] = {"src": "hda", "id": datasets["genome_file"]}
# datamap["1"] = {"src": "hda", "id": datasets["proteins_file"]}
#
wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir, workflow_name="jbrowse")
wf_dict_json = workflow.generate(working_directory=wd, main_directory=main_dir, workflow_name="main")
wf_dict = json.loads(wf_dict_json) # doesn't work with eval()
#
# gi.workflows.import_workflow_dict(workflow_dict=wf_dict)
......@@ -311,10 +332,13 @@ def main():
# print("Jbrowse workflow ID: " + wf_id)
# wf_params = workflow.set_jbrowse_workflow_parameters()
#
# allow_tool_state_correction makes galaxy fill missing tool states,
# because workflow was edited outside of galaxy with only some inputs (precaution parameter)
# gi.workflows.invoke_workflow(workflow_id=wf_id,
# history_id=current_hi_id,
# params=wf_params,
# inputs=datamap)
# inputs=datamap,
# allow_tool_state_corrections=True)
# gi.workflows.delete_workflow(workflow_id=wf_id)
# remove active instance history for testing, purge configured @ ~/config/galaxy.yml.docker_sample
......
File moved
......@@ -62,14 +62,15 @@ class Workflow:
# print("Workflow file @ " + self.custom_ga_file_path)
with open(self.preset_ga_file, 'r') as ga_in_file:
ga_in = str(ga_in_file.readlines())
ga_in = ga_in.replace('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"UNIQUEID\\\\\\\\\\\\"}',
print(ga_in)
ga_in = ga_in.replace('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"UNIQUE_ID\\\\\\\\\\\\"}',
str('{\\\\\\\\\\\\"unique_id\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus + " " + self.species) + '\\\\\\\\\\\\"')
ga_in = ga_in.replace('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"NAME\\\\\\\\\\\\"',
str('\\\\\\\\\\\\"name\\\\\\\\\\\\": \\\\\\\\\\\\"' + self.genus.lower()[0] + self.species) + '\\\\\\\\\\\\"')
ga_in = ga_in.replace("\\\\", "\\") # to restore the correct amount of backslashes in the workflow string before import
ga_in = ga_in.replace("\\\\\\\\\\\\", "\\\\\\")
ga_in = ga_in.replace('http://localhost/sp/undaria_pinnatifida/feature/Undaria/pinnatifida/mRNA/{id}"',
"http://localhost/sp/" + self.genus.lower()[0] + self.genus[1:] + " " + self.species + "/feature/" + self.genus + "/mRNA/{id}")
# ga_in = ga_in.replace("\\\\\\\\\\\\", "\\\\\\")
ga_in = ga_in.replace('http://localhost/sp/genus_species/feature/Genus/species/mRNA/{id}',
"http://localhost/sp/" + self.genus.lower()[0] + self.genus[1:] + "_" + self.species + "/feature/" + self.genus + "/mRNA/{id}")
# ga_in = ga_in.replace('"index\\\": \\\"false', '"index\\\": \\\"true')
# workflow_name = '"name": "' + self.full + '"'
# ga_in = ga_in.replace('"name": "preset_workflow"', '"name": "preset_workflow"')
......@@ -77,7 +78,7 @@ class Workflow:
ga_in = ga_in[2:-2] # if the line under doesn't outputs a correct json
# ga_in = ga_in[:-2] # if the line above doesn't outputs a correct json
self.workflow = ga_in
print(ga_in)
# print(ga_in)
return ga_in
def set_main_workflow_parameters(self, datasets):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment