Skip to content
Snippets Groups Projects
Commit 350b0e4f authored by Arthur Le Bars's avatar Arthur Le Bars
Browse files

More templating option for gga_init, cleaned up some deprecated code and added some more comments

parent 10ddfb28
Branches
Tags
1 merge request!1Release 1.0
......@@ -16,11 +16,10 @@ from jinja2 import Template, Environment, FileSystemLoader
import utilities
import speciesData
"""
gga_init.py
Usage: $ python3 gga_init.py -i input_example.yml [OPTIONS]
Usage: $ python3 gga_init.py -i input_example.yml --config config.yml [OPTIONS]
"""
......@@ -30,7 +29,8 @@ class DeploySpeciesStack(speciesData.SpeciesData):
Child of SpeciesData
Contains methods and attributes to deploy a stack of services for a given organism, from creating/updating
the organism's directory tree to create the required docker-compose files
the organism's directory tree to create the required docker-compose files and stack deployment
"""
def make_directory_tree(self):
......@@ -42,88 +42,87 @@ class DeploySpeciesStack(speciesData.SpeciesData):
os.chdir(self.main_dir)
# Create the species main directory (name of the dir: genus_species)
try:
os.mkdir(self.species_dir)
logging.info("Making directory tree for %s" % self.full_name)
except FileExistsError:
logging.info("Updating directory tree for %s" % self.full_name)
logging.info("Updating directory tree of %s" % self.genus_species)
try:
os.chdir(self.species_dir)
except OSError:
logging.critical("Cannot access " + self.species_dir + ", run with higher privileges")
sys.exit()
except OSError as exc:
logging.critical("Cannot access %s" % self.genus_species)
sys.exit(exc)
# Copy the custom banner to the species dir (banner used in tripal pages)
# if "custom_banner" not in self.config.keys() or not self.config["custom_banner"] == "/path/to/banner" or not self.config["custom_banner"] == "":
# try:
# logging.debug("Custom banner path: %s" % self.config["custom_banner"])
# if os.path.isfile(os.path.abspath(self.config["custom_banner"])):
# shutil.copy(os.path.abspath(self.config["custom_banner"]), "%s/banner.png" % self.species_dir)
# except FileNotFoundError:
# logging.warning("Specified banner not found (%s), skipping" % self.config["custom_banner"])
# Write nginx conf
# If the path specified is invalid (because it's empty or is still the default demo one),
# use the default banner instead
if "banner_path" in self.config.keys():
if self.config["banner_path"] != "/path/to/banner" or self.config["banner_path"] != "":
try:
logging.debug("Custom banner path: %s" % self.config["banner_path"])
if os.path.isfile(os.path.abspath(self.config["banner_path"])):
shutil.copy(os.path.abspath(self.config["banner_path"]), "%s/banner.png" % self.species_dir)
except FileNotFoundError:
logging.warning("Specified banner not found (%s), using default banner instead" % self.config["banner_path"])
self.config.pop("banner_path", None)
else:
logging.debug("Using default banner for Tripal pages")
self.config.pop("banner_path", None)
# Create nginx dirs and write/re-write nginx conf
self.make_dirs(dir_paths_li=["./nginx", "./nginx/conf"])
try:
os.mkdir("./nginx/")
os.mkdir("./nginx/conf")
with open(os.path.abspath("./nginx/conf/default.conf"), 'w') as conf:
conf.write("server {\n\tlisten 80;\n\tserver_name ~.;\n\tlocation /download/ {\n\t\talias /project_data/; \n\t\tautoindex on;\n\t}\n}") # The species nginx conf
except FileExistsError:
logging.debug("Nginx conf already exists, skipping")
organism_annotation_dir, organism_genome_dir = None, None
except OSError as exc:
logging.critical("Cannot edit NginX conf file")
sys.exit(exc)
# Creation (or updating) of the src_data directory tree
# Depth 0
try:
os.mkdir("./src_data")
except FileExistsError:
logging.debug("src_data folder already exist for %s" % self.full_name)
except PermissionError:
logging.critical("Insufficient permission to create src_data directory tree")
sys.exit()
# Depth 1
try:
os.mkdir("./src_data/annotation")
os.mkdir("./src_data/genome")
os.mkdir("./src_data/tracks")
except FileExistsError:
logging.debug("Depth 1 src_data folder(s) already exist for %s" % self.full_name)
except PermissionError as exc:
logging.critical("Insufficient permission to create src_data directory tree")
sys.exit(exc)
# Depth 2
try:
os.mkdir("./src_data/annotation/" + self.species_folder_name)
os.mkdir("./src_data/genome/" + self.species_folder_name)
except FileExistsError:
logging.debug("Depth 2 src_data folder(s) already exist for %s" % self.full_name)
except PermissionError as exc:
logging.critical("Insufficient permission to create src_data directory tree")
sys.exit(exc)
# List of all the directories to create in src_data
src_data_dirs_li = ["./src_data", "./src_data/annotation", "./src_data/genome", "./src_data/tracks",
"./src_data/annotation/%s" % self.species_folder_name,
"./src_data/genome/%s" % self.species_folder_name,
"./src_data/annotation/{0}/OGS{1}/".format(self.species_folder_name, self.ogs_version),
"./src_data/genome/{0}/v{1}".format(self.species_folder_name, self.genome_version)]
self.make_dirs(dir_paths_li=src_data_dirs_li)
# Return to main directory
os.chdir(self.main_dir)
# Depth 3
logging.info("Directory tree generated for %s" % self.full_name)
@staticmethod
def make_dirs(dir_paths_li):
"""
Recursively create directories from a list of paths with a try-catch condition
:param dir_paths_li:
:return:
"""
created_dir_paths_li = []
for dir_path in dir_paths_li:
try:
os.mkdir("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.ogs_version)
os.mkdir("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version)
organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version)
organism_genome_dir = os.path.abspath("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version)
os.mkdir(dir_path)
except FileExistsError:
logging.debug("Depth 3 src_data folder(s) already exist for %s" % self.full_name)
logging.debug("%s directory already exists" % dir_path)
except PermissionError as exc:
logging.critical("Insufficient permission to create src_data directory tree")
logging.critical("Insufficient permission to create %s" % dir_path)
sys.exit(exc)
created_dir_paths_li.append(dir_path)
# Return to main_dir
os.chdir(self.main_dir)
logging.info("Directory tree generated for %s" % self.full_name)
return created_dir_paths_li
def make_compose_files(self):
def make_compose_files(self, force=False):
"""
Create a formatted copy of the template compose file inside a species directory tree
......@@ -133,60 +132,13 @@ class DeploySpeciesStack(speciesData.SpeciesData):
os.chdir(self.main_dir)
try:
os.chdir(self.species_dir)
except OSError:
logging.critical("Cannot access " + self.species_dir)
sys.exit(0)
# Path to the templates used to generate the custom docker-compose files for an input species and the traefik+authelia services
gspecies_template_path = self.script_dir + "/templates/gspecies_compose_template.yml.j2" # Jinja template path
traefik_template_path = self.script_dir + "/templates/traefik_compose_template.yml"
# authelia_config_path = self.script_dir + "/templates/authelia_config_example.yml" # Do not copy the authelia config!
authelia_users_path = self.script_dir + "/templates/authelia_users_template.yml"
except OSError as exc:
logging.critical("Cannot access %s" % self.species_dir)
sys.exit(exc)
# Set the genus_species_strain_sex value for replacing template
genus_species_strain_sex = "{0}_{1}".format(self.genus.lower(), self.specieslower())
if self.sex and self.strain:
genus_species_strain_sex = "_".join([self.genus.lower(), self.species.lower(), self.strain, self.sex])
elif self.sex and not self.strain:
genus_species_strain_sex = "_".join([self.genus.lower(), self.specieslower(), self.sex])
elif self.genus and not self.species and not self.sex and not self.strain:
genus_species_strain_sex = self.genus.lower()
else:
genus_species_strain_sex = "{0}_{1}".format(self.genus.lower(), self.specieslower())
# # Method without Jinja templating (deprecated)
# with open(stack_template_path, 'r') as infile:
# organism_content = list()
# for line in infile:
# # Replace placeholders in the compose file and append line to output
# organism_content.append(
# line.replace("genus_species",
# str(self.genus.lower() + "_" + self.species)).replace("Genus species",
# str(self.genus_uppercase + " " + self.species)).replace("Genus/species",
# str(self.genus_uppercase + "/" + self.species)).replace("gspecies",
# str(self.genus.lower()[0] + self.species)).replace("genus_species_strain_sex",
# genus_species_strain_sex))
# # Write/format the output compose file
# with open("./docker-compose.yml", 'w') as outfile:
# outfile.truncate(0) # Delete file content
# for line in organism_content: # Replace env variables by those in the config file
# for env_variable, value in self.config.items(): # env variables are stored in this dict
# # print("ENV VARIABLE: " + env_variable + "\t VALUE: " + value)
# if env_variable in line:
# line = line.replace(env_variable, value)
# break
# # Write the new line in the docker-compose
# outfile.write(line)
# # Create mounts for the current docker-compose
# self.create_mounts(working_dir=self.species_dir)
# jinja templating, handled using the python jinja module
# TODO: use ansible to handle the templating in production
# Jinja2 templating, handled using the python "jinja2" module
file_loader = FileSystemLoader(self.script_dir + "/templates")
env = Environment(loader=file_loader)
template = env.get_template("gspecies_compose_template.yml.j2")
# We need a dict holding all key (variables) - values that needs to be replaced in the template as our rendering dict
# To do so we need both input file vars and config vars
......@@ -196,41 +148,55 @@ class DeploySpeciesStack(speciesData.SpeciesData):
"strain": self.strain, "sex": self.sex, "Genus_species": self.genus_species[0].upper() + self.genus_species[1:]}
# Merge the two dicts
render_vars = {**self.config, **input_vars}
output = template.render(render_vars)
with open(os.path.join(self.species_dir, "docker-compose.yml", "w")) as gspecies_compose_file:
print("Writing gspecies compose yml file")
# Render the gspecies docker-compose file and write it
gspecies_compose_template = env.get_template("gspecies_compose_template.yml.j2")
gspecies_compose_output = gspecies_compose_template.render(render_vars)
with open(os.path.join(self.species_dir, "docker-compose.yml"), "w") as gspecies_compose_file:
logging.info("Writing %s docker-compose.yml" % self.genus_species)
gspecies_compose_file.truncate(0)
gspecies_compose_file.write(gspecies_compose_output)
# Create the volumes (directory) of the species docker-compose file
self.create_mounts(working_dir=".")
# Proceed to the traefik and authelia directories
os.chdir(self.main_dir)
self.make_dirs(["./traefik", "./traefik/authelia"])
# Render and try to write the traefik docker-compose file
# This new docker-compose file will not overwrite the one already present in the traefik dir
# unless the argument "--overwrite-all" is specified
if not os.path.isfile("./traefik/docker-compose.yml") or force:
traefik_compose_template = env.get_template("traefik_compose_template.yml.j2")
traefik_compose_output = traefik_compose_template.render(render_vars)
with open(os.path.join(self.main_dir, "docker-compose.yml"), 'w') as traefik_compose_file:
logging.info("Writing traefik docker-compose.yml")
traefik_compose_file.truncate(0)
traefik_compose_file.write(traefik_compose_output)
if self.config["authelia_config_path"]:
if not self.config["authelia_config_path"] == "" or not self.config["authelia_config_path"] == "/path/to/authelia/config":
if os.path.isfile(os.path.abspath(self.config["authelia_config_path"])):
try:
os.chdir(os.path.abspath(self.main_dir))
os.mkdir("./traefik")
os.mkdir("./traefik/authelia")
if self.config["custom_authelia_config_path"]:
if os.path.isfile(os.path.abspath(self.config["custom_authelia_config_path"])):
try:
shutil.copy(os.path.abspath(self.config["custom_authelia_config_path"]), "./traefik/authelia")
except FileNotFoundError:
logging.critical("Cannot copy custom Authelia config file (%s)" % self.config["custom_authelia_config_path"])
sys.exit()
shutil.copy(os.path.abspath(self.config["authelia_config_path"]), "./traefik/authelia")
except Exception as exc:
logging.critical("Cannot copy custom Authelia config file (%s)" % self.config["authelia_config_path"])
sys.exit(exc)
else:
logging.critical("Custom Authelia config file not found (%s)" % self.config["custom_authelia_config_path"])
if not os.path.isfile("./traefik/authelia/users.yml"):
logging.critical("Custom Authelia config file not found (%s)" % self.config["authelia_config_path"])
# Path to the authelia users in the repo
authelia_users_path = self.script_dir + "/templates/authelia_users_template.yml"
# Copy authelia "users" file
if not os.path.isfile("./traefik/authelia/users.yml") or force:
shutil.copy(authelia_users_path, "./traefik/authelia/users.yml")
except FileExistsError:
logging.debug("Traefik directory already exists: %s" % os.path.abspath("../traefik"))
try:
if not os.path.isfile("./traefik/docker-compose.yml"):
shutil.copy(traefik_template_path, "./traefik/docker-compose.yml")
else:
logging.debug("Traefik compose file already exists: %s" % os.path.abspath("./traefik/docker-compose.yml"))
except FileExistsError:
logging.debug("Traefik compose file already exists: %s" % os.path.abspath("./traefik/docker-compose.yml"))
# Create the mounts for the traefik and authelia services
traefik_dir = os.path.abspath(os.path.join(self.main_dir, "traefik"))
# Create the mounts for the traefik+authelia containers
if not os.path.isdir(os.path.join(traefik_dir, "docker_data")) or force:
self.create_mounts(working_dir=traefik_dir)
# Return to main directory
os.chdir(self.main_dir)
def create_mounts(self, working_dir):
......@@ -243,9 +209,9 @@ class DeploySpeciesStack(speciesData.SpeciesData):
# Change directory to create mount points for the container
try:
os.chdir(os.path.abspath(working_dir))
except Exception:
except Exception as exc:
logging.critical("Cannot access %s, exiting" % working_dir)
sys.exit()
sys.exit(exc)
compose_yml = os.path.abspath("./docker-compose.yml")
if not os.path.isfile(compose_yml):
raise Exception("Could not find docker-compose.yml at %s" % compose_yml)
......@@ -279,11 +245,11 @@ class DeploySpeciesStack(speciesData.SpeciesData):
# Go back to the "main" directory
try:
os.chdir(os.path.abspath(self.main_dir))
except Exception:
logging.critical("Cannot access main directory (%s), exiting" % self.main_dir)
sys.exit()
except OSError as exc:
logging.critical("Cannot access %s, exiting" % self.main_dir)
sys.exit(exc)
def deploy_stack(self):
def deploy_stack(self, input_list):
"""
Call the script "deploy.sh" used to initiliaze the swarm cluster if needed and
launch/update the current organism's stack
......@@ -294,6 +260,9 @@ class DeploySpeciesStack(speciesData.SpeciesData):
:return:
"""
to_deploy_species_li = []
# # Create our swarm cluster if it doesn't exist
# subprocess.Popen(["docker", "swarm", "init"],
# stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=self.main_dir)
......@@ -304,17 +273,17 @@ class DeploySpeciesStack(speciesData.SpeciesData):
# Launch and update docker stacks
# noinspection PyArgumentList
deploy_stacks_popen = subprocess.Popen(["sh", self.script_dir + "/deploy.sh", self.genus_species,
self.main_dir + "/traefik"],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
universal_newlines=True)
for stdout_line in iter(deploy_stacks_popen.stdout.readline, ""):
if "daemon" in stdout_line: # Ignore swarm init error output
pass
else:
logging.info("\t%s" % stdout_line.strip())
deploy_stacks_popen.stdout.close()
# deploy_stacks_popen = subprocess.Popen(["sh", self.script_dir + "/deploy.sh", self.genus_species,
# self.main_dir + "/traefik"],
# stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
# universal_newlines=True)
#
# for stdout_line in iter(deploy_stacks_popen.stdout.readline, ""):
# if "daemon" in stdout_line: # Ignore swarm init error output
# pass
# else:
# logging.info("\t%s" % stdout_line.strip())
# deploy_stacks_popen.stdout.close()
if __name__ == "__main__":
......@@ -339,6 +308,10 @@ if __name__ == "__main__":
type=str,
help="Where the stack containers will be located, defaults to current directory")
parser.add_argument("--overwrite-all",
help="Overwrite all docker-compose and conf files in the traefik and authelia directories (default=False)",
action="store_true")
args = parser.parse_args()
if args.verbose:
......@@ -359,6 +332,9 @@ if __name__ == "__main__":
main_dir = os.path.abspath(args.main_directory)
sp_dict_list = utilities.parse_input(os.path.abspath(args.input))
print(sp_dict_list)
utilities.get_species_to_deploy(sp_dict_list=sp_dict_list)
logging.info("Deploying stacks for organisms in input file %s" % args.input)
for sp_dict in sp_dict_list:
......@@ -377,7 +353,7 @@ if __name__ == "__main__":
# Set the instance url attribute
for env_variable, value in deploy_stack_for_current_organism.config.items():
if env_variable == "custom_host":
if env_variable == "hostname":
deploy_stack_for_current_organism.instance_url = value + \
deploy_stack_for_current_organism.genus_lowercase + \
"_" + deploy_stack_for_current_organism.species + \
......@@ -396,7 +372,7 @@ if __name__ == "__main__":
logging.info("Successfully generated the directory tree for %s" % deploy_stack_for_current_organism.full_name)
# Make compose files
deploy_stack_for_current_organism.make_compose_files()
deploy_stack_for_current_organism.make_compose_files(force=args.overwrite_all)
logging.info("Successfully generated the docker-compose files for %s" % deploy_stack_for_current_organism.full_name)
# Deploy the stack
......
......@@ -7,8 +7,6 @@ import os
import subprocess
import logging
import sys
import utilities
import speciesData
import fnmatch
import time
import json
......@@ -19,11 +17,13 @@ import shutil
from bioblend.galaxy.objects import GalaxyInstance
from bioblend import galaxy
import utilities
import speciesData
"""
gga_load_data.py
Usage: $ python3 gga_init.py -i input_example.yml [OPTIONS]
Usage: $ python3 gga_init.py -i input_example.yml --config config.yml [OPTIONS]
Do not call this script before the galaxy container is ready
"""
......@@ -36,7 +36,7 @@ class LoadData(speciesData.SpeciesData):
Contains methods and attributes to copy data into the src_data subfolders of an organism and then into the
galaxy instance's history of this given organism
Optional fasta headers reformat
Optional data file formatting
"""
......@@ -69,8 +69,9 @@ class LoadData(speciesData.SpeciesData):
"""
proteins_file = None
proteins_outfile = None
annotation_dir = None
organism_annotation_dir = os.path.abspath("./src_data/annotation/{0}/OGS{1}".format(self.species_folder_name, self.genome_version))
organism_annotation_dir = os.path.abspath("./src_data/annotation/{0}/OGS{1}".format(self.species_folder_name, self.ogs_version))
self.goto_species_dir()
......@@ -91,12 +92,12 @@ class LoadData(speciesData.SpeciesData):
subprocess.run(["mv", annotation_dir + "/outfile_proteins.fa", proteins_file],
stdout=subprocess.PIPE,
cwd=annotation_dir)
# subprocess.run(["rm", annotation_dir + "/outfile_proteins.fa"], stdout=subprocess.PIPE, cwd=annotation_dir)
subprocess.run(["rm", annotation_dir + "/outfile_proteins.fa"], stdout=subprocess.PIPE, cwd=annotation_dir)
else:
logging.warning("Skipping proteins fasta headers formatting (FileNotFound)")
def format_fasta_headers(self, infile, outfile, pattern, repl):
@staticmethod
def format_fasta_headers(infile, outfile, pattern, repl):
"""
Format the fasta headers of a given file, given a matching pattern and a replacement string
......@@ -124,6 +125,7 @@ class LoadData(speciesData.SpeciesData):
Find source data files in the parent_directory
Link data files
TODO: manage access to the "parent directory" subdirectories properly
TODO: implement search/tests for individual file paths
:return:
......@@ -139,38 +141,22 @@ class LoadData(speciesData.SpeciesData):
organism_genome_dir = os.path.abspath("./src_data/genome/{0}/v{1}".format(self.species_folder_name, self.genome_version))
for dirpath, dirnames, files in os.walk(self.source_data_dir):
if "0" in str(dirpath): # TODO: Ensures to take the correct files (other dirs hold files with valid names), this is for Phaeoexplorer only!
if "0" in str(dirpath):
for f in files:
if "Contaminants" not in str(f):
try:
if fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + ".fa"):
logging.info("Genome assembly file - " + str(f))
# os.symlink(os.path.join(dirpath, f), organism_genome_dir)
# organism_genome_dir = os.path.abspath("./src_data/genome/" +
# self.species_folder_name + "/v" +
# self.genome_version)
shutil.copyfile(os.path.join(dirpath, f), os.path.join(organism_genome_dir, f))
elif fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + ".gff"):
logging.info("GFF file - " + str(f))
# os.symlink(os.path.join(dirpath, f), organism_annotation_dir)
# organism_annotation_dir = os.path.abspath("./src_data/annotation/" +
# self.species_folder_name + "/OGS" +
# self.genome_version)
shutil.copyfile(os.path.join(dirpath, f), os.path.join(organism_annotation_dir, f))
elif fnmatch.fnmatch(f, "*" + self.species[1:] + "_" +
self.sex.upper() + "_transcripts-gff.fa"):
logging.info("Transcripts file - " + str(f))
# os.symlink(os.path.join(dirpath, f), organism_annotation_dir)
# organism_annotation_dir = os.path.abspath("./src_data/annotation/" +
# self.species_folder_name + "/OGS" +
# self.genome_version)
shutil.copyfile(os.path.join(dirpath, f), os.path.join(organism_annotation_dir, f))
elif fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + "_proteins.fa"):
logging.info("Proteins file - " + str(f))
# os.symlink(os.path.join(dirpath, f), organism_annotation_dir)
# organism_annotation_dir = os.path.abspath("./src_data/annotation/" +
# self.species_folder_name + "/OGS" +
# self.genome_version)
shutil.copyfile(os.path.join(dirpath, f), os.path.join(organism_annotation_dir, f))
except FileExistsError as exc:
logging.warning("Error raised (FileExistsError)")
......@@ -184,7 +170,6 @@ class LoadData(speciesData.SpeciesData):
os.chdir(self.main_dir)
def set_get_history(self):
"""
Create or set the working history to the current species one
......@@ -206,7 +191,6 @@ class LoadData(speciesData.SpeciesData):
return self.history_id
def remove_homo_sapiens_from_db(self):
"""
Run the GMOD tool to remove the "Homo sapiens" default organism from the original database
......@@ -235,7 +219,6 @@ class LoadData(speciesData.SpeciesData):
logging.debug("Homo sapiens isn't in the instance's chado database")
pass
def purge_histories(self):
"""
Delete all histories in the instance
......@@ -252,7 +235,6 @@ class LoadData(speciesData.SpeciesData):
return histories
def setup_library(self):
"""
Create a "Project Data" library in galaxy, mirroring the "src_data" folder of the current organism
......@@ -376,7 +358,6 @@ class LoadData(speciesData.SpeciesData):
logging.info("Finished importing data")
def create_deep_folder(self, prj_lib, path, parent_folder=None, deep_name=""):
"""
Create a folder inside a folder in a galaxy library
......@@ -403,7 +384,6 @@ class LoadData(speciesData.SpeciesData):
return new_folder
def setup_data_libraries(self):
"""
Load data into the galaxy container with the galaxy_data_libs_SI.py script written by A. Bretaudeau
......@@ -425,7 +405,6 @@ class LoadData(speciesData.SpeciesData):
else:
logging.info("Data successfully loaded into the galaxy container for " + self.full_name)
def generate_blast_banks(self):
"""
TODO: Automatically generate blast banks for a species and commit
......@@ -433,9 +412,6 @@ class LoadData(speciesData.SpeciesData):
:return:
"""
def connect_to_instance(self):
"""
Test the connection to the galaxy instance for the current organism
......@@ -522,8 +498,7 @@ if __name__ == "__main__":
load_data_for_current_species.config = utilities.parse_config(args.config)
# Set the instance url attribute
for env_variable, value in load_data_for_current_species.config.items():
if env_variable == "custom_host":
# TODO:
if env_variable == "hostname":
load_data_for_current_species.instance_url = "http://{0}:8888/sp/{1}_{2}/galaxy/".format(
value, load_data_for_current_species.genus_lowercase, load_data_for_current_species.species)
break
......@@ -567,7 +542,8 @@ if __name__ == "__main__":
# Set or get the history for the current organism
load_data_for_current_species.set_get_history()
# Remove H. sapiens from database if here TODO: set a dedicated history for removing H. sapiens (instead of doing it into a species history)
# Remove H. sapiens from database if here
# TODO: set a dedicated history for removing H. sapiens (instead of doing it into a species history)
load_data_for_current_species.remove_homo_sapiens_from_db()
# logging.info("Importing datasets into history for %s" % load_data_for_current_species.full_name)
......
......@@ -88,6 +88,7 @@ def check_galaxy_state(genus_lowercase, species, script_dir):
:param genus_lowercase:
:param species:
:param script_dir:
:return:
"""
......@@ -118,6 +119,33 @@ def get_species_history_id(instance, full_name):
return [history_id, show_history]
def get_species_to_deploy(sp_dict_list):
"""
Find and return which species (i.e genus species) are not duplicated in the input dictionary used by gga scripts
Returns a list of species (directories) for which to deploy the stack
This aims to reduce the number of deployments for a single species by removing this step from the gga_init
loop iterating over input species
:param sp_dict_list:
:return:
"""
to_deploy_li = []
for sp in sp_dict_list:
for k, v in sp.items():
sp_gspecies = ""
for k2, v2 in v.items():
if k2 == "genus":
sp_gspecies = sp_gspecies + v2
elif k2 == "species":
sp_gspecies = sp_gspecies + "_" + v2
if sp_gspecies not in to_deploy_li and sp_gspecies != "":
to_deploy_li.append(sp_gspecies)
print(to_deploy_li)
return to_deploy_li
def write_metadata(metadata_file, metadata_dict):
"""
......@@ -129,4 +157,3 @@ def write_metadata(metadata_file, metadata_dict):
ret = 0
return metadata_file, metadata_dict, ret
......@@ -2,7 +2,7 @@
"a_galaxy_workflow": "true",
"annotation": "",
"format-version": "0.1",
"name": "Chado load Tripal synchronize",
"name": "Chado load Tripal synchronize (imported from uploaded file)",
"steps": {
"0": {
"annotation": "",
......@@ -16,7 +16,7 @@
"outputs": [],
"position": {
"left": 200,
"top": 662.7333374023438
"top": 227
},
"tool_id": null,
"tool_state": "{\"optional\": false}",
......@@ -27,7 +27,7 @@
{
"label": null,
"output_name": "output",
"uuid": "df303ec8-a34f-47ff-8e23-f6fd4f7a4a25"
"uuid": "1ea45e76-73ee-4de4-9d60-7be3f3bd47f5"
}
]
},
......@@ -43,7 +43,7 @@
"outputs": [],
"position": {
"left": 200,
"top": 707.7333374023438
"top": 317
},
"tool_id": null,
"tool_state": "{\"optional\": false}",
......@@ -54,7 +54,7 @@
{
"label": null,
"output_name": "output",
"uuid": "cae20d90-587c-4b3b-b3a4-5aedb8dde3bf"
"uuid": "8e968289-d670-401d-8700-82dcecc6e79e"
}
]
},
......@@ -70,7 +70,7 @@
"outputs": [],
"position": {
"left": 200,
"top": 752.7333374023438
"top": 407
},
"tool_id": null,
"tool_state": "{\"optional\": false}",
......@@ -81,7 +81,7 @@
{
"label": null,
"output_name": "output",
"uuid": "79cc48a0-de9b-45d1-8372-7dabded47796"
"uuid": "06493160-17c4-4fcd-85fa-c9fd3c1ff4b5"
}
]
},
......@@ -97,7 +97,7 @@
"outputs": [],
"position": {
"left": 200,
"top": 797.7333374023438
"top": 497
},
"tool_id": null,
"tool_state": "{\"optional\": false}",
......@@ -108,7 +108,7 @@
{
"label": null,
"output_name": "output",
"uuid": "00f82694-ec6e-471c-90ab-66311651c023"
"uuid": "2ea3e81b-3048-4968-90c8-03df89b01caf"
}
]
},
......@@ -146,8 +146,8 @@
}
],
"position": {
"left": 343,
"top": 662.7333374023438
"left": 486,
"top": 227
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_fasta/feature_load_fasta/2.3.3",
......@@ -170,63 +170,10 @@
]
},
"5": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.10+galaxy0",
"errors": null,
"id": 5,
"input_connections": {
"reference_genome|genome": {
"id": 0,
"output_name": "output"
},
"track_groups_0|data_tracks_0|data_format|annotation": {
"id": 1,
"output_name": "output"
}
},
"inputs": [
{
"description": "runtime parameter for tool JBrowse",
"name": "reference_genome"
}
],
"label": null,
"name": "JBrowse",
"outputs": [
{
"name": "output",
"type": "html"
}
],
"position": {
"left": 343,
"top": 748.7333374023438
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.10+galaxy0",
"tool_shed_repository": {
"changeset_revision": "8774b28235bb",
"name": "jbrowse",
"owner": "iuc",
"tool_shed": "toolshed.g2.bx.psu.edu"
},
"tool_state": "{\"action\": {\"action_select\": \"create\", \"__current_case__\": 0}, \"gencode\": \"1\", \"jbgen\": {\"defaultLocation\": \"\", \"trackPadding\": \"20\", \"shareLink\": \"true\", \"aboutDescription\": \"\", \"show_tracklist\": \"true\", \"show_nav\": \"true\", \"show_overview\": \"true\", \"show_menu\": \"true\", \"hideGenomeOptions\": \"false\"}, \"plugins\": {\"BlastView\": \"true\", \"ComboTrackSelector\": \"false\", \"GCContent\": \"false\"}, \"reference_genome\": {\"genome_type_select\": \"history\", \"__current_case__\": 1, \"genome\": {\"__class__\": \"RuntimeValue\"}}, \"standalone\": \"minimal\", \"track_groups\": [{\"__index__\": 0, \"category\": \"Default\", \"data_tracks\": [{\"__index__\": 0, \"data_format\": {\"data_format_select\": \"gene_calls\", \"__current_case__\": 2, \"annotation\": {\"__class__\": \"RuntimeValue\"}, \"match_part\": {\"match_part_select\": \"false\", \"__current_case__\": 1}, \"index\": \"false\", \"track_config\": {\"track_class\": \"NeatHTMLFeatures/View/Track/NeatFeatures\", \"__current_case__\": 3, \"html_options\": {\"topLevelFeatures\": \"\"}}, \"jbstyle\": {\"style_classname\": \"feature\", \"style_label\": \"product,name,id\", \"style_description\": \"note,description\", \"style_height\": \"10px\", \"max_height\": \"600\"}, \"jbcolor_scale\": {\"color_score\": {\"color_score_select\": \"none\", \"__current_case__\": 0, \"color\": {\"color_select\": \"automatic\", \"__current_case__\": 0}}}, \"jb_custom_config\": {\"option\": []}, \"jbmenu\": {\"track_menu\": [{\"__index__\": 0, \"menu_action\": \"iframeDialog\", \"menu_label\": \"View transcript report\", \"menu_title\": \"Transcript {id}\", \"menu_url\": {\"__class__\": \"RuntimeValue\"}, \"menu_icon\": \"dijitIconBookmark\"}]}, \"track_visibility\": \"default_off\", \"override_apollo_plugins\": \"False\", \"override_apollo_drag\": \"False\"}}]}], \"uglyTestingHack\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
"tool_version": "1.16.10+galaxy0",
"type": "tool",
"uuid": "00657cb2-12f9-4f93-98da-04feac9e1388",
"workflow_outputs": [
{
"label": null,
"output_name": "output",
"uuid": "6cbfa232-911e-49b9-96ad-fa9ed236f806"
}
]
},
"6": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_gff/feature_load_gff/2.3.3",
"errors": null,
"id": 6,
"id": 5,
"input_connections": {
"fasta": {
"id": 2,
......@@ -246,9 +193,21 @@
"description": "runtime parameter for tool Chado load gff",
"name": "analysis_id"
},
{
"description": "runtime parameter for tool Chado load gff",
"name": "fasta"
},
{
"description": "runtime parameter for tool Chado load gff",
"name": "gff"
},
{
"description": "runtime parameter for tool Chado load gff",
"name": "organism"
},
{
"description": "runtime parameter for tool Chado load gff",
"name": "wait_for"
}
],
"label": null,
......@@ -260,8 +219,8 @@
}
],
"position": {
"left": 486,
"top": 743.2333374023438
"left": 772,
"top": 227
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/chado_feature_load_gff/feature_load_gff/2.3.3",
......@@ -271,7 +230,7 @@
"owner": "gga",
"tool_shed": "toolshed.g2.bx.psu.edu"
},
"tool_state": "{\"add_only\": \"false\", \"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"fasta\": {\"__class__\": \"ConnectedValue\"}, \"gff\": {\"__class__\": \"ConnectedValue\"}, \"landmark_type\": \"contig\", \"no_seq_compute\": \"false\", \"organism\": {\"__class__\": \"RuntimeValue\"}, \"prot_naming\": {\"method\": \"regex\", \"__current_case__\": 1, \"re_protein_capture\": \"^mRNA(\\\\..+)$\", \"re_protein\": \"protein\\\\1\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"wait_for\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
"tool_state": "{\"add_only\": \"false\", \"analysis_id\": {\"__class__\": \"RuntimeValue\"}, \"fasta\": {\"__class__\": \"RuntimeValue\"}, \"gff\": {\"__class__\": \"RuntimeValue\"}, \"landmark_type\": \"contig\", \"no_seq_compute\": \"false\", \"organism\": {\"__class__\": \"RuntimeValue\"}, \"prot_naming\": {\"method\": \"regex\", \"__current_case__\": 1, \"re_protein_capture\": \"^mRNA(\\\\..+)$\", \"re_protein\": \"protein\\\\1\"}, \"psql_target\": {\"method\": \"remote\", \"__current_case__\": 0}, \"wait_for\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
"tool_version": "2.3.3",
"type": "tool",
"uuid": "b100a055-0dab-4f2f-8c46-573713ed3fff",
......@@ -283,58 +242,14 @@
}
]
},
"7": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1",
"errors": null,
"id": 7,
"input_connections": {
"organisms_0|jbrowse": {
"id": 5,
"output_name": "output"
}
},
"inputs": [],
"label": null,
"name": "Add organisms to JBrowse container",
"outputs": [
{
"name": "output",
"type": "html"
}
],
"position": {
"left": 486,
"top": 662.7333374023438
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/jbrowse_to_container/jbrowse_to_container/0.5.1",
"tool_shed_repository": {
"changeset_revision": "11033bdad2ca",
"name": "jbrowse_to_container",
"owner": "gga",
"tool_shed": "toolshed.g2.bx.psu.edu"
},
"tool_state": "{\"organisms\": [{\"__index__\": 0, \"jbrowse\": {\"__class__\": \"RuntimeValue\"}, \"name\": {\"__class__\": \"RuntimeValue\"}, \"advanced\": {\"unique_id\": {\"__class__\": \"RuntimeValue\"}}}], \"__page__\": null, \"__rerun_remap_job_id__\": null}",
"tool_version": "0.5.1",
"type": "tool",
"uuid": "7b7cca87-4000-45de-93a5-bd22cd661d0a",
"workflow_outputs": [
{
"label": null,
"output_name": "output",
"uuid": "8c23d473-4ffa-4a66-b071-aeecc105a529"
}
]
},
"8": {
"6": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1.0",
"errors": null,
"id": 8,
"id": 6,
"input_connections": {
"wait_for": {
"id": 6,
"id": 5,
"output_name": "results"
}
},
......@@ -353,8 +268,8 @@
}
],
"position": {
"left": 629,
"top": 662.7333374023438
"left": 1058,
"top": 227
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_organism_sync/organism_sync/3.2.1.0",
......@@ -376,14 +291,14 @@
}
]
},
"9": {
"7": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0",
"errors": null,
"id": 9,
"id": 7,
"input_connections": {
"wait_for": {
"id": 8,
"id": 6,
"output_name": "results"
}
},
......@@ -402,8 +317,8 @@
}
],
"position": {
"left": 772,
"top": 662.7333374023438
"left": 1344,
"top": 227
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0",
......@@ -425,14 +340,14 @@
}
]
},
"10": {
"8": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0",
"errors": null,
"id": 10,
"id": 8,
"input_connections": {
"wait_for": {
"id": 9,
"id": 7,
"output_name": "results"
}
},
......@@ -451,8 +366,8 @@
}
],
"position": {
"left": 915,
"top": 662.7333374023438
"left": 1630,
"top": 227
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_analysis_sync/analysis_sync/3.2.1.0",
......@@ -474,14 +389,14 @@
}
]
},
"11": {
"9": {
"annotation": "",
"content_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_feature_sync/feature_sync/3.2.1.0",
"errors": null,
"id": 11,
"id": 9,
"input_connections": {
"wait_for": {
"id": 10,
"id": 8,
"output_name": "results"
}
},
......@@ -500,8 +415,8 @@
}
],
"position": {
"left": 1058,
"top": 662.7333374023438
"left": 1916,
"top": 227
},
"post_job_actions": {},
"tool_id": "toolshed.g2.bx.psu.edu/repos/gga/tripal_feature_sync/feature_sync/3.2.1.0",
......@@ -525,6 +440,6 @@
}
},
"tags": [],
"uuid": "69699b44-94c8-4cc7-977e-74f266e58fdf",
"version": 3
"uuid": "13fe9125-bf94-4955-9305-908076475b70",
"version": 1
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment