Newer
Older
#!/usr/bin/python
# -*- coding: utf-8 -*-

Arthur Le Bars
committed

Arthur Le Bars
committed
import argparse
import os
import subprocess

Arthur Le Bars
committed
import sys
import yaml
import re

Arthur Le Bars
committed
from gga_autoload.gga_load_data import table_parser
from datetime import datetime

Arthur Le Bars
committed

Arthur Le Bars
committed
Usage: $ python3 deploy_stacks.py -i example.yml [OPTIONS]

Arthur Le Bars
committed
def parse_input(input_file):
"""

Arthur Le Bars
committed
Parse the yml input file to extract data to create the SpeciesData objects
Return a list of dictionaries. Each dictionary contains data tied to a species
:param input_file:
:return:
"""
parsed_sp_dict_list = []
if str(input_file).endswith("yml") or str(input_file).endswith("yaml"):
logging.debug("Input format used: YAML")
else:
logging.critical("Error, please input a YAML file")
sys.exit()
with open(input_file, 'r') as stream:
try:
yaml_dict = yaml.safe_load(stream)
for k, v in yaml_dict.items():

Arthur Le Bars
committed
if k == "config":
pass
parsed_sp_dict_list.append(v)

Arthur Le Bars
committed
except yaml.YAMLError as exit_code:
logging.critical(exit_code + " (YAML input file might be incorrect)")
sys.exit()
return parsed_sp_dict_list

Arthur Le Bars
committed
class DeploySpeciesStack:

Arthur Le Bars
committed
"""

Arthur Le Bars
committed
Deploy a stack of services for a given species

Arthur Le Bars
committed
"""

Arthur Le Bars
committed
def __init__(self, parameters_dictionary):
self.parameters_dictionary = parameters_dictionary
self.species = parameters_dictionary["description"]["species"]
self.genus = parameters_dictionary["description"]["genus"]
self.strain = parameters_dictionary["description"]["strain"]
self.sex = parameters_dictionary["description"]["sex"]
self.common = parameters_dictionary["description"]["common_name"]
self.date = datetime.today().strftime("%Y-%m-%d")
self.origin = parameters_dictionary["description"]["origin"]
self.performed = parameters_dictionary["data"]["performed_by"]
if parameters_dictionary["data"]["genome_version"] == "":
self.genome_version = "1.0"
else:
self.genome_version = parameters_dictionary["data"]["genome_version"]
if parameters_dictionary["data"]["ogs_version"] == "":
self.ogs_version = "1.0"
else:
self.ogs_version = parameters_dictionary["data"]["ogs_version"]

Arthur Le Bars
committed
self.genus_lowercase = self.genus[0].lower() + self.genus[1:]
self.genus_uppercase = self.genus[0].upper() + self.genus[1:]
self.species_folder_name = "_".join([self.genus_lowercase, self.species, self.strain, self.sex])
self.full_name = " ".join([self.genus_uppercase, self.species, self.strain, self.sex])

Arthur Le Bars
committed
self.abbreviation = " ".join([self.genus_lowercase[0], self.species, self.strain, self.sex])
self.genus_species = self.genus_lowercase + "_" + self.species

Arthur Le Bars
committed
self.instance_url = "http://scratchgmodv1:8888/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/"
# Testing with localhost/scratchgmodv1

Arthur Le Bars
committed
self.history_id = None
self.library_id = None
self.script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))

Arthur Le Bars
committed
self.main_dir = None
self.species_dir = None
self.org_id = None
self.genome_analysis_id = None
self.ogs_analysis_id = None
self.tool_panel = None
self.datasets = dict()
self.source_files = dict()
self.workflow_name = None

Arthur Le Bars
committed
self.api_key = "master" # TODO: set the key in config file --> saved for later (master api key access actions are limited)
if parameters_dictionary["data"]["parent_directory"] == "" or parameters_dictionary["data"]["parent_directory"] == "/path/to/closest/parent/dir":
self.source_data_dir = "/projet/sbr/phaeoexplorer/" # Testing path for phaeoexplorer data
else:
self.source_data_dir = parameters_dictionary["data"]["parent_directory"]

Arthur Le Bars
committed
# Directory/subdirectories where data files are located (fasta, gff, ...)

Arthur Le Bars
committed
# Update the instance (in histories corresponding to the input) instead of creating a new one
self.api_key = "master"
# API key used to communicate with the galaxy instance. Cannot be used to do user-tied actions
self.species_name_regex_litteral = "(?=\w*V)(?=\w*A)(?=\w*R)(?=\w*I)(?=\w*A)(?=\w*B)(?=\w*L)(?=\w*E)\w+" # Placeholder re
def make_directory_tree(self):

Arthur Le Bars
committed
"""
Generate the directory tree for an organism and move datasets into src_data

Arthur Le Bars
committed
"""

Arthur Le Bars
committed
os.chdir(self.main_dir)
self.main_dir = os.getcwd() + "/"
self.species_dir = os.path.join(self.main_dir, self.genus_species) + "/"

Arthur Le Bars
committed
try:
os.mkdir(self.species_dir)
except FileExistsError:
logging.debug("Directory " + self.species_dir + " already exists")

Arthur Le Bars
committed
try:
os.chdir(self.species_dir)
working_dir = os.getcwd()
except OSError:
logging.critical("Cannot access " + self.species_dir + ", run with higher privileges")

Arthur Le Bars
committed
sys.exit()
try:
os.mkdir("./nginx/")
os.mkdir("./nginx/conf")
with open(os.path.abspath("./nginx/conf/default.conf"), 'w') as conf:
conf.write("server {\n\tlisten 80;\n\tserver_name ~.;\n\tlocation /download/ {\n\t\talias /project_data/; \n\t\tautoindex on;\n\t}\n}") # The species nginx conf
logging.debug("NginX conf exists")
# src_data_folders = ["annotation", "genome"] # The directories to generate
not_empty_attributes = filter_empty_not_empty_items([self.genus_lowercase, self.species, self.strain, self.sex])["not_empty"]
self.species_folder_name = "_".join(not_empty_attributes)
# self.species_folder_name = "_".join([self.genus_lowercase, self.species, self.strain, self.sex])
organism_annotation_dir, organism_genome_dir = None, None

Arthur Le Bars
committed
# Creation (or updating) of the src_data directory tree
# Depth 0-1

Arthur Le Bars
committed
try:
os.mkdir("./src_data")
os.mkdir("./src_data/annotation")
os.mkdir("./src_data/genome")
os.mkdir("./src_data/tracks")

Arthur Le Bars
committed
except FileExistsError:
if self.do_update:
logging.info("Updating src_data directory tree")
else:
logging.debug("The src_data directory tree already exists")
except PermissionError:
logging.critical("Insufficient permission to create src_data directory tree")
sys.exit()
# Depth 2
try:
os.mkdir("./src_data/annotation/" + self.species_folder_name)
os.mkdir("./src_data/genome/" + self.species_folder_name)

Arthur Le Bars
committed
except FileExistsError:
if self.do_update:
logging.info("Updating src_data directory tree")
else:
logging.debug("The src_data directory tree already exists")
except PermissionError:
logging.critical("Insufficient permission to create src_data directory tree")
sys.exit()
# Depth 3
try:
os.mkdir("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.ogs_version)
os.mkdir("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version)
organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version)
organism_genome_dir = os.path.abspath("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version)

Arthur Le Bars
committed
except FileExistsError:
logging.debug("The src_data directory tree already exists")

Arthur Le Bars
committed
except PermissionError:
logging.critical("Insufficient permission to create src_data directory tree")

Arthur Le Bars
committed
def make_compose_files(self):
"""
:return:
"""
# Path to the templates used to generate the custom docker-compose files for an input species

Arthur Le Bars
committed
stack_template_path = self.script_dir + "/templates/stack-organism.yml"
traefik_template_path = self.script_dir + "/templates/traefik.yml"
authelia_config_path = self.script_dir + "/templates/authelia_config.yml"
authelia_users_path = self.script_dir + "/templates/authelia_users.yml"
if self.sex and self.strain:
genus_species_strain_sex = self.genus.lower() + "_" + self.species + "_" + self.strain + "_" + self.sex
else:
genus_species_strain_sex = self.genus.lower() + "_" + self.species
with open(stack_template_path, 'r') as infile:
organism_content = list()
for line in infile:

Arthur Le Bars
committed
# Replace placeholders in the compose file

Arthur Le Bars
committed
line.replace("genus_species", str(self.genus.lower() + "_" + self.species)).replace("Genus species",
str(
self.genus_uppercase + " " + self.species)).replace(
"Genus/species", str(self.genus_uppercase + "/" + self.species)).replace("gspecies", str(
self.genus.lower()[0] + self.species)).replace("genus_species_strain_sex",
genus_species_strain_sex))
with open("./docker-compose.yml", 'w') as outfile:
for line in organism_content:
outfile.write(line)

Arthur Le Bars
committed
subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir,
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) # Create mounts for the containers
try:
os.mkdir("../traefik")
os.mkdir("../traefik/authelia")

Arthur Le Bars
committed
shutil.copy(authelia_config_path, "../traefik/authelia/configuration.yml")
shutil.copy(authelia_users_path, "../traefik/authelia/users.yml") # TODO: custom users (add a config file?)

Arthur Le Bars
committed
subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir,
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) # Create mounts for the containers
logging.debug("Traefik directory already exists")
try:
shutil.copy(traefik_template_path, "../traefik/docker-compose.yml")
except FileExistsError:
logging.debug("Traefik compose file already exists")
subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir)
def get_source_data_files_from_path(self):

Arthur Le Bars
committed
Link data files

Arthur Le Bars
committed
try:
os.chdir(self.species_dir)
working_dir = os.getcwd()
except OSError:
logging.critical("Cannot access " + self.species_dir + ", run with higher privileges")
sys.exit()

Arthur Le Bars
committed
organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version)
organism_genome_dir = os.path.abspath("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version)
for dirpath, dirnames, files in os.walk(self.source_data_dir):

Arthur Le Bars
committed
if "0" in str(dirpath): # Ensures to take the correct files (other dirs hold files with the correct names, but I don't know if they are the same), this is for Phaeoexplorer only
for f in files:
if "Contaminants" not in str(f):
try:
if fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + ".fa"):
logging.info("Genome assembly file - " + str(f))
organism_genome_dir = organism_genome_dir + "/" + f
os.symlink(os.path.join(dirpath, f), organism_genome_dir)
organism_genome_dir = os.path.abspath("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version)
elif fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + ".gff"):
logging.info("GFF file - " + str(f))
organism_annotation_dir = organism_annotation_dir + "/" + f
os.symlink(os.path.join(dirpath, f), organism_annotation_dir)
organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version)
elif fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + "_transcripts-gff.fa"):
logging.info("Transcripts file - " + str(f))
organism_annotation_dir = organism_annotation_dir + "/" + f
os.symlink(os.path.join(dirpath, f), organism_annotation_dir)
organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version)
elif fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + "_proteins.fa"):
logging.info("Proteins file - " + str(f))
organism_annotation_dir = organism_annotation_dir + "/" + f
os.symlink(os.path.join(dirpath, f), organism_annotation_dir)
organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version)
except FileExistsError:
logging.warning("Error raised (FileExistsError)")
logging.warning("Error raised (TypeError)")
except NotADirectoryError:
logging.warning("Error raised (NotADirectoryError)")
Call the script "deploy.sh" used to initiliaze the swarm cluster if needed and launch/update the stack
# Launch and update docker stacks (cf docs)
subprocess.call(["sh", self.script_dir + "/deploy.sh", self.genus_species, self.main_dir + "/traefik"])

Arthur Le Bars
committed
def filter_empty_not_empty_items(li):
ret = {"empty": [], "not_empty": []}
for i in li:
if i is None or i == "":
ret["empty"].append(i)
else:
ret["not_empty"].append(i)
return ret

Arthur Le Bars
committed
if __name__ == "__main__":

Arthur Le Bars
committed
parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction "
"with galaxy instances for GGA"
", following the protocol @ "
"http://gitlab.sb-roscoff.fr/abims/e-infra/gga")

Arthur Le Bars
committed
parser.add_argument("input",
type=str,
help="Input file (yml)")
parser.add_argument("-v", "--verbose",
help="Increase output verbosity",
action="store_false")
args = parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)

Arthur Le Bars
committed
logging.info("Deploy stacks: start")
sp_dict_list = parse_input(args.input)
for sp_dict in sp_dict_list:

Arthur Le Bars
committed
o = DeploySpeciesStack(parameters_dictionary=sp_dict)
o.main_dir = os.path.abspath(args.dir)
# dss.make_directory_tree()
# logging.info("Successfully generated the directory tree for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)
dss.make_compose_files()
logging.info("Successfully generated the directory tree for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)
# dss.get_source_data_files_from_path()
# logging.info("Successfully retrieved source data files for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)
# dss.deploy_stack()
# logging.info("Successfully deployed containers stack for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)
logging.info("Deploy stacks: done")