#!/usr/bin/python # -*- coding: utf-8 -*- import argparse import os import subprocess import logging import sys import yaml import re from gga_autoload.gga_load_data import table_parser import fnmatch import shutil from datetime import datetime """ deploy_stacks.py Usage: $ python3 deploy_stacks.py -i example.yml [OPTIONS] """ def parse_input(input_file): """ Parse the yml input file to extract data to create the SpeciesData objects Return a list of dictionaries. Each dictionary contains data tied to a species :param input_file: :return: """ parsed_sp_dict_list = [] if str(input_file).endswith("yml") or str(input_file).endswith("yaml"): logging.debug("Input format used: YAML") else: logging.critical("Error, please input a YAML file") sys.exit() with open(input_file, 'r') as stream: try: yaml_dict = yaml.safe_load(stream) for k, v in yaml_dict.items(): if k == "config": pass parsed_sp_dict_list.append(v) except yaml.YAMLError as exit_code: logging.critical(exit_code + " (YAML input file might be incorrect)") sys.exit() return parsed_sp_dict_list class DeploySpeciesStack: """ Deploy a stack of services for a given species """ def __init__(self, parameters_dictionary): self.parameters_dictionary = parameters_dictionary self.species = parameters_dictionary["description"]["species"] self.genus = parameters_dictionary["description"]["genus"] self.strain = parameters_dictionary["description"]["strain"] self.sex = parameters_dictionary["description"]["sex"] self.common = parameters_dictionary["description"]["common_name"] self.date = datetime.today().strftime("%Y-%m-%d") self.origin = parameters_dictionary["description"]["origin"] self.performed = parameters_dictionary["data"]["performed_by"] if parameters_dictionary["data"]["genome_version"] == "": self.genome_version = "1.0" else: self.genome_version = parameters_dictionary["data"]["genome_version"] if parameters_dictionary["data"]["ogs_version"] == "": self.ogs_version = "1.0" else: self.ogs_version = parameters_dictionary["data"]["ogs_version"] self.genus_lowercase = self.genus[0].lower() + self.genus[1:] self.genus_uppercase = self.genus[0].upper() + self.genus[1:] self.species_folder_name = "_".join([self.genus_lowercase, self.species, self.strain, self.sex]) self.full_name = " ".join([self.genus_uppercase, self.species, self.strain, self.sex]) self.abbreviation = " ".join([self.genus_lowercase[0], self.species, self.strain, self.sex]) self.genus_species = self.genus_lowercase + "_" + self.species self.instance_url = "http://scratchgmodv1:8888/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/" # Testing with localhost/scratchgmodv1 self.instance = None self.history_id = None self.library_id = None self.script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) self.main_dir = None self.species_dir = None self.org_id = None self.genome_analysis_id = None self.ogs_analysis_id = None self.tool_panel = None self.datasets = dict() self.source_files = dict() self.workflow_name = None self.metadata = dict() self.api_key = "master" # TODO: set the key in config file --> saved for later (master api key access actions are limited) if parameters_dictionary["data"]["parent_directory"] == "" or parameters_dictionary["data"]["parent_directory"] == "/path/to/closest/parent/dir": self.source_data_dir = "/projet/sbr/phaeoexplorer/" # Testing path for phaeoexplorer data else: self.source_data_dir = parameters_dictionary["data"]["parent_directory"] # Directory/subdirectories where data files are located (fasta, gff, ...) self.do_update = False # Update the instance (in histories corresponding to the input) instead of creating a new one self.api_key = "master" # API key used to communicate with the galaxy instance. Cannot be used to do user-tied actions self.species_name_regex_litteral = "(?=\w*V)(?=\w*A)(?=\w*R)(?=\w*I)(?=\w*A)(?=\w*B)(?=\w*L)(?=\w*E)\w+" # Placeholder re def make_directory_tree(self): """ Generate the directory tree for an organism and move datasets into src_data :return: """ os.chdir(self.main_dir) self.main_dir = os.getcwd() + "/" self.species_dir = os.path.join(self.main_dir, self.genus_species) + "/" try: os.mkdir(self.species_dir) except FileExistsError: logging.debug("Directory " + self.species_dir + " already exists") try: os.chdir(self.species_dir) working_dir = os.getcwd() except OSError: logging.critical("Cannot access " + self.species_dir + ", run with higher privileges") sys.exit() try: os.mkdir("./nginx/") os.mkdir("./nginx/conf") with open(os.path.abspath("./nginx/conf/default.conf"), 'w') as conf: conf.write("server {\n\tlisten 80;\n\tserver_name ~.;\n\tlocation /download/ {\n\t\talias /project_data/; \n\t\tautoindex on;\n\t}\n}") # The species nginx conf except FileExistsError: logging.debug("NginX conf exists") # src_data_folders = ["annotation", "genome"] # The directories to generate not_empty_attributes = filter_empty_not_empty_items([self.genus_lowercase, self.species, self.strain, self.sex])["not_empty"] self.species_folder_name = "_".join(not_empty_attributes) # self.species_folder_name = "_".join([self.genus_lowercase, self.species, self.strain, self.sex]) organism_annotation_dir, organism_genome_dir = None, None # Creation (or updating) of the src_data directory tree # Depth 0-1 try: os.mkdir("./src_data") os.mkdir("./src_data/annotation") os.mkdir("./src_data/genome") os.mkdir("./src_data/tracks") except FileExistsError: if self.do_update: logging.info("Updating src_data directory tree") else: logging.debug("The src_data directory tree already exists") except PermissionError: logging.critical("Insufficient permission to create src_data directory tree") sys.exit() # Depth 2 try: os.mkdir("./src_data/annotation/" + self.species_folder_name) os.mkdir("./src_data/genome/" + self.species_folder_name) except FileExistsError: if self.do_update: logging.info("Updating src_data directory tree") else: logging.debug("The src_data directory tree already exists") except PermissionError: logging.critical("Insufficient permission to create src_data directory tree") sys.exit() # Depth 3 try: os.mkdir("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.ogs_version) os.mkdir("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version) organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version) organism_genome_dir = os.path.abspath("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version) except FileExistsError: if self.do_update: logging.info("Updating src_data directory tree") else: logging.debug("The src_data directory tree already exists") except PermissionError: logging.critical("Insufficient permission to create src_data directory tree") sys.exit() def make_compose_files(self): """ :return: """ # Path to the templates used to generate the custom docker-compose files for an input species stack_template_path = self.script_dir + "/templates/stack-organism.yml" traefik_template_path = self.script_dir + "/templates/traefik.yml" authelia_config_path = self.script_dir + "/templates/authelia_config.yml" authelia_users_path = self.script_dir + "/templates/authelia_users.yml" if self.sex and self.strain: genus_species_strain_sex = self.genus.lower() + "_" + self.species + "_" + self.strain + "_" + self.sex else: genus_species_strain_sex = self.genus.lower() + "_" + self.species with open(stack_template_path, 'r') as infile: organism_content = list() for line in infile: # Replace placeholders in the compose file organism_content.append( line.replace("genus_species", str(self.genus.lower() + "_" + self.species)).replace("Genus species", str( self.genus_uppercase + " " + self.species)).replace( "Genus/species", str(self.genus_uppercase + "/" + self.species)).replace("gspecies", str( self.genus.lower()[0] + self.species)).replace("genus_species_strain_sex", genus_species_strain_sex)) with open("./docker-compose.yml", 'w') as outfile: for line in organism_content: outfile.write(line) subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) # Create mounts for the containers try: os.mkdir("../traefik") os.mkdir("../traefik/authelia") shutil.copy(authelia_config_path, "../traefik/authelia/configuration.yml") shutil.copy(authelia_users_path, "../traefik/authelia/users.yml") # TODO: custom users (add a config file?) subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) # Create mounts for the containers except FileExistsError: logging.debug("Traefik directory already exists") try: shutil.copy(traefik_template_path, "../traefik/docker-compose.yml") except FileExistsError: logging.debug("Traefik compose file already exists") subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir) def get_source_data_files_from_path(self): """ Link data files :return: """ try: os.chdir(self.species_dir) working_dir = os.getcwd() except OSError: logging.critical("Cannot access " + self.species_dir + ", run with higher privileges") sys.exit() organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version) organism_genome_dir = os.path.abspath("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version) for dirpath, dirnames, files in os.walk(self.source_data_dir): if "0" in str(dirpath): # Ensures to take the correct files (other dirs hold files with the correct names, but I don't know if they are the same), this is for Phaeoexplorer only for f in files: if "Contaminants" not in str(f): try: if fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + ".fa"): logging.info("Genome assembly file - " + str(f)) organism_genome_dir = organism_genome_dir + "/" + f os.symlink(os.path.join(dirpath, f), organism_genome_dir) organism_genome_dir = os.path.abspath("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version) elif fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + ".gff"): logging.info("GFF file - " + str(f)) organism_annotation_dir = organism_annotation_dir + "/" + f os.symlink(os.path.join(dirpath, f), organism_annotation_dir) organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version) elif fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + "_transcripts-gff.fa"): logging.info("Transcripts file - " + str(f)) organism_annotation_dir = organism_annotation_dir + "/" + f os.symlink(os.path.join(dirpath, f), organism_annotation_dir) organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version) elif fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + "_proteins.fa"): logging.info("Proteins file - " + str(f)) organism_annotation_dir = organism_annotation_dir + "/" + f os.symlink(os.path.join(dirpath, f), organism_annotation_dir) organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version) except FileExistsError: logging.warning("Error raised (FileExistsError)") except TypeError: logging.warning("Error raised (TypeError)") except NotADirectoryError: logging.warning("Error raised (NotADirectoryError)") def deploy_stack(self): """ Call the script "deploy.sh" used to initiliaze the swarm cluster if needed and launch/update the stack :return: """ # Launch and update docker stacks (cf docs) subprocess.call(["sh", self.script_dir + "/deploy.sh", self.genus_species, self.main_dir + "/traefik"]) def filter_empty_not_empty_items(li): ret = {"empty": [], "not_empty": []} for i in li: if i is None or i == "": ret["empty"].append(i) else: ret["not_empty"].append(i) return ret if __name__ == "__main__": parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction " "with galaxy instances for GGA" ", following the protocol @ " "http://gitlab.sb-roscoff.fr/abims/e-infra/gga") parser.add_argument("input", type=str, help="Input file (yml)") parser.add_argument("-v", "--verbose", help="Increase output verbosity", action="store_false") args = parser.parse_args() if args.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) logging.info("Deploy stacks: start") sp_dict_list = parse_input(args.input) for sp_dict in sp_dict_list: o = DeploySpeciesStack(parameters_dictionary=sp_dict) o.main_dir = os.path.abspath(args.dir) # dss.make_directory_tree() # logging.info("Successfully generated the directory tree for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex) dss.make_compose_files() logging.info("Successfully generated the directory tree for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex) # dss.get_source_data_files_from_path() # logging.info("Successfully retrieved source data files for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex) # dss.deploy_stack() # logging.info("Successfully deployed containers stack for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex) logging.info("Deploy stacks: done")