Skip to content
Snippets Groups Projects
deploy_stacks.py 16.6 KiB
Newer Older
#!/usr/bin/python
# -*- coding: utf-8 -*-
Arthur Le Bars's avatar
Arthur Le Bars committed
import logging
from gga_autoload.gga_load_data import table_parser
Arthur Le Bars's avatar
Arthur Le Bars committed
import fnmatch
import shutil
from datetime import datetime

""" 
deploy_stacks.py

Usage: $ python3 deploy_stacks.py -i example.yml [OPTIONS]
Arthur Le Bars's avatar
Arthur Le Bars committed
"""
def parse_input(input_file):
    """
    Parse the yml input file to extract data to create the SpeciesData objects
    Return a list of dictionaries. Each dictionary contains data tied to a species

    :param input_file:
    :return:
    """

    parsed_sp_dict_list = []

    if str(input_file).endswith("yml") or str(input_file).endswith("yaml"):
        logging.debug("Input format used: YAML")
    else:
        logging.critical("Error, please input a YAML file")
        sys.exit()
    with open(input_file, 'r') as stream:
        try:
            yaml_dict = yaml.safe_load(stream)
            for k, v in yaml_dict.items():
                parsed_sp_dict_list.append(v)
        except yaml.YAMLError as exit_code:
            logging.critical(exit_code + " (YAML input file might be incorrect)")
            sys.exit()
    return parsed_sp_dict_list


    Deploy a stack of services for a given species
    def __init__(self, parameters_dictionary):
        self.parameters_dictionary = parameters_dictionary
        self.species = parameters_dictionary["description"]["species"]
        self.genus = parameters_dictionary["description"]["genus"]
        self.strain = parameters_dictionary["description"]["strain"]
        self.sex = parameters_dictionary["description"]["sex"]
        self.common = parameters_dictionary["description"]["common_name"]
        self.date = datetime.today().strftime("%Y-%m-%d")
        self.origin = parameters_dictionary["description"]["origin"]
        self.performed = parameters_dictionary["data"]["performed_by"]
        if parameters_dictionary["data"]["genome_version"] == "":
            self.genome_version = "1.0"
        else:
            self.genome_version = parameters_dictionary["data"]["genome_version"]
        if parameters_dictionary["data"]["ogs_version"] == "":
            self.ogs_version = "1.0"
        else:
            self.ogs_version = parameters_dictionary["data"]["ogs_version"]
        self.genus_lowercase = self.genus[0].lower() + self.genus[1:]
        self.genus_uppercase = self.genus[0].upper() + self.genus[1:]
        self.species_folder_name = "_".join([self.genus_lowercase, self.species, self.strain, self.sex])
        self.full_name = " ".join([self.genus_uppercase, self.species, self.strain, self.sex])
        self.abbreviation = " ".join([self.genus_lowercase[0], self.species, self.strain, self.sex])
        self.genus_species = self.genus_lowercase + "_" + self.species
        self.instance_url = "http://scratchgmodv1:8888/sp/" + self.genus_lowercase + "_" + self.species + "/galaxy/"
        # Testing with localhost/scratchgmodv1
Arthur Le Bars's avatar
Arthur Le Bars committed
        self.instance = None
        self.history_id = None
        self.library_id = None
Arthur Le Bars's avatar
Arthur Le Bars committed
        self.script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
        self.main_dir = None
        self.species_dir = None
        self.org_id = None
        self.genome_analysis_id = None
        self.ogs_analysis_id = None
        self.tool_panel = None
        self.datasets = dict()
        self.source_files = dict()
        self.workflow_name = None
Arthur Le Bars's avatar
Arthur Le Bars committed
        self.metadata = dict()
        self.api_key = "master"  # TODO: set the key in config file --> saved for later (master api key access actions are limited)
        if parameters_dictionary["data"]["parent_directory"] == "" or parameters_dictionary["data"]["parent_directory"] == "/path/to/closest/parent/dir":
            self.source_data_dir = "/projet/sbr/phaeoexplorer/"  # Testing path for phaeoexplorer data
        else:
            self.source_data_dir = parameters_dictionary["data"]["parent_directory"]
        # Directory/subdirectories where data files are located (fasta, gff, ...)
Arthur Le Bars's avatar
Arthur Le Bars committed
        self.do_update = False
        # Update the instance (in histories corresponding to the input) instead of creating a new one
        self.api_key = "master"
        # API key used to communicate with the galaxy instance. Cannot be used to do user-tied actions
        self.species_name_regex_litteral = "(?=\w*V)(?=\w*A)(?=\w*R)(?=\w*I)(?=\w*A)(?=\w*B)(?=\w*L)(?=\w*E)\w+"  # Placeholder re


    def make_directory_tree(self):
        Generate the directory tree for an organism and move datasets into src_data
Arthur Le Bars's avatar
Arthur Le Bars committed

Arthur Le Bars's avatar
Arthur Le Bars committed
        self.main_dir = os.getcwd() + "/"
        self.species_dir = os.path.join(self.main_dir, self.genus_species) + "/"
        try:
            os.mkdir(self.species_dir)
        except FileExistsError:
            logging.debug("Directory " + self.species_dir + " already exists")
        try:
            os.chdir(self.species_dir)
            working_dir = os.getcwd()
        except OSError:
            logging.critical("Cannot access " + self.species_dir + ", run with higher privileges")
Arthur Le Bars's avatar
Arthur Le Bars committed
        try:
            os.mkdir("./nginx/")
            os.mkdir("./nginx/conf")
            with open(os.path.abspath("./nginx/conf/default.conf"), 'w') as conf:
                conf.write("server {\n\tlisten 80;\n\tserver_name ~.;\n\tlocation /download/ {\n\t\talias /project_data/; \n\t\tautoindex on;\n\t}\n}")  # The species nginx conf
Arthur Le Bars's avatar
Arthur Le Bars committed
        except FileExistsError:
            logging.debug("NginX conf exists")
Arthur Le Bars's avatar
Arthur Le Bars committed

        # src_data_folders = ["annotation", "genome"]  # The directories to generate
        not_empty_attributes = filter_empty_not_empty_items([self.genus_lowercase, self.species, self.strain, self.sex])["not_empty"]
        self.species_folder_name = "_".join(not_empty_attributes)
        # self.species_folder_name = "_".join([self.genus_lowercase, self.species, self.strain, self.sex])
Arthur Le Bars's avatar
Arthur Le Bars committed
        organism_annotation_dir, organism_genome_dir = None, None

        # Creation (or updating) of the src_data directory tree
        # Depth 0-1
Arthur Le Bars's avatar
Arthur Le Bars committed
            os.mkdir("./src_data/annotation")
            os.mkdir("./src_data/genome")
            os.mkdir("./src_data/tracks")
        except FileExistsError:
            if self.do_update:
                logging.info("Updating src_data directory tree")
            else:
                logging.debug("The src_data directory tree already exists")
        except PermissionError:
            logging.critical("Insufficient permission to create src_data directory tree")
            sys.exit()

        # Depth 2
        try:
            os.mkdir("./src_data/annotation/" + self.species_folder_name)
            os.mkdir("./src_data/genome/" + self.species_folder_name)
        except FileExistsError:
            if self.do_update:
                logging.info("Updating src_data directory tree")
            else:
                logging.debug("The src_data directory tree already exists")
        except PermissionError:
            logging.critical("Insufficient permission to create src_data directory tree")
            sys.exit()

        # Depth 3
        try:
            os.mkdir("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.ogs_version)
            os.mkdir("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version)
            organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version)
            organism_genome_dir = os.path.abspath("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version)
Arthur Le Bars's avatar
Arthur Le Bars committed
            if self.do_update:
Arthur Le Bars's avatar
Arthur Le Bars committed
                logging.info("Updating src_data directory tree")
Arthur Le Bars's avatar
Arthur Le Bars committed
            else:
                logging.debug("The src_data directory tree already exists")
            logging.critical("Insufficient permission to create src_data directory tree")
Arthur Le Bars's avatar
Arthur Le Bars committed
            sys.exit()

        # Path to the templates used to generate the custom docker-compose files for an input species
        stack_template_path = self.script_dir + "/templates/stack-organism.yml"
        traefik_template_path = self.script_dir + "/templates/traefik.yml"
        authelia_config_path = self.script_dir + "/templates/authelia_config.yml"
        authelia_users_path = self.script_dir + "/templates/authelia_users.yml"

        if self.sex and self.strain:
            genus_species_strain_sex = self.genus.lower() + "_" + self.species + "_" + self.strain + "_" + self.sex
        else:
            genus_species_strain_sex = self.genus.lower() + "_" + self.species
Arthur Le Bars's avatar
Arthur Le Bars committed

        with open(stack_template_path, 'r') as infile:
            organism_content = list()
            for line in infile:
                # Replace placeholders in the compose file
Arthur Le Bars's avatar
Arthur Le Bars committed
                organism_content.append(
                    line.replace("genus_species", str(self.genus.lower() + "_" + self.species)).replace("Genus species",
                                                                                                        str(
                                                                                                            self.genus_uppercase + " " + self.species)).replace(
                        "Genus/species", str(self.genus_uppercase + "/" + self.species)).replace("gspecies", str(
                        self.genus.lower()[0] + self.species)).replace("genus_species_strain_sex",
                                                                       genus_species_strain_sex))
Arthur Le Bars's avatar
Arthur Le Bars committed
            with open("./docker-compose.yml", 'w') as outfile:
                for line in organism_content:
                    outfile.write(line)
            subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir,
                            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)  # Create mounts for the containers
Arthur Le Bars's avatar
Arthur Le Bars committed

        try:
            os.mkdir("../traefik")
            os.mkdir("../traefik/authelia")
            shutil.copy(authelia_config_path, "../traefik/authelia/configuration.yml")
            shutil.copy(authelia_users_path, "../traefik/authelia/users.yml")  # TODO: custom users (add a config file?)
            subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir,
                            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)  # Create mounts for the containers
Arthur Le Bars's avatar
Arthur Le Bars committed
        except FileExistsError:
            logging.debug("Traefik directory already exists")
        try:
            shutil.copy(traefik_template_path, "../traefik/docker-compose.yml")
        except FileExistsError:
            logging.debug("Traefik compose file already exists")
        subprocess.call(["python3", self.script_dir + "/create_mounts.py"], cwd=working_dir)
Arthur Le Bars's avatar
Arthur Le Bars committed


    def get_source_data_files_from_path(self):
        try:
            os.chdir(self.species_dir)
            working_dir = os.getcwd()
        except OSError:
            logging.critical("Cannot access " + self.species_dir + ", run with higher privileges")
            sys.exit()
        organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version)
        organism_genome_dir = os.path.abspath("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version)
Arthur Le Bars's avatar
Arthur Le Bars committed

        for dirpath, dirnames, files in os.walk(self.source_data_dir):
            if "0" in str(dirpath):  # Ensures to take the correct files (other dirs hold files with the correct names, but I don't know if they are the same), this is for Phaeoexplorer only
                for f in files:
                    if "Contaminants" not in str(f):
Arthur Le Bars's avatar
Arthur Le Bars committed
                        try:
                            if fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + ".fa"):
                                logging.info("Genome assembly file - " + str(f))
Arthur Le Bars's avatar
Arthur Le Bars committed
                                organism_genome_dir = organism_genome_dir + "/" + f
                                os.symlink(os.path.join(dirpath, f), organism_genome_dir)
                                organism_genome_dir = os.path.abspath("./src_data/genome/" + self.species_folder_name + "/v" + self.genome_version)
Arthur Le Bars's avatar
Arthur Le Bars committed
                            elif fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + ".gff"):
                                logging.info("GFF file - " + str(f))
Arthur Le Bars's avatar
Arthur Le Bars committed
                                organism_annotation_dir = organism_annotation_dir + "/" + f
                                os.symlink(os.path.join(dirpath, f), organism_annotation_dir)
                                organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version)
Arthur Le Bars's avatar
Arthur Le Bars committed
                            elif fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + "_transcripts-gff.fa"):
                                logging.info("Transcripts file - " + str(f))
Arthur Le Bars's avatar
Arthur Le Bars committed
                                organism_annotation_dir = organism_annotation_dir + "/" + f
                                os.symlink(os.path.join(dirpath, f), organism_annotation_dir)
                                organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version)
Arthur Le Bars's avatar
Arthur Le Bars committed
                            elif fnmatch.fnmatch(f, "*" + self.species[1:] + "_" + self.sex.upper() + "_proteins.fa"):
                                logging.info("Proteins file - " + str(f))
Arthur Le Bars's avatar
Arthur Le Bars committed
                                organism_annotation_dir = organism_annotation_dir + "/" + f
                                os.symlink(os.path.join(dirpath, f), organism_annotation_dir)
                                organism_annotation_dir = os.path.abspath("./src_data/annotation/" + self.species_folder_name + "/OGS" + self.genome_version)
                        except FileExistsError:
                            logging.warning("Error raised (FileExistsError)")
Arthur Le Bars's avatar
Arthur Le Bars committed
                        except TypeError:
                            logging.warning("Error raised (TypeError)")
                        except NotADirectoryError:
                            logging.warning("Error raised (NotADirectoryError)")
Arthur Le Bars's avatar
Arthur Le Bars committed

    def deploy_stack(self):
Arthur Le Bars's avatar
Arthur Le Bars committed
        """
        Call the script "deploy.sh" used to initiliaze the swarm cluster if needed and launch/update the stack
Arthur Le Bars's avatar
Arthur Le Bars committed

        :return:
        """
        # Launch and update docker stacks (cf docs)
        subprocess.call(["sh", self.script_dir + "/deploy.sh", self.genus_species, self.main_dir + "/traefik"])
def filter_empty_not_empty_items(li):
    ret = {"empty": [], "not_empty": []}
    for i in li:
        if i is None or i == "":
            ret["empty"].append(i)
        else:
            ret["not_empty"].append(i)
    return ret


    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction "
                                                 "with galaxy instances for GGA"
                                                 ", following the protocol @ "
                                                 "http://gitlab.sb-roscoff.fr/abims/e-infra/gga")

    parser.add_argument("input",
                        type=str,
                        help="Input file (yml)")

Arthur Le Bars's avatar
Arthur Le Bars committed
    parser.add_argument("-v", "--verbose",
                        help="Increase output verbosity",
                        action="store_false")

    args = parser.parse_args()

    if args.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    sp_dict_list = parse_input(args.input)
    for sp_dict in sp_dict_list:
        o = DeploySpeciesStack(parameters_dictionary=sp_dict)
        o.main_dir = os.path.abspath(args.dir)

        # dss.make_directory_tree()
        # logging.info("Successfully generated the directory tree for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)

        dss.make_compose_files()
        logging.info("Successfully generated the directory tree for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)

        # dss.get_source_data_files_from_path()
        # logging.info("Successfully retrieved source data files for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)

        # dss.deploy_stack()
        # logging.info("Successfully deployed containers stack for " + o.genus[0].upper() + ". " + o.species + " " + o.strain + " " + o.sex)

    logging.info("Deploy stacks: done")