Skip to content
Snippets Groups Projects
species_data.py 8.25 KiB
Newer Older
# -*- coding: utf-8 -*-

import os
def clean_string(string):
    if not string is None and string != "":
        cleaned_string = string \
            .strip() \
            .replace(" ", "_") \
            .replace("-", "_") \
            .replace(".", "_") \
Loraine Gueguen's avatar
Loraine Gueguen committed
            .replace("#", "_") \
            .replace("(", "") \
            .replace(")", "") \
            .replace("'", "")
        return cleaned_string
    else:
        return string

def set_service_attribute(service, parameters_dictionary):
    parameters_dictionary_services = parameters_dictionary[constants.ORG_PARAM_SERVICES]
    service_value = "0"
    if (service in parameters_dictionary_services.keys()
            and parameters_dictionary_services[service] is not None
            and parameters_dictionary_services[service] != ""):
        service_value = parameters_dictionary_services[service]
    return service_value

class SpeciesData:
    """
    This class contains attributes and functions to interact with the galaxy container of the GGA environment
    Parent class of LoadData, GetData, DeploySpeciesStack, GgaPreprocess and RunWorkflow
    def get_species_dir(self):

        species_dir = None
        if os.path.isdir(self.main_dir) and not self.genus_species is None:
            species_dir = os.path.join(self.main_dir, self.genus_species)
        else:
            logging.error("Cannot set species dir with '{0}/{1}'".format(self.main_dir,self.genus_species))
        return species_dir

    def goto_species_dir(self):
        """
        Go to the species directory (starting from the main dir)

        :return:
        """

        species_dir = self.get_species_dir()
        try:
            os.chdir(species_dir)
        except OSError:
            logging.critical("Cannot access %s" % species_dir)
            sys.exit(0)
        return 1

    def __init__(self, parameters_dictionary):
        self.parameters_dictionary = parameters_dictionary
        self.name = parameters_dictionary[constants.ORG_PARAM_NAME]
        parameters_dictionary_description=parameters_dictionary[constants.ORG_PARAM_DESC]
        parameters_dictionary_data = parameters_dictionary[constants.ORG_PARAM_DATA]

        self.species = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_SPECIES])
        self.genus = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_GENUS])
        self.strain = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_STRAIN])
Loraine Gueguen's avatar
Loraine Gueguen committed
        if self.strain is None:
            self.strain = ""
        self.sex = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_SEX])
        self.common_name = parameters_dictionary_description[constants.ORG_PARAM_DESC_COMMON_NAME]
Loraine Gueguen's avatar
Loraine Gueguen committed
        if self.sex is None:
            self.sex = ""
        self.common_name = parameters_dictionary_description[constants.ORG_PARAM_DESC_COMMON_NAME]
        self.database = parameters_dictionary_description[constants.ORG_PARAM_DESC_DATABASE]
        self.date = datetime.today().strftime("%Y-%m-%d")
        self.origin = parameters_dictionary_description[constants.ORG_PARAM_DESC_ORIGIN]
        if parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_VERSION] == "":
            self.genome_version = "1.0"
        else:
            self.genome_version = str(parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_VERSION])
Loraine Gueguen's avatar
Loraine Gueguen committed

        if parameters_dictionary_data[constants.ORG_PARAM_DATA_OGS_VERSION] == "":
            self.ogs_version = "1.0"
        else:
            self.ogs_version = str(parameters_dictionary_data[constants.ORG_PARAM_DATA_OGS_VERSION])
        # TODO: catch blocks if key is absent in input
        self.genome_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_PATH]
        self.contig_prefix = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_CONTIG_PREFIX]
        self.genome_analysis_program = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_ANALYSIS_PROGRAM]
        self.genome_analysis_sourcename = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_ANALYSIS_SOURCENAME]
        self.genome_analysis_date = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_ANALYSIS_DATE]
        self.ogs_analysis_program = parameters_dictionary_data[constants.ORG_PARAM_DATA_OGS_ANALYSIS_PROGRAM]
        self.ogs_analysis_sourcename = parameters_dictionary_data[constants.ORG_PARAM_DATA_OGS_ANALYSIS_SOURCENAME]
        self.ogs_analysis_date = parameters_dictionary_data[constants.ORG_PARAM_DATA_OGS_ANALYSIS_DATE]
        self.transcripts_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_TRANSCRIPTS_PATH]
        self.proteins_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_PROTEINS_PATH]
        self.gff_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_GFF_PATH]
        self.interpro_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_INTERPRO_PATH]
        self.blastp_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_BLASTP_PATH]
        self.blastx_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_BLASTX_PATH]
        self.orthofinder_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_ORTHOFINDER_PATH]
        self.blast = set_service_attribute(constants.ORG_PARAM_SERVICES_BLAST, parameters_dictionary)
        self.go = set_service_attribute(constants.ORG_PARAM_SERVICES_GO, parameters_dictionary)
Loraine Gueguen's avatar
Loraine Gueguen committed
        self.genus_lowercase = self.genus.lower()
Loraine Gueguen's avatar
Loraine Gueguen committed
        self.species_lowercase = self.species.lower()
Loraine Gueguen's avatar
Loraine Gueguen committed
        try:
            self.strain_lowercase = self.strain.lower()
        except AttributeError as exc:
            self.strain_lowercase = None
        try:
            self.sex_lowercase = self.sex.lower()
        except AttributeError as exc:
            self.sex_lowercase = None
Loraine Gueguen's avatar
Loraine Gueguen committed
        self.genus_uppercase = self.genus[0].upper() + self.genus_lowercase[1:]
Loraine Gueguen's avatar
Loraine Gueguen committed
        self.genus_species = "{0}_{1}".format(self.genus_lowercase, self.species_lowercase)
        self.strain_sex = '_'.join(utilities.filter_empty_not_empty_items([self.strain_lowercase, self.sex_lowercase])["not_empty"])
Loraine Gueguen's avatar
Loraine Gueguen committed

        self.full_name = ' '.join(utilities.filter_empty_not_empty_items([self.genus_uppercase, self.species_lowercase, self.strain, self.sex])["not_empty"])
        self.full_name_lowercase = self.full_name.lower()
Loraine Gueguen's avatar
Loraine Gueguen committed

        self.species_folder_name = "_".join(utilities.filter_empty_not_empty_items(
Loraine Gueguen's avatar
Loraine Gueguen committed
            [self.genus_lowercase,
             self.species_lowercase,
             self.strain_lowercase,
             self.sex_lowercase])["not_empty"])
Loraine Gueguen's avatar
Loraine Gueguen committed

        self.dataset_prefix = self.strain_sex
        # If both strain and sex are empty attributes, change the dataset prefix to nothing instead of "_"
        if self.dataset_prefix == "_":
            self.dataset_prefix = ""
        self.genome_filename = "{0}_v{1}.fasta".format(self.dataset_prefix, self.genome_version)
        self.gff_filename = "{0}_OGS{1}_{2}.gff".format(self.dataset_prefix, self.ogs_version, constants.DATA_DATE.replace("-",""))
        self.transcripts_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_TRANSCRIPTS)
        self.proteins_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_PROTEINS)
        self.interpro_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_INTERPRO)
        self.blastp_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_BLASTP)
        self.blastx_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_BLASTX)
        self.orthofinder_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_ORTHOFINDER)
Arthur Le Bars's avatar
Arthur Le Bars committed

        self.script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
        self.main_dir = None
        self.species_dir = None
Arthur Le Bars's avatar
Arthur Le Bars committed
        self.config = None  # Custom config used to set environment variables inside containers

        # # Bioblend/Chado IDs for an organism analyses/organisms/datasets/history/library
        self.instance_url = None
        self.instance = None
        self.history_id = None
Loraine Gueguen's avatar
Loraine Gueguen committed
        self.history_name = str(self.genus_species)