Skip to content
Snippets Groups Projects
speciesData.py 4.38 KiB
Newer Older
# -*- coding: utf-8 -*-

import os
class SpeciesData:
    """
    This class contains attributes and functions to interact with the galaxy container of the GGA environment
    Parent class of LoadData, GetData, DeploySpeciesStack, GgaPreprocess and RunWorkflow

    """

    def __init__(self, parameters_dictionary):
        self.parameters_dictionary = parameters_dictionary
        self.species = parameters_dictionary["description"]["species"].replace("(", "_").replace(")", "_")
        self.genus = parameters_dictionary["description"]["genus"].replace("(", "_").replace(")", "_")
        self.strain = parameters_dictionary["description"]["strain"].replace("(", "_").replace(")", "_")
        self.sex = parameters_dictionary["description"]["sex"].replace("(", "_").replace(")", "_")
        self.common = parameters_dictionary["description"]["common_name"].replace("(", "_").replace(")", "_")
        self.date = datetime.today().strftime("%Y-%m-%d")
        self.origin = parameters_dictionary["description"]["origin"]
        self.performed = parameters_dictionary["data"]["performed_by"]
        if parameters_dictionary["data"]["genome_version"] == "":
            self.genome_version = "1.0"
        else:
            self.genome_version = parameters_dictionary["data"]["genome_version"]
        if parameters_dictionary["data"]["ogs_version"] == "":
            self.ogs_version = "1.0"
        else:
            self.ogs_version = parameters_dictionary["data"]["ogs_version"]
        # TODO: catch blocks if key is absent in input
        self.genome_path = parameters_dictionary["data"]["genome_path"]
        self.transcripts_path = parameters_dictionary["data"]["transcripts_path"]
        self.proteins_path = parameters_dictionary["data"]["proteins_path"]
        self.gff_path = parameters_dictionary["data"]["gff_path"]
        self.interpro_path = parameters_dictionary["data"]["interpro_path"]
        self.blastp_path = parameters_dictionary["data"]["blastp_path"]
        self.blastx_path = parameters_dictionary["data"]["blastx_path"]
        self.orthofinder_path = parameters_dictionary["data"]["orthofinder_path"]

        self.genus_lowercase = self.genus[0].lower() + self.genus[1:]
        self.genus_uppercase = self.genus[0].upper() + self.genus[1:]
        self.chado_species_name = "{0} {1}".format(self.species, self.sex)
        self.full_name = ' '.join(utilities.filter_empty_not_empty_items([self.genus_uppercase, self.species, self.strain, self.sex])["not_empty"])
        self.full_name = self.full_name.replace("__", "_").replace("_ ", "_").replace(" _", "_")
        if self.full_name.endswith("_") or self.full_name.endswith(" "):
            self.full_name = self.full_name[0:-2]

        self.full_name_lowercase = self.full_name.lower()
        self.abbreviation = "_".join(utilities.filter_empty_not_empty_items([self.genus_lowercase[0], self.species, self.strain, self.sex])["not_empty"])
        self.genus_species = self.genus_lowercase + "_" + self.species
        self.instance = None
        self.history_id = None
        self.library_id = None
        self.script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
        self.main_dir = None
        self.species_dir = None
        self.org_id = None
        self.genome_analysis_id = None
        self.ogs_analysis_id = None
        self.tool_panel = None
        self.datasets = dict()
        self.datasets_name = dict()
        self.source_files = dict()
        self.workflow_name = None
        self.metadata = dict()
        self.api_key = None
        # API key used to communicate with the galaxy instance. Cannot be used to do user-tied actions
        self.config = None
        # Custom config used to set environment variables inside containers, defaults to the one in the repo
        self.species_folder_name = "_".join(utilities.filter_empty_not_empty_items([self.genus_lowercase.lower(), self.species.lower(), self.strain.lower(), self.sex.lower()])["not_empty"])
        self.existing_folders_cache = {}
        self.bam_metadata_cache = {}

        # # Sanitize str attributes
        # for var in vars(self):
        #     for attr in var if type(attr) == str:
        #         attr = attr.replace("(", "_").replace(")", "_")