#!/usr/bin/env python3 # -*- coding: utf-8 -*- import os import sys import utilities import logging import constants from _datetime import datetime def clean_string(string): if not string is None and string != "": cleaned_string = string \ .strip() \ .replace(" ", "_") \ .replace("-", "_") \ .replace(".", "_") \ .replace("#", "_") \ .replace("(", "") \ .replace(")", "") \ .replace("'", "") return cleaned_string else: return string def set_service_attribute(service, parameters_dictionary): parameters_dictionary_services = parameters_dictionary[constants.ORG_PARAM_SERVICES] service_value = "0" if (service in parameters_dictionary_services.keys() and parameters_dictionary_services[service] is not None and parameters_dictionary_services[service] != ""): service_value = parameters_dictionary_services[service] return service_value class SpeciesData: """ This class contains attributes and functions to interact with the galaxy container of the GGA environment Parent class of LoadData, GetData, DeploySpeciesStack, GgaPreprocess and RunWorkflow """ def get_species_dir(self): species_dir = None if os.path.isdir(self.main_dir) and not self.genus_species is None: species_dir = os.path.join(self.main_dir, self.genus_species) else: logging.error("Cannot set species dir with '{0}/{1}'".format(self.main_dir,self.genus_species)) return species_dir def goto_species_dir(self): """ Go to the species directory (starting from the main dir) :return: """ species_dir = self.get_species_dir() try: os.chdir(species_dir) except OSError: logging.critical("Cannot access %s" % species_dir) sys.exit(0) return 1 def __init__(self, parameters_dictionary): self.parameters_dictionary = parameters_dictionary self.name = parameters_dictionary[constants.ORG_PARAM_NAME] parameters_dictionary_description=parameters_dictionary[constants.ORG_PARAM_DESC] parameters_dictionary_data = parameters_dictionary[constants.ORG_PARAM_DATA] self.species = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_SPECIES]) self.genus = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_GENUS]) self.strain = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_STRAIN]) if self.strain is None: self.strain = "" self.sex = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_SEX]) self.common_name = parameters_dictionary_description[constants.ORG_PARAM_DESC_COMMON_NAME] if self.sex is None: self.sex = "" self.common_name = parameters_dictionary_description[constants.ORG_PARAM_DESC_COMMON_NAME] self.database = parameters_dictionary_description[constants.ORG_PARAM_DESC_DATABASE] self.date = datetime.today().strftime("%Y-%m-%d") self.origin = parameters_dictionary_description[constants.ORG_PARAM_DESC_ORIGIN] if parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_VERSION] == "": self.genome_version = "1.0" else: self.genome_version = str(parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_VERSION]) if parameters_dictionary_data[constants.ORG_PARAM_DATA_OGS_VERSION] == "": self.ogs_version = "1.0" else: self.ogs_version = str(parameters_dictionary_data[constants.ORG_PARAM_DATA_OGS_VERSION]) # TODO: catch blocks if key is absent in input self.genome_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_PATH] self.contig_prefix = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_CONTIG_PREFIX] self.genome_analysis_program = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_ANALYSIS_PROGRAM] self.genome_analysis_sourcename = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_ANALYSIS_SOURCENAME] self.genome_analysis_date = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_ANALYSIS_DATE] self.ogs_analysis_program = parameters_dictionary_data[constants.ORG_PARAM_DATA_OGS_ANALYSIS_PROGRAM] self.ogs_analysis_sourcename = parameters_dictionary_data[constants.ORG_PARAM_DATA_OGS_ANALYSIS_SOURCENAME] self.ogs_analysis_date = parameters_dictionary_data[constants.ORG_PARAM_DATA_OGS_ANALYSIS_DATE] self.transcripts_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_TRANSCRIPTS_PATH] self.proteins_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_PROTEINS_PATH] self.gff_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_GFF_PATH] self.interpro_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_INTERPRO_PATH] self.blastp_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_BLASTP_PATH] self.blastx_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_BLASTX_PATH] self.orthofinder_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_ORTHOFINDER_PATH] self.blast = set_service_attribute(constants.ORG_PARAM_SERVICES_BLAST, parameters_dictionary) self.go = set_service_attribute(constants.ORG_PARAM_SERVICES_GO, parameters_dictionary) self.genus_lowercase = self.genus.lower() self.species_lowercase = self.species.lower() try: self.strain_lowercase = self.strain.lower() except AttributeError as exc: self.strain_lowercase = None try: self.sex_lowercase = self.sex.lower() except AttributeError as exc: self.sex_lowercase = None self.genus_uppercase = self.genus[0].upper() + self.genus_lowercase[1:] self.genus_species = "{0}_{1}".format(self.genus_lowercase, self.species_lowercase) self.strain_sex = '_'.join(utilities.filter_empty_not_empty_items([self.strain_lowercase, self.sex_lowercase])["not_empty"]) self.full_name = ' '.join(utilities.filter_empty_not_empty_items([self.genus_uppercase, self.species_lowercase, self.strain, self.sex])["not_empty"]) self.full_name_lowercase = self.full_name.lower() self.species_folder_name = "_".join(utilities.filter_empty_not_empty_items( [self.genus_lowercase, self.species_lowercase, self.strain_lowercase, self.sex_lowercase])["not_empty"]) self.dataset_prefix = self.strain_sex # If both strain and sex are empty attributes, change the dataset prefix to nothing instead of "_" if self.dataset_prefix == "_": self.dataset_prefix = "" self.genome_filename = "{0}_v{1}.fasta".format(self.dataset_prefix, self.genome_version) self.gff_filename = "{0}_OGS{1}_{2}.gff".format(self.dataset_prefix, self.ogs_version, constants.DATA_DATE.replace("-","")) self.transcripts_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_TRANSCRIPTS) self.proteins_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_PROTEINS) self.interpro_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_INTERPRO) self.blastp_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_BLASTP) self.blastx_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_BLASTX) self.orthofinder_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_ORTHOFINDER) self.script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) self.main_dir = None self.species_dir = None self.config = None # Custom config used to set environment variables inside containers # # Bioblend/Chado IDs for an organism analyses/organisms/datasets/history/library self.instance_url = None self.instance = None self.history_id = None self.history_name = str(self.genus_species)