Newer
Older

Arthur Le Bars
committed
#!/usr/bin/env python3

Arthur Le Bars
committed
import sys
import utilities
import logging

Loraine Gueguen
committed
import constants

Arthur Le Bars
committed
from _datetime import datetime
def clean_string(string):
if not string is None and string != "":
cleaned_string = string \
.replace(" ", "_") \
.replace("-", "_") \
.replace("(", "") \
.replace(")", "") \
return cleaned_string
else:
return string
def set_service_attribute(service, parameters_dictionary):
parameters_dictionary_services = parameters_dictionary[constants.ORG_PARAM_SERVICES]
service_value = "0"
if (service in parameters_dictionary_services.keys()
and parameters_dictionary_services[service] is not None
and parameters_dictionary_services[service] != ""):
service_value = parameters_dictionary_services[service]
return service_value
class SpeciesData:
"""
This class contains attributes and functions to interact with the galaxy container of the GGA environment
Parent class of LoadData, GetData, DeploySpeciesStack, GgaPreprocess and RunWorkflow
def get_species_dir(self):
species_dir = None
if os.path.isdir(self.main_dir) and not self.genus_species is None:
species_dir = os.path.join(self.main_dir, self.genus_species)
else:
logging.error("Cannot set species dir with '{0}/{1}'".format(self.main_dir,self.genus_species))
return species_dir
def goto_species_dir(self):
"""
Go to the species directory (starting from the main dir)
:return:
"""
species_dir = self.get_species_dir()
try:
os.chdir(species_dir)
except OSError:
logging.critical("Cannot access %s" % species_dir)
sys.exit(0)
return 1
def __init__(self, parameters_dictionary):
self.parameters_dictionary = parameters_dictionary

Loraine Gueguen
committed
self.name = parameters_dictionary[constants.ORG_PARAM_NAME]
parameters_dictionary_description=parameters_dictionary[constants.ORG_PARAM_DESC]
parameters_dictionary_data = parameters_dictionary[constants.ORG_PARAM_DATA]
self.species = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_SPECIES])
self.genus = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_GENUS])
self.strain = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_STRAIN])
self.sex = clean_string(parameters_dictionary_description[constants.ORG_PARAM_DESC_SEX])
self.common_name = parameters_dictionary_description[constants.ORG_PARAM_DESC_COMMON_NAME]
self.common_name = parameters_dictionary_description[constants.ORG_PARAM_DESC_COMMON_NAME]
self.database = parameters_dictionary_description[constants.ORG_PARAM_DESC_DATABASE]

Arthur Le Bars
committed
self.date = datetime.today().strftime("%Y-%m-%d")

Loraine Gueguen
committed
self.origin = parameters_dictionary_description[constants.ORG_PARAM_DESC_ORIGIN]

Arthur Le Bars
committed

Loraine Gueguen
committed
if parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_VERSION] == "":

Loraine Gueguen
committed
self.genome_version = str(parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_VERSION])

Loraine Gueguen
committed
if parameters_dictionary_data[constants.ORG_PARAM_DATA_OGS_VERSION] == "":

Loraine Gueguen
committed
self.ogs_version = str(parameters_dictionary_data[constants.ORG_PARAM_DATA_OGS_VERSION])

Arthur Le Bars
committed
# TODO: catch blocks if key is absent in input

Loraine Gueguen
committed
self.genome_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_PATH]
self.contig_prefix = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_CONTIG_PREFIX]
Loraine Guéguen
committed
self.genome_analysis_program = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_ANALYSIS_PROGRAM]
self.genome_analysis_sourcename = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_ANALYSIS_SOURCENAME]
self.genome_analysis_date = parameters_dictionary_data[constants.ORG_PARAM_DATA_GENOME_ANALYSIS_DATE]
self.ogs_analysis_program = parameters_dictionary_data[constants.ORG_PARAM_DATA_OGS_ANALYSIS_PROGRAM]
self.ogs_analysis_sourcename = parameters_dictionary_data[constants.ORG_PARAM_DATA_OGS_ANALYSIS_SOURCENAME]
self.ogs_analysis_date = parameters_dictionary_data[constants.ORG_PARAM_DATA_OGS_ANALYSIS_DATE]

Loraine Gueguen
committed
self.transcripts_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_TRANSCRIPTS_PATH]
self.proteins_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_PROTEINS_PATH]
self.gff_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_GFF_PATH]
self.interpro_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_INTERPRO_PATH]
self.blastp_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_BLASTP_PATH]
self.blastx_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_BLASTX_PATH]
self.orthofinder_path = parameters_dictionary_data[constants.ORG_PARAM_DATA_ORTHOFINDER_PATH]

Arthur Le Bars
committed
self.blast = set_service_attribute(constants.ORG_PARAM_SERVICES_BLAST, parameters_dictionary)
self.go = set_service_attribute(constants.ORG_PARAM_SERVICES_GO, parameters_dictionary)

Loraine Gueguen
committed
try:
self.strain_lowercase = self.strain.lower()
except AttributeError as exc:
self.strain_lowercase = None
try:
self.sex_lowercase = self.sex.lower()
except AttributeError as exc:
self.sex_lowercase = None

Loraine Gueguen
committed
self.genus_uppercase = self.genus[0].upper() + self.genus_lowercase[1:]
self.genus_species = "{0}_{1}".format(self.genus_lowercase, self.species_lowercase)
self.strain_sex = '_'.join(utilities.filter_empty_not_empty_items([self.strain_lowercase, self.sex_lowercase])["not_empty"])

Loraine Gueguen
committed
self.full_name = ' '.join(utilities.filter_empty_not_empty_items([self.genus_uppercase, self.species_lowercase, self.strain, self.sex])["not_empty"])

Arthur Le Bars
committed
self.full_name_lowercase = self.full_name.lower()
self.species_folder_name = "_".join(utilities.filter_empty_not_empty_items(
[self.genus_lowercase,
self.species_lowercase,
self.strain_lowercase,
self.sex_lowercase])["not_empty"])
self.dataset_prefix = self.strain_sex
# If both strain and sex are empty attributes, change the dataset prefix to nothing instead of "_"
if self.dataset_prefix == "_":
self.dataset_prefix = ""
self.genome_filename = "{0}_v{1}.fasta".format(self.dataset_prefix, self.genome_version)

Loraine Gueguen
committed
self.gff_filename = "{0}_OGS{1}_{2}.gff".format(self.dataset_prefix, self.ogs_version, constants.DATA_DATE.replace("-",""))
self.transcripts_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_TRANSCRIPTS)
self.proteins_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_PROTEINS)
self.interpro_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_INTERPRO)
self.blastp_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_BLASTP)
self.blastx_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_BLASTX)
self.orthofinder_filename = "{0}_OGS{1}_{2}".format(self.dataset_prefix, self.ogs_version, constants.FILENAME_SUFFIX_ORTHOFINDER)
self.script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
self.main_dir = None
self.species_dir = None
self.config = None # Custom config used to set environment variables inside containers
# # Bioblend/Chado IDs for an organism analyses/organisms/datasets/history/library
self.instance_url = None
self.instance = None
self.history_id = None