Skip to content
Snippets Groups Projects
Commit 87f9fb47 authored by Arthur Le Bars's avatar Arthur Le Bars
Browse files

Merge branch 'dev' of http://gitlab.sb-roscoff.fr/abims/e-infra/gga_load_data into workflow_v2

parents 8154c570 28a1d904
No related branches found
No related tags found
2 merge requests!11Workflow v2,!9Release 2.0 (merge dev to master)
This commit is part of merge request !9. Comments created here will be created in the context of that merge request.
# Input file for the automated creation GGA docker stacks
# The file consists in a "list" of species for which the script will have to create these stacks/load data into galaxy/run workflows
# This file is internally turned into a list of dictionaries by the scripts
citrus_sinensis: # Dummy value to designate the species (isn't used by the script)
- name: citrus_sinensis
description:
# Species description, leave blank if unknown or you don't want it to be used
# These parameters are used to set up the various urls and adresses in different containers
......@@ -13,11 +12,8 @@ citrus_sinensis: # Dummy value to designate the species (isn't used by the scri
strain: ""
common_name: ""
origin: ""
# the sex and strain, the script will look for files containing the genus, species, sex and strain of the species)
# If no file corresponding to the description is found, this path will be considered empty and the script will
# proceed to the next step (create the directory tree for the GGA docker stack)
data:
# Sequence of paths to the different datasets to copy and import into the galaxy container (as a shared library)
# Paths to the different datasets to copy and import into the galaxy container (as a shared library)
# Must be absolute paths to the dataset
genome_path: "/path/to/repo/examples/src_data/genome/v1.0/Citrus_sinensis-scaffold00001.fasta" # Mandatory!
transcripts_path: "/path/to/repo/examples/src_data/annotation/v1.0/Citrus_sinensis-orange1.1g015632m.g.fasta" # Mandatory!
......@@ -35,8 +31,8 @@ citrus_sinensis: # Dummy value to designate the species (isn't used by the scri
ogs_version: "1.0"
performed_by: ""
services:
# Describe what optional services to deploy for the stack
# By default, only tripal, tripaldb and galaxy services will be deployed
# List the optional services to be deploy in the stack
# By default, only tripal, tripaldb, galaxy, jbrowse and elasticsearch services will be deployed
blast: "False"
wiki: "False"
apollo: "False"
\ No newline at end of file
......@@ -91,6 +91,12 @@ class GetData(speciesData.SpeciesData):
logging.info("src_data directory tree generated for %s" % self.full_name)
def get_last_modified_time_string(self, filePath):
# give the last modification date for the file, with format '20190130'
lastModifiedTimestamp = os.path.getmtime(filePath)
lastModifiedTimeStructure = time.localtime(lastModifiedTimestamp)
lastModifiedDate = time.strftime("%Y%m%d", lastModifiedTimeStructure)
return lastModifiedDate
def get_source_data_files_from_path(self):
"""
......@@ -137,7 +143,7 @@ class GetData(speciesData.SpeciesData):
elif k in annotation_datasets:
dataset_fname = ""
if k == "gff_path":
dataset_fname = "{0}_OGS{1}.gff".format(self.dataset_prefix, self.ogs_version)
dataset_fname = "{0}_OGS{1}_{2}.gff".format(self.dataset_prefix, self.ogs_version, self.get_last_modified_time_string(os.path.abspath(v)))
elif k == "transcripts_path":
dataset_fname = "{0}_OGS{1}_transcripts.fasta".format(self.dataset_prefix, self.ogs_version)
elif k == "proteins_path":
......@@ -194,7 +200,7 @@ if __name__ == "__main__":
parser.add_argument("-v", "--verbose",
help="Increase output verbosity",
action="store_false")
action="store_true")
parser.add_argument("--main-directory",
type=str,
......@@ -206,7 +212,6 @@ if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
logging.getLogger("urllib3").setLevel(logging.WARNING)
if not args.main_directory:
args.main_directory = os.getcwd()
......
......@@ -62,11 +62,10 @@ class DeploySpeciesStack(speciesData.SpeciesData):
# use the default banner instead
if "banner_path" in self.config.keys():
if not config["banner_path"] == "" and os.path.isfile(os.path.abspath(config["banner_path"])):
banner_dest_path = os.path.abspath("./banner.png")
logging.info("Custom banner path: %s" % self.config["banner_path"])
# if os.path.samefile(os.path.abspath(config["banner_path"]), banner_dest_path):
# shutil.copy(os.path.abspath(self.config["banner_path"]), banner_dest_path)
shutil.copy(os.path.abspath(self.config["banner_path"]), banner_dest_path)
banner_dest_path = os.path.join(self.species_dir, os.path.abspath("banner.png"))
if not os.path.isfile(banner_dest_path) or not os.path.samefile(os.path.abspath(config["banner_path"]),banner_dest_path):
os.symlink(os.path.abspath(self.config["banner_path"]), banner_dest_path)
logging.info("Custom banner added: symlink from %s" % self.config["banner_path"])
else:
logging.debug("Using default banner for Tripal pages")
self.config.pop("banner_path", None)
......
......@@ -321,7 +321,7 @@ if __name__ == "__main__":
parser.add_argument("-v", "--verbose",
help="Increase output verbosity",
action="store_false")
action="store_true")
parser.add_argument("--config",
type=str,
......@@ -337,7 +337,6 @@ if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
logging.getLogger("urllib3").setLevel(logging.WARNING)
# Parsing the config file if provided, using the default config otherwise
if not args.config:
......
......@@ -23,7 +23,7 @@ class SpeciesData:
return string
def __init__(self, parameters_dictionary):
# self.config_dictionary = None
self.name = parameters_dictionary["name"]
self.parameters_dictionary = parameters_dictionary
parameters_dictionary_description=parameters_dictionary["description"]
self.species = self.clean_string(parameters_dictionary_description["species"])
......@@ -59,9 +59,6 @@ class SpeciesData:
self.genus_uppercase = self.genus[0].upper() + self.genus[1:]
self.chado_species_name = "{0} {1}".format(self.species, self.sex)
self.full_name = ' '.join(utilities.filter_empty_not_empty_items([self.genus_uppercase, self.species, self.strain, self.sex])["not_empty"])
self.full_name = self.full_name.replace("__", "_").replace("_ ", "_").replace(" _", "_")
if self.full_name.endswith("_") or self.full_name.endswith(" "):
self.full_name = self.full_name[0:-2]
self.full_name_lowercase = self.full_name.lower()
self.abbreviation = "_".join(utilities.filter_empty_not_empty_items([self.genus_lowercase[0], self.species, self.strain, self.sex])["not_empty"])
......@@ -96,14 +93,8 @@ class SpeciesData:
self.api_key = None # API key used to communicate with the galaxy instance. Cannot be used to do user-tied actions
self.datasets = dict()
self.config = None # Custom config used to set environment variables inside containers
self.species_folder_name = "_".join(utilities.filter_empty_not_empty_items([self.genus_lowercase.lower(), self.species.lower(), self.strain.lower(), self.sex.lower()])["not_empty"])
self.species_folder_name = self.species_folder_name .replace("-", "_").replace('__', '_').replace("(", "_").replace(")", "_")
if self.species_folder_name.endswith("_"):
self.species_folder_name = self.species_folder_name[0:-2]
self.species_folder_name = "_".join(utilities.filter_empty_not_empty_items(
[self.genus_lowercase.lower(), self.species.lower(), self.strain.lower(),
self.sex.lower()])["not_empty"])
self.existing_folders_cache = {}
self.bam_metadata_cache = {}
# # Sanitize str attributes
# for var in vars(self):
# for attr in var if type(attr) == str:
# attr = attr.replace("(", "_").replace(")", "_")
{{ name }}:
- name: {{ name }}
description:
genus: {{ genus }}
species: {{ species }}
......@@ -13,7 +13,7 @@
genome_path: {{ genome_path }}
transcripts_path: {{ transcripts_path }}
proteins_path: {{ proteins_path }}
gff_path: {{ gff_path }} # Mandatory!
gff_path: {{ gff_path }}
interpro_path: {{ interpro_path }}
orthofinder_path: {{ orthofinder_path }}
blastp_path: {{ blastp_path }}
......
......@@ -44,14 +44,12 @@ def parse_input(input_file):
:return:
"""
parsed_sp_dict_list = []
sp_dict_list = []
try:
with open(input_file, 'r') as stream:
try:
yaml_dict = yaml.safe_load(stream)
for k, v in yaml_dict.items():
parsed_sp_dict_list.append(v)
sp_dict_list = yaml.safe_load(stream)
except yaml.YAMLError as err:
logging.critical("Input file is not in YAML format")
sys.exit(err)
......@@ -62,7 +60,7 @@ def parse_input(input_file):
logging.critical("The specified input file cannot be read (%s)" % input_file)
sys.exit()
return parsed_sp_dict_list
return sp_dict_list
def filter_empty_not_empty_items(li):
......@@ -125,13 +123,9 @@ def get_species_history_id(instance, full_name):
def get_gspecies_string_from_sp_dict(sp_dict):
gspecies = ""
for k, v in sp_dict.items():
for k2, v2 in v.items():
if k2 == "genus":
gspecies = gspecies.lower() + v2
elif k2 == "species":
gspecies = gspecies.lower() + "_" + v2
genus = sp_dict["description"]["genus"]
species = sp_dict["description"]["species"]
gspecies = genus.lower() + "_" + species.lower()
return gspecies
def get_unique_species_str_list(sp_dict_list):
......@@ -166,7 +160,6 @@ def get_unique_species_dict_list(sp_dict_list):
unique_species_dict = {}
unique_species_list_of_dict = []
unique_species_genus_species = get_unique_species_str_list(sp_dict_list=sp_dict_list)
for sp in sp_dict_list:
gspecies = get_gspecies_string_from_sp_dict(sp)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment