diff --git a/examples/citrus_sinensis.yml b/examples/citrus_sinensis.yml index 39f05f0896136d60ee020ae55627dc36b4e51bc4..14fe1dcd168a0bd2ca52111ff163abdba62b41dd 100644 --- a/examples/citrus_sinensis.yml +++ b/examples/citrus_sinensis.yml @@ -1,8 +1,7 @@ # Input file for the automated creation GGA docker stacks # The file consists in a "list" of species for which the script will have to create these stacks/load data into galaxy/run workflows -# This file is internally turned into a list of dictionaries by the scripts -citrus_sinensis: # Dummy value to designate the species (isn't used by the script) +- name: citrus_sinensis description: # Species description, leave blank if unknown or you don't want it to be used # These parameters are used to set up the various urls and adresses in different containers @@ -13,11 +12,8 @@ citrus_sinensis: # Dummy value to designate the species (isn't used by the scri strain: "" common_name: "" origin: "" - # the sex and strain, the script will look for files containing the genus, species, sex and strain of the species) - # If no file corresponding to the description is found, this path will be considered empty and the script will - # proceed to the next step (create the directory tree for the GGA docker stack) data: - # Sequence of paths to the different datasets to copy and import into the galaxy container (as a shared library) + # Paths to the different datasets to copy and import into the galaxy container (as a shared library) # Must be absolute paths to the dataset genome_path: "/path/to/repo/examples/src_data/genome/v1.0/Citrus_sinensis-scaffold00001.fasta" # Mandatory! transcripts_path: "/path/to/repo/examples/src_data/annotation/v1.0/Citrus_sinensis-orange1.1g015632m.g.fasta" # Mandatory! @@ -35,8 +31,8 @@ citrus_sinensis: # Dummy value to designate the species (isn't used by the scri ogs_version: "1.0" performed_by: "" services: - # Describe what optional services to deploy for the stack - # By default, only tripal, tripaldb and galaxy services will be deployed + # List the optional services to be deploy in the stack + # By default, only tripal, tripaldb, galaxy, jbrowse and elasticsearch services will be deployed blast: "False" wiki: "False" apollo: "False" \ No newline at end of file diff --git a/gga_get_data.py b/gga_get_data.py index 20ec80d37f1b1449fd216ae616d8f594976e952a..872d5c3a21d41eb6af1c9a547a262f806699525d 100755 --- a/gga_get_data.py +++ b/gga_get_data.py @@ -91,6 +91,12 @@ class GetData(speciesData.SpeciesData): logging.info("src_data directory tree generated for %s" % self.full_name) + def get_last_modified_time_string(self, filePath): + # give the last modification date for the file, with format '20190130' + lastModifiedTimestamp = os.path.getmtime(filePath) + lastModifiedTimeStructure = time.localtime(lastModifiedTimestamp) + lastModifiedDate = time.strftime("%Y%m%d", lastModifiedTimeStructure) + return lastModifiedDate def get_source_data_files_from_path(self): """ @@ -137,7 +143,7 @@ class GetData(speciesData.SpeciesData): elif k in annotation_datasets: dataset_fname = "" if k == "gff_path": - dataset_fname = "{0}_OGS{1}.gff".format(self.dataset_prefix, self.ogs_version) + dataset_fname = "{0}_OGS{1}_{2}.gff".format(self.dataset_prefix, self.ogs_version, self.get_last_modified_time_string(os.path.abspath(v))) elif k == "transcripts_path": dataset_fname = "{0}_OGS{1}_transcripts.fasta".format(self.dataset_prefix, self.ogs_version) elif k == "proteins_path": @@ -194,7 +200,7 @@ if __name__ == "__main__": parser.add_argument("-v", "--verbose", help="Increase output verbosity", - action="store_false") + action="store_true") parser.add_argument("--main-directory", type=str, @@ -206,7 +212,6 @@ if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) - logging.getLogger("urllib3").setLevel(logging.WARNING) if not args.main_directory: args.main_directory = os.getcwd() diff --git a/gga_init.py b/gga_init.py index bd1beb4e7ebed69a7945fb0e8ed58a4545bf3483..673df13b3e83a6540410ddcfebcf303031e6d36c 100755 --- a/gga_init.py +++ b/gga_init.py @@ -62,11 +62,10 @@ class DeploySpeciesStack(speciesData.SpeciesData): # use the default banner instead if "banner_path" in self.config.keys(): if not config["banner_path"] == "" and os.path.isfile(os.path.abspath(config["banner_path"])): - banner_dest_path = os.path.abspath("./banner.png") - logging.info("Custom banner path: %s" % self.config["banner_path"]) - # if os.path.samefile(os.path.abspath(config["banner_path"]), banner_dest_path): - # shutil.copy(os.path.abspath(self.config["banner_path"]), banner_dest_path) - shutil.copy(os.path.abspath(self.config["banner_path"]), banner_dest_path) + banner_dest_path = os.path.join(self.species_dir, os.path.abspath("banner.png")) + if not os.path.isfile(banner_dest_path) or not os.path.samefile(os.path.abspath(config["banner_path"]),banner_dest_path): + os.symlink(os.path.abspath(self.config["banner_path"]), banner_dest_path) + logging.info("Custom banner added: symlink from %s" % self.config["banner_path"]) else: logging.debug("Using default banner for Tripal pages") self.config.pop("banner_path", None) diff --git a/gga_load_data.py b/gga_load_data.py index c4ed5594a9d3d173bd11d4ee4918a1152cd6a5ca..32c5eaa1c9ac944e99f35bfeccc3a56861a04820 100755 --- a/gga_load_data.py +++ b/gga_load_data.py @@ -321,7 +321,7 @@ if __name__ == "__main__": parser.add_argument("-v", "--verbose", help="Increase output verbosity", - action="store_false") + action="store_true") parser.add_argument("--config", type=str, @@ -337,7 +337,6 @@ if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) - logging.getLogger("urllib3").setLevel(logging.WARNING) # Parsing the config file if provided, using the default config otherwise if not args.config: diff --git a/speciesData.py b/speciesData.py index a8637b01484de65a257f12eec6e5afacc406d014..f2b97026d4d929a39e21c79b31058f498f253622 100755 --- a/speciesData.py +++ b/speciesData.py @@ -23,7 +23,7 @@ class SpeciesData: return string def __init__(self, parameters_dictionary): - # self.config_dictionary = None + self.name = parameters_dictionary["name"] self.parameters_dictionary = parameters_dictionary parameters_dictionary_description=parameters_dictionary["description"] self.species = self.clean_string(parameters_dictionary_description["species"]) @@ -59,9 +59,6 @@ class SpeciesData: self.genus_uppercase = self.genus[0].upper() + self.genus[1:] self.chado_species_name = "{0} {1}".format(self.species, self.sex) self.full_name = ' '.join(utilities.filter_empty_not_empty_items([self.genus_uppercase, self.species, self.strain, self.sex])["not_empty"]) - self.full_name = self.full_name.replace("__", "_").replace("_ ", "_").replace(" _", "_") - if self.full_name.endswith("_") or self.full_name.endswith(" "): - self.full_name = self.full_name[0:-2] self.full_name_lowercase = self.full_name.lower() self.abbreviation = "_".join(utilities.filter_empty_not_empty_items([self.genus_lowercase[0], self.species, self.strain, self.sex])["not_empty"]) @@ -96,14 +93,8 @@ class SpeciesData: self.api_key = None # API key used to communicate with the galaxy instance. Cannot be used to do user-tied actions self.datasets = dict() self.config = None # Custom config used to set environment variables inside containers - self.species_folder_name = "_".join(utilities.filter_empty_not_empty_items([self.genus_lowercase.lower(), self.species.lower(), self.strain.lower(), self.sex.lower()])["not_empty"]) - self.species_folder_name = self.species_folder_name .replace("-", "_").replace('__', '_').replace("(", "_").replace(")", "_") - if self.species_folder_name.endswith("_"): - self.species_folder_name = self.species_folder_name[0:-2] + self.species_folder_name = "_".join(utilities.filter_empty_not_empty_items( + [self.genus_lowercase.lower(), self.species.lower(), self.strain.lower(), + self.sex.lower()])["not_empty"]) self.existing_folders_cache = {} self.bam_metadata_cache = {} - - # # Sanitize str attributes - # for var in vars(self): - # for attr in var if type(attr) == str: - # attr = attr.replace("(", "_").replace(")", "_") diff --git a/templates/organisms.yml.j2 b/templates/organisms.yml.j2 index 83ab58f9ab4bb3282af991df6ad0c50c952725f5..915bbed25daf29704e3e1b2a6085a648ed849599 100644 --- a/templates/organisms.yml.j2 +++ b/templates/organisms.yml.j2 @@ -1,4 +1,4 @@ -{{ name }}: +- name: {{ name }} description: genus: {{ genus }} species: {{ species }} @@ -13,7 +13,7 @@ genome_path: {{ genome_path }} transcripts_path: {{ transcripts_path }} proteins_path: {{ proteins_path }} - gff_path: {{ gff_path }} # Mandatory! + gff_path: {{ gff_path }} interpro_path: {{ interpro_path }} orthofinder_path: {{ orthofinder_path }} blastp_path: {{ blastp_path }} diff --git a/utilities.py b/utilities.py index 2bb532d727c8f6cb704695d5ba374ff18a7be12f..93c3e72d49d6709548402e87e3b14c2cd3945b9d 100755 --- a/utilities.py +++ b/utilities.py @@ -44,14 +44,12 @@ def parse_input(input_file): :return: """ - parsed_sp_dict_list = [] + sp_dict_list = [] try: with open(input_file, 'r') as stream: try: - yaml_dict = yaml.safe_load(stream) - for k, v in yaml_dict.items(): - parsed_sp_dict_list.append(v) + sp_dict_list = yaml.safe_load(stream) except yaml.YAMLError as err: logging.critical("Input file is not in YAML format") sys.exit(err) @@ -62,7 +60,7 @@ def parse_input(input_file): logging.critical("The specified input file cannot be read (%s)" % input_file) sys.exit() - return parsed_sp_dict_list + return sp_dict_list def filter_empty_not_empty_items(li): @@ -125,13 +123,9 @@ def get_species_history_id(instance, full_name): def get_gspecies_string_from_sp_dict(sp_dict): - gspecies = "" - for k, v in sp_dict.items(): - for k2, v2 in v.items(): - if k2 == "genus": - gspecies = gspecies.lower() + v2 - elif k2 == "species": - gspecies = gspecies.lower() + "_" + v2 + genus = sp_dict["description"]["genus"] + species = sp_dict["description"]["species"] + gspecies = genus.lower() + "_" + species.lower() return gspecies def get_unique_species_str_list(sp_dict_list): @@ -166,7 +160,6 @@ def get_unique_species_dict_list(sp_dict_list): unique_species_dict = {} unique_species_list_of_dict = [] - unique_species_genus_species = get_unique_species_str_list(sp_dict_list=sp_dict_list) for sp in sp_dict_list: gspecies = get_gspecies_string_from_sp_dict(sp)