diff --git a/gga_get_data.py b/gga_get_data.py index 339daf5e3ca0ea946a5d9105f1d198b194197a8f..3251b786c341c2e6c6d17c0359d0d550af09612e 100644 --- a/gga_get_data.py +++ b/gga_get_data.py @@ -153,11 +153,11 @@ if __name__ == "__main__": get_data_for_current_species.genus_species + "/") - # Change serexec permissions in repo - try: - os.chmod("%s/serexec" % get_data_for_current_species.script_dir, 0o0777) - except PermissionError: - logging.critical("Cannot access %s, exiting" % get_data_for_current_species.script_dir) + # # Change serexec permissions in repo + # try: + # os.chmod("%s/serexec" % get_data_for_current_species.script_dir, 0o0755) + # except PermissionError: + # logging.warning("serexec permissions incorrect in %s" % get_data_for_current_species.script_dir) # Retrieve datasets logging.info("Finding and copying datasets for %s" % get_data_for_current_species.full_name) diff --git a/gga_load_data.py b/gga_load_data.py index f0501c5a8cd484fa121b76fc633795ebc359d76d..e2b677b7f6f8efd24698ee0c8f26fbfc849e6db4 100644 --- a/gga_load_data.py +++ b/gga_load_data.py @@ -269,27 +269,6 @@ class LoadData(speciesData.SpeciesData): return new_folder - def setup_data_libraries(self): - """ - Load data into the galaxy container with the galaxy_data_libs_SI.py script written by A. Bretaudeau - - DEPRECATED - - :return: - """ - - self.goto_species_dir() - try: - logging.info("Loading data into the galaxy container") - subprocess.call(["../serexec","{0}_{1}_galaxy".format(self.genus_lowercase, self.species), - "/tool_deps/_conda/bin/python", - "/opt/galaxy_data_libs_SI.py"]) - except subprocess.CalledProcessError: - logging.info("Cannot load data into the galaxy container for " + self.full_name) - pass - else: - logging.info("Data successfully loaded into the galaxy container for " + self.full_name) - def connect_to_instance(self): """ Test the connection to the galaxy instance for the current organism @@ -379,11 +358,11 @@ if __name__ == "__main__": load_data_for_current_species.genus_lowercase, load_data_for_current_species.species) - # Change serexec permissions in repo - try: - os.chmod("%s/serexec" % load_data_for_current_species.script_dir, 0o0777) - except PermissionError: - logging.critical("Cannot access %s, exiting" % load_data_for_current_species.script_dir) + # # Change serexec permissions in repo + # try: + # os.chmod("%s/serexec" % load_data_for_current_species.script_dir, 0o0755) + # except PermissionError: + # logging.warning("serexec permissions incorrect in %s" % load_data_for_current_species.script_dir) # Check the galaxy container state and proceed if the galaxy services are up and running if utilities.check_galaxy_state(genus_lowercase=load_data_for_current_species.genus_lowercase, diff --git a/run_workflow_phaeoexplorer.py b/run_workflow_phaeoexplorer.py index 67968163a8b287877e09e298a0c67b771a8c9288..f9a4c016b48cf80f9c04bb3006430a893be18ae9 100644 --- a/run_workflow_phaeoexplorer.py +++ b/run_workflow_phaeoexplorer.py @@ -310,6 +310,33 @@ class RunWorkflow(speciesData.SpeciesData): if k == "id": folders_ids[current_folder_name] = v + history_datasets_li = self.instance.datasets.get_datasets() + genome_dataset_hda_id, gff_dataset_hda_id, transcripts_dataset_hda_id, proteins_datasets_hda_id = None, None, None, None + interproscan_dataset_hda_id, blast_diamond_dataset_hda_id = None, None + + # Check for existing datasets for current organism (should have been run separately for mutliple organisms instances) + for dataset_dict in history_datasets_li[0:5]: # Limit of datasets is 6 + # Datasets imports should be ordered correctly + if dataset_dict["name"].endswith("proteins.fa"): + proteins_datasets_hda_id = dataset_dict["id"] + logging.debug("Proteins dataset hda ID: %s" % proteins_datasets_hda_id) + elif dataset_dict["name"].endswith("transcripts-gff.fa"): + transcripts_dataset_hda_id = dataset_dict["id"] + logging.debug("Transcripts dataset hda ID: %s" % transcripts_dataset_hda_id) + elif dataset_dict["name"].endswith(".gff"): + gff_dataset_hda_id = dataset_dict["id"] + logging.debug("gff dataset hda ID: %s" % gff_dataset_hda_id) + elif "Interpro" in dataset_dict["name"]: + interproscan_dataset_hda_id = dataset_dict["id"] + logging.debug("InterproScan dataset hda ID: %s" % gff_dataset_hda_id) + elif "diamond-blastp" in dataset_dict["name"]: + blast_diamond_dataset_hda_id = dataset_dict["id"] + logging.debug("Blast Diamond dataset hda ID: %s" % gff_dataset_hda_id) + else: + genome_dataset_hda_id = dataset_dict["id"] + logging.debug("Genome dataset hda id: %s" % genome_dataset_hda_id) + + # Iterating over the folders to find datasets and map datasets to their IDs logging.debug("Datasets IDs: ") for k, v in folders_ids.items(): @@ -327,12 +354,7 @@ class RunWorkflow(speciesData.SpeciesData): final_sub_folder_content = self.instance.folders.show_folder(folder_id=sub_folder_content["folder_contents"][0]["id"], contents=True) for k2, v2 in final_sub_folder_content.items(): for e in v2: - # try: - # print(e["name"]) - # except TypeError: - # print("TypeError") if type(e) == dict: - # TODO: manage genome and ogs versions (differentiate between the correct folders using self.config) if "transcripts" in e["name"]: self.datasets["transcripts_file"] = e["ldda_id"] logging.debug("Transcripts file:\t" + e["name"] + ": " + e["ldda_id"]) @@ -350,18 +372,24 @@ class RunWorkflow(speciesData.SpeciesData): logging.debug("Blastp diamond file:\t" + e["name"] + ": " + e["ldda_id"]) logging.info("Uploading datasets into history %s" % self.history_id) - self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["genome_file"]) - self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["gff_file"]) - self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["transcripts_file"]) - self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["proteins_file"]) - try: - self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["interproscan_file"]) - except Exception as exc: - logging.debug("Interproscan file could not be loaded in history {0} ({1})".format(self.history_id, exc)) - try: - self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["blast_diamond_file"]) - except Exception as exc: - logging.debug("Blastp file could not be loaded in history {0} ({1})".format(self.history_id, exc)) + if genome_dataset_hda_id is None: + self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["genome_file"]) + if gff_dataset_hda_id is None: + self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["gff_file"]) + if transcripts_dataset_hda_id is None: + self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["transcripts_file"]) + if proteins_datasets_hda_id is None: + self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["proteins_file"]) + if interproscan_dataset_hda_id is None: + try: + self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["interproscan_file"]) + except Exception as exc: + logging.debug("Interproscan file not found in library (history: {0})\n{1}".format(self.history_id, exc)) + if blast_diamond_dataset_hda_id is None: + try: + self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["blast_diamond_file"]) + except Exception as exc: + logging.debug("Blastp file not found in library (history: {0})\n{1}".format(self.history_id, exc)) _datasets = self.instance.datasets.get_datasets() with open(os.path.join(self.main_dir, "datasets_ids.json"), "w") as datasets_ids_outfile: diff --git a/utilities.py b/utilities.py index eef544818f73ad9ff58e00baecbef481e6a01a1f..564552c0049fa8e3107fbbff8a60212b5d1d5337 100644 --- a/utilities.py +++ b/utilities.py @@ -93,6 +93,11 @@ def check_galaxy_state(genus_lowercase, species, script_dir): """ # Run supervisorctl status in the galaxy container via serexec + # Change serexec permissions in repo + try: + os.chmod("%s/serexec" % script_dir, 0o0755) + except PermissionError: + logging.warning("serexec permissions incorrect in %s" % script_dir) galaxy_logs = subprocess.run(["%s/serexec" % script_dir, "{0}_{1}_galaxy".format(genus_lowercase, species), "supervisorctl", "status", "galaxy:"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) if "galaxy:galaxy_web RUNNING" in str(galaxy_logs.stdout) \