From f5552b4d8869e956d68e7eff526410b9a41667af Mon Sep 17 00:00:00 2001
From: Arthur Le Bars <arthur.le-bars@sb-roscoff.fr>
Date: Mon, 1 Mar 2021 11:07:35 +0100
Subject: [PATCH] serexec chmod moved to utilities, 'deploy' input list moved
 to utilities

---
 gga_get_data.py               | 10 +++---
 gga_load_data.py              | 31 +++---------------
 run_workflow_phaeoexplorer.py | 62 +++++++++++++++++++++++++----------
 utilities.py                  |  5 +++
 4 files changed, 60 insertions(+), 48 deletions(-)

diff --git a/gga_get_data.py b/gga_get_data.py
index 339daf5..3251b78 100644
--- a/gga_get_data.py
+++ b/gga_get_data.py
@@ -153,11 +153,11 @@ if __name__ == "__main__":
                                                                  get_data_for_current_species.genus_species +
                                                                  "/")
 
-        # Change serexec permissions in repo
-        try:
-            os.chmod("%s/serexec" % get_data_for_current_species.script_dir, 0o0777)
-        except PermissionError:
-            logging.critical("Cannot access %s, exiting" % get_data_for_current_species.script_dir)
+        # # Change serexec permissions in repo
+        # try:
+        #     os.chmod("%s/serexec" % get_data_for_current_species.script_dir, 0o0755)
+        # except PermissionError:
+        #     logging.warning("serexec permissions incorrect in %s" % get_data_for_current_species.script_dir)
 
         # Retrieve datasets
         logging.info("Finding and copying datasets for %s" % get_data_for_current_species.full_name)
diff --git a/gga_load_data.py b/gga_load_data.py
index f0501c5..e2b677b 100644
--- a/gga_load_data.py
+++ b/gga_load_data.py
@@ -269,27 +269,6 @@ class LoadData(speciesData.SpeciesData):
 
         return new_folder
 
-    def setup_data_libraries(self):
-        """
-        Load data into the galaxy container with the galaxy_data_libs_SI.py script written by A. Bretaudeau
-
-        DEPRECATED
-
-        :return:
-        """
-
-        self.goto_species_dir()
-        try:
-            logging.info("Loading data into the galaxy container")
-            subprocess.call(["../serexec","{0}_{1}_galaxy".format(self.genus_lowercase, self.species),
-                                        "/tool_deps/_conda/bin/python",
-                                        "/opt/galaxy_data_libs_SI.py"])
-        except subprocess.CalledProcessError:
-            logging.info("Cannot load data into the galaxy container for " + self.full_name)
-            pass
-        else:
-            logging.info("Data successfully loaded into the galaxy container for " + self.full_name)
-
     def connect_to_instance(self):
         """
         Test the connection to the galaxy instance for the current organism
@@ -379,11 +358,11 @@ if __name__ == "__main__":
             load_data_for_current_species.genus_lowercase,
             load_data_for_current_species.species)
         
-        # Change serexec permissions in repo
-        try:
-            os.chmod("%s/serexec" % load_data_for_current_species.script_dir, 0o0777)
-        except PermissionError:
-            logging.critical("Cannot access %s, exiting" % load_data_for_current_species.script_dir)
+        # # Change serexec permissions in repo
+        # try:
+        #     os.chmod("%s/serexec" % load_data_for_current_species.script_dir, 0o0755)
+        # except PermissionError:
+        #     logging.warning("serexec permissions incorrect in %s" % load_data_for_current_species.script_dir)
 
         # Check the galaxy container state and proceed if the galaxy services are up and running
         if utilities.check_galaxy_state(genus_lowercase=load_data_for_current_species.genus_lowercase,
diff --git a/run_workflow_phaeoexplorer.py b/run_workflow_phaeoexplorer.py
index 6796816..f9a4c01 100644
--- a/run_workflow_phaeoexplorer.py
+++ b/run_workflow_phaeoexplorer.py
@@ -310,6 +310,33 @@ class RunWorkflow(speciesData.SpeciesData):
                 if k == "id":
                     folders_ids[current_folder_name] = v
 
+        history_datasets_li = self.instance.datasets.get_datasets()
+        genome_dataset_hda_id, gff_dataset_hda_id, transcripts_dataset_hda_id, proteins_datasets_hda_id = None, None, None, None
+        interproscan_dataset_hda_id, blast_diamond_dataset_hda_id = None, None
+
+        # Check for existing datasets for current organism (should have been run separately for mutliple organisms instances)
+        for dataset_dict in history_datasets_li[0:5]:  # Limit of datasets is 6
+            # Datasets imports should be ordered correctly
+            if dataset_dict["name"].endswith("proteins.fa"):
+                proteins_datasets_hda_id = dataset_dict["id"]
+                logging.debug("Proteins dataset hda ID: %s" % proteins_datasets_hda_id)
+            elif dataset_dict["name"].endswith("transcripts-gff.fa"):
+                transcripts_dataset_hda_id = dataset_dict["id"]
+                logging.debug("Transcripts dataset hda ID: %s" % transcripts_dataset_hda_id)
+            elif dataset_dict["name"].endswith(".gff"):
+                gff_dataset_hda_id = dataset_dict["id"]
+                logging.debug("gff dataset hda ID: %s" % gff_dataset_hda_id)
+            elif "Interpro" in dataset_dict["name"]:
+                interproscan_dataset_hda_id = dataset_dict["id"]
+                logging.debug("InterproScan dataset hda ID: %s" % gff_dataset_hda_id)
+            elif "diamond-blastp" in dataset_dict["name"]:
+                blast_diamond_dataset_hda_id = dataset_dict["id"]
+                logging.debug("Blast Diamond dataset hda ID: %s" % gff_dataset_hda_id)
+            else:
+                genome_dataset_hda_id = dataset_dict["id"]
+                logging.debug("Genome dataset hda id: %s" % genome_dataset_hda_id)
+
+
         # Iterating over the folders to find datasets and map datasets to their IDs
         logging.debug("Datasets IDs: ")
         for k, v in folders_ids.items():
@@ -327,12 +354,7 @@ class RunWorkflow(speciesData.SpeciesData):
                 final_sub_folder_content = self.instance.folders.show_folder(folder_id=sub_folder_content["folder_contents"][0]["id"], contents=True)
                 for k2, v2 in final_sub_folder_content.items():
                     for e in v2:
-                        # try:
-                        #     print(e["name"])
-                        # except TypeError:
-                        #     print("TypeError")
                         if type(e) == dict:
-                            # TODO: manage genome and ogs versions (differentiate between the correct folders using self.config)
                             if "transcripts" in e["name"]:
                                 self.datasets["transcripts_file"] = e["ldda_id"]
                                 logging.debug("Transcripts file:\t" + e["name"] + ": " + e["ldda_id"])
@@ -350,18 +372,24 @@ class RunWorkflow(speciesData.SpeciesData):
                                 logging.debug("Blastp diamond file:\t" + e["name"] + ": " + e["ldda_id"])
 
         logging.info("Uploading datasets into history %s" % self.history_id)
-        self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["genome_file"])
-        self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["gff_file"])
-        self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["transcripts_file"])
-        self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["proteins_file"])
-        try:
-            self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["interproscan_file"])
-        except Exception as exc:
-            logging.debug("Interproscan file could not be loaded in history {0} ({1})".format(self.history_id, exc))
-        try:
-            self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["blast_diamond_file"])
-        except Exception as exc:
-            logging.debug("Blastp file could not be loaded in history {0} ({1})".format(self.history_id, exc))
+        if genome_dataset_hda_id is None:
+            self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["genome_file"])
+        if gff_dataset_hda_id is None:
+            self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["gff_file"])
+        if transcripts_dataset_hda_id is None:
+            self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["transcripts_file"])
+        if proteins_datasets_hda_id is None:
+            self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["proteins_file"])
+        if interproscan_dataset_hda_id is None:
+            try:
+                self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["interproscan_file"])
+            except Exception as exc:
+                logging.debug("Interproscan file not found in library (history: {0})\n{1}".format(self.history_id, exc))
+        if blast_diamond_dataset_hda_id is None:
+            try:
+                self.instance.histories.upload_dataset_from_library(history_id=self.history_id, lib_dataset_id=self.datasets["blast_diamond_file"])
+            except Exception as exc:
+                logging.debug("Blastp file not found in library (history: {0})\n{1}".format(self.history_id, exc))
 
         _datasets = self.instance.datasets.get_datasets()
         with open(os.path.join(self.main_dir, "datasets_ids.json"), "w") as datasets_ids_outfile:
diff --git a/utilities.py b/utilities.py
index eef5448..564552c 100644
--- a/utilities.py
+++ b/utilities.py
@@ -93,6 +93,11 @@ def check_galaxy_state(genus_lowercase, species, script_dir):
     """
 
     # Run supervisorctl status in the galaxy container via serexec
+    # Change serexec permissions in repo
+    try:
+        os.chmod("%s/serexec" % script_dir, 0o0755)
+    except PermissionError:
+        logging.warning("serexec permissions incorrect in %s" % script_dir)
     galaxy_logs = subprocess.run(["%s/serexec" % script_dir, "{0}_{1}_galaxy".format(genus_lowercase, species),
                                   "supervisorctl", "status", "galaxy:"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     if "galaxy:galaxy_web                RUNNING" in str(galaxy_logs.stdout) \
-- 
GitLab