Loraine Gueguen
--- a/gga_load_data.py

+ 67

− 98
+++ b/gga_load_data.py

+ 67

− 98
 @@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 import re

-import bioblend
 import argparse
 import os
 import logging
 @@ -10,12 +9,11 @@ import sys
 import time
 import json
 import yaml
-import subprocess
-from bioblend import galaxy
 from bioblend.galaxy.objects import GalaxyInstance

 import utilities
-import speciesData
+import utilities_bioblend
+import species_data
 import constants

 """ 
 @@ -26,7 +24,7 @@ Usage: $ python3 gga_load_data.py -i input_example.yml --config config.yml [OPTI
 Do not call this script before the galaxy container is ready
 """

-class LoadData(speciesData.SpeciesData):
+class LoadData(species_data.SpeciesData):
    """
    Child of SpeciesData

 @@ -40,55 +38,6 @@ class LoadData(speciesData.SpeciesData):
        self.bam_metadata_cache = {}
        super().__init__(parameters_dictionary)

-    def get_history(self):
-        """
-        Create or set the working history to the current species one
-
-        :return:
-        """
-        try:
-            histories = self.instance.histories.get_histories(name=str(self.genus_species))
-            if len(histories) == 1:
-                self.history_id = histories[0]["id"]
-                logging.debug("History ID set for {0} {1}: {2}".format(self.genus, self.species, self.history_id))
-            else:
-                logging.critical("Multiple histories exists for {1}: {2}".format(self.genus, self.species))
-        except IndexError:
-            logging.info("Creating history for {0} {1}".format(self.genus, self.species))
-            hist_dict = self.instance.histories.create_history(name=str(self.genus_species))
-            self.history_id = hist_dict["id"]
-            logging.debug("History ID set for {0} {1}: {2}".format(self.genus, self.species, self.history_id))
-
-        return self.history_id
-
-    def remove_homo_sapiens_from_db(self):
-        """
-        Run the GMOD tool to remove the "Homo sapiens" default organism from the original database
-        Will do nothing if H. sapiens isn't in the database
-
-        """
-
-        logging.debug("Getting 'Homo sapiens' ID in chado database")
-        get_sapiens_id_job_output_dataset_id = utilities.run_tool_and_get_single_output_dataset_id(
-            self.instance,
-            tool_id=constants.GET_ORGANISMS_TOOL, # If this version if not found, Galaxy will use the one that is found
-            history_id=self.history_id,
-            tool_inputs={"genus": "Homo", "species": "sapiens"})
-        get_sapiens_id_json_output = self.instance.datasets.download_dataset(dataset_id=get_sapiens_id_job_output_dataset_id)
-
-        logging.info("Deleting Homo 'sapiens' in the instance's chado database")
-        try:
-            get_sapiens_id_final_output = json.loads(get_sapiens_id_json_output)[0]
-            sapiens_id = str(get_sapiens_id_final_output["organism_id"])  # needs to be str to be recognized by the chado tool
-            utilities.run_tool(
-                self.instance,
-                tool_id=constants.DELETE_ORGANISMS_TOOL,
-                history_id=self.history_id,
-                tool_inputs={"organism": sapiens_id})
-        except IndexError:
-            logging.error("Homo sapiens isn't in the instance's chado database (IndexError)")
-            pass
-
    def purge_histories(self):
        """
        Delete all histories in the instance
 @@ -114,10 +63,10 @@ class LoadData(speciesData.SpeciesData):

        data_dir_root=os.path.join(self.get_species_dir(), constants.HOST_DATA_DIR)

-        instance = GalaxyInstance(url=self.instance_url,
-                                              email=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL],
-                                              password=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD]
-                                              )
+        gio = GalaxyInstance(url=self.instance_url,
+                             email=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL],
+                             password=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD]
+                             )

        logging.info("Looking for project data in %s" % data_dir_root)
        folders = dict()
 @@ -129,20 +78,20 @@ class LoadData(speciesData.SpeciesData):

        if folders:
            # Delete pre-existing lib (probably created by a previous call)
-            existing = instance.libraries.get_previews(name='Project Data')
+            existing = gio.libraries.get_previews(name=constants.GALAXY_LIBRARY_NAME)
            for lib in existing:
                if not lib.deleted:
-                    logging.info('Pre-existing "Project Data" library %s found, removing it' % lib.id)
-                    instance.libraries.delete(lib.id)
+                    logging.info('Pre-existing {0} library {1} found, removing it'.format(constants.GALAXY_LIBRARY_NAME, lib.id))
+                    gio.libraries.delete(lib.id)

-            logging.info("Creating new 'Project Data' library")
-            prj_lib = instance.libraries.create('Project Data', 'Data for current genome annotation project')
-            self.library_id = prj_lib.id  # project data folder/library
-            logging.info("Library for {0}: {1}".format(self.full_name, self.library_id))
+            logging.info("Creating new %s library" % constants.GALAXY_LIBRARY_NAME)
+            prj_lib = gio.libraries.create(constants.GALAXY_LIBRARY_NAME, constants.GALAXY_LIBRARY_DESC)
+            library_id = prj_lib.id  # project data folder/library
+            logging.info("Library for {0}: {1}".format(self.full_name, library_id))

            for fname, files in folders.items():
                if fname and files:
-                    folder_name = re.sub(data_dir_root + "/", "", fname)
+                    folder_name = re.sub(re.compile(data_dir_root + "/"), "", str(fname))
                    logging.info("Creating folder: %s" % folder_name)
                    folder = self.create_deep_folder(prj_lib, folder_name)

 @@ -280,30 +229,35 @@ class LoadData(speciesData.SpeciesData):
                logging.info("Did not find metadata in %s " % meta_file)
            return self.get_bam_label(dirname, bam_file)

-    def create_galaxy_instance(self):
-        """
-        Test the connection to the galaxy instance for the current organism
-        Exit if we cannot connect to the instance
-
-        """
-
-        logging.info("Connecting to the galaxy instance (%s)" % self.instance_url)
-        self.instance = galaxy.GalaxyInstance(url=self.instance_url,
-                                              email=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL],
-                                              password=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD]
-                                              )
+def remove_homo_sapiens_from_db(instance, history_id):
+    """
+    Run the GMOD tool to remove the "Homo sapiens" default organism from the original database
+    Will do nothing if H. sapiens isn't in the database

-        try:
-            self.instance.histories.get_histories()
-        except bioblend.ConnectionError:
-            logging.critical("Cannot connect to galaxy instance (%s) " % self.instance_url)
-            sys.exit()
-        else:
-            logging.info("Successfully connected to galaxy instance (%s) " % self.instance_url)
+    """

-        return self.instance
+    logging.debug("Getting 'Homo sapiens' ID in chado database")
+    get_sapiens_id_json_output = utilities_bioblend.run_tool_and_download_single_output_dataset(
+        instance,
+        tool_id=constants.GET_ORGANISMS_TOOL, # If this version if not found, Galaxy will use the one that is found
+        history_id=history_id,
+        tool_inputs={"genus": "Homo", "species": "sapiens"},
+        time_sleep = 10
+    )
+
+    logging.info("Deleting Homo 'sapiens' in the instance's chado database")
+    try:
+        get_sapiens_id_final_output = json.loads(get_sapiens_id_json_output)[0]
+        sapiens_id = str(get_sapiens_id_final_output["organism_id"])  # needs to be str to be recognized by the chado tool
+        utilities_bioblend.run_tool(
+            instance,
+            tool_id=constants.DELETE_ORGANISMS_TOOL,
+            history_id=history_id,
+            tool_inputs={"organism": sapiens_id})
+    except IndexError:
+        logging.error("Homo sapiens isn't in the instance's chado database (IndexError)")
+        pass

-    
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Load data into Galaxy library")

 @@ -315,6 +269,10 @@ if __name__ == "__main__":
                        help="Increase output verbosity",
                        action="store_true")

+    parser.add_argument("-vv", "--very_verbose",
+                        help="Increase output verbosity",
+                        action="store_true")
+
    parser.add_argument("--config",
                        type=str,
                        help="Config path, default to 'examples/config.yml'")
 @@ -325,11 +283,15 @@ if __name__ == "__main__":

    args = parser.parse_args()

-    if args.verbose:
+    if args.verbose or args.very_verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

+    if not args.very_verbose:
+        logging.getLogger("urllib3").setLevel(logging.INFO)
+        logging.getLogger("bioblend").setLevel(logging.INFO)
+
    # Parsing the config file if provided, using the default config otherwise
    if args.config:
        config_file = os.path.abspath(args.config)
 @@ -364,30 +326,37 @@ if __name__ == "__main__":
        # Parse the config yaml file
        load_data_for_current_species.config = config
        # Set the instance url attribute -- Does not work with localhost on scratch (ALB)
-        load_data_for_current_species.instance_url = "http://localhost:{0}/sp/{1}_{2}/galaxy/".format(
+        load_data_for_current_species.instance_url = "http://localhost:{0}/sp/{1}/galaxy/".format(
                load_data_for_current_species.config[constants.CONF_ALL_HTTP_PORT],
-                load_data_for_current_species.genus_lowercase,
-                load_data_for_current_species.species)
+                load_data_for_current_species.genus_species)

        # Check the galaxy container state and proceed if the galaxy services are up and running
-        if utilities.check_galaxy_state(genus_lowercase=load_data_for_current_species.genus_lowercase,
-                                        species=load_data_for_current_species.species,
+        if utilities_bioblend.check_galaxy_state(network_name=load_data_for_current_species.genus_species,
                                        script_dir=load_data_for_current_species.script_dir):

            # Create the Galaxy instance
-            load_data_for_current_species.instance = load_data_for_current_species.create_galaxy_instance()
+            load_data_for_current_species.instance = utilities_bioblend.get_galaxy_instance(
+                instance_url=load_data_for_current_species.instance_url,
+                email=load_data_for_current_species.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL],
+                password=load_data_for_current_species.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD]
+            )

            # Load the datasets into a galaxy library
            logging.info("Setting up library for {0} {1}".format(load_data_for_current_species.genus, load_data_for_current_species.species))
            load_data_for_current_species.setup_library()
            logging.debug("Successfully set up library in galaxy for {0} {1}".format(load_data_for_current_species.genus, load_data_for_current_species.species))

-            # Set or get the history for the current organism
-            load_data_for_current_species.get_history()
-            
+            # Get default history
+            history_id = utilities_bioblend.get_history(
+                instance=load_data_for_current_species.instance,
+                history_name="Unnamed history")
+
            # Remove H. sapiens from database if here
            # TODO: set a dedicated history for removing H. sapiens (instead of doing it into a species history)
-            load_data_for_current_species.remove_homo_sapiens_from_db()
+            remove_homo_sapiens_from_db(
+                instance=load_data_for_current_species.instance,
+                history_id=history_id
+            )

            # logging.info("Importing datasets into history for %s" % load_data_for_current_species.full_name)
            # load_data_for_current_species.import_datasets_into_history()  # Option "--load-history"