Loraine Gueguen
--- a/gga_load_data.py

+ 25

− 25
+++ b/gga_load_data.py

+ 25

− 25
 @@ -11,12 +11,12 @@ import time
 import json
 import yaml
 import subprocess
-
 from bioblend import galaxy
 from bioblend.galaxy.objects import GalaxyInstance

 import utilities
 import speciesData
+import constants

 """ 
 gga_load_data.py
 @@ -26,13 +26,6 @@ Usage: $ python3 gga_load_data.py -i input_example.yml --config config.yml [OPTI
 Do not call this script before the galaxy container is ready
 """

-# If this version if not found, Galaxy will use the one that is found
-GET_ORGANISMS_TOOL = "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0"
-DELETE_ORGANISMS_TOOL = "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.4+galaxy0"
-
-HOST_DATA_DIR='src_data'
-CONTAINER_DATA_DIR_ROOT='/project_data'
-
 class LoadData(speciesData.SpeciesData):
    """
    Child of SpeciesData
 @@ -42,6 +35,10 @@ class LoadData(speciesData.SpeciesData):
    Optional data file formatting

    """
+    def __init__(self, parameters_dictionary):
+        self.existing_folders_cache = {}
+        self.bam_metadata_cache = {}
+        super().__init__(parameters_dictionary)

    def get_history(self):
        """
 @@ -72,8 +69,9 @@ class LoadData(speciesData.SpeciesData):
        """

        logging.debug("Getting 'Homo sapiens' ID in chado database")
-        get_sapiens_id_job_output_dataset_id = utilities.run_tool_and_get_single_output_dataset_id(self.instance,
-            tool_id=GET_ORGANISMS_TOOL,
+        get_sapiens_id_job_output_dataset_id = utilities.run_tool_and_get_single_output_dataset_id(
+            self.instance,
+            tool_id=constants.GET_ORGANISMS_TOOL, # If this version if not found, Galaxy will use the one that is found
            history_id=self.history_id,
            tool_inputs={"genus": "Homo", "species": "sapiens"})
        get_sapiens_id_json_output = self.instance.datasets.download_dataset(dataset_id=get_sapiens_id_job_output_dataset_id)
 @@ -83,7 +81,8 @@ class LoadData(speciesData.SpeciesData):
            get_sapiens_id_final_output = json.loads(get_sapiens_id_json_output)[0]
            sapiens_id = str(get_sapiens_id_final_output["organism_id"])  # needs to be str to be recognized by the chado tool
            utilities.run_tool(
-                tool_id=DELETE_ORGANISMS_TOOL,
+                self.instance,
+                tool_id=constants.DELETE_ORGANISMS_TOOL,
                history_id=self.history_id,
                tool_inputs={"organism": sapiens_id})
        except IndexError:
 @@ -113,11 +112,11 @@ class LoadData(speciesData.SpeciesData):
        :return:
        """

-        data_dir_root=os.path.join(self.get_species_dir(), HOST_DATA_DIR)
+        data_dir_root=os.path.join(self.get_species_dir(), constants.HOST_DATA_DIR)

        instance = GalaxyInstance(url=self.instance_url,
-                                              email=self.config["galaxy_default_admin_email"],
-                                              password=self.config["galaxy_default_admin_password"]
+                                              email=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL],
+                                              password=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD]
                                              )

        logging.info("Looking for project data in %s" % data_dir_root)
 @@ -185,8 +184,8 @@ class LoadData(speciesData.SpeciesData):
                            logging.info("Skipping useless file '%s'" % single_file)
                            continue

-                        single_file_relative_path = re.sub(data_dir_root, CONTAINER_DATA_DIR_ROOT, single_file)
-                        single_file_path_in_container=os.path.join(CONTAINER_DATA_DIR_ROOT, single_file_relative_path)
+                        single_file_relative_path = re.sub(data_dir_root, constants.CONTAINER_DATA_DIR_ROOT, single_file)
+                        single_file_path_in_container=os.path.join(constants.CONTAINER_DATA_DIR_ROOT, single_file_relative_path)

                        logging.info("Adding file '%s' with type '%s' and name '%s'" % (single_file_path_in_container, ftype, clean_name))
                        datasets = prj_lib.upload_from_galaxy_fs(
 @@ -290,8 +289,8 @@ class LoadData(speciesData.SpeciesData):

        logging.info("Connecting to the galaxy instance (%s)" % self.instance_url)
        self.instance = galaxy.GalaxyInstance(url=self.instance_url,
-                                              email=self.config["galaxy_default_admin_email"],
-                                              password=self.config["galaxy_default_admin_password"]
+                                              email=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL],
+                                              password=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD]
                                              )

        try:
 @@ -306,10 +305,7 @@ class LoadData(speciesData.SpeciesData):

    
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction "
-                                                 "with galaxy instances for GGA"
-                                                 ", following the protocol @ "
-                                                 "http://gitlab.sb-roscoff.fr/abims/e-infra/gga")
+    parser = argparse.ArgumentParser(description="Load data into Galaxy library")

    parser.add_argument("input",
                        type=str,
 @@ -321,7 +317,7 @@ if __name__ == "__main__":

    parser.add_argument("--config",
                        type=str,
-                        help="Config path, default to the 'config' file inside the script repository")
+                        help="Config path, default to 'examples/config.yml'")

    parser.add_argument("--main-directory",
                        type=str,
 @@ -334,7 +330,11 @@ if __name__ == "__main__":
    else:
        logging.basicConfig(level=logging.INFO)

-    config_file = os.path.abspath(args.config)
+    # Parsing the config file if provided, using the default config otherwise
+    if args.config:
+        config_file = os.path.abspath(args.config)
+    else:
+        config_file = os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])), constants.DEFAULT_CONFIG)
    config = utilities.parse_config(config_file)

    main_dir = None
 @@ -365,7 +365,7 @@ if __name__ == "__main__":
        load_data_for_current_species.config = config
        # Set the instance url attribute -- Does not work with localhost on scratch (ALB)
        load_data_for_current_species.instance_url = "http://localhost:{0}/sp/{1}_{2}/galaxy/".format(
-                load_data_for_current_species.config["http_port"],
+                load_data_for_current_species.config[constants.CONF_ALL_HTTP_PORT],
                load_data_for_current_species.genus_lowercase,
                load_data_for_current_species.species)