Skip to content
Snippets Groups Projects

Release_2.0 next

Merged Loraine Gueguen requested to merge release_2.0 into dev
2 files
+ 9
13
Compare changes
  • Side-by-side
  • Inline
Files
2
+ 25
25
@@ -11,12 +11,12 @@ import time
import json
import yaml
import subprocess
from bioblend import galaxy
from bioblend.galaxy.objects import GalaxyInstance
import utilities
import speciesData
import constants
"""
gga_load_data.py
@@ -26,13 +26,6 @@ Usage: $ python3 gga_load_data.py -i input_example.yml --config config.yml [OPTI
Do not call this script before the galaxy container is ready
"""
# If this version if not found, Galaxy will use the one that is found
GET_ORGANISMS_TOOL = "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_get_organisms/organism_get_organisms/2.3.4+galaxy0"
DELETE_ORGANISMS_TOOL = "toolshed.g2.bx.psu.edu/repos/gga/chado_organism_delete_organisms/organism_delete_organisms/2.3.4+galaxy0"
HOST_DATA_DIR='src_data'
CONTAINER_DATA_DIR_ROOT='/project_data'
class LoadData(speciesData.SpeciesData):
"""
Child of SpeciesData
@@ -42,6 +35,10 @@ class LoadData(speciesData.SpeciesData):
Optional data file formatting
"""
def __init__(self, parameters_dictionary):
self.existing_folders_cache = {}
self.bam_metadata_cache = {}
super().__init__(parameters_dictionary)
def get_history(self):
"""
@@ -72,8 +69,9 @@ class LoadData(speciesData.SpeciesData):
"""
logging.debug("Getting 'Homo sapiens' ID in chado database")
get_sapiens_id_job_output_dataset_id = utilities.run_tool_and_get_single_output_dataset_id(self.instance,
tool_id=GET_ORGANISMS_TOOL,
get_sapiens_id_job_output_dataset_id = utilities.run_tool_and_get_single_output_dataset_id(
self.instance,
tool_id=constants.GET_ORGANISMS_TOOL, # If this version if not found, Galaxy will use the one that is found
history_id=self.history_id,
tool_inputs={"genus": "Homo", "species": "sapiens"})
get_sapiens_id_json_output = self.instance.datasets.download_dataset(dataset_id=get_sapiens_id_job_output_dataset_id)
@@ -83,7 +81,8 @@ class LoadData(speciesData.SpeciesData):
get_sapiens_id_final_output = json.loads(get_sapiens_id_json_output)[0]
sapiens_id = str(get_sapiens_id_final_output["organism_id"]) # needs to be str to be recognized by the chado tool
utilities.run_tool(
tool_id=DELETE_ORGANISMS_TOOL,
self.instance,
tool_id=constants.DELETE_ORGANISMS_TOOL,
history_id=self.history_id,
tool_inputs={"organism": sapiens_id})
except IndexError:
@@ -113,11 +112,11 @@ class LoadData(speciesData.SpeciesData):
:return:
"""
data_dir_root=os.path.join(self.get_species_dir(), HOST_DATA_DIR)
data_dir_root=os.path.join(self.get_species_dir(), constants.HOST_DATA_DIR)
instance = GalaxyInstance(url=self.instance_url,
email=self.config["galaxy_default_admin_email"],
password=self.config["galaxy_default_admin_password"]
email=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL],
password=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD]
)
logging.info("Looking for project data in %s" % data_dir_root)
@@ -185,8 +184,8 @@ class LoadData(speciesData.SpeciesData):
logging.info("Skipping useless file '%s'" % single_file)
continue
single_file_relative_path = re.sub(data_dir_root, CONTAINER_DATA_DIR_ROOT, single_file)
single_file_path_in_container=os.path.join(CONTAINER_DATA_DIR_ROOT, single_file_relative_path)
single_file_relative_path = re.sub(data_dir_root, constants.CONTAINER_DATA_DIR_ROOT, single_file)
single_file_path_in_container=os.path.join(constants.CONTAINER_DATA_DIR_ROOT, single_file_relative_path)
logging.info("Adding file '%s' with type '%s' and name '%s'" % (single_file_path_in_container, ftype, clean_name))
datasets = prj_lib.upload_from_galaxy_fs(
@@ -290,8 +289,8 @@ class LoadData(speciesData.SpeciesData):
logging.info("Connecting to the galaxy instance (%s)" % self.instance_url)
self.instance = galaxy.GalaxyInstance(url=self.instance_url,
email=self.config["galaxy_default_admin_email"],
password=self.config["galaxy_default_admin_password"]
email=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_EMAIL],
password=self.config[constants.CONF_GALAXY_DEFAULT_ADMIN_PASSWORD]
)
try:
@@ -306,10 +305,7 @@ class LoadData(speciesData.SpeciesData):
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Automatic data loading in containers and interaction "
"with galaxy instances for GGA"
", following the protocol @ "
"http://gitlab.sb-roscoff.fr/abims/e-infra/gga")
parser = argparse.ArgumentParser(description="Load data into Galaxy library")
parser.add_argument("input",
type=str,
@@ -321,7 +317,7 @@ if __name__ == "__main__":
parser.add_argument("--config",
type=str,
help="Config path, default to the 'config' file inside the script repository")
help="Config path, default to 'examples/config.yml'")
parser.add_argument("--main-directory",
type=str,
@@ -334,7 +330,11 @@ if __name__ == "__main__":
else:
logging.basicConfig(level=logging.INFO)
config_file = os.path.abspath(args.config)
# Parsing the config file if provided, using the default config otherwise
if args.config:
config_file = os.path.abspath(args.config)
else:
config_file = os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])), constants.DEFAULT_CONFIG)
config = utilities.parse_config(config_file)
main_dir = None
@@ -365,7 +365,7 @@ if __name__ == "__main__":
load_data_for_current_species.config = config
# Set the instance url attribute -- Does not work with localhost on scratch (ALB)
load_data_for_current_species.instance_url = "http://localhost:{0}/sp/{1}_{2}/galaxy/".format(
load_data_for_current_species.config["http_port"],
load_data_for_current_species.config[constants.CONF_ALL_HTTP_PORT],
load_data_for_current_species.genus_lowercase,
load_data_for_current_species.species)
Loading