From 6c7b8ea0f6e44bb96003c49e1e2f37d4b34b5193 Mon Sep 17 00:00:00 2001 From: Arthur Le Bars <arthur.le-bars@sb-roscoff.fr> Date: Tue, 18 Feb 2020 12:07:10 +0100 Subject: [PATCH] main function --- main.py | 229 +++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 177 insertions(+), 52 deletions(-) diff --git a/main.py b/main.py index 72f2443..bf999a9 100644 --- a/main.py +++ b/main.py @@ -1,59 +1,184 @@ from bioblend import galaxy +import bioblend as bb import argparse +import os +import sys +import subprocess +import json +from workflow import Workflow +from filetransfer import FileTransfer -""" - - +# TODO: script description """ - -parser = argparse.ArgumentParser(description="Input genus, species, strain, version") -parser.add_argument("--name", help="Sample species name, format: genus-species",type=str, required=True) -parser.add_argument("--strain", help="Strain of the sample species", type=str, required=True) -parser.add_argument("--version", help="Data version (e.g 1.0, 1.2, ...)", type=str, required=True) -parser.add_argument("--common-name", help="Vernacular/common name of the species",type=str, required=False) - -user_arguments = parser.parse_args() - -genus = user_arguments.name.split('-')[0] -species = user_arguments.name.split('-')[1] -strain = user_arguments.strain -vernacular = user_arguments.common_name -instance_url = "http://localhost/sp/" + genus + "_" + species + "/galaxy/" - -print("Species: " + genus + " " + species + " (Wakame)" + - "\nStrain: " + strain + - "\nAccessing " + instance_url + "...") - -gi = galaxy.GalaxyInstance(url=instance_url, - key="291ec50b48dd1f006985b32e39bc3696", - email="alebars@sb-roscoff.fr", - password="pouet", - verify=True) - -# gi = galaxy.GalaxyInstance(url="http://localhost/sp/undaria_pinnatifida/galaxy/", -# # key="291ec50b48dd1f006985b32e39bc3696", -# email="alebars@sb-roscoff.fr", -# password="pouet", -# verify=True) - -# gi = galaxy.GalaxyInstance(url="http://localhost/sp/undaria_pinnatifida/galaxy/", -# key="291ec50b48dd1f006985b32e39bc3696", -# email="alebars@sb-roscoff.fr", -# password="pouet", -# verify=True) - -# gi = galaxy.GalaxyInstance(url="http://abims-gga.sb-roscoff.fr/sp/ectocarpus_species1/galaxy/", -# key="84dfbee3c0efa9155518f01fbeff57c8", -# email="gga@sb-roscoff.fr", -# password="Mmo,55k") - -try: - hl = gi.histories.get_histories() -except ConnectionError: - print("Cannot connect to GGA instance @ " + instance_url + "") - - - +""" +class Autoload: + + def __init__(self, json_in): + self.json_in = json_in + + def main(self): + return None + + +def main(): + + parser = argparse.ArgumentParser(description="Input genus, species, strain, version") + parser.add_argument("json", type=str, help="Input JSON file") + parser.add_argument("--just-load", help="Only load data into galaxy, does not create nor run analyses in galaxy") + + # CLI stuff + # parser.add_argument("--name", help="Sample species name, format: genus-species",type=str) + # parser.add_argument("--strain", help="Strain of the sample species", type=str) + # parser.add_argument("--version", help="Data version (e.g 1.0, 1.2, ...)", type=str) + # parser.add_argument("--common-name", help="Vernacular/common name of the species",type=str) + + user_arguments = parser.parse_args() + + # List that will hold all dicts from the JSON input file, containing parameters for each species + sp_dict_list = [] + + # JSON parsing and loading + with open(user_arguments.json, 'r') as infile: + json_sp_dict = json.load(infile) + json_sp_dump = json.dumps(json_sp_dict, indent=4, sort_keys=True) + for json_sp in json_sp_dict: + sp_dict_list.append(json_sp) + + # Get variables from the current species dict + for sp_dict in sp_dict_list: + sp_params_list = [] + genus = sp_dict["genus"] + genus_lower = genus[0].lower() + genus[1:] + species = sp_dict["species"] + genus_species = genus_lower + "_" + species + common = sp_dict["common"] + strain = sp_dict["strain"] + if strain != "": + genus_species_strain = genus_species + "_" + strain + else: + genus_species_strain = genus_species + ogs_version = sp_dict["ogs version"] + genome_version = sp_dict["genome version"] + performed_by = sp_dict["performed by"] + + # CLI stuff + # genus = user_arguments.name.split('-')[0] + # genus_lower = genus[0].lower().genus[1:] + # genus_upper = genus[0].upper() + genus[1:] + # species = user_arguments.name.split('-')[1] + # strain = user_arguments.strain + # vernacular = user_arguments.common_name + + # TODO: prompt y/n asking for the validity of info + # Test adress, change to abims-gga.sb-roscoff.fr/sp/ in production + instance_url = "http://localhost/sp/" + genus_lower + "_" + species + "/galaxy/" + + print("Species: " + genus + " " + species + " (" + common + ")" + "\nStrain: " + strain + + "\nAccessing instance " + instance_url) + + # Connect to the galaxy instance of the current species TODO: API key connection issues + gi = galaxy.GalaxyInstance(url=instance_url, + key="291ec50b48dd1f006985b32e39bc3696", + email="alebars@sb-roscoff.fr", + password="pouet", + verify=True) + + """ + This part creates the current species directory and go to it + If it already exists, just move to it + To be expanded when docker-swarm is implemented (or all config files are ready), not useful for now + """ + main_dir = os.getcwd() + "/" + sp_dir = os.path.join(main_dir, genus_species) + "/" + + try: + os.mkdir(sp_dir) + except FileExistsError: + print("Directory " + sp_dir + " already exists") + try: + os.chdir(sp_dir) + wd = os.getcwd() + except OSError: + print("Cannot access " + sp_dir + ", run with higher privileges") + break + + # Production instance example TODO: secure pswd and API key? + manage API keys (if needed) + # gi = galaxy.GalaxyInstance(url="http://abims-gga.sb-roscoff.fr/sp/ectocarpus_species1/galaxy/", + # key="84dfbee3c0efa9155518f01fbeff57c8", + # email="gga@sb-roscoff.fr", + # password="****") + + # Check connection to the current instance + try: + hl = gi.histories.get_histories() + except bb.ConnectionError: + print("Cannot connect to GGA instance @ " + instance_url) + else: + print("Successfully connected to instance " + instance_url) + + # TODO: FTP stuff to retrieve the datasets (used in testing, not needed for production) + # try: + # os.mkdir("./src_data") + # except FileExistsError: + # print("src_data folder already exists for " + genus_species_strain) + # else: + # print("src_data folder created for " + genus_species_strain) + # + # TODO: load the data into the current species directory and load it into galaxy instance + # setup_data_libraries_cl = \ + # "docker-compose exec galaxy /tool_deps/_conda/bin/python /opt/setup_data_libraries.py" + # + # try: + # setup_data_libraries = subprocess.Popen(setup_data_libraries_cl.split(), stdout=subprocess.PIPE) + # # output message from the data loading script + # setup_data_libraries_output = setup_data_libraries.communicate() + # except Exception: + # print("Cannot load data into container for " + genus_species_strain) + # break + # else: + # print("Data successfully loaded into docker container for " + genus_species_strain) + + # TODO: datamapping to run workflow (get datasets from instance) + datamap = {} + + # generate workflow file and run it in the galaxy instance + workflow = Workflow(parameters_dict=sp_dict, instance=gi) + print("Generating custom workflow for " + genus_species_strain) + workflow.generate(working_directory=wd, main_directory=main_dir) + wf_id = workflow.store() + hi_id = gi.histories.get_histories()[0]["id"] + print("Workflow id: " + wf_id) + print("History id: " + hi_id) + wf_show = gi.workflows.show_workflow(workflow_id=wf_id) + print(wf_show["inputs"]) + + # workflow.port() + li_id = gi.libraries.get_libraries()[0]["id"] + # gi.folders.update_folder() # TODO: add method to enable data updates + tsi = gi.toolshed.get_repositories() # tool shed repo access point + # print(gi.users.get_users()) # TODO: users management + # print(gi.libraries.show_library(li_id)) + fo_gi = gi.libraries.get_folders(library_id=li_id) # data location + fo_id = {} + current_fo_name = "" + print(fo_gi) + # folders ids: access to data to run the first tools + for i in fo_gi: + for k, v in i.items(): + if k == "name": + fo_id[k] = 0 + current_fo_name = k + if k == "id": + fo_id[current_fo_name] = v + + + os.chdir(main_dir) + print("\n") + + +if __name__ == "__main__": + main() -- GitLab