from bioblend import galaxy import bioblend as bb import argparse import os import sys import subprocess import json from workflow import Workflow from filetransfer import FileTransfer # TODO: script description """ """ class Autoload: def __init__(self, json_in): self.json_in = json_in def main(self): return None def main(): parser = argparse.ArgumentParser(description="Input genus, species, strain, version") parser.add_argument("json", type=str, help="Input JSON file") parser.add_argument("--just-load", help="Only load data into galaxy, does not create nor run analyses in galaxy") # CLI stuff # parser.add_argument("--name", help="Sample species name, format: genus-species",type=str) # parser.add_argument("--strain", help="Strain of the sample species", type=str) # parser.add_argument("--version", help="Data version (e.g 1.0, 1.2, ...)", type=str) # parser.add_argument("--common-name", help="Vernacular/common name of the species",type=str) user_arguments = parser.parse_args() # List that will hold all dicts from the JSON input file, containing parameters for each species sp_dict_list = [] # JSON parsing and loading with open(user_arguments.json, 'r') as infile: json_sp_dict = json.load(infile) json_sp_dump = json.dumps(json_sp_dict, indent=4, sort_keys=True) for json_sp in json_sp_dict: sp_dict_list.append(json_sp) # Get variables from the current species dict for sp_dict in sp_dict_list: sp_params_list = [] genus = sp_dict["genus"] genus_lower = genus[0].lower() + genus[1:] species = sp_dict["species"] genus_species = genus_lower + "_" + species common = sp_dict["common"] strain = sp_dict["strain"] if strain != "": genus_species_strain = genus_species + "_" + strain else: genus_species_strain = genus_species ogs_version = sp_dict["ogs version"] genome_version = sp_dict["genome version"] performed_by = sp_dict["performed by"] # CLI stuff # genus = user_arguments.name.split('-')[0] # genus_lower = genus[0].lower().genus[1:] # genus_upper = genus[0].upper() + genus[1:] # species = user_arguments.name.split('-')[1] # strain = user_arguments.strain # vernacular = user_arguments.common_name # TODO: prompt y/n asking for the validity of info # Test adress, change to abims-gga.sb-roscoff.fr/sp/ in production instance_url = "http://localhost/sp/" + genus_lower + "_" + species + "/galaxy/" print("Species: " + genus + " " + species + " (" + common + ")" "\nStrain: " + strain + "\nAccessing instance " + instance_url) # Connect to the galaxy instance of the current species TODO: API key connection issues gi = galaxy.GalaxyInstance(url=instance_url, key="0e993414b2f876515e74dd890f16ffc7", email="alebars@sb-roscoff.fr", password="pouet", verify=False) """ This part creates the current species directory and go to it If it already exists, just move to it To be expanded when docker-swarm is implemented (or all config files are ready), not useful for now """ main_dir = os.getcwd() + "/" sp_dir = os.path.join(main_dir, genus_species) + "/" try: os.mkdir(sp_dir) except FileExistsError: print("Directory " + sp_dir + " already exists") try: os.chdir(sp_dir) wd = os.getcwd() except OSError: print("Cannot access " + sp_dir + ", run with higher privileges") break # Production instance example TODO: secure pswd and API key? + manage API keys (if needed) # gi = galaxy.GalaxyInstance(url="http://abims-gga.sb-roscoff.fr/sp/ectocarpus_species1/galaxy/", # key="84dfbee3c0efa9155518f01fbeff57c8", # email="gga@sb-roscoff.fr", # password="****") # Check connection to the current instance try: hl = gi.histories.get_histories() except bb.ConnectionError: print("Cannot connect to GGA instance @ " + instance_url) else: print("Successfully connected to instance " + instance_url) # TODO: FTP stuff to retrieve the datasets (used in testing, not needed for production) # try: # os.mkdir("./src_data") # except FileExistsError: # print("src_data folder already exists for " + genus_species_strain) # else: # print("src_data folder created for " + genus_species_strain) # # TODO: load the data into the current species directory and load it into galaxy instance # setup_data_libraries_cl = \ # "docker-compose exec galaxy /tool_deps/_conda/bin/python /opt/setup_data_libraries.py" # # try: # setup_data_libraries = subprocess.Popen(setup_data_libraries_cl.split(), stdout=subprocess.PIPE) # # output message from the data loading script # setup_data_libraries_output = setup_data_libraries.communicate() # except Exception: # print("Cannot load data into container for " + genus_species_strain) # break # else: # print("Data successfully loaded into docker container for " + genus_species_strain) # TODO: datamapping to run workflow (get datasets from instance) datamap = {} # generate workflow file and run it in the galaxy instance workflow = Workflow(parameters_dict=sp_dict, instance=gi) print("Generating custom workflow for " + genus_species_strain) wf_dict = workflow.generate(working_directory=wd, main_directory=main_dir) gi.histories.create_history(name=str(genus_species_strain + "_" + genome_version)) hi = gi.histories.get_histories(name=str(genus_species_strain + "_" + genome_version)) hi_id = hi[0]["id"] # print(hi) li = gi.libraries.get_libraries() # only one library # print(li) li_id = gi.libraries.get_libraries()[0]["id"] # project data folder/library # print(li_id) fo_gi = gi.libraries.get_folders(library_id=li_id) # data location (project data) # print(fo_gi) fo_id = {} current_fo_name = "" # folders ids: access to data to run the first tools for i in fo_gi: for k, v in i.items(): if k == "name": fo_id[v] = 0 current_fo_name = v if k == "id": fo_id[current_fo_name] = v print("IDs: ") datasets = {} for k, v in fo_id.items(): print("\t" + k + ": " + v) if k == "/genome": sub_folder_content = gi.folders.show_folder(folder_id=v, contents=True) # print(sub_folder_content) for k2, v2 in sub_folder_content.items(): # print(k2) # print(v2) for e in v2: if type(e) == dict: if e["name"].endswith(".fa"): datasets["genome_file"] = e["ldda_id"] print("\t\t" + e["name"] + ": " + e["ldda_id"]) elif k == "/annotation/" + genus_species: sub_folder_content = gi.folders.show_folder(folder_id=v, contents=True) # print(sub_folder_content) for k2, v2 in sub_folder_content.items(): # print(k2) # print(v2) for e in v2: if type(e) == dict: # TODO: manage several files of the same type if e["name"].endswith("transcripts-gff.fa"): datasets["transcripts_file"] = e["ldda_id"] print("\t\t" + e["name"] + ": " + e["ldda_id"]) elif e["name"].endswith("proteins.fasta"): datasets["proteins_file"] = e["ldda_id"] print("\t\t" + e["name"] + ": " + e["ldda_id"]) elif e["name"].endswith(".gff"): datasets["gff_file"] = e["ldda_id"] print("\t\t" + e["name"] + ": " + e["ldda_id"]) current_hi = gi.histories.get_current_history()["id"] print("History ID:" + current_hi) gi.histories.upload_dataset_from_library(history_id=hi_id, lib_dataset_id=datasets["genome_file"]) gi.histories.upload_dataset_from_library(history_id=hi_id, lib_dataset_id=datasets["gff_file"]) gi.histories.upload_dataset_from_library(history_id=hi_id, lib_dataset_id=datasets["transcripts_file"]) gi.histories.upload_dataset_from_library(history_id=hi_id, lib_dataset_id=datasets["proteins_file"]) # print(gi.tools.get_tool_panel()) # print(wf_dict) wf_dict_2 = json.loads(wf_dict) # add to workflow class gi.workflows.import_workflow_dict(workflow_dict=wf_dict_2) # doesn't work with eval() # # PRE FILLED METHOD # wf_id = workflow.store() # hi_id = gi.histories.get_histories()[0]["id"] # print("Workflow id: " + wf_id) # print("History id: " + hi_id) # wf_show = gi.workflows.show_workflow(workflow_id=wf_id) # # print(wf_show["inputs"]) # ->no inputs # # workflow.port() # li_id = gi.libraries.get_libraries()[0]["id"] # # gi.folders.update_folder() # TODO: add method to enable data updates # tsi = gi.toolshed.get_repositories() # tool shed repo access point # # print(gi.users.get_users()) # TODO: users management # # print(gi.libraries.show_library(li_id)) # # TODO: create a NEW history, import data to galaxy from library (src_data) # fo_gi = gi.libraries.get_folders(library_id=li_id) # data location # fo_id = {} # current_fo_name = "" # # print(fo_gi) # # folders ids: access to data to run the first tools # for i in fo_gi: # for k, v in i.items(): # if k == "name": # fo_id[v] = 0 # current_fo_name = v # if k == "id": # fo_id[current_fo_name] = v # print("Folders id: ") # for k, v in fo_id.items(): # print("\t" + k + ": " + v) # workflow.show() # # gi.workflows.run_workflow(workflow_id=wf_id) # pre filled workflow, use the set on runtime approach instead # for testing, purge configured @ ~/config/galaxy.yml.docker_sample # gi.histories.delete_history(history_id=hi_id, purge=True) os.chdir(main_dir) print("\n") if __name__ == "__main__": main()