diff --git a/autoload.py b/autoload.py index 82779cd0774c2a4424cb67f6ebd4d56b37815b1d..089a4d592abd688cf9429efee39bca511b561ca7 100644 --- a/autoload.py +++ b/autoload.py @@ -11,6 +11,7 @@ import numpy import pandas import logging import re +from . import table_parser class Autoload: @@ -82,7 +83,6 @@ class Autoload: - change headers for pep file - load data into the galaxy container with the galaxy_data_libs_SI.py script - :param method: :return: """ os.chdir(self.main_dir) @@ -101,10 +101,8 @@ class Autoload: species_folder_name = "_".join([self.genus_lowercase, self.species, self.strain, self.sex]) try: os.mkdir("./src_data") - os.mkdir("./src_data/annotation") - os.mkdir("./src_data/genome") - os.mkdir("./src_data/annotation/" + species_folder_name) - os.mkdir("./src_data/genome/" + species_folder_name) + os.mkdir("./src_data/annotation /src_data/genome") + os.mkdir("./src_data/annotation/" + species_folder_name + " ./src_data/genome/" + species_folder_name) except FileExistsError: logging.debug("src_data directory tree already exists") except PermissionError: @@ -141,7 +139,14 @@ class Autoload: self.source_files["proteins_file"]] logging.info("changing fasta headers in " + self.source_files["proteins_file"]) subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir) - + # production version + modify_pep_headers = ["/usr/local/genome2/mmo/scripts/phaeoexplorer/phaeoexplorer-change_transcript_fasta_header.sh", + self.source_files["proteins_file"]] + # test version + modify_pep_headers = ["/home/alebars/gga/phaeoexplorer-change_transcript_fasta_header.sh", + self.source_files["proteins_file"]] + logging.info("changing fasta headers in " + self.source_files["transcripts_file"]) + subprocess.run(modify_pep_headers, stdout=subprocess.PIPE, cwd=annotation_dir) # src_data cleaning if os.path.exists(annotation_dir + "outfile"): subprocess.run(["mv", annotation_dir + "/outfile", self.source_files["proteins_file"]], @@ -413,69 +418,75 @@ class Autoload: return None -parser = argparse.ArgumentParser(description="Input genus, species, strain, version") -parser.add_argument("json", type=str, help="Input JSON file") -parser.add_argument("-v", "--verbose", - help="Increase output verbosity", - action="store_true") -parser.add_argument("--init-instance", - help="Initialization of galaxy instance. Run first in an empty instance", - action="store_true") -parser.add_argument("--load-data", - help="Create src_data directory tree and load its data into the instance", - action="store_true") -parser.add_argument("--run-main", - help="Run main workflow (load data into chado, sync all with tripal, " - "index tripal data, populate materialized view, " - "create a jbrowse for the current genus_species_strain_sex and add organism to jbrowse", - action="store_true") -args = parser.parse_args() - -if args.verbose: - logging.basicConfig(level=logging.DEBUG) -else: - logging.basicConfig(level=logging.INFO) - -sp_dict_list = list() -with open(args.json, 'r') as infile: - json_sp_dict = json.load(infile) - json_sp_dump = json.dumps(json_sp_dict, indent=4, sort_keys=True) - for json_sp in json_sp_dict: - sp_dict_list.append(json_sp) - -for sp_dict in sp_dict_list: - al = Autoload(species_parameters_dictionary=sp_dict, args=args) - if args.init_instance: - logging.info("initializing the galaxy instance") - al.init_instance() - al.get_instance_attributes() - if args.load_data: - logging.info("loading data into galaxy") - al.load_data_in_galaxy() - if args.run_main: - logging.info("running main workflow") - al.get_organism_and_analyses_ids() - workflow_parameters = dict() - workflow_parameters["0"] = {} - workflow_parameters["1"] = {} - workflow_parameters["2"] = {} - workflow_parameters["3"] = {} - workflow_parameters["4"] = {"organism": al.org_id, - "analysis_id": al.genome_analysis_id, - "do_update": "true"} - workflow_parameters["5"] = {"organism": al.org_id, - "analysis_id": al.ogs_analysis_id} - workflow_parameters["6"] = {"organism_id": al.org_id} - workflow_parameters["7"] = {"analysis_id": al.ogs_analysis_id} - workflow_parameters["8"] = {"analysis_id": al.genome_analysis_id} - workflow_parameters["9"] = {"organism_id": al.org_id} - workflow_parameters["10"] = {} - workflow_parameters["11"] = {} - - al.datamap = dict() - al.datamap["0"] = {"src": "hda", "id": al.datasets["genome_file"]} - al.datamap["1"] = {"src": "hda", "id": al.datasets["gff_file"]} - al.datamap["2"] = {"src": "hda", "id": al.datasets["proteins_file"]} - al.datamap["3"] = {"src": "hda", "id": al.datasets["transcripts_file"]} - - al.run_workflow(workflow_name="main", workflow_parameters=workflow_parameters, datamap=al.datamap) +if __name__ == "main": + parser = argparse.ArgumentParser(description="Automatic loading and interaction with galaxy instance (GGA)" + ", following the protocol @ " + "http://gitlab.sb-roscoff.fr/abims/e-infra/gga") + parser.add_argument("-j", "--json", type=str, help="Input JSON file, alternative" + " to input csv file", action="store_true") + parser.add_argument("table", type=str, help="Input table (tabulated file that describes all data)", + action="store_true") + parser.add_argument("-v", "--verbose", + help="Increase output verbosity", + action="store_true") + parser.add_argument("--init-instance", + help="Initialization of galaxy instance. Run first in an empty instance", + action="store_true") + parser.add_argument("--load-data", + help="Create src_data directory tree and load its data into the instance", + action="store_true") + parser.add_argument("--run-main", + help="Run main workflow (load data into chado, sync all with tripal, " + "index tripal data, populate materialized view, " + "create a jbrowse for the current genus_species_strain_sex and add organism to jbrowse", + action="store_true") + args = parser.parse_args() + + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) + + sp_dict_list = list() + with open(args.json, 'r') as infile: + json_sp_dict = json.load(infile) + json_sp_dump = json.dumps(json_sp_dict, indent=4, sort_keys=True) + for json_sp in json_sp_dict: + sp_dict_list.append(json_sp) + + for sp_dict in sp_dict_list: + al = Autoload(species_parameters_dictionary=sp_dict, args=args) + if args.init_instance: + logging.info("initializing the galaxy instance") + al.init_instance() + al.get_instance_attributes() + if args.load_data: + logging.info("loading data into galaxy") + al.load_data_in_galaxy() + if args.run_main: + logging.info("running main workflow") + al.get_organism_and_analyses_ids() + workflow_parameters = dict() + workflow_parameters["0"] = {} + workflow_parameters["1"] = {} + workflow_parameters["2"] = {} + workflow_parameters["3"] = {} + workflow_parameters["4"] = {"organism": al.org_id, + "analysis_id": al.genome_analysis_id, + "do_update": "true"} + workflow_parameters["5"] = {"organism": al.org_id, + "analysis_id": al.ogs_analysis_id} + workflow_parameters["6"] = {"organism_id": al.org_id} + workflow_parameters["7"] = {"analysis_id": al.ogs_analysis_id} + workflow_parameters["8"] = {"analysis_id": al.genome_analysis_id} + workflow_parameters["9"] = {"organism_id": al.org_id} + workflow_parameters["10"] = {} + workflow_parameters["11"] = {} + + al.datamap = dict() + al.datamap["0"] = {"src": "hda", "id": al.datasets["genome_file"]} + al.datamap["1"] = {"src": "hda", "id": al.datasets["gff_file"]} + al.datamap["2"] = {"src": "hda", "id": al.datasets["proteins_file"]} + al.datamap["3"] = {"src": "hda", "id": al.datasets["transcripts_file"]} + + al.run_workflow(workflow_name="main", workflow_parameters=workflow_parameters, datamap=al.datamap)