diff --git a/.gitignore b/.gitignore index 3c4cdc9..728070f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,7 @@ __pycache__/ *.pyc *.pyo *.pyd + + +#output folder of results +output diff --git a/bin/create-pathways.py b/bin/create-pathways.py index 30a8a68..0703799 100755 --- a/bin/create-pathways.py +++ b/bin/create-pathways.py @@ -22,24 +22,24 @@ def main(): taxon_id = "9606" - if args.input_file: + if args.pathway_list: # Read pathways from the input file try: - pathways_df = pd.read_csv(args.input_file, sep='\t') + pathways_df = pd.read_csv(args.pathway_list, sep='\t') pathways = dict(zip(pathways_df['ID'], pathways_df['PathwayName'])) except Exception as e: - logger.error(f"Error reading input file: {e}") + logger.error(f"Error reading pathway list file: {e}") return else: - logger.error("Input file (--input_file) is required.") + logger.error("Pathway file (--pathway-list) is required.") return # create a .tsv file for pathways list pathways_list_df = pd.DataFrame(list(pathways.items()), columns=['ID', 'PathwayName']) - pathways_list_df.to_csv(args.output, sep='\t', index=False) + pathways_list_df.to_csv(args.output_dir, sep='\t', index=False) for pathway_id, pathway_name in pathways.items(): - generate_pathway_file(pathway_id, taxon_id, pathway_name, decompose=args.decompose) + generate_pathway_file(pathway_id, taxon_id, pathway_name) if __name__ == "__main__": diff --git a/poetry.lock b/poetry.lock index 95ef7d1..70fd599 100644 --- a/poetry.lock +++ b/poetry.lock @@ -227,6 +227,54 @@ pygments = ">=2.0.0" six = ">=1.15.0" urllib3 = "*" +[[package]] +name = "pyarrow" +version = "15.0.0" +description = "Python library for Apache Arrow" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyarrow-15.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:0a524532fd6dd482edaa563b686d754c70417c2f72742a8c990b322d4c03a15d"}, + {file = "pyarrow-15.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60a6bdb314affa9c2e0d5dddf3d9cbb9ef4a8dddaa68669975287d47ece67642"}, + {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:66958fd1771a4d4b754cd385835e66a3ef6b12611e001d4e5edfcef5f30391e2"}, + {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f500956a49aadd907eaa21d4fff75f73954605eaa41f61cb94fb008cf2e00c6"}, + {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6f87d9c4f09e049c2cade559643424da84c43a35068f2a1c4653dc5b1408a929"}, + {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:85239b9f93278e130d86c0e6bb455dcb66fc3fd891398b9d45ace8799a871a1e"}, + {file = "pyarrow-15.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5b8d43e31ca16aa6e12402fcb1e14352d0d809de70edd185c7650fe80e0769e3"}, + {file = "pyarrow-15.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:fa7cd198280dbd0c988df525e50e35b5d16873e2cdae2aaaa6363cdb64e3eec5"}, + {file = "pyarrow-15.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8780b1a29d3c8b21ba6b191305a2a607de2e30dab399776ff0aa09131e266340"}, + {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe0ec198ccc680f6c92723fadcb97b74f07c45ff3fdec9dd765deb04955ccf19"}, + {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036a7209c235588c2f07477fe75c07e6caced9b7b61bb897c8d4e52c4b5f9555"}, + {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2bd8a0e5296797faf9a3294e9fa2dc67aa7f10ae2207920dbebb785c77e9dbe5"}, + {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e8ebed6053dbe76883a822d4e8da36860f479d55a762bd9e70d8494aed87113e"}, + {file = "pyarrow-15.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:17d53a9d1b2b5bd7d5e4cd84d018e2a45bc9baaa68f7e6e3ebed45649900ba99"}, + {file = "pyarrow-15.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9950a9c9df24090d3d558b43b97753b8f5867fb8e521f29876aa021c52fda351"}, + {file = "pyarrow-15.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:003d680b5e422d0204e7287bb3fa775b332b3fce2996aa69e9adea23f5c8f970"}, + {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f75fce89dad10c95f4bf590b765e3ae98bcc5ba9f6ce75adb828a334e26a3d40"}, + {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca9cb0039923bec49b4fe23803807e4ef39576a2bec59c32b11296464623dc2"}, + {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ed5a78ed29d171d0acc26a305a4b7f83c122d54ff5270810ac23c75813585e4"}, + {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6eda9e117f0402dfcd3cd6ec9bfee89ac5071c48fc83a84f3075b60efa96747f"}, + {file = "pyarrow-15.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a3a6180c0e8f2727e6f1b1c87c72d3254cac909e609f35f22532e4115461177"}, + {file = "pyarrow-15.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:19a8918045993349b207de72d4576af0191beef03ea655d8bdb13762f0cd6eac"}, + {file = "pyarrow-15.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d0ec076b32bacb6666e8813a22e6e5a7ef1314c8069d4ff345efa6246bc38593"}, + {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5db1769e5d0a77eb92344c7382d6543bea1164cca3704f84aa44e26c67e320fb"}, + {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2617e3bf9df2a00020dd1c1c6dce5cc343d979efe10bc401c0632b0eef6ef5b"}, + {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:d31c1d45060180131caf10f0f698e3a782db333a422038bf7fe01dace18b3a31"}, + {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:c8c287d1d479de8269398b34282e206844abb3208224dbdd7166d580804674b7"}, + {file = "pyarrow-15.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:07eb7f07dc9ecbb8dace0f58f009d3a29ee58682fcdc91337dfeb51ea618a75b"}, + {file = "pyarrow-15.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:47af7036f64fce990bb8a5948c04722e4e3ea3e13b1007ef52dfe0aa8f23cf7f"}, + {file = "pyarrow-15.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93768ccfff85cf044c418bfeeafce9a8bb0cee091bd8fd19011aff91e58de540"}, + {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6ee87fd6892700960d90abb7b17a72a5abb3b64ee0fe8db6c782bcc2d0dc0b4"}, + {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:001fca027738c5f6be0b7a3159cc7ba16a5c52486db18160909a0831b063c4e4"}, + {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:d1c48648f64aec09accf44140dccb92f4f94394b8d79976c426a5b79b11d4fa7"}, + {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:972a0141be402bb18e3201448c8ae62958c9c7923dfaa3b3d4530c835ac81aed"}, + {file = "pyarrow-15.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:f01fc5cf49081426429127aa2d427d9d98e1cb94a32cb961d583a70b7c4504e6"}, + {file = "pyarrow-15.0.0.tar.gz", hash = "sha256:876858f549d540898f927eba4ef77cd549ad8d24baa3207cf1b72e5788b50e83"}, +] + +[package.dependencies] +numpy = ">=1.16.6,<2" + [[package]] name = "pycodestyle" version = "2.11.1" @@ -331,4 +379,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "087a7f90c37cc65c3e13eb53304a15aa0ab2577865ec456bb45dc23b80952662" +content-hash = "5cb4023c5c07140844fa3c419241c505e2bea868f53b00f5be4f8425e2e6542f" diff --git a/pyproject.toml b/pyproject.toml index 0420a55..7b59b91 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ python = "^3.9" py2neo = "^2021.2.4" pandas = "^2.2.0" numpy = "^1.26.3" +pyarrow = "^15.0.0" [tool.poetry.group.dev.dependencies] diff --git a/src/argument_parser.py b/src/argument_parser.py index 2087109..af1fec6 100644 --- a/src/argument_parser.py +++ b/src/argument_parser.py @@ -6,7 +6,8 @@ def parse_args(): parser = argparse.ArgumentParser(description='pathway_creation') parser.add_argument('--debug', action='store_true', help='Enable debugging') parser.add_argument('--verbose', action='store_true', help='Enable verbose logging') - parser.add_argument('--input_file', type=str, help='Input file containing pathway information') + parser.add_argument('--pathway-list', type=str, help='Input file containing pathway information') + parser.add_argument('--output-dir', type=str, default='output', help='Output folder (default: output)') return parser.parse_args() diff --git a/src/reaction_generator.py b/src/reaction_generator.py index 91a758c..c82697d 100755 --- a/src/reaction_generator.py +++ b/src/reaction_generator.py @@ -71,7 +71,7 @@ def break_apart_entity(entity_id): logger.debug(f"Debugging: break_apart_entity - labels: {labels}") logger.debug(f"Debugging: break_apart_entity - broken_apart_members: {broken_apart_members}") - if set(broken_apart_members) == set(member_ids): + if set(tuple(broken_apart_members)) == set(tuple(member_ids)): return [[entity_id]] else: uid = str(uuid.uuid4()) @@ -270,10 +270,12 @@ def get_reactions_df(pathway_id): reaction_ids = pd.unique(reaction_connections_df[['parent_reaction_id', 'child_reaction_id']].values.ravel('K')) reaction_ids = reaction_ids[~pd.isna(reaction_ids)] # removing NA value from list - reaction_inputs_and_outputs_filename = 'reaction_inputs_and_outputs_df_' + pathway_id + '.tsv' + reaction_inputs_and_outputs_df = None + + reaction_inputs_and_outputs_filename = 'reaction_inputs_and_outputs_df_' + str(pathway_id) + '.tsv' if os.path.isfile(reaction_inputs_and_outputs_filename): reaction_inputs_and_outputs_df = pd.read_table(reaction_inputs_and_outputs_filename, delimiter="\t") - + else: reaction_inputs_and_outputs_df = get_reaction_inputs_and_outputs(reaction_ids) reaction_inputs_and_outputs_df.to_csv(reaction_inputs_and_outputs_filename, sep="\t")