Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,7 @@ __pycache__/
*.pyc
*.pyo
*.pyd


#output folder of results
output
12 changes: 6 additions & 6 deletions bin/create-pathways.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,24 @@ def main():

taxon_id = "9606"

if args.input_file:
if args.pathway_list:
# Read pathways from the input file
try:
pathways_df = pd.read_csv(args.input_file, sep='\t')
pathways_df = pd.read_csv(args.pathway_list, sep='\t')
pathways = dict(zip(pathways_df['ID'], pathways_df['PathwayName']))
except Exception as e:
logger.error(f"Error reading input file: {e}")
logger.error(f"Error reading pathway list file: {e}")
return
else:
logger.error("Input file (--input_file) is required.")
logger.error("Pathway file (--pathway-list) is required.")
return

# create a .tsv file for pathways list
pathways_list_df = pd.DataFrame(list(pathways.items()), columns=['ID', 'PathwayName'])
pathways_list_df.to_csv(args.output, sep='\t', index=False)
pathways_list_df.to_csv(args.output_dir, sep='\t', index=False)

for pathway_id, pathway_name in pathways.items():
generate_pathway_file(pathway_id, taxon_id, pathway_name, decompose=args.decompose)
generate_pathway_file(pathway_id, taxon_id, pathway_name)


if __name__ == "__main__":
Expand Down
50 changes: 49 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ python = "^3.9"
py2neo = "^2021.2.4"
pandas = "^2.2.0"
numpy = "^1.26.3"
pyarrow = "^15.0.0"


[tool.poetry.group.dev.dependencies]
Expand Down
3 changes: 2 additions & 1 deletion src/argument_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ def parse_args():
parser = argparse.ArgumentParser(description='pathway_creation')
parser.add_argument('--debug', action='store_true', help='Enable debugging')
parser.add_argument('--verbose', action='store_true', help='Enable verbose logging')
parser.add_argument('--input_file', type=str, help='Input file containing pathway information')
parser.add_argument('--pathway-list', type=str, help='Input file containing pathway information')
parser.add_argument('--output-dir', type=str, default='output', help='Output folder (default: output)')

return parser.parse_args()

Expand Down
8 changes: 5 additions & 3 deletions src/reaction_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def break_apart_entity(entity_id):
logger.debug(f"Debugging: break_apart_entity - labels: {labels}")
logger.debug(f"Debugging: break_apart_entity - broken_apart_members: {broken_apart_members}")

if set(broken_apart_members) == set(member_ids):
if set(tuple(broken_apart_members)) == set(tuple(member_ids)):
return [[entity_id]]
else:
uid = str(uuid.uuid4())
Expand Down Expand Up @@ -270,10 +270,12 @@ def get_reactions_df(pathway_id):
reaction_ids = pd.unique(reaction_connections_df[['parent_reaction_id', 'child_reaction_id']].values.ravel('K'))
reaction_ids = reaction_ids[~pd.isna(reaction_ids)] # removing NA value from list

reaction_inputs_and_outputs_filename = 'reaction_inputs_and_outputs_df_' + pathway_id + '.tsv'
reaction_inputs_and_outputs_df = None

reaction_inputs_and_outputs_filename = 'reaction_inputs_and_outputs_df_' + str(pathway_id) + '.tsv'
if os.path.isfile(reaction_inputs_and_outputs_filename):
reaction_inputs_and_outputs_df = pd.read_table(reaction_inputs_and_outputs_filename, delimiter="\t")

else:
reaction_inputs_and_outputs_df = get_reaction_inputs_and_outputs(reaction_ids)
reaction_inputs_and_outputs_df.to_csv(reaction_inputs_and_outputs_filename, sep="\t")

Expand Down