diff --git a/docs/changelog.rst b/docs/changelog.rst index 91eeac4d..1353234a 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -2,6 +2,13 @@ Changelog ========== +0.10.2 +------------------------- + +- Fixed readthedocs +- Removed save_memory from LIONESS (CLI). The first PANDA needs to keep the PANDA value in memory +- Added BONOBO to CLI + 0.10.1 ------------------------- diff --git a/docs/functions/cli.rst b/docs/functions/cli.rst index 15e14f35..7806aa78 100644 --- a/docs/functions/cli.rst +++ b/docs/functions/cli.rst @@ -23,7 +23,7 @@ Commands .. click:: netZooPy.cli:cli :prog: netzoopy - :commands: panda,lioness,condor + :commands: panda,lioness,condor,bonobo :nested: full diff --git a/netZooPy/cli.py b/netZooPy/cli.py index 8d872914..baec3888 100755 --- a/netZooPy/cli.py +++ b/netZooPy/cli.py @@ -11,3 +11,4 @@ def cli(): cli.add_command(cl.panda) cli.add_command(cl.lioness) cli.add_command(cl.condor) +cli.add_command(cl.bonobo) diff --git a/netZooPy/command_line.py b/netZooPy/command_line.py index e03c620a..1d2e51a5 100755 --- a/netZooPy/command_line.py +++ b/netZooPy/command_line.py @@ -5,6 +5,7 @@ import click from netZooPy.panda.panda import Panda from netZooPy.lioness import Lioness +from netZooPy.ligress import Bonobo from netZooPy.condor import condor_object ############################################################################# @@ -119,9 +120,9 @@ def panda(expression, motif, ppi, output, computing='cpu', precision='double',wi help='precision option') @click.option('--ncores', type=int, show_default=True, default=1, help='Number of cores. Lioness CPU parallelizes over ncores') -@click.option('--save_memory', is_flag=True, show_default=False, - help='panda option. When true the result network is weighted adjacency matrix of size (nTFs, nGenes).\ - when false The result network has 4 columns in the form gene - TF - weight in motif prior - PANDA edge.') +#@click.option('--save_memory', is_flag=True, show_default=False, +# help='panda option. When true the result network is weighted adjacency matrix of size (nTFs, nGenes).\ +# when false The result network has 4 columns in the form gene - TF - weight in motif prior - PANDA edge.') @click.option('--save_tmp', is_flag=True, show_default=True, help='panda option') @click.option('--rm_missing', is_flag=True, show_default=False, @@ -198,7 +199,8 @@ def lioness(expression, motif, ppi, output_panda, output_lioness, el, fmt, compu # Run PANDA print('Start Panda run ...') - panda_obj = Panda(expression, motif, ppi, precision=precision, computing=computing, save_tmp=save_tmp, remove_missing=rm_missing, keep_expression_matrix=True, save_memory=save_memory, modeProcess=mode_process, start=panda_start, end=panda_end, with_header=with_header) + # For now we keep save_memory=False always, otherwise we won't have the needed information for lioness + panda_obj = Panda(expression, motif, ppi, precision=precision, computing=computing, save_tmp=save_tmp, remove_missing=rm_missing, keep_expression_matrix=True, save_memory=False, modeProcess=mode_process, start=panda_start, end=panda_end, with_header=with_header) print('Panda saved. Computing Lioness...') panda_obj.save_panda_results(output_panda, save_adjacency=as_adjacency, old_compatible=old_compatible) @@ -269,3 +271,73 @@ def condor( co.tar_memb.to_csv(tar_output) co.reg_memb.to_csv(reg_output) + + +################################################ +###### BONOBO ################################## +################################################ + +@click.command() +@click.option('-e', '--expression_file', 'expression_file', type=str, required=True, + help='Path to file containing the gene expression data or pandas dataframe. By default, the expression file does not have a header, and the cells ares separated by a tab.') +@click.option('--output_folder', type=str, show_default=True, default='bonobo/', + help='Output folder for the bonobo files. If not specified, the bonobo files are saved in the current directory, in the bonobo subdirectory.') +@click.option('--output_format', type=str, show_default=True, default='.h5', + help='format of output bonobo matrix. By default it is an hdf file, can be a txt or csv.') +@click.option('--keep_in_memory', is_flag=True, show_default=True, + help='if True, the bonobo coexpression matrix is kept in memory, otherwise it is discarded after saving') +@click.option('--delta', type=float, show_default=True, default=None, + help='delta parameter. If default (None) delta is trained, otherwise pass a value.Recommended is 0.3.') +@click.option('--sparsify', is_flag=True, show_default=True, + help='if True, bonobo gets sparsified and relative pvalues are returned') +@click.option('--confidence', type=float, show_default=True, default=0.05, + help='if sparsify is True, this is the CI for the approximate zscore.') +@click.option('--save_pvals', is_flag=True, show_default=True, + help='if True, bonobo gets sparsified and relative pvalues are saved in the same format and folder of bonobo') +@click.option('--precision', type=str, show_default=True, default='single', + help='matrix precision (single or double), defaults to single precision.') +@click.option('--sample_names', type=str, show_default=True, default='', + help='Compute BONOBO only on a subset of samples. Pass a comma separated list of sample names. If not specified, all samples are used.') +def bonobo( + expression_file, + output_folder = 'bonobo/', + output_format = '.h5', + keep_in_memory = False, + delta = None, + sparsify = False, + confidence = 0.05, + save_pvals = False, + precision = 'single', + sample_names = '', +): + """ + Compute BONOBOs from an expression file. + + Parameters the user cannot access from the CLI: + - computing: for now it is only CPU + - cores: number of cores to use, for now there is no parallelization + - online_coexpression: we have not implemented the online coexpression yet + """ + + if sample_names!='': + sample_names = sample_names.split(',') + print('WARNING: computing BOBOBO only on a subset of samples. The sample names are:') + print(sample_names) + else: + sample_names = [] + + print('Initializing BONOBO object ...') + bonobo_obj_sparse = Bonobo(expression_file) + print('Running BONOBO ...') + print('Files saved in %s' %output_folder) + + bonobo_obj_sparse.run_bonobo(keep_in_memory=keep_in_memory, + output_fmt=output_format, + delta = delta, + sparsify=sparsify, + output_folder=output_folder, + confidence = confidence, + save_pvals=save_pvals, + precision = precision, + sample_names=sample_names) + diff --git a/netZooPy/ligress/bonobo.py b/netZooPy/ligress/bonobo.py index 71747a93..aa384aad 100755 --- a/netZooPy/ligress/bonobo.py +++ b/netZooPy/ligress/bonobo.py @@ -227,6 +227,7 @@ def run_bonobo( precision (str, optional): matrix precision, defaults to single precision. sparsify (bool, optiona): if True, bonobo gets sparsified and relative pvalues are returned confidence (float, optional): if sparsify is True, this is the CI for the approximate zscore. + save_pvals (bool, optional): if True, the pvalues are saved and returned """ ligress_start = time.time() @@ -238,7 +239,7 @@ def run_bonobo( elif precision == "double": atype = "float64" else: - sys.exit("Precision %s unknonw" % str(precision)) + sys.exit("ERROR: Precision %s unknonw" % str(precision)) # let's sort the expression and ppi data self.expression_data = self.expression_data.astype(atype)