fdth-python/make_fdt_cat_multiple.py at main · yuriccosta/fdth-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pandas as pd
from make_fdt_cat_simple import make_fdt_cat_simple

class FDTResult:
    """
    Class to encapsulate the results of frequency distribution tables (FDTs)
    and provide formatted output.

    Attributes:
        results (dict): A dictionary where keys are column names and values are
                        DataFrames containing the FDT for the respective columns.
    """
    def __init__(self, results):
        """
        Initialize the FDTResult class with the computed results.

        Parameters:
            results (dict): Dictionary of FDTs for categorical columns.
        """
        self.results = results

    def __str__(self):
        """
        Provide a formatted string representation of the FDT results.

        Returns:
            str: A formatted string containing the FDT for each categorical column.
        """
        output = []
        for key, value in self.results.items():
            output.append(f"--- {key} ---")
            output.append("Table:")
            table_with_index = value.to_string(index=True)
            output.append(table_with_index)
            output.append("")
        return "\n".join(output)


def make_fdt_cat_multiple(df, sort=False, decreasing=False):
    """
    Create frequency distribution tables (FDTs) for all categorical columns in a DataFrame.

    Parameters:
        df (pd.DataFrame): Input DataFrame.
        sort (bool): If True, sorts the tables by frequency. Default is False.
        decreasing (bool): If sort is True, sorts in descending order if True,
                           otherwise in ascending order. Default is False.

    Returns:
        FDTResult: An object containing FDTs for all categorical columns.

    Raises:
        ValueError: If the input is not a pandas DataFrame.
    """
    if not isinstance(df, pd.DataFrame):
        raise ValueError("The parameter 'df' must be a pandas DataFrame.")

    results = {}
    # Select only categorical columns
    categorical_columns = df.select_dtypes(include=['category', 'object']).columns

    for col in categorical_columns:
        column_data = df[col]
        fdt = make_fdt_cat_simple(column_data, sort=sort, decreasing=decreasing)
        results[col] = fdt

    return FDTResult(results)