-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathmake_fdt_cat_multiple.py
More file actions
67 lines (54 loc) · 2.22 KB
/
make_fdt_cat_multiple.py
File metadata and controls
67 lines (54 loc) · 2.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pandas as pd
from make_fdt_cat_simple import make_fdt_cat_simple
class FDTResult:
"""
Class to encapsulate the results of frequency distribution tables (FDTs)
and provide formatted output.
Attributes:
results (dict): A dictionary where keys are column names and values are
DataFrames containing the FDT for the respective columns.
"""
def __init__(self, results):
"""
Initialize the FDTResult class with the computed results.
Parameters:
results (dict): Dictionary of FDTs for categorical columns.
"""
self.results = results
def __str__(self):
"""
Provide a formatted string representation of the FDT results.
Returns:
str: A formatted string containing the FDT for each categorical column.
"""
output = []
for key, value in self.results.items():
output.append(f"--- {key} ---")
output.append("Table:")
table_with_index = value.to_string(index=True)
output.append(table_with_index)
output.append("")
return "\n".join(output)
def make_fdt_cat_multiple(df, sort=False, decreasing=False):
"""
Create frequency distribution tables (FDTs) for all categorical columns in a DataFrame.
Parameters:
df (pd.DataFrame): Input DataFrame.
sort (bool): If True, sorts the tables by frequency. Default is False.
decreasing (bool): If sort is True, sorts in descending order if True,
otherwise in ascending order. Default is False.
Returns:
FDTResult: An object containing FDTs for all categorical columns.
Raises:
ValueError: If the input is not a pandas DataFrame.
"""
if not isinstance(df, pd.DataFrame):
raise ValueError("The parameter 'df' must be a pandas DataFrame.")
results = {}
# Select only categorical columns
categorical_columns = df.select_dtypes(include=['category', 'object']).columns
for col in categorical_columns:
column_data = df[col]
fdt = make_fdt_cat_simple(column_data, sort=sort, decreasing=decreasing)
results[col] = fdt
return FDTResult(results)