ff-factor/__main__.py at main · johnnymo87/ff-factor · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from lib.experiment_with_shuffling import experiment_with_shuffling
from lib.factor_returns import FactorReturns
from lib.investment_returns import InvestmentReturns
from lib.investments import Investments
# Pandas to read csv file and other things
import pandas as pd
# To prepare design matrices using R-like formulas
from patsy import dmatrices
# Statsmodels to run our multiple regression model
import statsmodels.api as sm

FIVE_FACTOR_FORMULA = """
port_excess ~ market_minus_risk_free + small_minus_big + high_minus_low + robust_minus_weak + conservative_minus_aggressive
"""

if __name__ == '__main__':
    # market_type = 'US'
    # market_type = 'Developed ex US'
    market_type = 'Emerging'

    # Get the French-Fama Data
    ff_data = FactorReturns.fetch(market_type)
    ff_starts_at = ff_data.occurred_at.min()
    ff_ends_at = ff_data.occurred_at.max()

    # Get the investments to study
    # Investments().backfill_facts(market_type)
    print(f'Looking for investment returns through {ff_ends_at}')
    investments = Investments().query.for_analysis(market_type, ff_ends_at)
    print(f'Found {len(list(investments))} investments of market type {market_type}')
    # for investment in investments:
    #     try:
    #         InvestmentReturns.backfill_returns(investment.ticker_symbol, ff_starts_at, ff_ends_at)
    #     except KeyError as e:
    #         print(f'Skipping {investment.ticker_symbol} due to lack of Yahoo API response')

    results = {}
    for investment in investments:
        ticker_symbol = investment.ticker_symbol
        # print(f'Analyzing {ticker_symbol}')
        # Get the returns of the investment
        ticker_data = InvestmentReturns.fetch(ticker_symbol, ff_starts_at, ff_ends_at)
        if len(ticker_data) < 12:
            print(f'Less than 12 months of data, skipping {ticker_symbol}!')
            continue

        # Join the FF and investment returns data
        all_data = pd.merge(ticker_data, ff_data, on='occurred_at')
        all_data['port_excess'] = all_data.percentage_change - all_data.risk_free

        # Run OLS regression
        endogenous, exogenous = dmatrices(FIVE_FACTOR_FORMULA, data=all_data, return_type='dataframe')
        results[ticker_symbol] = sm.OLS(endogenous, exogenous).fit()

    dfs = []
    for ticker, result in results.items():
        df = pd.DataFrame({ 'coef': result.params, 'tvalue': result.tvalues, 'pvalue': result.pvalues })
        df['factor'] = df.index
        df['ticker'] = ticker
        df.set_index(['ticker'])
        dfs.append(df)

    df = pd.concat(dfs)
    # df = pd.merge(df, investments.to_data_frame(), on='ticker')

    # Remove inverse funds
    inversed = df[(df.coef <= 0) & (df.factor == 'market_minus_risk_free')]
    df = df[~df.ticker.isin(inversed.ticker)]
    # Remove leveraged funds
    leveraged = df[(df.coef >= 2) & (df.factor == 'market_minus_risk_free')]
    df = df[~df.ticker.isin(leveraged.ticker)]
    # Exclude 'Intercept' because it almost always very close to zero
    df = df[~df.factor.isin(['Intercept'])]
    # Exclude 'market_minus_risk_free' because it usually close to one
    # df = df[~df.factor.isin(['market_minus_risk_free'])]

    # Exclude statistically insignificant results
    df = df[df.pvalue <= 0.05]

    renamed = {
        'market_minus_risk_free': 'mmrf',
        'small_minus_big': 'smb',
        'high_minus_low': 'hml',
        'robust_minus_weak': 'rmw',
        'conservative_minus_aggressive': 'cma'}
    df = df[['ticker', 'factor', 'coef']].\
        pivot(index='ticker', columns='factor', values='coef').\
        rename(columns=renamed)
    df = df.reset_index() # Make index integers rather than ticker

    investments_df = investments.to_data_frame()[['ticker', 'expense_ratio', 'dividend_yield']]
    investments_df = investments_df.fillna(0)
    # Throw out funds missing their expense ratio
    investments_df = investments_df[investments_df.expense_ratio > 0]

    df = df.merge(investments_df, on='ticker')

    # Replacing all NaNs with zero. This isn't perfect because:
    # * Factors with just barely insignificant p-values will be zero, when in
    #   fact they might be negative.
    # * Dividend yield that are missing in the API will appear as zero.
    df = df.fillna(0)

    # Reorder columns
    df = df.filter(['ticker', 'mmrf', 'smb', 'hml', 'rmw', 'cma', 'expense_ratio', 'dividend_yield'])

    # print('Consider catching a debugger here to play with the data frames')
    # print('Write "import pdb; pdb.set_trace()" and run "python ."')
    # print(df.head())

    experiment_with_shuffling(df, market_type)