-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path__main__.py
More file actions
111 lines (93 loc) · 4.51 KB
/
__main__.py
File metadata and controls
111 lines (93 loc) · 4.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from lib.experiment_with_shuffling import experiment_with_shuffling
from lib.factor_returns import FactorReturns
from lib.investment_returns import InvestmentReturns
from lib.investments import Investments
# Pandas to read csv file and other things
import pandas as pd
# To prepare design matrices using R-like formulas
from patsy import dmatrices
# Statsmodels to run our multiple regression model
import statsmodels.api as sm
FIVE_FACTOR_FORMULA = """
port_excess ~ market_minus_risk_free + small_minus_big + high_minus_low + robust_minus_weak + conservative_minus_aggressive
"""
if __name__ == '__main__':
# market_type = 'US'
# market_type = 'Developed ex US'
market_type = 'Emerging'
# Get the French-Fama Data
ff_data = FactorReturns.fetch(market_type)
ff_starts_at = ff_data.occurred_at.min()
ff_ends_at = ff_data.occurred_at.max()
# Get the investments to study
# Investments().backfill_facts(market_type)
print(f'Looking for investment returns through {ff_ends_at}')
investments = Investments().query.for_analysis(market_type, ff_ends_at)
print(f'Found {len(list(investments))} investments of market type {market_type}')
# for investment in investments:
# try:
# InvestmentReturns.backfill_returns(investment.ticker_symbol, ff_starts_at, ff_ends_at)
# except KeyError as e:
# print(f'Skipping {investment.ticker_symbol} due to lack of Yahoo API response')
results = {}
for investment in investments:
ticker_symbol = investment.ticker_symbol
# print(f'Analyzing {ticker_symbol}')
# Get the returns of the investment
ticker_data = InvestmentReturns.fetch(ticker_symbol, ff_starts_at, ff_ends_at)
if len(ticker_data) < 12:
print(f'Less than 12 months of data, skipping {ticker_symbol}!')
continue
# Join the FF and investment returns data
all_data = pd.merge(ticker_data, ff_data, on='occurred_at')
all_data['port_excess'] = all_data.percentage_change - all_data.risk_free
# Run OLS regression
endogenous, exogenous = dmatrices(FIVE_FACTOR_FORMULA, data=all_data, return_type='dataframe')
results[ticker_symbol] = sm.OLS(endogenous, exogenous).fit()
dfs = []
for ticker, result in results.items():
df = pd.DataFrame({ 'coef': result.params, 'tvalue': result.tvalues, 'pvalue': result.pvalues })
df['factor'] = df.index
df['ticker'] = ticker
df.set_index(['ticker'])
dfs.append(df)
df = pd.concat(dfs)
# df = pd.merge(df, investments.to_data_frame(), on='ticker')
# Remove inverse funds
inversed = df[(df.coef <= 0) & (df.factor == 'market_minus_risk_free')]
df = df[~df.ticker.isin(inversed.ticker)]
# Remove leveraged funds
leveraged = df[(df.coef >= 2) & (df.factor == 'market_minus_risk_free')]
df = df[~df.ticker.isin(leveraged.ticker)]
# Exclude 'Intercept' because it almost always very close to zero
df = df[~df.factor.isin(['Intercept'])]
# Exclude 'market_minus_risk_free' because it usually close to one
# df = df[~df.factor.isin(['market_minus_risk_free'])]
# Exclude statistically insignificant results
df = df[df.pvalue <= 0.05]
renamed = {
'market_minus_risk_free': 'mmrf',
'small_minus_big': 'smb',
'high_minus_low': 'hml',
'robust_minus_weak': 'rmw',
'conservative_minus_aggressive': 'cma'}
df = df[['ticker', 'factor', 'coef']].\
pivot(index='ticker', columns='factor', values='coef').\
rename(columns=renamed)
df = df.reset_index() # Make index integers rather than ticker
investments_df = investments.to_data_frame()[['ticker', 'expense_ratio', 'dividend_yield']]
investments_df = investments_df.fillna(0)
# Throw out funds missing their expense ratio
investments_df = investments_df[investments_df.expense_ratio > 0]
df = df.merge(investments_df, on='ticker')
# Replacing all NaNs with zero. This isn't perfect because:
# * Factors with just barely insignificant p-values will be zero, when in
# fact they might be negative.
# * Dividend yield that are missing in the API will appear as zero.
df = df.fillna(0)
# Reorder columns
df = df.filter(['ticker', 'mmrf', 'smb', 'hml', 'rmw', 'cma', 'expense_ratio', 'dividend_yield'])
# print('Consider catching a debugger here to play with the data frames')
# print('Write "import pdb; pdb.set_trace()" and run "python ."')
# print(df.head())
experiment_with_shuffling(df, market_type)