#| echo: false

import pandas as pd
import numpy as np
import seaborn as sns
sns.set_theme(style="whitegrid")

import cvxpy as cvx
import matplotlib.pyplot as plt

#| echo: false
dowjones_components = [
    'AAPL','AXP','BA','BAC','CAT',
    'CSCO','CVX','DD','DIS','GE',
    'HD','HPQ','IBM','INTC','JNJ',
    'JPM','KO','MCD','MMM',
    'MRK','MSFT','PFE','PG','T',
    'TRV','VZ','WMT','XOM'
]

symbols = pd.read_csv('data/tickers.csv').set_index('code')
symbols['name'] # full company names

code
AAPL                              Apple Inc (AAPL)
AXP                     American Express Co. (AXP)
BA                             The Boeing Co. (BA)
BAC                    Bank of America Corp. (BAC)
CAT                         Caterpillar Inc. (CAT)
CSCO                     Cisco Systems Inc. (CSCO)
CVX                      Chevron Corporation (CVX)
DD            E.I. du Pont de Nemours and Co. (DD)
DIS                  Disney (Walt) Co. (The) (DIS)
GE                        General Electric Co (GE)
HD                             Home Depot Inc (HD)
HPQ                       Hewlett-Packard Co (HPQ)
IBM     International Business Machines Corp (IBM)
INTC                             Intel Corp (INTC)
JNJ                        Johnson & Johnson (JNJ)
JPM                          JP Morgan Chase (JPM)
KO                         Coca-Cola Co (The) (KO)
MCD                          McDonald's Corp (MCD)
MMM                                    3M Co (MMM)
MRK                          Merck & Co. Inc (MRK)
MSFT                  Microsoft Corporation (MSFT)
PFE                               Pfizer Inc (PFE)
PG                       Procter & Gamble Co. (PG)
T                                     AT&T Inc (T)
TRV            Travelers Companies Inc (The) (TRV)
UTX                United Technologies Corp. (UTX)
VZ                 Verizon Communications Inc (VZ)
WMT                          Wal-Mart Stores (WMT)
XOM                        Exxon Mobil Corp. (XOM)
Name: name, dtype: object

#| code-fold: true
import yfinance as yf

store = pd.HDFStore('data/yfinance-data-store.h5')

if 'rawdata' in store:
    rawdata = store['rawdata']
    print('Data loaded from store')
else:
    print('Downloading data from Yahoo Finance')
    print('set auto_adjust=False to get adjusted closing price as separate column')
    rawdata = yf.download(dowjones_components, start="2000-01-01", end="2025-11-01", auto_adjust=True)
    store['rawdata'] = rawdata

Data loaded from store

rawdata.head()

rawdata.tail()

## Hierarchical Index
rawdata.columns

MultiIndex([( 'Close', 'AAPL'),
            ( 'Close',  'AXP'),
            ( 'Close',   'BA'),
            ( 'Close',  'BAC'),
            ( 'Close',  'CAT'),
            ( 'Close', 'CSCO'),
            ( 'Close',  'CVX'),
            ( 'Close',   'DD'),
            ( 'Close',  'DIS'),
            ( 'Close',   'GE'),
            ...
            ('Volume',  'MMM'),
            ('Volume',  'MRK'),
            ('Volume', 'MSFT'),
            ('Volume',  'PFE'),
            ('Volume',   'PG'),
            ('Volume',    'T'),
            ('Volume',  'TRV'),
            ('Volume',   'VZ'),
            ('Volume',  'WMT'),
            ('Volume',  'XOM')],
           names=['Price', 'Ticker'], length=140)

rawdata.columns = rawdata.columns.set_names(['Value', 'Symbol'])
rawdata.head()

rawdata.columns

MultiIndex([( 'Close', 'AAPL'),
            ( 'Close',  'AXP'),
            ( 'Close',   'BA'),
            ( 'Close',  'BAC'),
            ( 'Close',  'CAT'),
            ( 'Close', 'CSCO'),
            ( 'Close',  'CVX'),
            ( 'Close',   'DD'),
            ( 'Close',  'DIS'),
            ( 'Close',   'GE'),
            ...
            ('Volume',  'MMM'),
            ('Volume',  'MRK'),
            ('Volume', 'MSFT'),
            ('Volume',  'PFE'),
            ('Volume',   'PG'),
            ('Volume',    'T'),
            ('Volume',  'TRV'),
            ('Volume',   'VZ'),
            ('Volume',  'WMT'),
            ('Volume',  'XOM')],
           names=['Value', 'Symbol'], length=140)

rawdata.columns.levels

FrozenList([['Close', 'High', 'Low', 'Open', 'Volume'], ['AAPL', 'AXP', 'BA', 'BAC', 'CAT', 'CSCO', 'CVX', 'DD', 'DIS', 'GE', 'HD', 'HPQ', 'IBM', 'INTC', 'JNJ', 'JPM', 'KO', 'MCD', 'MMM', 'MRK', 'MSFT', 'PFE', 'PG', 'T', 'TRV', 'VZ', 'WMT', 'XOM']])

rawdata['Close'].head()

rawdata.loc[:, 'High':'Low'].head()

idx = pd.IndexSlice
rawdata.loc[:, idx[:, 'AAPL']].head()

aapl = rawdata.loc[:, idx['Close':'Open', 'AAPL']]
aapl.head()

aapl = rawdata.loc[:, idx['Close':'Open', 'AAPL']].droplevel('Symbol', axis=1).tail(60)
aapl.head()

mmm = rawdata.loc[:, idx['Close':'Open', 'MMM']].droplevel('Symbol', axis=1).tail(60).reset_index()

fig, ax = plt.subplots(figsize=(12, 6))

# width of the candlestick body in days
body_width = 0.6  # adjust (e.g., 0.5 - 0.8) to change bar thickness

for _, r in mmm.iterrows():
  o = r['Open']; c = r['Close']; h = r['High']; l = r['Low']
  color = 'green' if c >= o else 'red'
  x = r['Date']
  ax.vlines(x, l, h, color=color, linewidth=1)
  lower = min(o, c)
  height = abs(c - o)
  ax.add_patch(
    plt.Rectangle(
      (x - pd.Timedelta(days=body_width / 2), lower),
      pd.Timedelta(days=body_width),
      height if height != 0 else 0.0001,  # ensure visible even if open == close
      facecolor=color,
      edgecolor='black',
      linewidth=0.5
    )
  )

ax.set_title('MMM Candlestick (Last 60 Trading Days)')
ax.set_ylabel('Price')
ax.set_xlabel('Date')
ax.xaxis.set_major_locator(plt.MaxNLocator(12))
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

first10 = rawdata.loc[:, idx['Close', ['MMM', 'MRK', 'PFE', 'PG', 'JNJ']]].tail(60)
first10.head()

healthcare = first10.stack('Symbol', future_stack=True)
healthcare

plt.figure(figsize=(12, 6))
sns.lineplot(data=healthcare, x='Date', y='Close', hue='Symbol')
plt.xticks(rotation=-90)
plt.tight_layout()
plt.show()

rawdata['Close'].isna().sum(axis=0)

Symbol
AAPL    0
AXP     0
BA      0
BAC     0
CAT     0
CSCO    0
CVX     0
DD      0
DIS     0
GE      0
HD      0
HPQ     0
IBM     0
INTC    0
JNJ     0
JPM     0
KO      0
MCD     0
MMM     0
MRK     0
MSFT    0
PFE     0
PG      0
T       0
TRV     0
VZ      0
WMT     0
XOM     0
dtype: int64

anymissing = rawdata['Close'].isna().any(axis=1)
rawdata.loc[anymissing]

(rawdata.isna().sum()>0).any()

False

symbols = symbols.loc[rawdata.columns.levels[1].to_list()]
symbols

rawdata.head()

closedata= rawdata['Close']
closedata.head()

logret = np.log(closedata).diff()
logret.head()

mu = logret[1:].mean()
mu

Symbol
AAPL    0.000889
AXP     0.000371
BA      0.000315
BAC     0.000224
CAT     0.000589
CSCO    0.000113
CVX     0.000352
DD      0.000163
DIS     0.000246
GE      0.000134
HD      0.000354
HPQ     0.000129
IBM     0.000257
INTC    0.000074
JNJ     0.000320
JPM     0.000401
KO      0.000248
MCD     0.000407
MMM     0.000331
MRK     0.000188
MSFT    0.000412
PFE     0.000113
PG      0.000264
T       0.000210
TRV     0.000421
VZ      0.000144
WMT     0.000302
XOM     0.000290
dtype: float64

cov_sample = logret.cov()
cov_sample

import matplotlib.pyplot as plt 
import seaborn as sns
fig, ax = plt.subplots(1, 2, figsize=(20, 8))
sns.heatmap(logret.cov(),  square=True, ax=ax[0]).set_title('Covariance Matrix')
sns.heatmap(logret.corr(), square=True, ax=ax[1]).set_title('Correlation Matrix');
# fig.show();

clmap = sns.clustermap(logret.corr(), square=True, figsize=(8, 8));
ordering = clmap.dendrogram_col.reordered_ind # save the hierarchical clustering generated variable ordering

/opt/conda/lib/python3.11/site-packages/seaborn/matrix.py:1124: UserWarning: ``square=True`` ignored in clustermap
  warnings.warn(msg)

fig, ax = plt.subplots(1, 2, figsize=(20, 8))
sns.heatmap(logret.cov().iloc[ordering, ordering],  square=True, ax=ax[0]).set_title('Covariance Matrix')
sns.heatmap(logret.corr().iloc[ordering, ordering], square=True, ax=ax[1]).set_title('Correlation Matrix');
# fig.show()

s, _ = cov_sample.shape # calculated from data

w = cvx.Variable(s) # variables
risk = cvx.quad_form(w, cov_sample.values)  # objective function
prob = cvx.Problem(cvx.Minimize(risk), # optimization problem 
               [cvx.sum(w) == 1])      # fully invested portfolio constraint
prob.solve()

8.182720882145511e-05

w.value

array([ 0.01652722, -0.06677811, -0.00556869, -0.01420911,  0.00303467,
       -0.01916917,  0.03920442, -0.03985089,  0.01652121, -0.00538882,
       -0.02358231,  0.01211534,  0.09040282, -0.01447738,  0.2142491 ,
       -0.02886968,  0.1425609 ,  0.14683368,  0.06653689,  0.01455767,
        0.02968867,  0.02761711,  0.12084072,  0.00462494,  0.01039099,
        0.09993229,  0.13561062,  0.0266449 ])

s,_ = cov_sample.shape

w = cvx.Variable(s)
risk = cvx.quad_form(w, cov_sample.values)
prob = cvx.Problem(cvx.Minimize(risk), 
               [
                   cvx.sum(w) == 1,
                   mu.values@w >= 0.001,
                   # w >= 0
               ]) 
prob.solve()

0.00025582983343653144

w.value

array([ 0.37727003,  0.0103586 ,  0.00531868, -0.19475594,  0.38319326,
       -0.1993208 ,  0.15270708, -0.22931477, -0.04446237, -0.22544009,
       -0.0039972 , -0.1268713 ,  0.07198615, -0.20353497,  0.43703468,
        0.20627437,  0.00312622,  0.32205393,  0.14227969, -0.07175241,
        0.18423752, -0.21650405,  0.10500898,  0.0260248 ,  0.16675933,
       -0.11354043,  0.12224684, -0.08638584])

w.value.sum()

0.9999999999999997

np.dot(mu.values,w.value)

0.0010000000000000002

earned = np.dot(logret.bfill().values, w.value)

earnedcp = (1+earned[1:]).cumprod()
ecp = pd.DataFrame(data=earnedcp.T, index=logret.index.values[1:], columns=['Portfolio Value'])
ecp.plot(figsize=(10, 8));

np.log10(ecp).plot(figsize=(10, 8));

ecp

retinfo = logret.mean(axis=0).agg(['min', 'mean', 'max'])
stdinfo = logret.std(axis=0).agg(['min', 'mean', 'max'])

print('portfolio average returns:', earned.mean())
print('          average stddev :', earned.std())
print('')
print("component stocks average: minimum: %f\n                          average: %f\n                          maximum: %f" % tuple(retinfo))
print("component stocks stddev : minimum: %f\n                          average: %f\n                          maximum: %f" % tuple(stdinfo))

portfolio average returns: 0.0009969739339166317
          average stddev : 0.015994079712789433

component stocks average: minimum: 0.000074
                          average: 0.000295
                          maximum: 0.000889
component stocks stddev : minimum: 0.012085
                          average: 0.018796
                          maximum: 0.027256

(1+earned.mean())**365 - 1

0.43866299874773973

from sklearn import preprocessing
scaler = preprocessing.StandardScaler()
scaled_df = scaler.fit_transform(logret.dropna().values)
scaled_df = pd.DataFrame(scaled_df, columns=logret.columns, index=logret.index[1:])

clmap = sns.clustermap(scaled_df.loc['2020':'2020'].corr(), square=True, figsize=(8, 8));
ordering = clmap.dendrogram_col.reordered_ind # save the hierarchical clustering generated variable ordering

/opt/conda/lib/python3.11/site-packages/seaborn/matrix.py:1124: UserWarning: ``square=True`` ignored in clustermap
  warnings.warn(msg)

fig, ax = plt.subplots(1, 2, figsize=(20, 8))

sns.heatmap(scaled_df.loc['2019':'2019'].corr().iloc[ordering, ordering],  square=True, ax=ax[0]).set_title('Correlation Matrix: 2019')
sns.heatmap(scaled_df.loc['2020':'2020'].corr().iloc[ordering, ordering],  square=True, ax=ax[1]).set_title('Correlation Matrix: 2020');
# fig.show()

# Rolling backtest with 300-day lookback and 50-day rebalance horizon

def estimate_moments(returns_df, cov_fn=None):
  """
  Estimate mean vector and covariance matrix from a returns window.
  - returns_df: DataFrame of daily returns (rows: dates, cols: assets)
  - cov_fn: optional function taking returns_df and returning a covariance matrix
  """
  window = returns_df.dropna()
  mu_hat = window.mean()
  if cov_fn is None:
    cov_hat = window.cov()
  else:
    cov_hat = cov_fn(window)
    # Convert to DataFrame with consistent indexing if needed
    if not isinstance(cov_hat, pd.DataFrame):
      cov_hat = pd.DataFrame(cov_hat, index=window.columns, columns=window.columns)
  return mu_hat, cov_hat


def compute_min_var_weights(mu_vec, cov_mat, target_return=None, long_only=False, ridge=1e-6, solver_opts=None):
  """
  Solve for portfolio weights minimizing variance:
    min w' Σ w
    s.t. 1' w = 1
       μ' w >= target_return (optional)
       w >= 0 (if long_only)
  - mu_vec: pd.Series indexed by asset
  - cov_mat: pd.DataFrame with same index/columns
  - ridge: small diagonal loading for numerical stability
  """
  cols = mu_vec.index
  Sigma = cov_mat.copy()
  # numerical stabilizer
  Sigma.values[range(Sigma.shape[0]), range(Sigma.shape[0])] += ridge

  n = len(cols)
  w_var = cvx.Variable(n)
  constraints = [cvx.sum(w_var) == 1]
  if long_only:
    constraints.append(w_var >= 0)
  if target_return is not None:
    constraints.append(mu_vec.values @ w_var >= float(target_return))

  objective = cvx.Minimize(cvx.quad_form(w_var, Sigma.values))
  prob_local = cvx.Problem(objective, constraints)
  prob_local.solve(**(solver_opts or {}))
  w_val = w_var.value
  if w_val is None or np.any(np.isnan(w_val)):
    raise RuntimeError("Solver returned invalid weights")
  weights = pd.Series(w_val, index=cols, name='weight')
  return weights


def compute_portfolio_returns(returns_df, weights):
  """
  Compute daily portfolio returns given returns matrix and weights.
  - returns_df: DataFrame (dates x assets)
  - weights: pd.Series indexed by assets
  """
  # align columns
  common = [c for c in returns_df.columns if c in weights.index]
  if len(common) == 0:
    return pd.Series([], dtype=float)
  w = weights.loc[common]
  R = returns_df[common]
  # dot product per day
  return R.dot(w)


def rolling_backtest(returns,
           lookback=300,
           horizon=50,
           rebalance=50,
           cov_fn=None,
           target_return=None,
           long_only=True,
           ridge=1e-6,
           solver_opts=None):
  """
  Rolling backtest:
  - Use past `lookback` days to estimate mu, Sigma
  - Invest for next `horizon` days with fixed weights
  - Rebalance every `rebalance` days
  Returns:
    dict with:
    'returns': pd.Series of daily portfolio returns over backtest
    'weights': pd.DataFrame of weights per rebalance date (rows=dates, cols=assets)
  """
  dates = returns.index
  n = len(dates)
  out_returns_parts = []
  weights_records = []

  start = lookback
  while start < n:
    end_invest = min(start + horizon, n)

    window = returns.iloc[start - lookback:start]
    mu_hat, cov_hat = estimate_moments(window, cov_fn=cov_fn)
    w_hat = compute_min_var_weights(mu_hat, cov_hat,
                    target_return=target_return,
                    long_only=long_only,
                    ridge=ridge,
                    solver_opts=solver_opts)

    period_rets = returns.iloc[start:end_invest]
    port_rets = compute_portfolio_returns(period_rets, w_hat)
    out_returns_parts.append(port_rets)

    # store weights at the rebalance date
    w_row = w_hat.reindex(returns.columns).fillna(0.0)
    w_row.name = dates[start]
    weights_records.append(w_row)

    # advance to next rebalance
    start += rebalance

  out_returns = pd.concat(out_returns_parts).sort_index()
  weights_df = pd.DataFrame(weights_records)
  return {'returns': out_returns, 'weights': weights_df}

# Run the requested backtest on the existing `logret` DataFrame
bt = rolling_backtest(
  logret,
  lookback=300,
  horizon=50,
  rebalance=50,
  cov_fn=None,          # default: sample covariance
  target_return=None,   # pure minimum-variance
  long_only=False,       # set False to allow shorting
  ridge=1e-6,
)

bt_returns = bt['returns']
bt_weights = bt['weights']

# Example outputs: cumulative growth from log returns
bt_wealth = np.exp(bt_returns.cumsum())
bt_results = {
  'final_wealth': float(bt_wealth.iloc[-1]) if len(bt_wealth) else np.nan,
  'mean_daily_return': float(bt_returns.mean()) if len(bt_returns) else np.nan,
  'std_daily_return': float(bt_returns.std()) if len(bt_returns) else np.nan,
}

bt_results

{'final_wealth': 5.631200904168396,
 'mean_daily_return': 0.0002788516818128177,
 'std_daily_return': 0.009221033517696169}

# perform backtest using ledoit-wolf shrinkage covariance estimator
from sklearn.covariance import LedoitWolf
def ledoit_wolf_cov(returns_df):
  lw = LedoitWolf().fit(returns_df.dropna().values)
  cov_est = lw.covariance_
  return pd.DataFrame(cov_est, index=returns_df.columns, columns=returns_df.columns)
bt_lw = rolling_backtest(
  logret,
  lookback=300,
  horizon=50,
  rebalance=50,
  cov_fn=ledoit_wolf_cov,
  target_return=None,
  long_only=False,
  ridge=1e-6,
)
bt_lw_returns = bt_lw['returns']
bt_lw_wealth = np.exp(bt_lw_returns.cumsum())
bt_lw_results = {
  'final_wealth': float(bt_lw_wealth.iloc[-1]) if len(bt_lw_wealth) else np.nan,
  'mean_daily_return': float(bt_lw_returns.mean()) if len(bt_lw_returns) else np.nan,
  'std_daily_return': float(bt_lw_returns.std()) if len(bt_lw_returns) else np.nan,
}

bt_lw_results

{'final_wealth': 5.843559184966783,
 'mean_daily_return': 0.000284824146608879,
 'std_daily_return': 0.009095051514796566}

# perform backtest using graphical lasso covariance estimator
from sklearn.covariance import GraphicalLasso
def graphical_lasso_cov(returns_df, alpha=0.01):
  gl = GraphicalLasso(alpha=alpha).fit(returns_df.dropna().values)
  cov_est = gl.covariance_
  return pd.DataFrame(cov_est, index=returns_df.columns, columns=returns_df.columns)

bt_gl = rolling_backtest(
  logret,
  lookback=300,
  horizon=50,
  rebalance=50,
  cov_fn=graphical_lasso_cov,
  target_return=None,
  long_only=False,
  ridge=1e-6,
)
bt_gl_returns = bt_gl['returns']
bt_gl_wealth = np.exp(bt_gl_returns.cumsum())
bt_gl_results = {
  'final_wealth': float(bt_gl_wealth.iloc[-1]) if len(bt_gl_wealth) else np.nan,
  'mean_daily_return': float(bt_gl_returns.mean()) if len(bt_gl_returns) else np.nan,
  'std_daily_return': float(bt_gl_returns.std()) if len(bt_gl_returns) else np.nan,
}

bt_gl_results

{'final_wealth': 7.13234951776576,
 'mean_daily_return': 0.0003169797847334469,
 'std_daily_return': 0.010463294745776675}

# Plot the growth of the three portfolios
plt.figure(figsize=(12, 6))
plt.plot(bt_wealth.index, bt_wealth.values, label='Sample Covariance', color='blue')
plt.plot(bt_lw_wealth.index, bt_lw_wealth.values, label='Ledoit-Wolf', color='orange')
plt.plot(bt_gl_wealth.index, bt_gl_wealth.values, label='Graphical Lasso', color='green')
plt.title('Portfolio Growth Over Time')
plt.xlabel('Date')
plt.ylabel('Cumulative Growth')
plt.legend()
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

store['logret'] = logret
store.close()

Price	Close										...	Volume
Ticker	AAPL	AXP	BA	BAC	CAT	CSCO	CVX	DD	DIS	GE	...	MMM	MRK	MSFT	PFE	PG	T	TRV	VZ	WMT	XOM
Date
2000-01-03	0.839280	32.323586	25.940268	12.490799	12.574042	35.148087	15.875281	11.853917	22.736227	129.774673	...	2599386	6265782	53228400	12873345	4275000	7668476	336400	4663843	25109700	13458200
2000-01-04	0.768521	31.103098	25.899942	11.749414	12.412425	33.176201	15.875281	11.529919	24.068050	124.583733	...	3245705	7894689	54119000	14208974	4270800	9497846	494400	5005878	20235300	14510800
2000-01-05	0.779767	30.313122	27.513647	11.878352	12.703340	33.074562	16.160040	11.904197	25.066923	124.367424	...	4424482	7963018	64059600	12981591	5098400	12035160	736000	6368681	21056100	17485000
2000-01-06	0.712287	30.930702	27.796047	12.893729	13.349821	32.525688	16.848200	12.239370	24.068050	126.030174	...	7147057	4989004	54976600	11115273	6524200	9471366	660400	4705763	19633500	19461600
2000-01-07	0.746027	31.381020	28.602884	12.555267	13.786201	34.436577	17.144821	12.513094	23.687531	130.910217	...	4905035	10871218	62013600	17962163	9832000	7792534	594700	5043907	23930700	16603800

Price	Close										...	Volume
Ticker	AAPL	AXP	BA	BAC	CAT	CSCO	CVX	DD	DIS	GE	...	MMM	MRK	MSFT	PFE	PG	T	TRV	VZ	WMT	XOM
Date
2025-10-27	268.549652	361.670013	223.000000	53.020000	527.070007	71.389999	153.564911	34.050209	112.339996	312.839996	...	2922300	7226600	18734700	34974900	7775300	86220400	1108100	27487200	16375900	10975400
2025-10-28	268.739471	361.029999	223.330002	52.869999	524.469971	72.620003	152.427628	34.267780	111.650002	309.790009	...	2448100	8345900	29986700	50152400	5819300	77662500	1341300	30719200	13323000	9535200
2025-10-29	269.438812	358.220001	213.580002	52.580002	585.489990	71.330002	153.386917	34.376568	110.239998	314.279999	...	3163400	10921900	36023000	65438400	7405500	102715000	1262700	53730200	11986400	12570600
2025-10-30	271.137146	358.880005	200.080002	53.029999	583.150024	72.910004	151.824356	34.087868	111.839996	310.750000	...	2143700	16678800	41023100	157478000	6760700	82167800	1473500	47371000	14065100	16172900
2025-10-31	270.108154	360.730011	201.020004	53.450001	577.260010	73.110001	155.977966	34.163181	112.620003	308.950012	...	2096600	12385000	34006400	132697400	7930300	89936800	1900900	52181800	20237000	20213400

Value	Close										...	Volume
Symbol	AAPL	AXP	BA	BAC	CAT	CSCO	CVX	DD	DIS	GE	...	MMM	MRK	MSFT	PFE	PG	T	TRV	VZ	WMT	XOM
Date
2000-01-03	0.839280	32.323586	25.940268	12.490799	12.574042	35.148087	15.875281	11.853917	22.736227	129.774673	...	2599386	6265782	53228400	12873345	4275000	7668476	336400	4663843	25109700	13458200
2000-01-04	0.768521	31.103098	25.899942	11.749414	12.412425	33.176201	15.875281	11.529919	24.068050	124.583733	...	3245705	7894689	54119000	14208974	4270800	9497846	494400	5005878	20235300	14510800
2000-01-05	0.779767	30.313122	27.513647	11.878352	12.703340	33.074562	16.160040	11.904197	25.066923	124.367424	...	4424482	7963018	64059600	12981591	5098400	12035160	736000	6368681	21056100	17485000
2000-01-06	0.712287	30.930702	27.796047	12.893729	13.349821	32.525688	16.848200	12.239370	24.068050	126.030174	...	7147057	4989004	54976600	11115273	6524200	9471366	660400	4705763	19633500	19461600
2000-01-07	0.746027	31.381020	28.602884	12.555267	13.786201	34.436577	17.144821	12.513094	23.687531	130.910217	...	4905035	10871218	62013600	17962163	9832000	7792534	594700	5043907	23930700	16603800

Symbol	AAPL	AXP	BA	BAC	CAT	CSCO	CVX	DD	DIS	GE	...	MMM	MRK	MSFT	PFE	PG	T	TRV	VZ	WMT	XOM
Date
2000-01-03	0.839280	32.323586	25.940268	12.490799	12.574042	35.148087	15.875281	11.853917	22.736227	129.774673	...	19.343803	25.427525	35.668091	11.650893	27.024214	6.324178	17.456139	15.576514	14.239633	17.255524
2000-01-04	0.768521	31.103098	25.899942	11.749414	12.412425	33.176201	15.875281	11.529919	24.068050	124.583733	...	18.575180	24.534512	34.463211	11.216841	26.504219	5.954147	17.224714	15.072998	13.706803	16.925014
2000-01-05	0.779767	30.313122	27.513647	11.878352	12.703340	33.074562	16.160040	11.904197	25.066923	124.367424	...	19.113220	25.498028	34.826580	11.399590	25.999968	6.046654	17.092470	15.576514	13.427073	17.847692
2000-01-06	0.712287	30.930702	27.796047	12.893729	13.349821	32.525688	16.848200	12.239370	24.068050	126.030174	...	20.650473	25.709532	33.659988	11.810803	27.197546	5.929497	17.423077	15.497396	13.573600	18.770374
2000-01-07	0.746027	31.381020	28.602884	12.555267	13.786201	34.436577	17.144821	12.513094	23.687531	130.910217	...	21.060410	28.177097	34.099834	12.610372	29.372086	5.980319	18.117361	15.382957	14.599282	18.715279

Value	High										...	Low
Symbol	AAPL	AXP	BA	BAC	CAT	CSCO	CVX	DD	DIS	GE	...	MMM	MRK	MSFT	PFE	PG	T	TRV	VZ	WMT	XOM
Date
2000-01-03	0.843498	33.813864	26.908489	12.958197	12.671014	35.859586	16.302419	12.043848	22.783793	132.964966	...	19.279752	25.004515	34.271967	11.559513	26.646032	6.273719	17.257773	15.495303	13.959902	17.159124
2000-01-04	0.829439	32.040944	26.545424	12.361867	12.864961	34.802485	15.970201	11.753367	24.258312	128.044396	...	18.575180	24.346507	34.348461	10.965547	26.157553	5.853229	17.026349	14.878089	13.680161	16.856158
2000-01-05	0.828971	31.522557	27.957416	11.975055	12.978094	33.989347	16.432933	11.904197	25.209621	127.179215	...	18.677664	24.464008	33.468707	11.262521	25.842392	5.979376	17.059409	15.202938	13.253907	17.145353
2000-01-06	0.802260	31.368160	28.038103	12.893729	13.511441	33.135545	16.931254	12.647163	25.209619	127.125146	...	19.330995	25.451026	33.162739	11.399596	26.488456	5.844790	16.827981	15.268532	13.360470	17.668665
2000-01-07	0.757274	31.599747	28.965967	12.797025	14.254899	34.477234	17.251606	12.596887	24.448575	131.396868	...	20.483939	26.179553	32.837586	11.810801	27.528451	5.878671	17.621448	15.121398	13.746770	18.508708

Portfolio Allocation: Dow Jones component stocks¶

Stock Symbols¶

Load/Import Data from Yahoo Finance¶

Preview Data¶

Hierarchical Indexing¶

Subset First Level 1¶

Subset First Level 2¶

Subset Second Level 1¶

Subset Second Level 2¶

Subset Second Level 3¶

Candlestick Chart¶

Closing prices¶

Visualization¶

Data Cleaning¶

Count missing values¶

Data Cleaning¶

Closing Prices¶

Data Preprocessing¶

Log returns from stock prices¶

Estimate expected returns¶

Estimate covariance matrix (volatility structure)¶

Visualize volatility structure¶

Calculate Portfolio Allocation¶

Minimum Variance Portfolio¶

MVP with target return¶

Backtesting: portfolio returns¶

Summarize results¶

Visualization of Relationships¶

Correlation Matrix: Year 2020¶

Correlation Matrix: 2019 vs. 2020¶

Reality check¶

Backtesting¶

Value	Close	High	Low	Open
Date
2025-08-08	228.868134	230.514661	218.789348	220.366030
2025-08-11	226.959976	229.337676	224.542322	227.699265
2025-08-12	229.427582	230.576477	226.850094	227.789171
2025-08-13	233.104034	234.772415	230.206834	230.846229
2025-08-14	232.554565	234.892296	230.626442	233.833325

	name
code
AAPL	Apple Inc (AAPL)
AXP	American Express Co. (AXP)
BA	The Boeing Co. (BA)
BAC	Bank of America Corp. (BAC)
CAT	Caterpillar Inc. (CAT)
CSCO	Cisco Systems Inc. (CSCO)
CVX	Chevron Corporation (CVX)
DD	E.I. du Pont de Nemours and Co. (DD)
DIS	Disney (Walt) Co. (The) (DIS)
GE	General Electric Co (GE)
HD	Home Depot Inc (HD)
HPQ	Hewlett-Packard Co (HPQ)
IBM	International Business Machines Corp (IBM)
INTC	Intel Corp (INTC)
JNJ	Johnson & Johnson (JNJ)
JPM	JP Morgan Chase (JPM)
KO	Coca-Cola Co (The) (KO)
MCD	McDonald's Corp (MCD)
MMM	3M Co (MMM)
MRK	Merck & Co. Inc (MRK)
MSFT	Microsoft Corporation (MSFT)
PFE	Pfizer Inc (PFE)
PG	Procter & Gamble Co. (PG)
T	AT&T Inc (T)
TRV	Travelers Companies Inc (The) (TRV)
VZ	Verizon Communications Inc (VZ)
WMT	Wal-Mart Stores (WMT)
XOM	Exxon Mobil Corp. (XOM)

Symbol	AAPL	AXP	BA	BAC	CAT	CSCO	CVX	DD	DIS	GE	...	MMM	MRK	MSFT	PFE	PG	T	TRV	VZ	WMT	XOM
Symbol
AAPL	0.000632	0.000203	0.000163	0.000211	0.000179	0.000260	0.000116	0.000164	0.000167	0.000177	...	0.000122	0.000084	0.000222	0.000085	0.000065	0.000103	0.000124	0.000089	0.000095	0.000109
AXP	0.000203	0.000498	0.000251	0.000408	0.000246	0.000226	0.000191	0.000259	0.000239	0.000275	...	0.000178	0.000124	0.000195	0.000134	0.000095	0.000151	0.000220	0.000127	0.000113	0.000174
BA	0.000163	0.000251	0.000495	0.000249	0.000206	0.000168	0.000175	0.000226	0.000195	0.000232	...	0.000149	0.000098	0.000157	0.000110	0.000078	0.000116	0.000160	0.000089	0.000089	0.000164
BAC	0.000211	0.000408	0.000249	0.000743	0.000276	0.000234	0.000210	0.000296	0.000235	0.000309	...	0.000195	0.000130	0.000191	0.000148	0.000102	0.000164	0.000247	0.000134	0.000110	0.000188
CAT	0.000179	0.000246	0.000206	0.000276	0.000407	0.000182	0.000185	0.000259	0.000178	0.000221	...	0.000177	0.000100	0.000156	0.000110	0.000077	0.000115	0.000157	0.000097	0.000092	0.000173
CSCO	0.000260	0.000226	0.000168	0.000234	0.000182	0.000516	0.000125	0.000183	0.000191	0.000199	...	0.000139	0.000089	0.000237	0.000104	0.000074	0.000121	0.000145	0.000112	0.000106	0.000119
CVX	0.000116	0.000191	0.000175	0.000210	0.000185	0.000125	0.000300	0.000184	0.000144	0.000164	...	0.000121	0.000103	0.000121	0.000102	0.000065	0.000106	0.000145	0.000089	0.000064	0.000240
DD	0.000164	0.000259	0.000226	0.000296	0.000259	0.000183	0.000184	0.000478	0.000193	0.000230	...	0.000186	0.000117	0.000157	0.000125	0.000097	0.000126	0.000171	0.000104	0.000100	0.000171
DIS	0.000167	0.000239	0.000195	0.000235	0.000178	0.000191	0.000144	0.000193	0.000370	0.000194	...	0.000136	0.000094	0.000162	0.000100	0.000073	0.000120	0.000141	0.000107	0.000092	0.000136
GE	0.000177	0.000275	0.000232	0.000309	0.000221	0.000199	0.000164	0.000230	0.000194	0.000436	...	0.000168	0.000106	0.000159	0.000115	0.000083	0.000126	0.000172	0.000108	0.000097	0.000156
HD	0.000159	0.000214	0.000170	0.000221	0.000169	0.000170	0.000119	0.000182	0.000168	0.000173	...	0.000136	0.000093	0.000152	0.000100	0.000080	0.000107	0.000145	0.000097	0.000140	0.000109
HPQ	0.000245	0.000228	0.000198	0.000233	0.000204	0.000268	0.000151	0.000208	0.000195	0.000201	...	0.000143	0.000094	0.000204	0.000099	0.000060	0.000113	0.000141	0.000102	0.000084	0.000137
IBM	0.000159	0.000168	0.000136	0.000174	0.000145	0.000190	0.000111	0.000151	0.000136	0.000158	...	0.000110	0.000077	0.000144	0.000083	0.000062	0.000104	0.000116	0.000089	0.000077	0.000109
INTC	0.000286	0.000240	0.000201	0.000241	0.000200	0.000316	0.000139	0.000204	0.000192	0.000206	...	0.000148	0.000098	0.000260	0.000106	0.000078	0.000122	0.000151	0.000111	0.000103	0.000133
JNJ	0.000064	0.000095	0.000082	0.000098	0.000080	0.000073	0.000079	0.000088	0.000075	0.000082	...	0.000080	0.000102	0.000071	0.000101	0.000074	0.000073	0.000079	0.000067	0.000060	0.000078
JPM	0.000205	0.000365	0.000231	0.000498	0.000247	0.000239	0.000188	0.000266	0.000225	0.000283	...	0.000173	0.000126	0.000195	0.000134	0.000091	0.000159	0.000233	0.000138	0.000113	0.000171
KO	0.000072	0.000107	0.000096	0.000102	0.000086	0.000078	0.000082	0.000097	0.000084	0.000089	...	0.000081	0.000075	0.000078	0.000074	0.000084	0.000080	0.000089	0.000074	0.000065	0.000083
MCD	0.000092	0.000123	0.000113	0.000122	0.000094	0.000099	0.000086	0.000107	0.000099	0.000102	...	0.000080	0.000073	0.000085	0.000068	0.000069	0.000072	0.000103	0.000065	0.000070	0.000078
MMM	0.000122	0.000178	0.000149	0.000195	0.000177	0.000139	0.000121	0.000186	0.000136	0.000168	...	0.000241	0.000085	0.000114	0.000099	0.000080	0.000093	0.000128	0.000080	0.000082	0.000118
MRK	0.000084	0.000124	0.000098	0.000130	0.000100	0.000089	0.000103	0.000117	0.000094	0.000106	...	0.000085	0.000279	0.000088	0.000141	0.000081	0.000089	0.000105	0.000087	0.000070	0.000098
MSFT	0.000222	0.000195	0.000157	0.000191	0.000156	0.000237	0.000121	0.000157	0.000162	0.000159	...	0.000114	0.000088	0.000358	0.000094	0.000068	0.000100	0.000128	0.000094	0.000094	0.000110
PFE	0.000085	0.000134	0.000110	0.000148	0.000110	0.000104	0.000102	0.000125	0.000100	0.000115	...	0.000099	0.000141	0.000094	0.000252	0.000077	0.000091	0.000105	0.000085	0.000075	0.000101
PG	0.000065	0.000095	0.000078	0.000102	0.000077	0.000074	0.000065	0.000097	0.000073	0.000083	...	0.000080	0.000081	0.000068	0.000077	0.000177	0.000078	0.000083	0.000071	0.000074	0.000066
T	0.000103	0.000151	0.000116	0.000164	0.000115	0.000121	0.000106	0.000126	0.000120	0.000126	...	0.000093	0.000089	0.000100	0.000091	0.000078	0.000260	0.000112	0.000168	0.000079	0.000110
TRV	0.000124	0.000220	0.000160	0.000247	0.000157	0.000145	0.000145	0.000171	0.000141	0.000172	...	0.000128	0.000105	0.000128	0.000105	0.000083	0.000112	0.000321	0.000100	0.000089	0.000132
VZ	0.000089	0.000127	0.000089	0.000134	0.000097	0.000112	0.000089	0.000104	0.000107	0.000108	...	0.000080	0.000087	0.000094	0.000085	0.000071	0.000168	0.000100	0.000224	0.000072	0.000091
WMT	0.000095	0.000113	0.000089	0.000110	0.000092	0.000106	0.000064	0.000100	0.000092	0.000097	...	0.000082	0.000070	0.000094	0.000075	0.000074	0.000079	0.000089	0.000072	0.000217	0.000067
XOM	0.000109	0.000174	0.000164	0.000188	0.000173	0.000119	0.000240	0.000171	0.000136	0.000156	...	0.000118	0.000098	0.000110	0.000101	0.000066	0.000110	0.000132	0.000091	0.000067	0.000273

	Portfolio Value
2000-01-04	0.981337
2000-01-05	0.984165
2000-01-06	0.988135
2000-01-07	1.028528
2000-01-10	0.974107
...	...
2025-10-27	283.112617
2025-10-28	276.528236
2025-10-29	286.172518
2025-10-30	290.575876
2025-10-31	285.749925

Value	Close
Symbol	MMM	MRK	PFE	PG	JNJ
Date
2025-08-08	151.942062	79.900742	24.154673	152.443970	172.067017
2025-08-11	153.934250	79.247192	24.154673	153.903763	172.553452
2025-08-12	156.451752	79.514557	24.223461	154.013000	171.521011
2025-08-13	158.780930	81.900978	24.724636	154.330780	173.149063
2025-08-14	155.262375	81.950493	24.675501	152.672379	173.446884

Symbol	AAPL	AXP	BA	BAC	CAT	CSCO	CVX	DD	DIS	GE	...	MMM	MRK	MSFT	PFE	PG	T	TRV	VZ	WMT	XOM
Date
2000-01-03	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2000-01-04	-0.088077	-0.038490	-0.001556	-0.061189	-0.012937	-0.057737	0.000000	-0.027713	0.056926	-0.040822	...	-0.040546	-0.035751	-0.034364	-0.037967	-0.019429	-0.060292	-0.013346	-0.032859	-0.038137	-0.019340
2000-01-05	0.014527	-0.025727	0.060441	0.010914	0.023167	-0.003068	0.017778	0.031946	0.040664	-0.001738	...	0.028554	0.038520	0.010488	0.016161	-0.019209	0.015417	-0.007707	0.032859	-0.020619	0.053082
2000-01-06	-0.090514	0.020169	0.010212	0.082023	0.049638	-0.016734	0.041702	0.027767	-0.040664	0.013281	...	0.077358	0.008261	-0.034071	0.035437	0.045031	-0.019566	0.019158	-0.005092	0.010854	0.050406
2000-01-07	0.046281	0.014454	0.028614	-0.026601	0.032165	0.057089	0.017452	0.022118	-0.015936	0.037990	...	0.019657	0.091648	0.012983	0.065505	0.076918	0.008535	0.039075	-0.007412	0.072846	-0.002940