This R session will illustrate the incorporation of prior information in the estimation of \(\boldsymbol{\mu}\) and \(\boldsymbol{\Sigma}\).
(Useful R links: Cookbook R, Quick-R, R documentation, CRAN, METACRAN.)
Suppose we have an \(N\)-dimensional i.i.d. Gaussian time series \[ \mathbf{x}_t \sim \mathcal{N}(\boldsymbol{\mu}, \boldsymbol{\Sigma}). \] The sample estimators for the mean and covariance matrix are, respectively, the sample mean \[ \hat{\boldsymbol{\mu}} = \frac{1}{T}\sum_{t=1}^T \mathbf{x}_t \] and the sample covariance matrix \[ \hat{\boldsymbol{\Sigma}} = \frac{1}{T-1}\sum_{t=1}^T (\mathbf{x}_t - \hat{\boldsymbol{\mu}})^T(\mathbf{x}_t - \hat{\boldsymbol{\mu}}). \] We will next explore the idea of shrinkage in order to improve those sample estimators when \(T\) is not large enough.
The James-Stein estimator is \[ \hat{\boldsymbol{\mu}}^\textsf{JS} = (1-\rho) \hat{\boldsymbol{\mu}} + \rho \mathbf{t} \] where \(\mathbf{t}\) is the shrinkage target and \(0\le\rho\le1\) is the amount of shrinkage. Common choices for the target \(\mathbf{t}\) are:
A choice of \(\rho\) that improves the estimatior error is \[ \rho = \frac{1}{T}\frac{N\bar{\boldsymbol{\lambda}}-2\boldsymbol{\lambda}_\textsf{max}}{\|\hat{\boldsymbol{\mu}} - \mathbf{t}\|^2} \] where \(\bar{\boldsymbol{\lambda}}\) and \(\boldsymbol{\lambda}_\textsf{max}\) are the average and maximum values, respectively, of the eigenvalues of \(\boldsymbol{\Sigma}\) (or \(\hat{\boldsymbol{\Sigma}}\)).
We will first explore this estimator using synthetic data. Let’s start generating the data:
# generate Gaussian synthetic return data
library(mvtnorm)
set.seed(357)
N <- 100
T <- 120
mu <- runif(N)
U <- t(rmvnorm(n = round(0.7*N), sigma = 0.1*diag(N)))
Sigma <- U %*% t(U) + diag(N)
X <- rmvnorm(n = T, mean = mu, sigma = Sigma)
str(X)
#> num [1:120, 1:100] 3.57 0.19 -0.807 5.042 0.281 ...
# sample estimates (sample mean and sample covariance matrix)
mu_sm <- colMeans(X)
Sigma_scm <- cov(X)
Now let’s try the James-Stein estimator with the four targets:
# define the four targets
t_1 <- rep(0, N)
t_2 <- rep(0.1, N)
t_3 <- rep(mean(mu_sm), N)
t_4 <- rep(sum(solve(Sigma_scm, mu_sm))/sum(solve(Sigma_scm, rep(1, N))), N)
# compute the corresponding four rho's
lambdas <- eigen(Sigma_scm)$values
lmd_mean <- mean(lambdas)
lmd_max <- max(lambdas)
rho_1 <- (1/T)*(N*lmd_mean - 2*lmd_max)/norm(mu_sm - t_1, "2")^2
rho_2 <- (1/T)*(N*lmd_mean - 2*lmd_max)/norm(mu_sm - t_2, "2")^2
rho_3 <- (1/T)*(N*lmd_mean - 2*lmd_max)/norm(mu_sm - t_3, "2")^2
rho_4 <- (1/T)*(N*lmd_mean - 2*lmd_max)/norm(mu_sm - t_4, "2")^2
# finally the James-Stein estimators
mu_JS_1 <- (1-rho_1)*mu_sm + rho_1*t_1
mu_JS_2 <- (1-rho_2)*mu_sm + rho_2*t_2
mu_JS_3 <- (1-rho_3)*mu_sm + rho_3*t_3
mu_JS_4 <- (1-rho_4)*mu_sm + rho_4*t_4
Let’s now compare the estimation error measured in terms of squared error and PRIAL (percentage relative improvement in average loss): \[ \begin{align} \textsf{SE} &= \|\hat{\boldsymbol{\mu}}^\textsf{JS} - \boldsymbol{\mu}\|^2\\ \textsf{PRIAL} &= \frac{\|\hat{\boldsymbol{\mu}} - \boldsymbol{\mu}\|^2 - \|\hat{\boldsymbol{\mu}}^\textsf{JS} - \boldsymbol{\mu}\|^2}{\|\hat{\boldsymbol{\mu}} - \boldsymbol{\mu}\|^2} \end{align} \]
# compute squared error and PRIAL
SE_mu <- c(norm(mu_sm - mu, "2")^2,
norm(mu_JS_1 - mu, "2")^2,
norm(mu_JS_2 - mu, "2")^2,
norm(mu_JS_3 - mu, "2")^2,
norm(mu_JS_4 - mu, "2")^2)
names(SE_mu) <- c("sample mean",
"JS with 0 target",
"JS with 0.1 target",
"JS with GM target",
"JS with VW target")
print(SE_mu)
#> sample mean JS with 0 target JS with 0.1 target JS with GM target JS with VW target
#> 5.971340 5.594483 5.274438 3.318186 3.150180
ref <- norm(mu_sm - mu, "2")^2
PRIAL_mu <- (ref - SE_mu)/ref
print(PRIAL_mu)
#> sample mean JS with 0 target JS with 0.1 target JS with GM target JS with VW target
#> 0.00000000 0.06311096 0.11670777 0.44431464 0.47245011
#plots
barplot(SE_mu, main = "Squared error in estimation of mu",
col = heat.colors(5), cex.names = 0.75, las = 1)
barplot(PRIAL_mu, main = "PRIAL in estimation of mu",
col = heat.colors(5), cex.names = 0.75, las = 1)
The improvement in performance is quite clear.
We can now plot the error as a function of the number of samples to get a more complete picture:
# first generate all the data
set.seed(357)
N <- 100
T_max <- 300
mu <- runif(N)
U <- t(rmvnorm(n = round(0.7*N), sigma = 0.1*diag(N)))
Sigma <- U %*% t(U) + diag(N)
X <- rmvnorm(n = T_max, mean = mu, sigma = Sigma)
# now loop over subsets of the samples
SE_mu_vs_T <- NULL
T_sweep <- ceiling(seq(1.01*N, T_max, length.out = 20))
for (T in T_sweep) {
X_ <- X[1:T, ]
# sample estimates
mu_sm <- colMeans(X_)
Sigma_scm <- cov(X_)
# James-Stein estimators
t_3 <- rep(mean(mu_sm), N)
t_4 <- rep(sum(solve(Sigma_scm, mu_sm))/sum(solve(Sigma_scm, rep(1, N))), N)
lambdas <- eigen(Sigma_scm)$values
lmd_mean <- mean(lambdas)
lmd_max <- max(lambdas)
rho_1 <- (1/T)*(N*lmd_mean - 2*lmd_max)/norm(mu_sm - t_1, "2")^2
rho_2 <- (1/T)*(N*lmd_mean - 2*lmd_max)/norm(mu_sm - t_2, "2")^2
rho_3 <- (1/T)*(N*lmd_mean - 2*lmd_max)/norm(mu_sm - t_3, "2")^2
rho_4 <- (1/T)*(N*lmd_mean - 2*lmd_max)/norm(mu_sm - t_4, "2")^2
mu_JS_1 <- (1-rho_1)*mu_sm + rho_1*t_1
mu_JS_2 <- (1-rho_2)*mu_sm + rho_2*t_2
mu_JS_3 <- (1-rho_3)*mu_sm + rho_3*t_3
mu_JS_4 <- (1-rho_4)*mu_sm + rho_4*t_4
# compute errors
SE_mu_vs_T <- rbind(SE_mu_vs_T, c(norm(mu_sm - mu, "2")^2,
norm(mu_JS_1 - mu, "2")^2,
norm(mu_JS_2 - mu, "2")^2,
norm(mu_JS_3 - mu, "2")^2,
norm(mu_JS_4 - mu, "2")^2))
}
colnames(SE_mu_vs_T) <- names(SE_mu)
rownames(SE_mu_vs_T) <- paste("T =", T_sweep)
# compute PRIAL
PRIAL_mu_vs_T <- (SE_mu_vs_T[, 1] - SE_mu_vs_T[, -1])/SE_mu_vs_T[, 1]
# plots
matplot(T_sweep, SE_mu_vs_T,
main = "Squared error in estimation of mu", xlab = "T", ylab = "squared error",
type = "b", pch = 20, col = rainbow(5))
legend("topright", inset=0.01, legend = colnames(SE_mu_vs_T), pch = 20, col = rainbow(5))
matplot(T_sweep, PRIAL_mu_vs_T,
main = "PRIAL in estimation of mu", xlab = "T", ylab = "PRIAL",
type = "b", pch = 20, col = rainbow(4))
legend("topright", inset=0.01, legend = colnames(PRIAL_mu_vs_T), pch = 20, col = rainbow(4))
Indeed, we can clearly conclude the huge improvement from using shrinkage in the mean estimation. In particular, the James-Stein estimator using as target the grand mean and the volatility weighted mean are the best by a big difference.
The shrinkage estimator for the covariance matrix is \[ \hat{\boldsymbol{\Sigma}}^\textsf{sh} = (1-\rho) \hat{\boldsymbol{\Sigma}} + \rho \mathbf{T} \] where \(\mathbf{T}\) is the shrinkage target and \(0\le\rho\le1\) is the amount of shrinkage.
Common choices for the target \(\mathbf{T}\) are:
To determine \(\rho\) we will use three different criteria via random matrix theory (RMT):
Let’s start by generating some synthetic data:
# generate Gaussian synthetic return data
library(mvtnorm)
set.seed(357)
N <- 100
T <- 120
mu <- runif(N)
U <- t(rmvnorm(n = round(0.7*N), sigma = 0.1*diag(N)))
Sigma <- U %*% t(U) + diag(N)
X <- rmvnorm(n = T, mean = mu, sigma = Sigma)
# sample estimates (sample mean and sample covariance matrix)
mu_sm <- colMeans(X)
Sigma_scm <- cov(X)
The two targets are easily computed:
# two target matrices T=Identity and T=Diagonal
T_I <- sum(diag(Sigma_scm))/N * diag(N)
T_D <- diag(diag(Sigma_scm))
We next compute the Ledoit-Wolf estimator: \[ \rho =\min\left(1,\frac{\frac{1}{T^{2}}\sum_{t=1}^{T}||\hat{\boldsymbol{\Sigma}}-\mathbf{r}_{t}\mathbf{r}_{t}^{T}||_{F}^{2}}{||\hat{\boldsymbol{\Sigma}}-\mathbf{T}||_{F}^{2}}\right) \]
# Ledoit-Wolf shrinkage estimators with two targets
rho_LW <- function(X, Sigma_scm, Sigma_T) {
X <- scale(X, center = TRUE, scale = FALSE)
T <- nrow(X)
#tmp <- 0
#for (t in 1:T)
# tmp <- tmp + norm(Sigma_scm - X[t, ] %o% X[t, ], "F")^2
tmp <- (1-T)*norm(Sigma_scm, "F")^2 + sum(apply(X, 1, norm, "2")^4)
rho <- min(1, (1/T^2) * tmp / norm(Sigma_scm - Sigma_T, "F")^2)
return(rho)
}
rho <- rho_LW(X, Sigma_scm, T_I)
Sigma_LW_I <- (1-rho)*Sigma_scm + rho*T_I
rho <- rho_LW(X, Sigma_scm, T_D)
Sigma_LW_D <- (1-rho)*Sigma_scm + rho*T_D
Let’s now compare the estimation error measured in terms of squared error and PRIAL: \[ \begin{align} \textsf{SE} &= \|\hat{\boldsymbol{\Sigma}}^\textsf{sh} - \boldsymbol{\Sigma}\|_F^2\\ \textsf{PRIAL} &= \frac{\|\hat{\boldsymbol{\Sigma}} - \boldsymbol{\Sigma}\|_F^2 - \|\hat{\boldsymbol{\Sigma}}^\textsf{sh} - \boldsymbol{\Sigma}\|_F^2}{\|\hat{\boldsymbol{\Sigma}} - \boldsymbol{\Sigma}\|_F^2} \end{align} \]
# performance in terms of squared error and PRIAL
SE_Sigma <- c(norm(Sigma_scm - Sigma, "F")^2,
norm(Sigma_LW_I - Sigma, "F")^2,
norm(Sigma_LW_D - Sigma, "F")^2)
names(SE_Sigma) <- c("SCM",
"LW with I target",
"LW with D target")
print(SE_Sigma)
#> SCM LW with I target LW with D target
#> 5012.484 3031.465 3073.355
ref <- norm(Sigma_scm - Sigma, "F")^2
PRIAL_Sigma <- (ref - SE_Sigma)/ref
print(PRIAL_Sigma)
#> SCM LW with I target LW with D target
#> 0.0000000 0.3952170 0.3868599
#plots
barplot(SE_Sigma, main = "Squared error in estimation of Sigma",
col = heat.colors(5), cex.names = 0.75, las = 1)
barplot(PRIAL_Sigma, main = "PRIAL in estimation of Sigma",
col = heat.colors(5), cex.names = 0.75, las = 1)
Let’s try now the shrinkage estimator based on maximizing the Sharpe ratio: \[ \begin{array}{ll} \underset{\rho_{1}\geq0}{\textsf{maximize}} & \begin{array}{c} \frac{\hat{\boldsymbol{\mu}}^{T}(\hat{\boldsymbol{\Sigma}}^{\textsf{sh}})^{-1}\hat{\boldsymbol{\mu}}-\delta}{\sqrt{b\hat{\boldsymbol{\mu}}^{T}(\hat{\boldsymbol{\Sigma}}^{\textsf{sh}})^{-1}\hat{\boldsymbol{\Sigma}}(\hat{\boldsymbol{\Sigma}}^{\textsf{sh}})^{-1}\hat{\boldsymbol{\mu}}}}\end{array}\\ \textsf{subject to} & \begin{array}[t]{l} \hat{\boldsymbol{\Sigma}}^{\textsf{sh}}=\rho_{1}\mathbf{T}+\hat{\boldsymbol{\Sigma}}\\ \delta=D/(1-D)\\ D=\frac{1}{T}\textsf{Tr}(\hat{\boldsymbol{\Sigma}}(\hat{\boldsymbol{\Sigma}}^{\textsf{sh}})^{-1})\\ b=\frac{T}{\textsf{Tr}(\mathbf{W}(\mathbf{I}+\delta\mathbf{W})^{-2})} \end{array} \end{array} \] where \(\mathbf{W}=\mathbf{I}-\frac{1}{T}\mathbf{1}\mathbf{1}^{T}\).
# Max-SR shrinkage estimators with two targets
rho_maxSR <- function(mu_sm, Sigma_scm, Sigma_T, T) {
W <- diag(T) - (1/T)*matrix(1, T, T)
rho1_sweep <- exp(seq(-10, 10, length.out = 100))
obj <- NULL
for (rho1 in rho1_sweep) {
Sigma_sh <- rho1*Sigma_T + Sigma_scm
D <- (1/T)* sum(diag(Sigma_scm %*% solve(Sigma_sh)))
delta <- D/(1-D)
b <- T/sum(diag(W %*% solve((diag(T)+delta*W) %*% (diag(T)+delta*W))))
inv_S_sh_mu <- solve(Sigma_sh, mu_sm)
num <- mu_sm %*% inv_S_sh_mu - delta
den <- sqrt(b * inv_S_sh_mu %*% Sigma_scm %*% inv_S_sh_mu)
obj <- c(obj, num/den)
}
#plot(obj)
i_max <- which.max(obj)
rho1 <- rho1_sweep[i_max]
return(rho1/(1 + rho1))
}
rho <- rho_maxSR(mu_sm, Sigma_scm, T_I, T)
Sigma_maxSR_I <- (1-rho)*Sigma_scm + rho*T_I
rho <- rho_maxSR(mu_sm, Sigma_scm, T_D, T)
Sigma_maxSR_D <- (1-rho)*Sigma_scm + rho*T_D
We can now evaluate the performance:
# performance in terms of squared error, SR, and PRIAL
SE_Sigma <- c(norm(Sigma_scm - Sigma, "F")^2,
norm(Sigma_maxSR_I - Sigma, "F")^2,
norm(Sigma_maxSR_D - Sigma, "F")^2)
names(SE_Sigma) <- c("SCM",
"Max-SR with I target",
"Max-SR with D target")
print(SE_Sigma)
#> SCM Max-SR with I target Max-SR with D target
#> 5012.484 3812.347 3834.279
ref <- norm(Sigma_scm - Sigma, "F")^2
PRIAL_Sigma <- (ref - SE_Sigma)/ref
print(PRIAL_Sigma)
#> SCM Max-SR with I target Max-SR with D target
#> 0.0000000 0.2394296 0.2350540
compute_SR <- function(Sigma_scm, mu_sm, mu, Sigma) {
inv_S_sh_mu <- solve(Sigma_scm, mu_sm)
return(mu %*% inv_S_sh_mu / sqrt(inv_S_sh_mu %*% Sigma %*% inv_S_sh_mu))
}
SR_Sigma <- c(compute_SR(Sigma_scm, mu_sm, mu, Sigma),
compute_SR(Sigma_maxSR_I, mu_sm, mu, Sigma),
compute_SR(Sigma_maxSR_D, mu_sm, mu, Sigma))
names(SR_Sigma) <- names(SE_Sigma)
print(SR_Sigma)
#> SCM Max-SR with I target Max-SR with D target
#> 1.472879 2.403143 2.372630
#plots
barplot(SE_Sigma, main = "Squared error in estimation of Sigma",
col = heat.colors(5), cex.names = 0.75, las = 1)
barplot(PRIAL_Sigma, main = "PRIAL in estimation of Sigma",
col = heat.colors(5), cex.names = 0.75, las = 1)
We can now plot the performance measures as a function of the number of samples to get a more complete picture (we will use only the identity target):
# first generate all the data
set.seed(357)
N <- 100
T_max <- 300
mu <- runif(N)
U <- t(rmvnorm(n = round(0.7*N), sigma = 0.1*diag(N)))
Sigma <- U %*% t(U) + diag(N)
X <- rmvnorm(n = T_max, mean = mu, sigma = Sigma)
# now loop over subsets of the samples
SR_Sigma_vs_T <- SE_Sigma_vs_T <- NULL
T_sweep <- ceiling(seq(1.01*N, T_max, length.out = 20))
for (T in T_sweep) {
X_ <- X[1:T, ]
# sample estimates
mu_sm <- colMeans(X_)
Sigma_scm <- cov(X_)
# methods: Ledoit-Wolf, max-SR
T_I <- sum(diag(Sigma_scm))/N * diag(N)
rho <- rho_LW(X_, Sigma_scm, T_I)
Sigma_LW <- (1-rho)*Sigma_scm + rho*T_I
rho <- rho_maxSR(mu_sm, Sigma_scm, T_I, T)
Sigma_maxSR <- (1-rho)*Sigma_scm + rho*T_I
# compute performance
SE_Sigma_vs_T <- rbind(SE_Sigma_vs_T, c(norm(Sigma_scm - Sigma, "F")^2,
norm(Sigma_LW - Sigma, "F")^2,
norm(Sigma_maxSR - Sigma, "F")^2))
SR_Sigma_vs_T <- rbind(SR_Sigma_vs_T, c(compute_SR(Sigma_scm, mu_sm, mu, Sigma),
compute_SR(Sigma_LW, mu_sm, mu, Sigma),
compute_SR(Sigma_maxSR, mu_sm, mu, Sigma)))
}
colnames(SR_Sigma_vs_T) <- colnames(SE_Sigma_vs_T) <- c("SCM", "LW", "max-SR")
rownames(SR_Sigma_vs_T) <- rownames(SE_Sigma_vs_T) <- paste("T =", T_sweep)
# compute PRIAL
PRIAL_Sigma_vs_T <- (SE_Sigma_vs_T[, 1] - SE_Sigma_vs_T[, -1])/SE_Sigma_vs_T[, 1]
# plots
matplot(T_sweep, SE_Sigma_vs_T,
main = "Squared error in estimation of Sigma", xlab = "T", ylab = "squared error",
type = "b", pch = 20, col = rainbow(3))
legend("topright", inset=0.01, legend = colnames(SE_Sigma_vs_T), pch = 20, col = rainbow(3))
matplot(T_sweep, PRIAL_Sigma_vs_T,
main = "PRIAL in estimation of Sigma", xlab = "T", ylab = "PRIAL",
type = "b", pch = 20, col = rainbow(2))
legend("topright", inset=0.01, legend = colnames(PRIAL_Sigma_vs_T), pch = 20, col = rainbow(2))
matplot(T_sweep, SR_Sigma_vs_T,
main = "Sharpe ratio", xlab = "T", ylab = "SR",
type = "b", pch = 20, col = rainbow(3))
legend("bottomright", inset=0.01, legend = colnames(SR_Sigma_vs_T), pch = 20, col = rainbow(3))
We start by loading some stock market data and dividing it into a training set and test set:
library(xts)
library(quantmod)
library(PerformanceAnalytics)
# set begin-end date and stock namelist
begin_date <- "2015-01-01"
end_date <- "2017-08-31"
# load list of symbols (old version here: http://trading.chrisconlan.com/SPstocks_current.csv)
stock_namelist <- as.character(read.csv("SPstocks_current.csv",
stringsAsFactors = FALSE, header = FALSE)[, 1])
# download data from YahooFinance
N <- length(stock_namelist)
N <- 300
prices <- xts()
for (stock_index in 1:N)
prices <- cbind(prices, Ad(getSymbols(stock_namelist[stock_index],
from = begin_date, to = end_date, auto.assign = FALSE)))
colnames(prices) <- stock_namelist[1:N]
tclass(prices) <- "Date"
# compute log-returns and linear returns
X_log <- diff(log(prices))[-1]
X_lin <- (prices/lag(prices) - 1)[-1]
N <- ncol(X_log) # number of stocks
T <- nrow(X_log) # number of days
# split data into training and set data
T_trn <- round(0.7*T) # 70% of data
X_log_trn <- X_log[1:T_trn, ]
X_log_tst <- X_log[(T_trn+1):T, ]
X_lin_trn <- X_lin[1:T_trn, ]
X_lin_tst <- X_lin[(T_trn+1):T, ]
Let’s consider three estimations for \(\boldsymbol{\Sigma}\): the SCM, the shrinkage Ledoit-Wolf estimator, and the shrinkage max-SR estimator:
# estimate Sigma with the training data (SCM, Ledoit-Wolf, and max-SR)
mu_sm <- colMeans(X_log_trn)
Sigma_scm <- cov(X_log_trn)
T_I <- sum(diag(Sigma_scm))/N * diag(N)
rho <- rho_LW(X_log_trn, Sigma_scm, T_I)
Sigma_LW <- (1-rho)*Sigma_scm + rho*T_I
rho <- rho_maxSR(mu_sm, Sigma_scm, T_I, T)
Sigma_maxSR <- (1-rho)*Sigma_scm + rho*T_I
We start with the Global Minimum Variance Portfolio (GMVP) since it does not require \(\boldsymbol{\mu}\) and we can focus on the estimation of \(\boldsymbol{\Sigma}\): \[ \mathbf{w}_\textsf{GMVP} = \frac{1}{\mathbf{1}^T\boldsymbol{\Sigma}^{-1}\mathbf{1}}\boldsymbol{\Sigma}^{-1}\mathbf{1} \]
# create function for GMVP
portolioGMVP <- function(Sigma) {
ones <- rep(1, nrow(Sigma))
Sigma_inv_1 <- solve(Sigma, ones) #same as: inv(Sigma) %*% ones
w <- (1/as.numeric(ones %*% Sigma_inv_1)) * Sigma_inv_1
return(w)
}
# compute the three versions of GMVP
w_GMVP_scm <- portolioGMVP(Sigma_scm)
w_GMVP_LW <- portolioGMVP(Sigma_LW)
w_GMVP_maxSR <- portolioGMVP(Sigma_maxSR)
w_GMVP_all <- cbind(w_GMVP_scm, w_GMVP_LW, w_GMVP_maxSR)
# compute returns of the three portfolios
ret_GMVP_all <- xts(X_lin %*% w_GMVP_all, index(X_lin))
ret_GMVP_all_trn <- ret_GMVP_all[1:T_trn, ]
ret_GMVP_all_tst <- ret_GMVP_all[-c(1:T_trn), ]
# performance
table.AnnualizedReturns(ret_GMVP_all_trn)
#> w_GMVP_scm w_GMVP_LW w_GMVP_maxSR
#> Annualized Return -0.0735 -0.0426 0.0867
#> Annualized Std Dev 0.0472 0.0513 0.1247
#> Annualized Sharpe (Rf=0%) -1.5553 -0.8320 0.6951
table.AnnualizedReturns(ret_GMVP_all_tst)
#> w_GMVP_scm w_GMVP_LW w_GMVP_maxSR
#> Annualized Return 0.1950 0.1575 0.1381
#> Annualized Std Dev 0.1398 0.0992 0.0683
#> Annualized Sharpe (Rf=0%) 1.3947 1.5879 2.0209
We can see that in-sample the SCM performs the best in terms of standard deviation, obviously, while the others are similar or worse. However, out-of-sample the performance comparison is reversed and the shrinkage methods are better.
Let’s plot the wealth evolution over time:
# plots
{ chart.CumReturns(ret_GMVP_all, main = "Performance of different GMVPs",
wealth.index = TRUE, legend.loc = "topleft", colorset = rich8equal)
addEventLines(xts("training", index(X_lin[T_trn])), srt=90, pos=2, lwd = 2, col = "darkblue") }
chart.CumReturns(ret_GMVP_all_tst, main = "Out-of-sample performance of different GMVPs",
wealth.index = TRUE, legend.loc = "topleft", colorset = rich8equal)
chart.Drawdown(ret_GMVP_all_tst, main = "Out-of-sample performance of different GMVPs",
legend.loc = "bottomleft", colorset = rich8equal)
One can see the clear superiority of the portfolio based on the shrinkage estimate whose \(\rho\) is computed using RMT in order to maximize the Sharpe ratio of the out-of-sample.
Finally, let’s repeat a similar experiment but designing instead a maximum Sharpe ratio portfolio: \[ \begin{array}{ll} \underset{\mathbf{w}}{\textsf{minimize}} & \tilde{\mathbf{w}}^T\mathbf{\Sigma}\tilde{\mathbf{w}}\\ {\textsf{subject to}} & \tilde{\mathbf{w}}^T\boldsymbol{\mu} = 1\\ & \tilde{\mathbf{w}}\ge\mathbf{0} \end{array} \] and then \(\mathbf{w} = \tilde{\mathbf{w}}/(\mathbf{1}^T\tilde{\mathbf{w}})\). We will also consider the James-Stein shrinkage estimator for \(\boldsymbol{\mu}\):
# James-Stein estimator with GM target
t <- rep(mean(mu_sm), N)
lambdas <- eigen(Sigma_scm)$values
lmd_mean <- mean(lambdas)
lmd_max <- max(lambdas)
rho <- (1/T)*(N*lmd_mean - 2*lmd_max)/norm(mu_sm - t, "2")^2
mu_JS <- (1-rho)*mu_sm + rho*t
# now re-compute the max-SR estimator for Sigma combined with the JS estimator for mu:
T_I <- sum(diag(Sigma_scm))/N * diag(N)
rho <- rho_maxSR(mu_JS, Sigma_scm, T_I, T)
Sigma_JS_maxSR <- (1-rho)*Sigma_scm + rho*T_I
library(CVXR)
# create function for the max-SR design
portolioMaxSharpeRatio <- function(mu, Sigma) {
w_ <- Variable(nrow(Sigma))
prob <- Problem(Minimize(quad_form(w_, Sigma)),
constraints = list(w_ >= 0, t(mu) %*% w_ == 1))
result <- solve(prob)
return(as.vector(result$getValue(w_)/sum(result$getValue(w_))))
}
# compute seven versions of max-SR portfolio
w_maxSR_scm <- portolioMaxSharpeRatio(mu_sm, Sigma_scm)
w_maxSR_LW <- portolioMaxSharpeRatio(mu_sm, Sigma_LW)
w_maxSR_maxSR <- portolioMaxSharpeRatio(mu_sm, Sigma_maxSR)
w_maxSR_JS_scm <- portolioMaxSharpeRatio(mu_JS, Sigma_scm)
w_maxSR_JS_LW <- portolioMaxSharpeRatio(mu_JS, Sigma_LW)
w_maxSR_JS_maxSR <- portolioMaxSharpeRatio(mu_JS, Sigma_maxSR)
w_JS_maxSR_JS_maxSR <- portolioMaxSharpeRatio(mu_JS, Sigma_JS_maxSR)
w_maxSR_all <- cbind(w_maxSR_scm, w_maxSR_LW, w_maxSR_maxSR,
w_maxSR_JS_scm, w_maxSR_JS_LW, w_maxSR_JS_maxSR, w_JS_maxSR_JS_maxSR)
# compute returns of the three portfolios
ret_maxSR_all <- xts(X_lin %*% w_maxSR_all, index(X_lin))
ret_maxSR_all_trn <- ret_maxSR_all[1:T_trn, ]
ret_maxSR_all_tst <- ret_maxSR_all[-c(1:T_trn), ]
# performance
table.AnnualizedReturns(ret_maxSR_all_trn)
#> w_maxSR_scm w_maxSR_LW w_maxSR_maxSR w_maxSR_JS_scm w_maxSR_JS_LW w_maxSR_JS_maxSR
#> Annualized Return 0.5970 0.5970 0.3536 0.5609 0.5621 0.3252
#> Annualized Std Dev 0.1817 0.1815 0.1494 0.1727 0.1728 0.1454
#> Annualized Sharpe (Rf=0%) 3.2857 3.2893 2.3674 3.2474 3.2522 2.2362
#> w_JS_maxSR_JS_maxSR
#> Annualized Return 0.2150
#> Annualized Std Dev 0.1486
#> Annualized Sharpe (Rf=0%) 1.4469
table.AnnualizedReturns(ret_maxSR_all_tst)
#> w_maxSR_scm w_maxSR_LW w_maxSR_maxSR w_maxSR_JS_scm w_maxSR_JS_LW w_maxSR_JS_maxSR
#> Annualized Return 0.6228 0.6248 0.3410 0.5734 0.5765 0.3159
#> Annualized Std Dev 0.1861 0.1856 0.0943 0.1691 0.1691 0.0882
#> Annualized Sharpe (Rf=0%) 3.3466 3.3665 3.6174 3.3906 3.4091 3.5797
#> w_JS_maxSR_JS_maxSR
#> Annualized Return 0.2525
#> Annualized Std Dev 0.0798
#> Annualized Sharpe (Rf=0%) 3.1651
Again, we can see totally different result in-sample and out-of-sample. In-sample, the portfolio based on the SCM has top performance, but this vanishes out-of-sample where the portfolio based on the max-SR shines in terms of Sharpe ratio (as expected!).
Let’s plot the wealth evolution over time:
# plots
{ chart.CumReturns(ret_maxSR_all, main = "Performance of different max-SR portfolios",
wealth.index = TRUE, legend.loc = "topleft", colorset = rich8equal)
addEventLines(xts("training", index(X_lin[T_trn])), srt=90, pos=2, lwd = 2, col = "darkblue") }
chart.CumReturns(ret_maxSR_all_tst, main = "Out-of-sample performance of different max-SR portfolios",
wealth.index = TRUE, legend.loc = "topleft", colorset = rich8equal)
chart.Drawdown(ret_maxSR_all_tst, main = "Out-of-sample drawdown of different max-SR portfolios",
legend.loc = "bottomleft", colorset = rich8equal)
The top performers by a huge difference are the ones in which \(\rho\) is obtained via the max-SR criterion. And among those three, the best is the one that fully uses the James-Stein’s estimator. Now let’s look at both the GMVP and max-SR portfolios together:
{ chart.CumReturns(cbind(ret_GMVP_all, ret_maxSR_all),
main = "Performance of different max-SR portfolios",
wealth.index = TRUE, legend.loc = "topleft", colorset = rich10equal)
addEventLines(xts("training", index(X_lin[T_trn])), srt=90, pos=2, lwd = 2, col = "darkblue") }