Factor Models

#
# Industry Factor Model
#
# This R code is based on the industry factor model example in Chapter 15 of "Modeling Financial 
# Time Series with S-Plus,Second Edition" by Zivot and Wang, Springer, 2006. Some of the code was
# also borrowed from Guy Yollen's example code for his Financial Data Modeling and Analysis in R,
# AMATH 542, University of Washington.
#
# This code uses the berndtInvest data set, which is part of the fEcofin, written by Rmetrics.
# At the time this code was written, the fEcofin library had disappeared from CRAN.  However, I had
# loaded a copy before it went away. To allow this code to run without the fEcofin library, the
# berndtInvest data is loaded from a .csv file.
#
 
# Return the indexes in charArrayA where elements of charArrayB appear, or 0
which.ix = function(charArrayA, charArrayB)
{
  ix = c()
  for (word in charArrayB) {
    t = which(charArrayA == word)
    if (length(t) > 0) {
      ix = c(ix, t)
    }
  }
  if (length(ix) == 0) {
    ix = c(0)
  }
  return(ix)
}
 
#
# This function is passed a matrix of returns and an optional
# covariance matrix. The result is the weights, mean return and 
# standard deviation of the global minimum variance portfolio,
# where the only constraint is full investment 
# (e.g., shorting is allowed).
#
analyticGMV = function(returns, S = var(returns))
{
  Sinv = solve(S)
  one = rep(1, ncol(S))
  denom = as.numeric(t(one) %*% Sinv %*% one)
  w = (Sinv %*% one) / denom
  sd = 1 / sqrt(denom)
  rslt = list(w = round(t(w), 8), mu = mean(returns %*% w), sd = sd)
  return(rslt)
}
 
 
# 
# Calculate the weights, mean return and standard deviation of the 
# tangency portfolio.
#
analyticTangency = function(returns, rf, S = var(returns))
{
  mu = apply(returns, 2, mean)
  Sinv = solve(S)
  one = rep(1, ncol(S))
  mu_e = mu - rf
  w = (Sinv %*% mu_e) / as.numeric((t(one) %*% Sinv %*% mu_e))
  sd = sqrt(t(w) %*% S %*% w)
  rslt = list(w = round(t(w), 6), mu = sum(w * mu), sd = sd)
  return(rslt)
}
 
#
# Given two Markowitz mean/variance optimized portfolio weights, calculate a set of
# means and variance values for the efficient frontier using the "two fund theorm".
# 
#  returns: a matrix of portfolio returns
#  gmv_wts: the weights for the Global Minimum Variance portfolio
#  tan_wts: the weights for the tangency portfolio
#  S      : The covariance matrix
#
frontierPoints = function(returns, gmv_wts, tan_wts, S, alpha = seq(from=-0.1, to=1.1, by=0.05))
{
  y_mu = list()
  x_sd = list()
  gmv_wts_t = as.vector(gmv_wts)
  tan_wts_t = as.vector(tan_wts)
  ix = 1
  mu = apply(returns, 2, mean)
  for (i in alpha) {
    w = i * gmv_wts_t + ((1-i) * tan_wts_t)
    y_mu[[ix]] = mu %*% w
    x_sd[[ix]] = sqrt( t(w) %*% S %*% w)
    ix = ix + 1
  }
  rslt = list(mu=y_mu, sd=x_sd)
  return(rslt)
}
 
# This is a hack to get around a problem in the fPortfolio code.  
# This code uses the covariance function, cov(), which is
# depricated and now returns a  "cov" object.
cov = function( time_series.mat )
{
  return(var(time_series.mat))
}
 
library(zoo)
library(fPortfolio)
 
berndt = read.csv(file="berndtInvest.csv")
date.col = berndt[[1]]
dates = as.Date(date.col)
skipCols = c(1, which.ix(colnames(berndt), c("MARKET", "RKFREE")))
rf.ts = as.numeric(berndt[[ "RKFREE"]])
rf = mean(rf.ts)
returns = as.matrix(berndt[-skipCols]) # remove the date, market data and risk free rate columns
names = colnames(returns)
tech = c("DATGEN", "DEC", "IBM", "TANDY")
techIx = which.ix(names, tech)
oil = c("CONTIL", "DELTA",  "MOBIL", "PANAM", "TEXACO")
oilIx = which.ix(names, oil)
n.stocks = ncol(returns) 
tech.dum = oil.dum = other.dum = matrix(0,n.stocks,1)
tech.dum[techIx,] = 1
oil.dum[oilIx,] = 1
other.dum = 1 - tech.dum - oil.dum 
B = cbind(tech.dum,oil.dum,other.dum)
dimnames(B) = list(colnames(returns),c("TECH","OIL","OTHER"))
 
F.hat = solve(t(B) %*% B) %*% t(B) %*% t(returns)
E.hat = t(returns) - B %*% F.hat
# Calculate the variances across the rows
diagD.hat = apply(E.hat, 1, var)
Dinv.hat = diag( diagD.hat^(-1))
# multivariate FGLS regression to estimate K x T matrix of factor returns
H = solve(t(B) %*% Dinv.hat %*% B) %*% t(B) %*% Dinv.hat
# create factor mimicking portfolios
F.hat = H %*% t(returns)
colnames(H) = colnames(returns)
 
F.hat = t(F.hat)
cov.ind = B %*% var(F.hat) %*% t(B) + diag(diagD.hat)
 
gmv.ind = analyticGMV(returns, cov.ind)
tan.ind = analyticTangency( returns, rf, cov.ind)
gmv.ind.w = gmv.ind$w
tan.ind.w = tan.ind$w
 
gmv.ret = analyticGMV(returns)
tan.ret = analyticTangency(returns, rf)
gmv.ret.w = gmv.ret$w
tan.ret.w = tan.ret$w
 
gmv.x = c(gmv.ind$sd, gmv.ret$sd)
gmv.y = c(gmv.ind$mu, gmv.ret$mu)
tan.x = c(tan.ind$sd, tan.ret$sd)
tan.y = c(tan.ind$mu, tan.ret$mu)
 
frontier.ind = frontierPoints(returns, gmv.ind.w, tan.ind.w, cov.ind )
alpha = seq(from=-2, to=2, by=0.05)
frontier.ret = frontierPoints(returns, gmv.ret.w, tan.ret.w, var(returns), alpha)
ind.sd = as.numeric(frontier.ind$sd)
ind.mu = as.numeric(frontier.ind$mu)
ret.sd = as.numeric(frontier.ret$sd)
ret.mu = as.numeric(frontier.ret$mu)
 
# Use fPortfolio's portfolioFrontier function to calculate the long-long portfolio frontier
spec = portfolioSpec()
setNFrontierPoints(spec)=100
returns.ts = as.timeSeries( returns )
longOnly = portfolioFrontier(data=returns.ts, spec=spec)
longOnly.port = longOnly@portfolio
longOnly.mu = longOnly.port@portfolio$targetReturn[,"mu"]
longOnly.sd = longOnly.port@portfolio$targetRisk[,"Sigma"]
 
xmin = min(ind.sd, ret.sd)
xmax = max(ind.sd, ret.sd)
ymin = min(ind.mu, ret.mu)
ymax = max(ind.mu, ret.mu)
 
ix = which(longOnly.mu >= ymin)
longOnly.mu.f = longOnly.mu[ix]
longOnly.sd.f = longOnly.sd[ix]
sharpe = longOnly.mu.f / longOnly.sd.f
longOnly.s = which.max(sharpe)
 
par(mfrow=c(1,1))
plot(x=ret.sd, y=ret.mu, 
     xlim=c(xmin, xmax), ylim=c(ymin, ymax),
     type="l", col="black", ylab="Monthly Portfolio Return", 
     xlab=expression(paste("monthly ", sigma)), lwd=2 )
lines(x = ind.sd, y=ind.mu, type="l", col="red", lwd=2)
lines(x = longOnly.sd.f, y = longOnly.mu.f, col="blue", lwd=2)
points(x = gmv.x, y = gmv.y, pch=15, col="magenta")
points(x = tan.x, y = tan.y, pch=15, col="green")
points(y = longOnly.mu.f[longOnly.s], x = longOnly.sd.f[longOnly.s], pch=15, col="red")
grid(col="grey")
legend(x="topleft", legend=c("MV portfolio", "BARRA Industry Factor", "Long Only"), col=c("black", "red", "blue"), lwd=4)
par(mfrow=c(1,1))