#
# Read the "fixed" CRSP/Compustat data and synthesize factor values. Some of these values
# are inspired by Chapter 5 of Quantitative Equity Portfolio Management by Qian, Hua and
# Sorensen. A set of ratio factors is from "Investing with Momentum: The Past, Present, and Future"
# by John B. Guerard, Jr., Ganlin Xu and Mustafa Gultekin.
#
# For a discussion on the CRSP/Compustat data and factor calculation see
# http://www.bearcave.com/finance/wrds_data.html#qian_factor_calc
#
library(tseries)
library(timeDate)
library(zoo)
NUM_SHARES = "CSHOQ" # Common shares outstanding
SHARE_PRICE = "PRCCQ" # Quarterly close price
CASH = "CHEQ" # Cash and Short-Term Investments
TICKER = "tic"
FQTR = "fqtr"
MILLIONS =1000000
DATE = "datadate"
#
# vec : a vector of values
# stockFact.df : a compustat data frame that includes a date column
# return: vec as a zoo object with dates from stockFact.df as the zoo index.
#
addDate = function(vec, stockFact.df)
{
dates = as.Date(strptime(as.vector(stockFact.df[,DATE]), format="%Y-%m-%d", tz="EST"))
vec.z = as.zoo(vec)
index(vec.z) = dates
return(vec.z)
}
#
# Calculate the "market valule" which is the number of shares time the share price.
#
marketValue = function(stockFact.df)
{
numShares = stockFact.df[,NUM_SHARES] * MILLIONS
sharePrice = stockFact.df[,SHARE_PRICE]
mv = numShares * sharePrice
mv.z = addDate(mv, stockFact.df)
return(mv.z)
}
#
# Calculate an approximate tax rate for a company.
#
# Tax rate = income taxes / pre-tax income
#
taxRate = function(stockFact.df)
{
txtq = stockFact.df[,"TXTQ"]
piq = stockFact.df[,"PIQ"]
trate = txtq / piq
# get rid of NA values
trate[is.na(trate)] = 0
# There should not be a tax rate over 0.4 or less than zero
trate[ trate < 0] = 0
# The maximum US tax rate is 0.4 according to KPMG
trate[ trate > 0.4] = 0.4
trate.z = addDate(trate, stockFact.df)
return( trate.z )
}
#
# Calculate the Enterprise Value (EV). The EV is the cost of purchasing the company
# which consists of the outstanding shares, plus the debt, plus preferred stock minus
# cash. The EV is returned as a zoo object.
#
# Like everything else on a corporate balance sheet, enterprise value can be complicated.
# The reason that cash is subtracted from the value is based on the idea that a
# purchaser could use the case to offset the purchase. However, this is generally not true
# for financial companies which are reqired too keep large cash reserves. Usually regulators
# do not allow these reserves to be used by those those buying a company Mitt Romney style.
# So in these cases, the enterprise value is equal to the non-cash factors.
#
# stockFact.df - a data frame of Compustat balance sheet factors
# return: a zoo object consisting of the enterprise value with a quarterly date
# index.
#
enterpriseValue = function(stockFact.df, ticker, MV)
{
gicBANK = 4010
gicFINANCE = 4020 # diversified financial
gicINSURE = 4030 # insurance
fin = c(gicBANK, gicFINANCE, gicINSURE)
debt_cl = stockFact.df[, "DLCQ"] * MILLIONS
lgterm_debt = stockFact.df[,"DLTTQ"] * MILLIONS
prf_stk = stockFact.df[, "PSTKQ"] * MILLIONS
cash = stockFact.df[,CASH] * MILLIONS
gicGroup = as.vector(stockFact.df[,"GGROUP"])
EV = ifelse (gicGroup %in% fin, (MV + debt_cl + lgterm_debt + prf_stk),
((MV + debt_cl + lgterm_debt + prf_stk) - cash))
EV.z = addDate(EV, stockFact.df)
return(EV.z)
}
#
# Calculate a quarterly value from the year-to-date value.
#
# Note that the first quarter is not differenced. So the difference
# starts over, with every year period. An example of the data is
# shown below.
#
# [1,] 1 41.668
# [2,] 2 77.207
# [3,] 3 144.263
# [4,] 4 227.231
# [5,] 1 62.257
# ...
# [26,] 2 277.510
# [27,] 3 464.412
# [28,] 4 785.428
# [29,] 1 170.983
# [30,] 2 392.292
#
# The code below does the yearly difference in parallel.
#
year2DateQ = function(stockFact.df, factor)
{
fqtr = stockFact.df[,FQTR]
cf = stockFact.df[,factor]
ix1 = which(fqtr == 1)
ix2 = which(fqtr == 2)
ix3 = which(fqtr == 3)
ix4 = which(fqtr == 4)
delta = cf
delta[ix2] = cf[ix2] - cf[ix1[1:length(ix2)]]
delta[ix3] = cf[ix3] - cf[ix2[1:length(ix3)]]
delta[ix4] = cf[ix4] - cf[ix3[1:length(ix4)]]
qv.z = addDate(delta, stockFact.df)
return(qv.z)
}
cashFlow2EV = function(stockFact.df, EV) {
cfoQ = year2DateQ(stockFact.df, "OANCFY") * MILLIONS
tax_rate = taxRate( stockFact.df )
interest = stockFact.df[,"XINTQ"] * MILLIONS
cfo2ev = (cfoQ + (interest * (1-tax_rate))) / EV
return(cfo2ev)
}
returnOnNetAss = function( stockFact.df )
{
commonEquity = stockFact.df[,"CEQQ"] * MILLIONS
curDebt = stockFact.df[,"DLCQ"] * MILLIONS
ltDebt = stockFact.df[,"DLTTQ"] * MILLIONS
prefStock = stockFact.df[,"PSTKQ"] * MILLIONS
cash = stockFact.df[,"CHEQ"] * MILLIONS
assets = (commonEquity + curDebt + ltDebt + prefStock) - cash
income = stockFact.df[,"IBQ"] * MILLIONS
tax_rate = taxRate(stockFact.df)
interest = stockFact.df[,"XINTQ"] * MILLIONS
rona = ifelse((sapply(assets, FUN=function(val) { identical(val,0)}) == "TRUE"),
0.0,
(income + (interest * (1-tax_rate))) / assets)
rona.z = addDate(rona, stockFact.df)
return(rona.z)
}
ebitda2EV = function(stockFact.df, EV)
{
salesQ = stockFact.df[, "SALEQ"] * MILLIONS
costOfGoods = stockFact.df[, "COGSQ"] * MILLIONS
genExp = stockFact.df[,"XSGAQ"] * MILLIONS
ebitda = salesQ - costOfGoods - genExp
ebitda.z = addDate(ebitda, stockFact.df)
ebitda2ev.z = ebitda.z / EV
return( ebitda2ev.z )
}
#
# Calculate the Qian et al factor BB2EV
#
# This factor is entirely composed of year-to-date values, which are converted to
# quarterly factors
#
externalFin2EV = function(stockFact.df, EV)
{
# Purchase of Common and Preferred Stock
prstkcq = year2DateQ(stockFact.df, "PRSTKCY") * MILLIONS
# Sale of Common and Preferred Stock
sstkq = year2DateQ(stockFact.df, "SSTKY") * MILLIONS
# Long-Term Debt - Reduction
dltrq = year2DateQ(stockFact.df, "DLTRY") * MILLIONS
# Changes in Current Debt
dlcchq = year2DateQ(stockFact.df, "DLCCHY") * MILLIONS
# Long-Term Debt - Issuance
dltisq = year2DateQ(stockFact.df, "DLTISY") * MILLIONS
# Cash Dividend
divq = year2DateQ(stockFact.df, "DVY") * MILLIONS
# equity repurchase
eq_repo = prstkcq - sstkq
# debt repurchase
debt_repo = dltrq - dlcchq - dltisq
bb = ((divq + eq_repo) - debt_repo)
bb.z = addDate(bb, stockFact.df)
bb2ev = bb.z / EV
return(bb2ev)
}
netBuyback = function( stockFact.df, MV)
{
# Cash Dividend
divq = year2DateQ(stockFact.df, "DVY") * MILLIONS
# Purchase of Common and Preferred Stock
prstkcq = year2DateQ(stockFact.df, "PRSTKCY") * MILLIONS
# Sale of Common and Preferred Stock
sstkq = year2DateQ(stockFact.df, "SSTKY") * MILLIONS
bb = (divq + prstkcq) - sstkq
bb.z = addDate(bb, stockFact.df)
BB2P = bb.z/MV
return( BB2P )
}
stockFactors = function(factors.df, ticker)
{
ix = which(factors.df[,TICKER] == ticker)
stockFact.df = factors.df[ix,]
return(stockFact.df)
}
#
# Build the factors from Chapter 5 of Quantitative Equity Portfolio Management by
# Qian, Hua and Sorensen.
#
buildSynthFactors = function(factors.df, ticker)
{
stockFact.df = stockFactors(factors.df, ticker)
MV = marketValue(stockFact.df)
EV.z = enterpriseValue(stockFact.df, ticker, MV)
CFO2EV = cashFlow2EV(stockFact.df, EV.z)
RONA = returnOnNetAss( stockFact.df )
EBITDA2EV = ebitda2EV( stockFact.df, EV.z)
income = stockFact.df[,"IBCOMQ"] * MILLIONS
E2PFY0 = income / MV
BB2P = netBuyback( stockFact.df, MV)
BB2EV = externalFin2EV(stockFact.df, EV.z)
B2P = (stockFact.df[,"CEQQ"] * MILLIONS) / EV.z
S2EV = (stockFact.df[,"SALEQ"] * MILLIONS) / EV.z
date = stockFact.df[,"datadate"]
sym = stockFact.df[,"tic"]
ggroup = stockFact.df[,"GGROUP"]
gsector = stockFact.df[,"GSECTOR"]
synthFactors.df = data.frame(date, sym, CFO2EV, RONA, EBITDA2EV, E2PFY0, BB2P, BB2EV, B2P, S2EV, ggroup, gsector)
return(synthFactors.df)
} # buildsynthFactors
dataDir = "../data"
# Input files
compustatFactors = "sp500_compustat_fixed.csv"
# Output files
FactorsFile = "synth_factors.csv"
factorPath = paste(dataDir, compustatFactors, sep="/")
synthFactorsPath = paste(dataDir, FactorsFile, sep="/")
factors.df = read.csv(file=factorPath)
factors.df[is.na(factors.df)] = 0
stocks = sort(unique(as.vector(factors.df[, TICKER])))
synthFactors.df = data.frame()
for (sym in stocks) {
t = buildSynthFactors(factors.df, sym)
synthFactors.df = rbind(synthFactors.df, t)
}
synthFactors.df[is.na(synthFactors.df)] = 0
write.csv(x = synthFactors.df, file=synthFactorsPath, quote=F, row.names=F)