# # Read the "fixed" CRSP/Compustat data and synthesize factor values. Some of these values # are inspired by Chapter 5 of Quantitative Equity Portfolio Management by Qian, Hua and # Sorensen. A set of ratio factors is from "Investing with Momentum: The Past, Present, and Future" # by John B. Guerard, Jr., Ganlin Xu and Mustafa Gultekin. # # For a discussion on the CRSP/Compustat data and factor calculation see # http://www.bearcave.com/finance/wrds_data.html#qian_factor_calc # library(tseries) library(timeDate) library(zoo) NUM_SHARES = "CSHOQ" # Common shares outstanding SHARE_PRICE = "PRCCQ" # Quarterly close price CASH = "CHEQ" # Cash and Short-Term Investments TICKER = "tic" FQTR = "fqtr" MILLIONS =1000000 DATE = "datadate" # # vec : a vector of values # stockFact.df : a compustat data frame that includes a date column # return: vec as a zoo object with dates from stockFact.df as the zoo index. # addDate = function(vec, stockFact.df) { dates = as.Date(strptime(as.vector(stockFact.df[,DATE]), format="%Y-%m-%d", tz="EST")) vec.z = as.zoo(vec) index(vec.z) = dates return(vec.z) } # # Calculate the "market valule" which is the number of shares time the share price. # marketValue = function(stockFact.df) { numShares = stockFact.df[,NUM_SHARES] * MILLIONS sharePrice = stockFact.df[,SHARE_PRICE] mv = numShares * sharePrice mv.z = addDate(mv, stockFact.df) return(mv.z) } # # Calculate an approximate tax rate for a company. # # Tax rate = income taxes / pre-tax income # taxRate = function(stockFact.df) { txtq = stockFact.df[,"TXTQ"] piq = stockFact.df[,"PIQ"] trate = txtq / piq # get rid of NA values trate[is.na(trate)] = 0 # There should not be a tax rate over 0.4 or less than zero trate[ trate < 0] = 0 # The maximum US tax rate is 0.4 according to KPMG trate[ trate > 0.4] = 0.4 trate.z = addDate(trate, stockFact.df) return( trate.z ) } # # Calculate the Enterprise Value (EV). The EV is the cost of purchasing the company # which consists of the outstanding shares, plus the debt, plus preferred stock minus # cash. The EV is returned as a zoo object. # # Like everything else on a corporate balance sheet, enterprise value can be complicated. # The reason that cash is subtracted from the value is based on the idea that a # purchaser could use the case to offset the purchase. However, this is generally not true # for financial companies which are reqired too keep large cash reserves. Usually regulators # do not allow these reserves to be used by those those buying a company Mitt Romney style. # So in these cases, the enterprise value is equal to the non-cash factors. # # stockFact.df - a data frame of Compustat balance sheet factors # return: a zoo object consisting of the enterprise value with a quarterly date # index. # enterpriseValue = function(stockFact.df, ticker, MV) { gicBANK = 4010 gicFINANCE = 4020 # diversified financial gicINSURE = 4030 # insurance fin = c(gicBANK, gicFINANCE, gicINSURE) debt_cl = stockFact.df[, "DLCQ"] * MILLIONS lgterm_debt = stockFact.df[,"DLTTQ"] * MILLIONS prf_stk = stockFact.df[, "PSTKQ"] * MILLIONS cash = stockFact.df[,CASH] * MILLIONS gicGroup = as.vector(stockFact.df[,"GGROUP"]) EV = ifelse (gicGroup %in% fin, (MV + debt_cl + lgterm_debt + prf_stk), ((MV + debt_cl + lgterm_debt + prf_stk) - cash)) EV.z = addDate(EV, stockFact.df) return(EV.z) } # # Calculate a quarterly value from the year-to-date value. # # Note that the first quarter is not differenced. So the difference # starts over, with every year period. An example of the data is # shown below. # # [1,] 1 41.668 # [2,] 2 77.207 # [3,] 3 144.263 # [4,] 4 227.231 # [5,] 1 62.257 # ... # [26,] 2 277.510 # [27,] 3 464.412 # [28,] 4 785.428 # [29,] 1 170.983 # [30,] 2 392.292 # # The code below does the yearly difference in parallel. # year2DateQ = function(stockFact.df, factor) { fqtr = stockFact.df[,FQTR] cf = stockFact.df[,factor] ix1 = which(fqtr == 1) ix2 = which(fqtr == 2) ix3 = which(fqtr == 3) ix4 = which(fqtr == 4) delta = cf delta[ix2] = cf[ix2] - cf[ix1[1:length(ix2)]] delta[ix3] = cf[ix3] - cf[ix2[1:length(ix3)]] delta[ix4] = cf[ix4] - cf[ix3[1:length(ix4)]] qv.z = addDate(delta, stockFact.df) return(qv.z) } cashFlow2EV = function(stockFact.df, EV) { cfoQ = year2DateQ(stockFact.df, "OANCFY") * MILLIONS tax_rate = taxRate( stockFact.df ) interest = stockFact.df[,"XINTQ"] * MILLIONS cfo2ev = (cfoQ + (interest * (1-tax_rate))) / EV return(cfo2ev) } returnOnNetAss = function( stockFact.df ) { commonEquity = stockFact.df[,"CEQQ"] * MILLIONS curDebt = stockFact.df[,"DLCQ"] * MILLIONS ltDebt = stockFact.df[,"DLTTQ"] * MILLIONS prefStock = stockFact.df[,"PSTKQ"] * MILLIONS cash = stockFact.df[,"CHEQ"] * MILLIONS assets = (commonEquity + curDebt + ltDebt + prefStock) - cash income = stockFact.df[,"IBQ"] * MILLIONS tax_rate = taxRate(stockFact.df) interest = stockFact.df[,"XINTQ"] * MILLIONS rona = ifelse((sapply(assets, FUN=function(val) { identical(val,0)}) == "TRUE"), 0.0, (income + (interest * (1-tax_rate))) / assets) rona.z = addDate(rona, stockFact.df) return(rona.z) } ebitda2EV = function(stockFact.df, EV) { salesQ = stockFact.df[, "SALEQ"] * MILLIONS costOfGoods = stockFact.df[, "COGSQ"] * MILLIONS genExp = stockFact.df[,"XSGAQ"] * MILLIONS ebitda = salesQ - costOfGoods - genExp ebitda.z = addDate(ebitda, stockFact.df) ebitda2ev.z = ebitda.z / EV return( ebitda2ev.z ) } # # Calculate the Qian et al factor BB2EV # # This factor is entirely composed of year-to-date values, which are converted to # quarterly factors # externalFin2EV = function(stockFact.df, EV) { # Purchase of Common and Preferred Stock prstkcq = year2DateQ(stockFact.df, "PRSTKCY") * MILLIONS # Sale of Common and Preferred Stock sstkq = year2DateQ(stockFact.df, "SSTKY") * MILLIONS # Long-Term Debt - Reduction dltrq = year2DateQ(stockFact.df, "DLTRY") * MILLIONS # Changes in Current Debt dlcchq = year2DateQ(stockFact.df, "DLCCHY") * MILLIONS # Long-Term Debt - Issuance dltisq = year2DateQ(stockFact.df, "DLTISY") * MILLIONS # Cash Dividend divq = year2DateQ(stockFact.df, "DVY") * MILLIONS # equity repurchase eq_repo = prstkcq - sstkq # debt repurchase debt_repo = dltrq - dlcchq - dltisq bb = ((divq + eq_repo) - debt_repo) bb.z = addDate(bb, stockFact.df) bb2ev = bb.z / EV return(bb2ev) } netBuyback = function( stockFact.df, MV) { # Cash Dividend divq = year2DateQ(stockFact.df, "DVY") * MILLIONS # Purchase of Common and Preferred Stock prstkcq = year2DateQ(stockFact.df, "PRSTKCY") * MILLIONS # Sale of Common and Preferred Stock sstkq = year2DateQ(stockFact.df, "SSTKY") * MILLIONS bb = (divq + prstkcq) - sstkq bb.z = addDate(bb, stockFact.df) BB2P = bb.z/MV return( BB2P ) } stockFactors = function(factors.df, ticker) { ix = which(factors.df[,TICKER] == ticker) stockFact.df = factors.df[ix,] return(stockFact.df) } # # Build the factors from Chapter 5 of Quantitative Equity Portfolio Management by # Qian, Hua and Sorensen. # buildSynthFactors = function(factors.df, ticker) { stockFact.df = stockFactors(factors.df, ticker) MV = marketValue(stockFact.df) EV.z = enterpriseValue(stockFact.df, ticker, MV) CFO2EV = cashFlow2EV(stockFact.df, EV.z) RONA = returnOnNetAss( stockFact.df ) EBITDA2EV = ebitda2EV( stockFact.df, EV.z) income = stockFact.df[,"IBCOMQ"] * MILLIONS E2PFY0 = income / MV BB2P = netBuyback( stockFact.df, MV) BB2EV = externalFin2EV(stockFact.df, EV.z) B2P = (stockFact.df[,"CEQQ"] * MILLIONS) / EV.z S2EV = (stockFact.df[,"SALEQ"] * MILLIONS) / EV.z date = stockFact.df[,"datadate"] sym = stockFact.df[,"tic"] ggroup = stockFact.df[,"GGROUP"] gsector = stockFact.df[,"GSECTOR"] synthFactors.df = data.frame(date, sym, CFO2EV, RONA, EBITDA2EV, E2PFY0, BB2P, BB2EV, B2P, S2EV, ggroup, gsector) return(synthFactors.df) } # buildsynthFactors dataDir = "../data" # Input files compustatFactors = "sp500_compustat_fixed.csv" # Output files FactorsFile = "synth_factors.csv" factorPath = paste(dataDir, compustatFactors, sep="/") synthFactorsPath = paste(dataDir, FactorsFile, sep="/") factors.df = read.csv(file=factorPath) factors.df[is.na(factors.df)] = 0 stocks = sort(unique(as.vector(factors.df[, TICKER]))) synthFactors.df = data.frame() for (sym in stocks) { t = buildSynthFactors(factors.df, sym) synthFactors.df = rbind(synthFactors.df, t) } synthFactors.df[is.na(synthFactors.df)] = 0 write.csv(x = synthFactors.df, file=synthFactorsPath, quote=F, row.names=F)