# # # This is a monthly version of factor_calc.r. The code here calculates factors # on a montlhy basis. The actual corporate data is only available quarterly. # However, the market value (MV) and the enterprice value (EV), which are # calculated from the close price changes constantly. In this code, he MV and EV # are calculated monthly and these values are used to calculate the synthesized # factors like EBITDA2EV. Factors that don't have a stock price component are # omitted. # # From factor_calc.r (the quarterly version) # # Read the "fixed" CRSP/Compustat data and synthesize factor values. Some of these values # are inspired by Chapter 5 of Quantitative Equity Portfolio Management by Qian, Hua and # Sorensen. A set of ratio factors is from "Investing with Momentum: The Past, Present, and Future" # by John B. Guerard, Jr., Ganlin Xu and Mustafa Gultekin. # # For a discussion on the CRSP/Compustat data and factor calculation see # http://www.bearcave.com/finance/wrds_data.html#qian_factor_calc # library(tseries) library(timeDate) library(zoo) NUM_SHARES = "CSHOQ" # Common shares outstanding SHARE_PRICE = "PRCCQ" # Quarterly close price CASH = "CHEQ" # Cash and Short-Term Investments TICKER = "tic" FQTR = "fqtr" MILLIONS =1000000 DATE = "datadate" # # vec : a vector of values # stockFact.df : a compustat data frame that includes a date column # return: vec as a zoo object with dates from stockFact.df as the zoo index. # addDate = function(vec, stockFact.df) { dates = stockFact.df[,DATE] if (class(dates) != "Date") { dates = as.Date(strptime(as.vector(dates, format="%Y-%m-%d", tz="EST"))) } vec.z = as.zoo(vec) index(vec.z) = dates return(vec.z) } # # Calculate the "market valule" which is the number of shares time the share price. # marketValue = function(stockFact.df) { numShares = stockFact.df[,NUM_SHARES] * MILLIONS sharePrice = stockFact.df[,SHARE_PRICE] mv = numShares * sharePrice mv.z = addDate(mv, stockFact.df) return(mv.z) } # # Calculate an approximate tax rate for a company. # # Tax rate = income taxes / pre-tax income # taxRate = function(stockFact.df) { txtq = stockFact.df[,"TXTQ"] piq = stockFact.df[,"PIQ"] trate = txtq / piq # get rid of NA values trate[is.na(trate)] = 0 # There should not be a tax rate over 0.4 or less than zero trate[ trate < 0] = 0 # The maximum US tax rate is 0.4 according to KPMG trate[ trate > 0.4] = 0.4 trate.z = addDate(trate, stockFact.df) return( trate.z ) } # # Calculate the Enterprise Value (EV). The EV is the cost of purchasing the company # which consists of the outstanding shares, plus the debt, plus preferred stock minus # cash. The EV is returned as a zoo object. # # Like everything else on a corporate balance sheet, enterprise value can be complicated. # The reason that cash is subtracted from the value is based on the idea that a # purchaser could use the case to offset the purchase. However, this is generally not true # for financial companies which are reqired too keep large cash reserves. Usually regulators # do not allow these reserves to be used by those those buying a company Mitt Romney style. # So in these cases, the enterprise value is equal to the non-cash factors. # # stockFact.df - a data frame of Compustat balance sheet factors # return: a zoo object consisting of the enterprise value with a quarterly date # index. # enterpriseValue = function(stockFact.df, ticker, MV) { gicBANK = 4010 gicFINANCE = 4020 # diversified financial gicINSURE = 4030 # insurance fin = c(gicBANK, gicFINANCE, gicINSURE) debt_cl = stockFact.df[, "DLCQ"] * MILLIONS lgterm_debt = stockFact.df[,"DLTTQ"] * MILLIONS prf_stk = stockFact.df[, "PSTKQ"] * MILLIONS cash = stockFact.df[,CASH] * MILLIONS gicGroup = as.vector(stockFact.df[,"GGROUP"]) EV = ifelse (gicGroup %in% fin, (MV + debt_cl + lgterm_debt + prf_stk), ((MV + debt_cl + lgterm_debt + prf_stk) - cash)) EV.z = addDate(EV, stockFact.df) return(EV.z) } # # Calculate a quarterly value from the year-to-date value. # # Note that the first quarter is not differenced. So the difference # starts over, with every year period. An example of the data is # shown below. # # [1,] 1 41.668 # [2,] 2 77.207 # [3,] 3 144.263 # [4,] 4 227.231 # [5,] 1 62.257 # ... # [26,] 2 277.510 # [27,] 3 464.412 # [28,] 4 785.428 # [29,] 1 170.983 # [30,] 2 392.292 # # The code below does the yearly difference in parallel. # year2DateQ = function(stockFact.df, factor) { fqtr = stockFact.df[,FQTR] cf = stockFact.df[,factor] ix1 = which(fqtr == 1) ix2 = which(fqtr == 2) ix3 = which(fqtr == 3) ix4 = which(fqtr == 4) delta = cf delta[ix2] = cf[ix2] - cf[ix1[1:length(ix2)]] delta[ix3] = cf[ix3] - cf[ix2[1:length(ix3)]] delta[ix4] = cf[ix4] - cf[ix3[1:length(ix4)]] qv.z = addDate(delta, stockFact.df) return(qv.z) } cashFlow2EV = function(stockFact.df, EV) { cfoQ = year2DateQ(stockFact.df, "OANCFY") * MILLIONS tax_rate = taxRate( stockFact.df ) interest = stockFact.df[,"XINTQ"] * MILLIONS cfo2ev = (cfoQ + (interest * (1-tax_rate))) / EV return(cfo2ev) } returnOnNetAss = function( stockFact.df ) { commonEquity = stockFact.df[,"CEQQ"] * MILLIONS curDebt = stockFact.df[,"DLCQ"] * MILLIONS ltDebt = stockFact.df[,"DLTTQ"] * MILLIONS prefStock = stockFact.df[,"PSTKQ"] * MILLIONS cash = stockFact.df[,"CHEQ"] * MILLIONS assets = (commonEquity + curDebt + ltDebt + prefStock) - cash income = stockFact.df[,"IBQ"] * MILLIONS tax_rate = taxRate(stockFact.df) interest = stockFact.df[,"XINTQ"] * MILLIONS rona = ifelse((sapply(assets, FUN=function(val) { identical(val,0)}) == "TRUE"), 0.0, (income + (interest * (1-tax_rate))) / assets) rona.z = addDate(rona, stockFact.df) return(rona.z) } ebitda2EV = function(stockFact.df, EV) { salesQ = stockFact.df[, "SALEQ"] * MILLIONS costOfGoods = stockFact.df[, "COGSQ"] * MILLIONS genExp = stockFact.df[,"XSGAQ"] * MILLIONS ebitda = salesQ - costOfGoods - genExp ebitda.z = addDate(ebitda, stockFact.df) ebitda2ev.z = ebitda.z / EV return( ebitda2ev.z ) } # # Calculate the Qian et al factor BB2EV # # This factor is entirely composed of year-to-date values, which are converted to # quarterly factors # externalFin2EV = function(stockFact.df, EV) { # Purchase of Common and Preferred Stock prstkcq = year2DateQ(stockFact.df, "PRSTKCY") * MILLIONS # Sale of Common and Preferred Stock sstkq = year2DateQ(stockFact.df, "SSTKY") * MILLIONS # Long-Term Debt - Reduction dltrq = year2DateQ(stockFact.df, "DLTRY") * MILLIONS # Changes in Current Debt dlcchq = year2DateQ(stockFact.df, "DLCCHY") * MILLIONS # Long-Term Debt - Issuance dltisq = year2DateQ(stockFact.df, "DLTISY") * MILLIONS # Cash Dividend divq = year2DateQ(stockFact.df, "DVY") * MILLIONS # equity repurchase eq_repo = prstkcq - sstkq # debt repurchase debt_repo = dltrq - dlcchq - dltisq bb = ((divq + eq_repo) - debt_repo) bb.z = addDate(bb, stockFact.df) bb2ev = bb.z / EV return(bb2ev) } netBuyback = function( stockFact.df, MV) { # Cash Dividend divq = year2DateQ(stockFact.df, "DVY") * MILLIONS # Purchase of Common and Preferred Stock prstkcq = year2DateQ(stockFact.df, "PRSTKCY") * MILLIONS # Sale of Common and Preferred Stock sstkq = year2DateQ(stockFact.df, "SSTKY") * MILLIONS bb = (divq + prstkcq) - sstkq bb.z = addDate(bb, stockFact.df) BB2P = bb.z/MV return( BB2P ) } stockFactors = function(factors.df, ticker) { ix = which(factors.df[,TICKER] == ticker) stockFact.df = factors.df[ix,] return(stockFact.df) } # # Build the factors from Chapter 5 of Quantitative Equity Portfolio Management by # Qian, Hua and Sorensen. # buildSynthFactors = function(factors.df, ticker) { stockFact.df = stockFactors(factors.df, ticker) MV = marketValue(stockFact.df) EV.z = enterpriseValue(stockFact.df, ticker, MV) CFO2EV = cashFlow2EV(stockFact.df, EV.z) EBITDA2EV = ebitda2EV( stockFact.df, EV.z) income = stockFact.df[,"IBCOMQ"] * MILLIONS E2PFY0 = income / MV BB2P = netBuyback( stockFact.df, MV) BB2EV = externalFin2EV(stockFact.df, EV.z) B2P = (stockFact.df[,"CEQQ"] * MILLIONS) / EV.z date = stockFact.df[,DATE] sym = stockFact.df[,TICKER] ggroup = stockFact.df[,"GGROUP"] gsector = stockFact.df[,"GSECTOR"] cusip = stockFact.df[,"cusip"] close = stockFact.df[,SHARE_PRICE] synthFactors.df = data.frame(date, sym, cusip, CFO2EV, EBITDA2EV, E2PFY0, BB2P, BB2EV, B2P, close, ggroup, gsector) return(synthFactors.df) } # buildsynthFactors # # Fix the quarterly dates so that they are all on the same boundary # fixQuarterDates = function( dates ) { months = as.numeric(format(dates, format="%m")) ix = which(months %in% c(3, 6, 9, 12)) cols = c("TICKER", "COMNAM", "CUSIP", "PRC") qtr = dates[ix] q1Ix = which(as.numeric(format(qtr, format="%m")) == 3) q1Yr = format(qtr[q1Ix], format="%Y") qtr[q1Ix] = as.Date(paste(q1Yr, "03-31", sep="-")) q2Ix = which(as.numeric(format(qtr, format="%m")) == 6) q2Yr = format(qtr[q2Ix], format="%Y") qtr[q2Ix] = as.Date(paste(q2Yr, "06-30", sep="-")) q3Ix = which(as.numeric(format(qtr, format="%m")) == 9) q3Yr = format(qtr[q3Ix], format="%Y") qtr[q3Ix] = as.Date(paste(q3Yr, "09-30", sep="-")) q4Ix = which(as.numeric(format(qtr, format="%m")) == 12) q4Yr = format(qtr[q4Ix], format="%Y") qtr[q4Ix] = as.Date(paste(q4Yr, "12-31", sep="-")) dates[ix] = qtr return(dates) } # # Expand the quarterly factor data into monthly data by replicating the factor rows and # adding the monthly dates and close prices. # expandFactors = function(factors.df, monthClose.df, cusips) { expFactors.df = data.frame() for (sip in cusips) { factorBlock.df = factors.df[factors.df[,"cusip"] == sip,] closeBlock.df = monthClose.df[monthClose.df[,"cusip"] == sip,] # There's a nasty case where there is a break in the factor data, which is the case # for RRI which becomes GEN (this stock is identified by it's CUSIP) factorDate = as.Date(as.vector(factorBlock.df[,DATE])) deltaDays = diff(factorDate) # If there's more than two time breaks, we skip the stock skip = FALSE border = which(deltaDays > 92) if (length(border) > 0) { if (length(border) == 2) { start = border[1] + 1 end = border[2] - 1 } else if (length(border) == 1) { # find the bigger half and use that. border = border[1] len = nrow(factorBlock.df) half = len/2 start = border+1 end = len if (border > half) { start = 1 end = border } } else { skip = TRUE } if (! skip) { factorBlock.df = factorBlock.df[start:end,] factorDate = as.Date(as.vector(factorBlock.df[,DATE])) } } if (! skip) { closeDate = as.Date(as.vector(closeBlock.df[,"date"])) minFacDate = min(factorDate) maxFacDate = max(factorDate) closeIx = which(closeDate %in% c(minFacDate, maxFacDate)) if ((length(closeIx) == 2) && (diff(closeIx) > 4) ) { closeBlock.df = closeBlock.df[closeIx[1]:closeIx[2],] range = seq(from=1, to=nrow(factorBlock.df)) expRange = rep(range, rep(3, length(range))) # remove the last two indexes expBlock.df = factorBlock.df[expRange,] end = min(nrow(expBlock.df), nrow(closeBlock.df)) expBlock.df = expBlock.df[1:end,] if (nrow(expBlock.df) != nrow(closeBlock.df)) { sym = unique(as.vector(factorBlock.df[,TICKER])) s = sprintf("%s: nrow(expBlock.df) = %d, nrow(closeBlock.df) = %d", sym, nrow(expBlock.df), nrow(closeBlock.df)) print(s) browser() # debug break point } expBlock.df[,DATE] = as.Date(as.vector(closeBlock.df[,"date"])) expBlock.df[,SHARE_PRICE] = as.vector(closeBlock.df[,"close"]) expFactors.df = rbind(expFactors.df, expBlock.df) } } } return(expFactors.df) } dataDir = "../data" # Input files # Corrected CRSP/Compustat data compustatFactors = "sp500_compustat_fixed.csv" # Monthly close price data (for calculating the market value) monthly_close = "sp500_close_mon.csv" # Output files FactorsFile = "monthly_synth_factors.csv" factorPath = paste(dataDir, compustatFactors, sep="/") closePath = paste(dataDir, monthly_close, sep="/") synthFactorsPath = paste(dataDir, FactorsFile, sep="/") factors.df = read.csv(file=factorPath) factors.df[is.na(factors.df)] = 0 facCUSIP = as.vector(factors.df[,"cusip"]) facCUSIPTrim = strtrim(facCUSIP, (nchar(facCUSIP)-1)) factors.df[,"cusip"] = facCUSIPTrim monthCloseRaw.df = read.csv(file=closePath) sprtrnCol = which(colnames(monthCloseRaw.df) == "sprtrn") if (length(sprtrnCol)) { monthCloseRaw.df = monthCloseRaw.df[,-sprtrnCol] } monthCUSIP = unique(as.vector(monthCloseRaw.df[,"CUSIP"])) # # Using the monthly data I am only trying to investigate the predictive power of the value factors, not # necessarily build portfolios. Here I weed out any stocks that don't have monthly close prices. This ends up # removing about 25% of the stocks. allCUSIP = sort(unique(as.vector(factors.df[, "cusip"]))) cusips = allCUSIP[ allCUSIP %in% monthCUSIP] monthCloseFilt.df = monthCloseRaw.df[as.vector(monthCloseRaw.df[,"CUSIP"]) %in% cusips, ] monthDates = as.Date(strptime(as.vector(as.character(monthCloseFilt.df[,"date"])), format="%Y-%m-%d", tz="EST")) ticker = as.vector(monthCloseFilt.df[,"TICKER"]) close = as.vector(monthCloseFilt.df[,"PRC"]) monthClose.df = data.frame(monthDates, ticker, as.vector(monthCloseFilt.df[,"CUSIP"]), close) colnames(monthClose.df) = c("date", "sym", "cusip", "close") dataRoot = "../r_data" expFactorFile = "expandedFactors.RData" expFactorPath = paste(dataRoot, expFactorFile, sep="/") if (file.exists(expFactorPath)) { load(expFactorPath) } else { expFactors.df = expandFactors(factors.df, monthClose.df, cusips) save(expFactors.df, file=expFactorPath) } # expFactors.df = expandFactors(tFac.df, tMon.df, tcusips) stocks = unique(as.vector(expFactors.df[,TICKER])) synthFactors.df = data.frame() for (sym in stocks) { t = buildSynthFactors(expFactors.df, sym) synthFactors.df = rbind(synthFactors.df, t) } synthFactors.df[is.na(synthFactors.df)] = 0 write.csv(x = synthFactors.df, file=synthFactorsPath, quote=F, row.names=F)