# # Process the S&P Index list (which includes the S&P 500 and other S&P indices) to build a file # of S&P Index stocks for a particular set of quarters. # # Processing the Compustat S&P 500 data involves finding the shares that make up the index # for a particular year. To do this the Compustat North America Index Constituents file is read # from the starting data. Each year is built up of stocks that have no end in the index and stocks # whose end date is beyond that year. # # This result is stored in a file. # # In order to support reading the data from the Compustat corporate factors, the GEVKEY Compustat # key code is output. # # Under the conm column: # # S&P 500 Comp-Ltd # S&P 500/Barra Growth Index # S&P 500/Barra Value Index # S&P 500 Growth # S&P 500 Value # # There also appears to be sections and groups. For example: # # [1] "S&P 500 Comp-Ltd" "S&P 500/Barra Growth Index" # [3] "S&P 500/Barra Value Index" "SP500 Energy .S" # [5] "SP500 Materials .S" "SP500 Industrials .S" # [7] "SP500 Consumr Discretion .S" "SP500 Consumer Staples .S" # [9] "SP500 Health Care .S" "SP500 Financials .S" # [11] "SP500 Information Tech .S" "SP500 Telecom Services .S" # [13] "SP500 Utilities .S" "SP500 Energy .G" # [15] "SP500 Materials .G" "SP500 Capital Goods .G" # [17] "SP500 CMMRCL&PRFSSNL SVC.G" "SP500 Transportation .G" # [19] "SP500 Auto & Components .G" "SP500 Cnsmr Durbl&Apprel .G # library(tseries) library(zoo) library(gridExtra) printTable = function( tab ) { grid.newpage() par(mfrow=c(1,1)) grid.table(round(tab, 6), gpar.rowtext=gpar(fontsize = 16), show.box=T, show.hlines=T, show.vlines=T, separator = "black") par(mfrow=c(1,1)) } pathRoot = "../data" # # INPUT Files # # This is the file of compustat data indexInfo = "s_and_p_500.csv" # # OUTPUT Files # # All Filtered S&P 500 values, by year allSP500 = "sp500ByMonths_all.csv" # S&P 500 monthly close prices monthly_close = "sp500_close_mon.csv" path = paste(pathRoot, indexInfo, sep="/") allSP500Path = paste(pathRoot, allSP500, sep="/") closePath = paste(pathRoot, monthly_close, sep="/") TICKER = "co_tic" FROM = "from" TO = "thru" NAME = "co_conm" KEY = "gvkey" CUSIP = "co_cusip" startDate = as.Date("1998-03-31") endDate = as.Date("2013-09-30") quarters = as.Date(timeSequence(from=startDate, to=endDate, by="quarter")) # Quarters should be year-03-31, year-06-30, year-09-30, year-12-31 # We have: year-03-30, year-06-30, year-09-30, year-12-30 # So the May and December quarterly dates need to be fixed qtrs = format(quarters, format="%m-%d") years = format(quarters, format="%Y") qtrs[qtrs == "03-30"] = "03-31" qtrs[qtrs == "12-30"] = "12-31" quarters = as.Date(paste(years, qtrs, sep="-")) sp500 = read.csv(file = path) closePrices = read.csv(file=closePath) closeDates = sort(as.Date(unique(strptime(as.vector(closePrices[,"date"]), format="%Y-%m-%d", tz="EST")))) bounds = which(closeDates %in% c(startDate, endDate)) months = closeDates[bounds[1]:bounds[2]] fromDates = as.Date(strptime(as.vector(sp500[,FROM]), format="%m/%d/%Y", tz="EST")) sp500Mon = data.frame() for (i in 1:length(months)) { mon = months[i] startBlock = sp500[fromDates <= mon,] startToDates = as.Date(strptime(as.vector(startBlock[,TO]), format="%m/%d/%Y", tz="EST")) startEndIx = sort(c(which( is.na(startToDates) ), which(startToDates >= mon))) monSAndP = startBlock[startEndIx, c(KEY, CUSIP, NAME, TICKER)] monVec = rep(mon, times = nrow(monSAndP)) monSAndP = data.frame(monVec, monSAndP) colnames(monSAndP) = c("month", KEY, CUSIP, NAME, TICKER) sp500Mon = rbind(sp500Mon, monSAndP) } write.csv(sp500Mon, file=allSP500Path, row.names=F)