piyush sachdeva - homework 5

8/8/2019 piyush sachdeva - Homework 5

1/48

Lucas Triana

Part 1

Homework 5

#loading the data bonddata = read.table("http://www.stat.cmu.edu/~cschafer/MSCF/bonddata.txt",

sep=",", header=T)#creating the data frameattach(bonddata)newdata=as.data.frame(cbind(weight, current_coupon, time_to_maturity,

reporting_delay, trade_size,curve_based_price, received_time_diff_last1,trade_price_last1,trade_size_last1,curve_based_price_last1,is_callable, trade_type, trade_type_last1,trade_price))

#removing influential observation # 1457newdata=as.data.frame(newdata[-1457,])

#initial histograms of weight, time_to_maturity, trade_size, trade_size_last1 hist(newdata$weight, main="Weight")

hist(newdata$time_to_maturity, main="Time to maturity")


2/48

hist(newdata$trade_size, main="Trade size")

hist(newdata$trade_size_last1, main="Trade size last 1")


3/48

#log transformation of the previous variables (to spread the data more evenly) log_weight=log(newdata$weight)hist(log_weight, main="log Weight")


4/48


5/48

log_trade_size_last1=log(newdata$trade_size_last1)hist(log_trade_size_last1, main="log Trade size last 1")

#transformation of reporting delay and received time diff last1 into a categorical variable categorical_reporting_delay=as.factor(cut(newdata$reporting_delay, c(-Inf,2,10,100,Inf)))

categorical_received_time_diff_last1=as.factor(cut(newdata$received_time_diff_last1,c(-Inf,500,75000,4000000,Inf)))

#fitting GAM from mgcv library(mgcv)

## Loading required package: nlme## This is mgcv 1.8-3. For overview type 'help("mgcv-package")'.


6/48

#assembling the data frame with the transformed variables attach(newdata)

## The following objects are masked from bonddata:#### current_coupon, curve_based_price, curve_based_price_last1,## is_callable, received_time_diff_last1, reporting_delay,

## time_to_maturity, trade_price, trade_price_last1, trade_size,## trade_size_last1, trade_type, trade_type_last1, weight

transformeddata=as.data.frame(cbind(log_weight, current_coupon, log_time_to_maturity,categorical_reporting_delay, log_trade_size,curve_based_price, categorical_received_time_diff

_last1,trade_price_last1, log_trade_size_last1,curve_bas

ed_price_last1,is_callable, trade_type, trade_type_last1, trade_

price))

#given that all variables in the data frame are treated as numeric the following is

to specify factor to categorical variables transformeddata$categorical_reporting_delay=factor(transformeddata$categorical_reporting_delay)transformeddata$categorical_received_time_diff_last1=factor(transformeddata$categorical_received_time_diff_last1)transformeddata$is_callable=factor(transformeddata$is_callable)transformeddata$trade_type=factor(transformeddata$trade_type)transformeddata$trade_type_last1=factor(transformeddata$trade_type_last1)


7/48

#fits the GAM holdgam=gam(trade_price~s(log_weight)+s(current_coupon)+s(log_time_to_maturity)+

categorical_reporting_delay+ s(log_trade_size)+s(curve_based_price)+categorical_received_time_diff_last1+ s(trade_price_last1)+s(log_trade_s

ize_last1)+s(curve_based_price_last1)+is_callable+trade_type+trade_type_last1, data

=transformeddata)

summary(holdgam)

#### Family: gaussian## Link function: identity#### Formula:## trade_price ~ s(log_weight) + s(current_coupon) + s(log_time_to_maturity) +## categorical_reporting_delay + s(log_trade_size) + s(curve_based_price) +## categorical_received_time_diff_last1 + s(trade_price_last1) +## s(log_trade_size_last1) + s(curve_based_price_last1) + is_callable +## trade_type + trade_type_last1

#### Parametric coefficients:## Estimate Std. Error t value## (Intercept) 105.56558 0.22310 473.181## categorical_reporting_delay2 -0.23967 0.09514 -2.519## categorical_reporting_delay3 -0.29058 0.09462 -3.071## categorical_reporting_delay4 -0.42087 0.12867 -3.271## categorical_received_time_diff_last12 -0.33503 0.20980 -1.597## categorical_received_time_diff_last13 -0.55544 0.27039 -2.054## categorical_received_time_diff_last14 -1.47683 0.45408 -3.252## is_callable1 -0.18380 0.12913 -1.423## trade_type3 1.55252 0.09667 16.060## trade_type4 0.73781 0.09016 8.184

## trade_type_last13 -0.94159 0.09679 -9.728## trade_type_last14 -0.42791 0.09327 -4.588## Pr(>|t|)## (Intercept) < 2e-16 ***## categorical_reporting_delay2 0.01187 *## categorical_reporting_delay3 0.00217 **## categorical_reporting_delay4 0.00110 **## categorical_received_time_diff_last12 0.11048## categorical_received_time_diff_last13 0.04012 *## categorical_received_time_diff_last14 0.00117 **## is_callable1 0.15483## trade_type3 < 2e-16 ***## trade_type4 5.63e-16 ***

## trade_type_last13 < 2e-16 ***## trade_type_last14 4.83e-06 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#### Approximate significance of smooth terms:## edf Ref.df F p-value## s(log_weight) 1.000 1.000 4.562 0.03284 *## s(current_coupon) 2.435 3.108 2.403 0.06374 .


8/48

## s(log_time_to_maturity) 5.231 6.444 1.751 0.09992 .## s(log_trade_size) 1.681 2.115 12.755 2.28e-06 ***## s(curve_based_price) 8.510 8.936 19.838 < 2e-16 ***## s(trade_price_last1) 4.846 6.217 278.604 < 2e-16 ***## s(log_trade_size_last1) 4.744 5.758 3.180 0.00497 **## s(curve_based_price_last1) 8.948 8.994 6.757 1.47e-09 ***

## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#### R-sq.(adj) = 0.985 Deviance explained = 98.5%## GCV = 2.0643 Scale est. = 2.0014 n = 1619

#Predictions for trade_price for bonds of trade type 3 and 4 type3vs4pricedif=(holdgam$coefficient[10]-holdgam$coefficient[9])

#plot of fitted relationships between continuous predictors and the response plot(holdgam, pages=4,scale=0, scheme=1)


9/48


10/48

#residuals vs fitted values dev.off()


11/48

## null device## 1

plot(as.numeric(holdgam$fitted.values), as.numeric(holdgam$residuals),pch=16,xlab="Fitted Values", ylab="Residuals",main="Fitted values vs. Residuals" ,cex.axis=1.3,cex.lab=1.3)

abline(h = 0,lwd=2,col=4,lty=2)print("Analyzing the fitted values vs. the residuals one can argue that there is nostrong evidence of uneven variance across the data. Also it seems reasonable toassume that the data are spread around zero.")

## [1] "Analyzing the fitted values vs. the residuals one can argue that there is nostrong evidence of uneven variance across the data. Also it seems reasonable to assume that the data are spread around zero."

#Actual response vs fitted values plot(as.numeric(holdgam$fitted.values), as.numeric(transformeddata$trade_price),

pch=16,xlab="Fitted Values", ylab="response",main="Fitted values vs. response" ,cex.axis=1.3,cex.lab=1.3)

abline(a=0,b=1,lwd=2,col=4,lty=1)

print("Analyzing the fitted values vs. the response one can argue that the model is reasonably good in its predicting power given that the data are scattered around the perfect agreement line here plotted. This introduces some worries as the prediction istoo good which might indicate possible overffiting or spurious relationships betweenthe regressors and the response.")

## [1] "Analyzing the fitted values vs. the response one can argue that the model isreasonably good in its predicting power given that the data are scattered around theperfect agreement line here plotted. This introduces some worries as the prediction is too good which might indicate possible overffiting or spurious relationships between the regressors and the response."

#qq plot qqnorm(as.numeric(holdgam$residuals),cex.axis=1.3,cex.lab=1.3,pch=16,main="QQ PLot")qqline(as.numeric(holdgam$residuals))print("There is clear evidence of heavy tails on both the lower and upper end of theplot, perhaps modifying the normality assumption would be advisable.")

## [1] "There is clear evidence of heavy tails on both the lower and upper end of theplot, perhaps modifying the normality assumption would be advisable."

#fitting the linear modelholdlinear=gam(trade_price~log_weight+current_coupon+log_time_to_maturity+

categorical_reporting_delay+log_trade_size+curve_based_price+categorical_received_time_diff_last1+trade_price_last1+log_trade_siz

e_last1+curve_based_price_last1+is_callable+trade_type+trade_type_last1, data

=transformeddata)

summary(holdlinear)

#### Family: gaussian## Link function: identity#### Formula:## trade_price ~ log_weight + current_coupon + log_time_to_maturity +


12/48

## categorical_reporting_delay + log_trade_size + curve_based_price +## categorical_received_time_diff_last1 + trade_price_last1 +## log_trade_size_last1 + curve_based_price_last1 + is_callable +## trade_type + trade_type_last1#### Parametric coefficients:

## Estimate Std. Error t value Pr(>|t|)## (Intercept) 1.24374 0.50380 2.469 0.013664## log_weight 0.12614 0.04974 2.536 0.011304## current_coupon 0.03085 0.02633 1.172 0.241572## log_time_to_maturity 0.03324 0.03924 0.847 0.397070## categorical_reporting_delay2 -0.18564 0.09816 -1.891 0.058791## categorical_reporting_delay3 -0.26398 0.09763 -2.704 0.006924## categorical_reporting_delay4 -0.40421 0.13214 -3.059 0.002258## log_trade_size 0.11996 0.02313 5.187 2.41e-07## curve_based_price 0.42527 0.03433 12.388 < 2e-16## categorical_received_time_diff_last12 -0.35606 0.21590 -1.649 0.099316## categorical_received_time_diff_last13 -0.63120 0.27888 -2.263 0.023748## categorical_received_time_diff_last14 -1.61833 0.46655 -3.469 0.000537## trade_price_last1 0.69842 0.01683 41.500 < 2e-16## log_trade_size_last1 -0.08395 0.02273 -3.693 0.000229## curve_based_price_last1 -0.13939 0.03589 -3.883 0.000107## is_callable1 -0.03317 0.11815 -0.281 0.778937## trade_type3 1.62292 0.09957 16.299 < 2e-16## trade_type4 0.81764 0.09287 8.804 < 2e-16## trade_type_last13 -0.89904 0.09972 -9.016 < 2e-16## trade_type_last14 -0.37862 0.09602 -3.943 8.39e-05#### (Intercept) *## log_weight *## current_coupon## log_time_to_maturity## categorical_reporting_delay2 .## categorical_reporting_delay3 **## categorical_reporting_delay4 **## log_trade_size ***## curve_based_price ***## categorical_received_time_diff_last12 .## categorical_received_time_diff_last13 *## categorical_received_time_diff_last14 ***## trade_price_last1 ***## log_trade_size_last1 ***## curve_based_price_last1 ***## is_callable1## trade_type3 ***## trade_type4 ***## trade_type_last13 ***## trade_type_last14 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1###### R-sq.(adj) = 0.984 Deviance explained = 98.4%## GCV = 2.2 Scale est. = 2.1728 n = 1619


13/48

plot(as.numeric(holdlinear$fitted.values), as.numeric(holdlinear$residuals),pch=16,xlab="Fitted Values", ylab="Residuals",main="Fitted values vs. Residuals" ,cex.axis=1.3,cex.lab=1.3)

abline(h = 0,lwd=2,col=3,lty=2)print("Analyzing the fitted values vs. the residuals one can argue that there is no strong evidence of uneven variance across the data. Also it seems reasonable to assume

that the data are spread around zero.")## [1] "Analyzing the fitted values vs. the residuals one can argue that there is nostrong evidence of uneven variance across the data. Also it seems reasonable to assume that the data are spread around zero."

#Actual response vs fitted values plot(as.numeric(holdgam$fitted.values), as.numeric(transformeddata$trade_price),

pch=16,xlab="Fitted Values", ylab="response",main="Fitted values vs. response" ,cex.axis=1.3,cex.lab=1.3)

abline(a=0,b=1,lwd=2,col=3,lty=1)print("Analyzing the fitted values vs. the response one can argue that the model is reasonably good in its predicting power given that the data are scattered around the perfect agreement line here plotted. This introduces some worries as the prediction is

too good which might indicate possible overffiting or spurious relationships betweenthe regressors and the response.")

## [1] "Analyzing the fitted values vs. the response one can argue that the model isreasonably good in its predicting power given that the data are scattered around theperfect agreement line here plotted. This introduces some worries as the prediction is too good which might indicate possible overffiting or spurious relationships between the regressors and the response."

#qq plot qqnorm(as.numeric(holdgam$residuals),cex.axis=1.3,cex.lab=1.3,pch=4,main="QQ PLot")qqline(as.numeric(holdgam$residuals))print("There is clear evidence of heavy tails on both the lower and upper end of the

plot, perhaps modifying the normality assumption would be advisable.")## [1] "There is clear evidence of heavy tails on both the lower and upper end of theplot, perhaps modifying the normality assumption would be advisable."

#AIC comparison holdgam$aic

## [1] 5768.452

holdlinear$aic

## [1] 5872.798

print("Analyzing diagnostic plots for both models and their AIC values (which are dis

tribution dependant and given that the qq plots are showing little reliability on thenormality assumption), one could argue that there is no justification in adding morecomplexity in the model given that the simple linear relationships seem to explain asmuch variability as in the complex model.")

## [1] "Analyzing diagnostic plots for both models and their AIC values (which are distribution dependent and given that the qq plots are showing little reliability on the normality assumption), one could argue that there is no justification in adding mor


14/48

e complexity in the model given that the simple linear relationships seem to explainas much variability as in the complex model."


15/48

Lucas Triana

Part 2

Homework 5

market_symbols=read.table("http://www.stat.cmu.edu/~cschafer/MSCF/Project/ChallengeSy

mbols2015.txt")

#including the following predictors#^GSPC S&P500#^VIX CBOE Volatility Index

#^VXN CBOE NASDAQ Volatility Index

#^VXO CBOE S&P 100 Volatility Index

my_symbols=read.table("mySymbols.txt")

library(quantmod)

market_symbols=as.vector(market_symbols$V1)

my_symbols=as.vector(my_symbols$V1)

#vectors that will store the volatility for the two months

VOLMONTH1=seq(length(data))

VOLMONTH2=seq(length(data))

#vectors that will store the the price values and returns

returns=data.frame(matrix(ncol = length(my_symbols)*4, nrow =

length(market_symbols)))

pricePerasset=data.frame(matrix(ncol = length(my_symbols)*4, nrow =

length(market_symbols)))

#loop that renames the columns for the data frames needed in the regression

j=1

k=1

for (i in 1:length(my_symbols)*4)

{

colname=sprintf("%s_weekly return", my_symbols[k])name=sprintf("%s%d", "X", j)

names(returns)[names(returns)==name]


16/48

names(returns)[names(returns)==name]


17/48

#price last month close

pricePerasset[length(market_symbols)-j,6]=asset$VIX.Adjusted[nrow(asset)-21-j] #

21 trading days

#price last quarter close

pricePerasset[length(market_symbols)-j,7]=asset$VIX.Adjusted[nrow(asset)-63-j]

#63 trading days

#price last year closepricePerasset[length(market_symbols)-j,8]=asset$VIX.Adjusted[nrow(asset)-252-j]

#252 trading days

#weekly returns

returns[length(market_symbols)-

j,5]=weeklyReturn(asset,type="log")[length(weeklyReturn(asset,type="log"))-j]#monthly returns


j,6]=monthlyReturn(asset,type="log")[length(monthlyReturn(asset,type="log"))-j]

#quarterly returns


j,7]=quarterlyReturn(asset,type="log")[length(quarterlyReturn(asset,type="log"))-j]#yearly returns


j,8]=quarterlyReturn(asset,type="log")[length(quarterlyReturn(asset,type="log"))-j]

}

#^VXN dataasset=getSymbols(my_symbols[3], from=(Sys.Date()-(7560)), to=(Sys.Date()-(30)),

auto.assign=F)

print(my_symbols[3])

for(j in 0:(length(market_symbols)-1))

{#price last week close

pricePerasset[length(market_symbols)-j,9]=asset$VXN.Adjusted[nrow(asset)-5-j] #5

trading days

#price last month close

pricePerasset[length(market_symbols)-j,10]=asset$VXN.Adjusted[nrow(asset)-21-j] #

21 trading days#price last quarter close

pricePerasset[length(market_symbols)-j,11]=asset$VXN.Adjusted[nrow(asset)-63-j]

#63 trading days

#price last year close

pricePerasset[length(market_symbols)-j,12]=asset$VXN.Adjusted[nrow(asset)-252-j]

#252 trading days

#weekly returns


j,9]=weeklyReturn(asset,type="log")[length(weeklyReturn(asset,type="log"))-j]

#monthly returns




18/48

#quarterly returns



#yearly returns



}

#^VXO data

asset=getSymbols(my_symbols[4], from=(Sys.Date()-(7560)), to=(Sys.Date()-(30)),

auto.assign=F)

print(my_symbols[4])

for(j in 0:(length(market_symbols)-1))

{

#price last week close

pricePerasset[length(market_symbols)-j,13]=asset$VXO.Adjusted[nrow(asset)-5-j] #5

trading days

#price last month closepricePerasset[length(market_symbols)-j,14]=asset$VXO.Adjusted[nrow(asset)-21-j] #

21 trading days

#price last quarter close

pricePerasset[length(market_symbols)-j,15]=asset$VXO.Adjusted[nrow(asset)-63-j]

#63 trading days

#price last year closepricePerasset[length(market_symbols)-j,16]=asset$VXO.Adjusted[nrow(asset)-252-j]

#252 trading days

#weekly returnsreturns[length(market_symbols)-

j,13]=weeklyReturn(asset,type="log")[length(weeklyReturn(asset,type="log"))-j]

#monthly returns



#quarterly returnsreturns[length(market_symbols)-


#yearly returns



}

#Computes the daily returns and volatility of the 70 initial stocks for two months

for (i in 1:length(market_symbols))

{

month1=getSymbols(market_symbols[i], from=(Sys.Date()-60), to=(Sys.Date()-30),

auto.assign=F)

DAILYRETURNS= dailyReturn(month1,type="log")

volatilitymonth1=sqrt(sum(DAILYRETURNS^2)/length(DAILYRETURNS))


19/48

VOLMONTH1[i]=volatilitymonth1

month2=getSymbols(market_symbols[i], from=(Sys.Date()-32), to=Sys.Date(),

auto.assign=F)

DAILYRETURNS2= dailyReturn(month2,type="log")

volatilitymonth2= sqrt(sum(DAILYRETURNS2^2)/length(DAILYRETURNS2))

VOLMONTH2[i]=volatilitymonth2

}

data=data.frame(cbind(returns,pricePerasset,as.data.frame(VOLMONTH1),as.data.frame(VO

LMONTH2)))

noNAdata=na.omit(data) #removing N/A data


20/48


21/48


22/48


23/48


24/48


25/48


26/48


27/48


28/48


29/48


30/48

#histogram drawing of the predictors and response to see if any transformations are

needed

for (i in 1:ncol(noNAdata)){hist(as.matrix(noNAdata[i]),main=colnames(noNAdata)[i],xlab=i)

}

#noticing that several columns of the data might need transformations

coltotransform=c(3,4,17,19,21,22,23,24,25,26,27,29,30,31,32,33)

skew=seq(length(coltotransform))

#columns whose skewness is to be less seem to be largely positive or negative are tobe transformed:

#two transformations will take place, log(x) (if all data are positive), x^2 or e^x

depending on the skew

library(moments)counter=0

for(i in 1:length(coltotransform)){

skew[i]=skewness(as.matrix(noNAdata[coltotransform[i]]),na.rm = FALSE)


31/48

if(min(noNAdata[coltotransform[i]])0){

temp=noNAdata[coltotransform[i]]noNAdata[coltotransform[i]]=log(noNAdata[coltotransform[i]])

hist(as.matrix(noNAdata[coltotransform[i]]),main=colnames(noNAdata)[i],xlab=i)

hist(as.matrix(temp),col=3,lwd=2,lty=2,main=colnames(noNAdata)[i],xlab=i,add=T)

counter=counter+1

print(counter)

}}


32/48


33/48


34/48


35/48


36/48


37/48

#cox-box transformation search for the response

library(car)

BC_Transformation=boxcox(VOLMONTH2~.,data=noNAdata)#finds the optimal value for lambda

lambda=BC_Transformation$x[which(BC_Transformation$y==max(BC_Transformation$y))]


38/48

#stepwise simple linear analysis

attach(no.na.data)

fullmod=lm(VOLMONTH2~.,data=noNAdata) #full modelsummary(fullmod)

linearmod=step(fullmod, direction="both")

summary(linearmod)Call:lm(formula = VOLMONTH2 ~ X.GSPC_weekly.return + X.VXN_weekly.return +

X.VXN_monthly.return + X.VXO_monthly.return + X.GSPC_monthly.price +X.VIX_quarterly.price + X.VXN_yearly.price + X.VXO_monthly.price +VOLMONTH1, data = noNAdata)

Residuals:Min 1Q Median 3Q Max

-0.007720 -0.002627 0.000164 0.001879 0.009041

Coefficients:Estimate Std. Error t value Pr(>|t|)

(Intercept) 1.550e-01 5.348e-02 2.898 0.00564 **X.GSPC_weekly.return -1.538e-01 8.126e-02 -1.893 0.06439 .X.VXN_weekly.return -1.418e-02 9.224e-03 -1.538 0.13068X.VXN_monthly.return -1.194e-02 7.371e-03 -1.620 0.11170X.VXO_monthly.return 1.667e-02 6.542e-03 2.548 0.01409 *X.GSPC_monthly.price -5.809e-05 2.196e-05 -2.645 0.01102 *X.VIX_quarterly.price 7.347e-03 3.705e-03 1.983 0.05308 .X.VXN_yearly.price 3.345e-03 7.823e-04 4.275 9.02e-05 ***X.VXO_monthly.price -1.412e-02 6.311e-03 -2.237 0.02997 *VOLMONTH1 1.249e-02 9.805e-04 12.736 < 2e-16 ***---Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.003814 on 48 degrees of freedomMultiple R-squared: 0.8028, Adjusted R-squared: 0.7658F-statistic: 21.71 on 9 and 48 DF, p-value: 4.576e-14

#Influential observation search

cookd=as.numeric(cooks.distance(linearmod))

plot(cookd,xlab="Observation",ylab="Cook's Distance",main="Cook's Distance")

lines(c(1,length(cookd)),c(4/length(cookd),4/length(cookd)),lwd=2,col=3,lty=2)

-2 -1 0 1 2

-140

-60

log-Likelih

ood

95%


39/48

#residuals vs fitted values

plot(as.numeric(linearmod$fitted.values),as.numeric(linearmod$residuals),

pch=16,xlab="Fitted Values", ylab="Residuals",main="Fitted values vs. Residuals",cex.axis=1.3,cex.lab=1.3)

abline(h = 0,lwd=2,col=3,lty=2)

#residuals over time

plot(linearmod$residuals, xlab="Time",ylab="Residuals", main="Evolution of residuals"

,cex.axis=1.3,cex.lab=1.3,pch=16)


0 10 20 30 40 50 600.00

0.20

Cook's Distance

Observation

Cook'sDis

tance

0.005 0.020-0.0

05

Fitted values vs. Residuals

Fitted Values

es

uas


40/48

#Actual response vs fitted values

plot(as.numeric(linearmod$fitted.values),as.numeric(noNAdata$VOLMONTH2),

pch=16,xlab="Fitted Values", ylab="response",main="Fitted values vs. response",cex.axis=1.3,cex.lab=1.3)


#qq plot

qqnorm(as.numeric(finalmod$residuals),cex.axis=1.3,cex.lab=1.3,pch=16,main="QQ PLot")

qqline(as.numeric(finalmod$residuals))

0 20 40 60-0.005

Evolution of residuals

Time

esu

as

0.005 0.0200.0

1

Fitted values vs. response

Fitted Values

res

ponse


41/48

#fit gam

library(mgcv)

attributes(noNAdata)

#noticing that there is a lack of data compared to the number of predictors and rows

in the

#data frame, those variables chosen by glm will be the ones included in gam

#(data frame noNAdata of size 58x34)

holdgam=gam(VOLMONTH2~s(X.GSPC_weekly.return)+(X.VXN_weekly.return)+s(X.VXN_monthly.r

eturn)+

(X.VXO_monthly.return)+s(X.GSPC_monthly.price)+s(X.VIX_quarterly.price)+

(X.VXN_yearly.price)+s(X.VXO_monthly.price)+s(VOLMONTH1)

,data=noNAdata)

summary(holdgam)Family: gaussianLink function: identity

Formula:VOLMONTH2 ~ s(X.GSPC_weekly.return) + (X.VXN_weekly.return) +

s(X.VXN_monthly.return) + (X.VXO_monthly.return) + s(X.GSPC_monthly.price) +

s(X.VIX_quarterly.price) + (X.VXN_yearly.price) + s(X.VXO_monthly.price)+

s(VOLMONTH1)

Parametric coefficients:Estimate Std. Error t value Pr(>|t|)

(Intercept) -0.0255803 0.0110534 -2.314 0.025176 *

X.VXN_weekly.return -0.0076158 0.0091063 -0.836 0.407304X.VXO_monthly.return 0.0147098 0.0061787 2.381 0.021479 *X.VXN_yearly.price 0.0028573 0.0007523 3.798 0.000426 ***---Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Approximate significance of smooth terms:edf Ref.df F p-value

s(X.GSPC_weekly.return) 1.000 1.000 1.922 0.1722s(X.VXN_monthly.return) 1.000 1.000 1.691 0.1999s(X.GSPC_monthly.price) 1.000 1.000 5.514 0.0231 *

-2 -1 0 1 2-0.005

QQ PLot

Theoretical Quantilesampe

u

an

es


42/48

s(X.VIX_quarterly.price) 1.000 1.000 3.825 0.0565 .s(X.VXO_monthly.price) 1.905 2.342 3.399 0.0355 *s(VOLMONTH1) 2.128 2.616 73.487


43/48


dev.off()plot(as.numeric(holdgam$fitted.values),as.numeric(holdgam$residuals),

pch=16,xlab="Fitted Values", ylab="Residuals",main="Fitted values vs. Residuals"

,cex.axis=1.3,cex.lab=1.3)



plot(as.numeric(holdgam$fitted.values),as.numeric(noNAdata$VOLMONTH2),

pch=16,xlab="Fitted Values", ylab="response",main="Fitted values vs. response"

,cex.axis=1.3,cex.lab=1.3)abline(a=0,b=1,lwd=2,col=4,lty=1)

2.4 3.0-0.010

X.VXO_monthly.pris(X.VXO_monthly.price,1.9

-5.5 -3.5-0.02

0.01

VOLMONTH1s(VOLMONTH1,2.13)

0.005 0.020 0.035-0.006


Fitted Values

es

uas


44/48

#qq plot

qqnorm(as.numeric(holdgam$residuals),cex.axis=1.3,cex.lab=1.3,pch=16,main="QQ PLot")

qqline(as.numeric(holdgam$residuals))

#fitting PPR

holdppr=ppr(VOLMONTH2~(X.GSPC_weekly.return)+(X.VXN_weekly.return)+(X.VXN_monthly.ret

urn)+

(X.VXO_monthly.return)+(X.GSPC_monthly.price)+(X.VIX_quarterly.price)+

(X.VXN_yearly.price)+(X.VXO_monthly.price)+(VOLMONTH1),nterms=2

,data=noNAdata,sm.method="gcvspline")

summary(holdppr)Call:ppr(formula = VOLMONTH2 ~ (X.GSPC_weekly.return) + (X.VXN_weekly.return) +

(X.VXN_monthly.return) + (X.VXO_monthly.return) + (X.GSPC_monthly.price)+

(X.VIX_quarterly.price) + (X.VXN_yearly.price) + (X.VXO_monthly.price) +(VOLMONTH1), data = noNAdata, nterms = 2, sm.method = "gcvspline")

Goodness of fit:

0.005 0.020 0.0350.01


Fitted Values

respon

se

-2 -1 0 1 2-0.006

QQ PLot

Theoretical Quantilesam

pe

uan

es


45/48


46/48

#qq plot

qqnorm(as.numeric(holdppr$residuals),cex.axis=1.3,cex.lab=1.3,pch=16,main="QQ PLot")

qqline(as.numeric(holdppr$residuals))

#fit NNET

library(nnet)

holdnnet=nnet(VOLMONTH2~(X.GSPC_weekly.return)+(X.VXN_weekly.return)+(X.VXN_monthly.r

eturn)+

(X.VXO_monthly.return)+(X.GSPC_monthly.price)+(X.VIX_quarterly.price)+

(X.VXN_yearly.price)+(X.VXO_monthly.price)+(VOLMONTH1),

data=noNAdata,size=4, lineout=TRUE, decay=0.001, maxit=2000)summary(holdnnet)a 9-4-1 network with 45 weightsoptions were - decay=0.001b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1 i8->h1 i9->h10.00 0.00 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.00

b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2 i8->h2 i9->h20.00 0.00 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.00


0.01 0.030.01


Fitted Values

respon

se

-2 -1 0 1 2-0.003

QQ PLot

Theoretical Quantilesam

pe

uan

es


47/48


b->o h1->o h2->o h3->o h4->o-0.81 -0.81 -0.81 -0.81 -0.81


plot(as.numeric(holdnnet$fitted.values),as.numeric(holdnnet$residuals),

pch=16,xlab="Fitted Values", ylab="Residuals",main="Fitted values vs. Residuals",cex.axis=1.3,cex.lab=1.3)



plot(as.numeric(holdnnet$fitted.values),as.numeric(noNAdata$VOLMONTH2),

pch=16,xlab="Fitted Values", ylab="response",main="Fitted values vs. response"

,cex.axis=1.3,cex.lab=1.3)


#qq plot

qqnorm(as.numeric(holdppr$residuals),cex.axis=1.3,cex.lab=1.3,pch=16,main="QQ PLot")

qqline(as.numeric(holdppr$residuals))

0.01723805 0.01723830-0.0

1


Fitted Values

es

uas

0.01723805 0.01723830

0.01


Fitted Values

response


48/48

save(finalmod, file="LTRIANAL_models.Robj")

-2 -1 0 1 2-0.003

QQ PLot

Theoretical Quantilesampe

u

an

es

piyush sachdeva - homework 5

Documents