This repository was archived by the owner on Sep 1, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathlinearAnalysis.R
More file actions
115 lines (102 loc) · 4.5 KB
/
linearAnalysis.R
File metadata and controls
115 lines (102 loc) · 4.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
linear_models <- emptySectorList
for (i in 1:length(sectorPrices)) {
sectDat <- sectorPrices[[i]][["Training Set"]]
form <- paste(colnames(sectDat)[2]," ~ ",colnames(sectDat)[3],sep='')
for (j in colnames(sectDat)[4:ncol(sectDat)]) {
if (j == "EPSGrowthRate"|| j == "PutCallOpenInterest" || j == "InstitutionOwnership") {
next()
} else {
form <- paste(form,as.character(j),sep = " + ")
}
}
linear_models[[i]] <- lm(form,sectDat)
}
names(linear_models) <- sectorNames
pValMatrix <- as.data.frame(matrix(
nrow = length(colnames(sectDat)[3:ncol(sectDat)][-(which(colnames(sectDat) == "EPSGrowthRate")-2)]),
ncol = length(sectorNames))
)
rownames(pValMatrix) <- colnames(sectDat)[3:ncol(sectDat)][-(which(colnames(sectDat) == "EPSGrowthRate")-2)]
colnames(pValMatrix) <- sectorNames
for (i in 1:length(linear_models)) {
sTest <- summary(linear_models[[i]])
for (metric in rownames(pValMatrix)) {
pValMatrix[metric, i] <- sTest[["coefficients"]][2:nrow(sTest[["coefficients"]]),4][metric]
}
}
plotHeatMap = function(df) {
tmp <- cbind(rownames(df),df)
tmp <- melt(tmp)
colnames(tmp) <- c("Factors", "Sectors", "Value")
pValGGPlot <<- 'ggplot(tmp, aes(x = Sectors, y = Factors, fill = Value)) + geom_tile() +
scale_fill_gradient2(low = "white", high = "blue", midpoint = 0, space = "Lab") +
labs(title = "Cold Plot", subtitle = "The bluer the area, the worse the P-Value",caption = "Rounded to 4 Decimal Places") +
geom_text(aes(label = round(Value,digits = 4)))'
eval(parse(text = pValGGPlot))
}
# png(filename = "Cold Plot.png",width = 862,height = 550,units = "px")
# plotHeatMap(pValMatrix)
# dev.off()
plotHeatMap(pValMatrix)
#write.csv(pValMatrix,file = "output/P Value Matrix.csv")
coeffMatrix <- as.data.frame(matrix(nrow = nrow(pValMatrix),ncol = length(sectorNames)))
rownames(coeffMatrix) <- rownames(pValMatrix)
colnames(coeffMatrix) <- sectorNames
for (i in 1:length(linear_models)) {
sTest <- summary(linear_models[[i]])
for(metric in rownames(coeffMatrix)) {
coeffMatrix[metric, i] <- sTest[["coefficients"]][2:nrow(sTest[["coefficients"]]),1][metric]
}
}
plotHeatMap2 = function(df) {
tmp <- cbind(rownames(df),df)
tmp <- melt(tmp)
colnames(tmp) <- c("Factors", "Sectors", "Value")
coeffHeatGGPlot <<- 'ggplot(tmp, aes(x = Sectors, y = Factors, fill = Value)) + geom_tile() +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, space = "Lab") +
labs(title = "Coeff Heatmap, Linear",caption = "Rounded to 2 Decimal Places") +
geom_text(aes(label = round(Value,digits = 2)))'
eval(parse(text = coeffHeatGGPlot))
}
plotHeatMap2(coeffMatrix)
plotRSquared = function(lst) {
tmp <- sapply(lst, function(x) summary(x)$r.squared)
tmp <- cbind(names(tmp),as.data.frame(tmp))
colnames(tmp) <- c("Sectors", "RSquared")
rSquareGGPlot <<- 'ggplot(tmp, aes(x=Sectors,y=RSquared,ymax=1)) + geom_bar(stat = "identity") + geom_text(aes(label = round(RSquared,digits = 4)),nudge_y=.02) + labs(title = "R-Squareds, Linear Model (New)")'
eval(parse(text = rSquareGGPlot))
}
plotRSquared(linear_models)
predictedValuesLinear <- emptySectorList
predictedValuesLinearMat <- as.data.frame(matrix(nrow = nrow(sectorPrices[[1]][["Testing Set"]]), ncol = length(linear_models)))
for (i in 1:length(linear_models)) {
preds <- predict.lm(linear_models[[i]],newdata = sectorPrices[[i]][["Testing Set"]][,-1])
predictedValuesLinear[[i]] <- preds
predictedValuesLinearMat[,i] <- preds
}
names(predictedValuesLinear) <- sectorNames
colnames(predictedValuesLinearMat) <- names(predictedValuesLinear)
linearError <- sapply(1:ncol(predictedValuesLinearMat), FUN=function(i) {
sum(log(predictedValuesLinearMat[,i] / sectorPrices[[i]][["Testing Set"]][,2]) ^ 2)
})
for (i in 1:length(linearError)) {
print(paste(sectorNames[i], " Total Error: ", round(linearError[i], 4), sep = ''))
}
names(linearError) <- sectorNames
plotErrLinear <- function() {
tmp <- linearError
tmp <- cbind(names(tmp),as.data.frame(tmp))
colnames(tmp) <- c("Sectors", "Error")
errPlot <<- 'ggplot(tmp, aes(x=Sectors,y=Error)) + geom_bar(stat = "identity") + geom_text(aes(label = percent(linearError, 0.001)),nudge_y=.02) + labs(title = "Mean Error, Linear Model")'
eval(parse(text = errPlot))
}
plotErrLinear()
size <- size + 1
if (size == 1) {
accuracyMatrix[1,] <- linearError
rownames(accuracyMatrix) <- "Linear Regression"
} else {
accuracyMatrix <- rbind(accuracyMatrix, linearError)
rownames(accuracyMatrix)[size] <- "Linear Regression"
}
print("Linear Analysis: Done!")