UA-60924200-1
Code and Graphs - PDF / .Rmd DOWNLOAD
Data Set - Download .csv
Classification Trees - Code Dump as seen below
d <- read.csv("C:/STAT/d.csv")
attach(d)
summary(d)
## ApC F1.R F2.R F3.R
##
Min. :0.000 Min.
:0.0 Min. :0.000
Min. :0.000
## 1st
Qu.:1.000 1st Qu.:2.0 1st Qu.:1.000 1st Qu.:0.000
##
Median :1.000 Median :3.0 Median :3.000 Median :1.000
##
Mean :0.814 Mean
:3.1 Mean :2.982
Mean :1.896
## 3rd
Qu.:1.000 3rd Qu.:5.0 3rd Qu.:5.000 3rd Qu.:4.000
##
Max. :1.000 Max.
:5.0 Max. :5.000
Max. :5.000
##
F4.R F5.R
##
Min. :0.000 Min.
:0.000
## 1st
Qu.:0.000 1st Qu.:0.000
##
Median :0.000 Median :0.000
##
Mean :1.248 Mean
:1.368
## 3rd
Qu.:2.000 3rd Qu.:3.000
##
Max. :5.000 Max.
:5.000
length(d$ApC)
## [1] 5000
library(caret)
## Loading required package:
lattice
## Loading required package: ggplot2
inTrain <- createDataPartition(y=d$ApC,p=0.7, list=FALSE)
trn <- d[inTrain,]
tst <- d[-inTrain,]
dim(trn);
dim(tst)
## [1] 3500 6
## [1] 1500 6
#
qplot(F1.R,F2.R,colour=d,data=trn)
library(rpart)
mFit <- train(ApC
~ .,method="rpart",data=trn)
print(mFit$finalModel)
## n= 3500
##
## node), split, n, deviance, yval
##
* denotes terminal node
##
## 1) root 3500 522.97140 0.8171429
## 2)
F1.R< 2.5 1330 332.03010 0.5187970
## 4)
F3.R< 2.5 946 207.01900 0.3234672
##
8) F5.R< 3 714 66.33053
0.1036415 *
##
9) F5.R>=3 232 0.00000
1.0000000 *
## 5)
F3.R>=2.5 384 0.00000 1.0000000 *
## 3)
F1.R>=2.5 2170 0.00000 1.0000000 *
library(rattle)
fancyRpartPlot(mFit$finalModel)
mFit_minsplit500cp0.001<-rpart(ApC~.,data=trn,
method="class",control=rpart.control(minsplit=500, cp=0.001))
fancyRpartPlot(mFit_minsplit500cp0.001)
mFit_minsplit200cp0.0005<-rpart(ApC~.,data=trn,
method="class",control=rpart.control(minsplit=200, cp=0.0005))
fancyRpartPlot(mFit_minsplit200cp0.0005)