Logistic_Regression and Classification Trees _Retail_Sales Data - 5000 Observations

UA-60924200-1

Code  and Graphs - PDF / .Rmd DOWNLOAD 

Data Set - Download .csv 

Classification Trees - Code Dump as seen below 

d <- read.csv("C:/STAT/d.csv")
attach(d)
summary(d)

##       ApC             F1.R          F2.R            F3.R     
##  Min.   :0.000   Min.   :0.0   Min.   :0.000   Min.   :0.000 
##  1st Qu.:1.000   1st Qu.:2.0   1st Qu.:1.000   1st Qu.:0.000 
##  Median :1.000   Median :3.0   Median :3.000   Median :1.000 
##  Mean   :0.814   Mean   :3.1   Mean   :2.982   Mean   :1.896 
##  3rd Qu.:1.000   3rd Qu.:5.0   3rd Qu.:5.000   3rd Qu.:4.000 
##  Max.   :1.000   Max.   :5.0   Max.   :5.000   Max.   :5.000 
##       F4.R            F5.R     
##  Min.   :0.000   Min.   :0.000 
##  1st Qu.:0.000   1st Qu.:0.000 
##  Median :0.000   Median :0.000 
##  Mean   :1.248   Mean   :1.368 
##  3rd Qu.:2.000   3rd Qu.:3.000 
##  Max.   :5.000   Max.   :5.000

length(d$ApC)

## [1] 5000

library(caret)

## Loading required package: lattice
## Loading required package: ggplot2

inTrain <- createDataPartition(y=d$ApC,p=0.7, list=FALSE)
trn <- d[inTrain,]
tst <- d[-inTrain,]
dim(trn); dim(tst)

## [1] 3500    6

## [1] 1500    6

# qplot(F1.R,F2.R,colour=d,data=trn)
library(rpart)
mFit <- train(ApC ~ .,method="rpart",data=trn)

print(mFit$finalModel)

## n= 3500
##
## node), split, n, deviance, yval
##       * denotes terminal node
##
## 1) root 3500 522.97140 0.8171429 
##   2) F1.R< 2.5 1330 332.03010 0.5187970 
##     4) F3.R< 2.5 946 207.01900 0.3234672 
##       8) F5.R< 3 714  66.33053 0.1036415 *
##       9) F5.R>=3 232   0.00000 1.0000000 *
##     5) F3.R>=2.5 384   0.00000 1.0000000 *
##   3) F1.R>=2.5 2170   0.00000 1.0000000 *

library(rattle)

fancyRpartPlot(mFit$finalModel)

mFit_minsplit500cp0.001<-rpart(ApC~.,data=trn, method="class",control=rpart.control(minsplit=500, cp=0.001))

fancyRpartPlot(mFit_minsplit500cp0.001)

mFit_minsplit200cp0.0005<-rpart(ApC~.,data=trn, method="class",control=rpart.control(minsplit=200, cp=0.0005))

fancyRpartPlot(mFit_minsplit200cp0.0005)