# Initial post - proof of concept - reach me for further detailed code and R Examples ... # SVM _ Simulated Data -1 # Source -- https://lagunita.stanford.edu/c4x/HumanitiesandScience/StatLearning/asset/ch9.html set.seed(10111) x = matrix(rnorm(40), 20, 2) y = rep(c(-1, 1), c(10, 10)) x[y == 1, ] = x[y == 1, ] + 1 plot(x, col = y + 3, pch = 19) # set.seed(10111) m = matrix(rnorm(40), 20, 2) n = rep(c(-1, 1), c(10, 10)) m[n == 1, ] = m[n == 1, ] + 1 plot(m, col = n + 3, pch = 19) # library(e1071) library("e1071", lib.loc="~/R/win-library/3.1") d = data.frame(x, y = as.factor(y)) svmfit = svm(y ~ ., data = d, kernel = "linear", cost = 10, scale = FALSE) print(svmfit) plot(svmfit, d) make.grid = function(x, n = 75) { grange = apply(x, 2, range) x1 = seq(from = grange[1, 1], to = grange[2, 1], length = n) x2 = seq(from = grange[1, 2], to = grange[2, 2], length = n) expand.grid(X1 = x1, X2 = x2) } xgrid = make.grid(x) ygrid = predict(svmfit, xgrid) plot(xgrid, col = c("red", "blue")[as.numeric(ygrid)], pch = 20, cex = 0.2) points(x, col = y + 3, pch = 19) points(x[svmfit$index, ], pch = 5, cex = 2) # beta = drop(t(svmfit$coefs) %*% x[svmfit$index, ]) beta0 = svmfit$rho plot(xgrid, col = c("red", "blue")[as.numeric(ygrid)], pch = 20, cex = 0.2) points(x, col = y + 3, pch = 19) points(x[svmfit$index, ], pch = 5, cex = 2) abline(beta0/beta[2], -beta[1]/beta[2]) abline((beta0 - 1)/beta[2], -beta[1]/beta[2], lty = 2) abline((beta0 + 1)/beta[2], -beta[1]/beta[2], lty = 2) # load(url("http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/ESL.mixture.rda")) # mm<-data.frame(c(1:8)) # save(mm,file="ESL.mixture.rda") # ?save names(ESL.mixture) rm(x, y) attach(ESL.mixture) plot(x, col = y + 1) # plot(x, col = y) # plot(x,y) d1= data.frame(y = factor(y), x) fit = svm(factor(y) ~ ., data = d1, scale = FALSE, kernel = "radial", cost = 5) str(fit) # xgrid = expand.grid(X1 = px1, X2 = px2) ygrid = predict(fit, xgrid) plot(xgrid, col = as.numeric(ygrid), pch = 20, cex = 0.2) points(x, col = y + 1, pch = 19) # func = predict(fit, xgrid, decision.values = TRUE) func = attributes(func)$decision xgrid = expand.grid(X1 = px1, X2 = px2) ygrid = predict(fit, xgrid) plot(xgrid, col = as.numeric(ygrid), pch = 20, cex = 0.2) points(x, col = y + 1, pch = 19) contour(px1, px2, matrix(func, 69, 99), level = 0, add = TRUE) contour(px1, px2, matrix(prob, 69, 99), level = 0.5, add = TRUE, col = "blue", lwd = 2) # ------------------------------------------ #SVM_TEXT # https://groups.google.com/forum/#!forum/rtexttools-help library(RTextTools) d <- read.csv("~Data.csv") attach(d) # Create the document term matrix dtMatrix <- create_matrix(d["Text"]) dtMatrix # create_matrix -- this code is showing "Acronym" --- if (attr(weighting, "Acronym") == "tf-idf") # Configure training set cont <- create_container(dtMatrix, d$IsSunny, trainSize=1:11, virgin=FALSE) cont # train a SVM Model SVM <- train_model(cont, "SVM", kernel="linear", cost=1) SVM # # new data for Prediction - This is TEST Data . Pred_d <- list("sunny sunny sunny rainy rainy", "rainy sunny rainy rainy", "hello", "", "this is another rainy world") Pred_d > trace("create_matrix",edit=TRUE) Tracing function "create_matrix" in package "RTextTools" [1] "create_matrix"# # create a prediction document term matrix Pred_Matrix<- create_matrix(Pred_d, originalMatrix=dtMatrix) # # create corresponding container Pred_size <- length(Pred_d); Pred_cont <- create_container(Pred_Matrix, labels=rep(0,Pred_size), testSize=1:Pred_size, virgin=FALSE) Pred_cont # predict results <- classify_model(Pred_cont,SVM) results
-------------------------------------TBD --------------------------- # Graphical Classification of Text library("e1071", lib.loc="~/R/win-library/3.1") d = data.frame(x, y = as.factor(y)) # svmfit = svm(y ~ ., data = d, kernel = "linear", cost = 10, scale = FALSE) # print(svmfit) # plot(svmfit, d) # # make.grid = function(x, n = 75) { # grange = apply(x, 2, range) # x1 = seq(from = grange[1, 1], to = grange[2, 1], length = n) # x2 = seq(from = grange[1, 2], to = grange[2, 2], length = n) # expand.grid(X1 = x1, X2 = x2) # } # xgrid = make.grid(x) # ygrid = predict(svmfit, xgrid) # plot(xgrid, col = c("red", "blue")[as.numeric(ygrid)], pch = 20, cex = 0.2) # points(x, col = y + 3, pch = 19) # points(x[svmfit$index, ], pch = 5, cex = 2) # # # # beta = drop(t(svmfit$coefs) %*% x[svmfit$index, ]) # beta0 = svmfit$rho # plot(xgrid, col = c("pink", "cyan")[as.numeric(ygrid)], pch = 20, cex = 0.2,main="Support Vectors") # points(x, col = y + 3, pch = 19) # points(x[svmfit$index, ], pch = 5, cex = 2) # abline(beta0/beta[2], -beta[1]/beta[2]) # abline((beta0 - 1)/beta[2], -beta[1]/beta[2], lty = 2) # abline((beta0 + 1)/beta[2], -beta[1]/beta[2], lty = 2) # # # # # load(url("http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/ESL.mixture.rda")) # # mm<-data.frame(c(1:8)) # # save(mm,file="ESL.mixture.rda") # # ?save # names(ESL.mixture) # rm(x, y) # attach(ESL.mixture) # plot(x, col = y + 1) # # plot(x, col = y) # # plot(x,y) # d1= data.frame(y = factor(y), x) # fit = svm(factor(y) ~ ., data = d1, scale = FALSE, kernel = "radial", cost = 5) # str(fit) # # # xgrid = expand.grid(X1 = px1, X2 = px2) # ygrid = predict(fit, xgrid) # plot(xgrid, col = as.numeric(ygrid), pch = 20, cex = 0.2) # points(x, col = y + 1, pch = 19) # # # func = predict(fit, xgrid, decision.values = TRUE) # func = attributes(func)$decision # xgrid = expand.grid(X1 = px1, X2 = px2) # ygrid = predict(fit, xgrid) # plot(xgrid, col = as.numeric(ygrid), pch = 20, cex = 0.2) # points(x, col = y + 1, pch = 19) # # contour(px1, px2, matrix(func, 69, 99), level = 0, add = TRUE) # contour(px1, px2, matrix(prob, 69, 99), level = 0.5, add = TRUE, col = "blue", # lwd = 2) # # # -- # Error Code Dump -- Fixed with --- trace("create_matrix",edit=TRUE) ## - Quitting from lines 3-25 (SVM_TEXT_.spin.Rmd) # Error in if (attr(weighting, "Acronym") == "tf-idf") weight <- 1e-09 : # argument is of length zero # Calls: <Anonymous> ... withCallingHandlers -> withVisible -> eval -> eval -> create_matrix # Execution halted # sessionInfo() # ?tm::weightTfIdf # tm::weightTfIdf # tm::weightTfIdf # C:\Users\Rohit\Documents\R\win-library\3.1\RTextTools\R # trace("create_matrix",edit=TRUE) |