patternpythonMinor
Performing machine learning
Viewed 0 times
machineperforminglearning
Problem
I've written the code below to do some work on machine learning in R. I'm not overly happy with some bits of it, and I suspect I could improve it quite a bit. Bits I'm specifically interested in looking at are how to deal with appending to vectors in the loop, and whether I can combine all of the functions
```
library(class)
library(nnet)
library(epibasix)
library(CVThresh)
library(e1071)
df <- read.table("semeion.data.data")
df$digit <- replicate(nrow(df), 42)
df$digit[which(as.logical(df$V266), arr.ind=TRUE)] = 9
df$digit[which(as.logical(df$V265), arr.ind=TRUE)] = 8
df$digit[which(as.logical(df$V264), arr.ind=TRUE)] = 7
df$digit[which(as.logical(df$V263), arr.ind=TRUE)] = 6
df$digit[which(as.logical(df$V262), arr.ind=TRUE)] = 5
df$digit[which(as.logical(df$V261), arr.ind=TRUE)] = 4
df$digit[which(as.logical(df$V260), arr.ind=TRUE)] = 3
df$digit[which(as.logical(df$V259), arr.ind=TRUE)] = 2
df$digit[which(as.logical(df$V258), arr.ind=TRUE)] = 1
df$digit[which(as.logical(df$V257), arr.ind=TRUE)] = 0
data <- df[,c(0:256,267)]
squareTable <- function(x,y) {
x <- factor(x)
y <- factor(y)
commonLevels <- sort(unique(c(levels(x), levels(y))))
x <- factor(x, levels = commonLevels)
y <- factor(y, levels = commonLevels)
table(x,y)
}
kf.knn <- function(data, k, nearest)
{
N <- nrow(data)
data <- data[sample(1:N),]
folds.index <- cvtype(N, cv.bsize=1, cv.kfold=k, FALSE)$cv.index
total = 0
for (i in 1:k)
{
test <- data[folds.index[i,], 0:256]
test.labels <- data[as.array(folds.index[i,]), 257]
rest <- as.array(folds.index[-i,])
train <- data[rest, 0:256]
train.labels <- data[rest, 257]
knnpredict <- knn(train, test, train.labels, nearest)
t <- table(as.factor(test.labels), as.factor(knnpredict))
kap <- epiKappa(t)
total <- total + kap$kappa
}
return(total / k)
}
kf.nnet <- function(data, k, hidden)
{
N <- nrow(data)
kf.knn, kf.svm etc into one function with various arguments.```
library(class)
library(nnet)
library(epibasix)
library(CVThresh)
library(e1071)
df <- read.table("semeion.data.data")
df$digit <- replicate(nrow(df), 42)
df$digit[which(as.logical(df$V266), arr.ind=TRUE)] = 9
df$digit[which(as.logical(df$V265), arr.ind=TRUE)] = 8
df$digit[which(as.logical(df$V264), arr.ind=TRUE)] = 7
df$digit[which(as.logical(df$V263), arr.ind=TRUE)] = 6
df$digit[which(as.logical(df$V262), arr.ind=TRUE)] = 5
df$digit[which(as.logical(df$V261), arr.ind=TRUE)] = 4
df$digit[which(as.logical(df$V260), arr.ind=TRUE)] = 3
df$digit[which(as.logical(df$V259), arr.ind=TRUE)] = 2
df$digit[which(as.logical(df$V258), arr.ind=TRUE)] = 1
df$digit[which(as.logical(df$V257), arr.ind=TRUE)] = 0
data <- df[,c(0:256,267)]
squareTable <- function(x,y) {
x <- factor(x)
y <- factor(y)
commonLevels <- sort(unique(c(levels(x), levels(y))))
x <- factor(x, levels = commonLevels)
y <- factor(y, levels = commonLevels)
table(x,y)
}
kf.knn <- function(data, k, nearest)
{
N <- nrow(data)
data <- data[sample(1:N),]
folds.index <- cvtype(N, cv.bsize=1, cv.kfold=k, FALSE)$cv.index
total = 0
for (i in 1:k)
{
test <- data[folds.index[i,], 0:256]
test.labels <- data[as.array(folds.index[i,]), 257]
rest <- as.array(folds.index[-i,])
train <- data[rest, 0:256]
train.labels <- data[rest, 257]
knnpredict <- knn(train, test, train.labels, nearest)
t <- table(as.factor(test.labels), as.factor(knnpredict))
kap <- epiKappa(t)
total <- total + kap$kappa
}
return(total / k)
}
kf.nnet <- function(data, k, hidden)
{
N <- nrow(data)
Solution
You can avoid appending to vectors (which can cause re-allocation of space and can considerably slow things down in principle; though in your case of only a length 10 vector that shouldn't be noticeable) if you allocate them to the needed size initially and then assign within them.
I'm not clear what you are asking about combing function; you can add an additional argument which is a character vector which then the code just has a series of if/elseif's checking that parameter to see which algorithm to use, if that is what you mean.
result <- vector("numeric", 10)
# or even: result <- numeric(10)
n <- vector("numeric", 10)
for (i in 2:10)
{
avg <- kf.nnet(data, 5, i)
result[i] <- avg
n[i] <- i
cat("!!!!!!!", i, avg, "\n")
}I'm not clear what you are asking about combing function; you can add an additional argument which is a character vector which then the code just has a series of if/elseif's checking that parameter to see which algorithm to use, if that is what you mean.
Code Snippets
result <- vector("numeric", 10)
# or even: result <- numeric(10)
n <- vector("numeric", 10)
for (i in 2:10)
{
avg <- kf.nnet(data, 5, i)
result[i] <- avg
n[i] <- i
cat("!!!!!!!", i, avg, "\n")
}Context
StackExchange Code Review Q#1681, answer score: 5
Revisions (0)
No revisions yet.