[R] multinomial classification. 다중분류

2017. 5. 19. 11:39

binary classification은 두 개로 분류하는 것이다.

이 binary classification을 하는 것을 여러개 붙이면 다중 분류도 가능하다.

즉, isAclass() 로 A이냐 아니냐 판단, isBclass()로 B이냐 아니냐 판단. isCclass()로 C이냐 아니냐 하는 판단 모듈들이 있으면, 조합하면 A, or B or C로 분류할 수 있다.

(x1, x2)의 특성을 같는 데이터 X를 세 가지(A,B,C)로 분류한 학습 데이터가 있다고 하자. 새로운 X가 올 경우 학습모델을 갖고 분류를 추정할 수 있다.

아래 그림에서 검은색은 훈련데이터로 이미 A, B, C 분류 결과도 있다. 이를 기반으로 학습을하여 다중 분류 모델을 만들고, 실습데이터(빨간색)로 분류하여 표시한 그래프이다.

원하는대로 적절하게 잘 분류하였다.

훈련 데이터는 (x1,x2) 좌표와 클래스 구분결과를 one hot인코딩한 데이터이다.

아래는 sigmoid만 사용한 방식.

# deep learning test

# Multinomial Classification... Softmax

# choose learning.... A,B,C

# A=left side, B=bottom side, C=right,up side

PLOTSHOW=TRUE

# training , X1=1 (bias)

X=rbind( c(1,1,1), c(1,1,5), c(1,2,6), c(1,2,3), c(1,4,6),

c(1,2,1), c(1,3,2), c(1,4,2), c(1,6,3), c(1,8,1),

c(1,1,10), c(1,4,8), c(1,6,6), c(1,7,5), c(1,9,3) )

# training result Y

Y=rbind ( c(1,0,0), c(1,0,0), c(1,0,0), c(1,0,0), c(1,0,0),

c(0,1,0), c(0,1,0), c(0,1,0), c(0,1,0), c(0,1,0),

c(0,0,1), c(0,0,1), c(0,0,1), c(0,0,1), c(0,0,1))

# searching parameter, A's W column, B's, C's

W=cbind( c(1,2,3), c(2,3,2), c(3,4,1) )

# drawing

if ( PLOTSHOW ) {

plot(1:10,1:10,type="n")

pchs = vector(length = nrow(X))

pchs[which(Y[,1]==1)]="A"

pchs[which(Y[,2]==1)]="B"

pchs[which(Y[,3]==1)]="C"

points(X[,2], X[,3], pch=pchs)

}

# most high probablity select

Onehot = function(T) {

OH=matrix(0, nrow=nrow(T), ncol=ncol(T))

ohw=apply(T,1,function(x) return(which.max(x)))

for (i in seq(ohw))

OH[i,ohw[i]]=1

return (OH)

}

# logistic function: sigmoid

# G(X,W)=1/(1+e^-z) , z=WX

G = function (X, W) {

Z=X %*% W

G = 1/(1+exp(-Z))

return (G)

}

Cost =function (X, W, Y) {

m = nrow(X)

return ( (-1)/m * sum(Y*log(G(X,W)) + (1-Y)*log(1-G(X,W))) )

}

Gradient = function (X, W, Y, alpha) {

m = nrow(X)

W = W + alpha/m * ( t(X) %*% (((Y-1)*exp(X%*%W)+Y) / (exp(X%*%W)+1)) )

return (W)

}

print( Cost(X, W, Y) )

#learning

alpha=0.1

for ( i in 1 : 600 ) {

W = Gradient(X,W,Y,alpha)

if ( i %% 100==0 ) {

print(paste("cnt=", i, " Cost=", Cost(X,W,Y), " W1(b)=", W[1,1], " W2=", W[2,1], " W3=", W[3,1] ))

}

# test

# classify

xmat = matrix( c(1,1,1, 1,2,4, 1,4,1, 1,9,2, 1,6,8, 1,3,4,

1,8,8, 1,6,6, 1,2,8, 1,9,5), byrow = T, ncol=3 )

qy = G( xmat, W )

print (xmat)

print (qy)

qy2=Onehot(qy)

print(qy2)

# drawing

if ( PLOTSHOW ) {

pchs = vector(length = nrow(xmat))

pchs[which(qy2[,1]==1)]="A"

pchs[which(qy2[,2]==1)]="B"

pchs[which(qy2[,3]==1)]="C"

points(xmat[,2], xmat[,3], pch=pchs, col="red")

}

#dev.off()

아래는 softmax의 확률 함수와 cost를 계산하는 함수는 다음과 같다.

# softmax ; make probablity ; S(yi)=e^yi / Sigma(e^yi)

# cross entropy cost function

# D(S,L) = Sigma Li.* -log(y^)

softmax와 cross entropy로 학습한 방식

# deep learning test

# Multinomial Classification... Softmax

# choose learning.... A,B,C

# A=left side, B=bottom side, C=right,up side

PLOTSHOW=TRUE

# training , X1=1 (bias)

X=rbind( c(1,1,1), c(1,1,5), c(1,2,6), c(1,2,3), c(1,4,6),

c(1,2,1), c(1,3,2), c(1,4,2), c(1,6,3), c(1,8,1),

c(1,1,10), c(1,4,8), c(1,6,6), c(1,7,5), c(1,9,3) )

# training result Y

Y=rbind ( c(1,0,0), c(1,0,0), c(1,0,0), c(1,0,0), c(1,0,0),

c(0,1,0), c(0,1,0), c(0,1,0), c(0,1,0), c(0,1,0),

c(0,0,1), c(0,0,1), c(0,0,1), c(0,0,1), c(0,0,1))

# searching parameter, A's W column, B's, C's

W=cbind( c(1,2,3), c(2,3,2), c(3,4,1) )

# drawing

if ( PLOTSHOW ) {

plot(1:10,1:10,type="n")

pchs = vector(length = nrow(X))

pchs[which(Y[,1]==1)]="A"

pchs[which(Y[,2]==1)]="B"

pchs[which(Y[,3]==1)]="C"

points(X[,2], X[,3], pch=pchs)

}

# softmax ; make probablity ; S(yi)=e^yi / Sigma(e^yi)

# yi = xw

Softmax = function(X, W) {

T=exp(X%*%W)

sume=apply(T, 1, sum)

return (T/sume)

}

# most high probablity select

Onehot = function(T) {

OH=matrix(0, nrow=nrow(T), ncol=ncol(T))

ohw=apply(T,1,function(x) return(which.max(x)))

for (i in seq(ohw))

OH[i,ohw[i]]=1

return (OH)

}

# cross entropy cost function

Cost =function (X, W, Y) {

# D(S,L) = Sigma Li.* -log(y^)

m = nrow(X)

return ( (-1)/m * sum(Y*log(Softmax(X,W)) ) )

}

Gradient = function (X, W, Y, alpha) {

m = nrow(X)

W = W - alpha/m * ( t(X) %*% (Softmax(X,W)-Y) )

return (W)

}

print( Cost(X, W, Y) )

#learning

alpha=0.1

for ( i in 1 : 2000 ) {

W = Gradient(X,W,Y,alpha)

if ( i %% 100==0 ) {

print(paste("cnt=", i, " Cost=", Cost(X,W,Y), " W1(b)=", W[1,1], " W2=", W[2,1], " W3=", W[3,1] ))

}

# test

# classify

xmat = matrix( c(1,1,1, 1,2,4, 1,4,1, 1,9,2, 1,6,8, 1,3,4,

1,8,8, 1,6,6, 1,2,8, 1,9,5), byrow = T, ncol=3 )

qy = Softmax( xmat, W )

print (xmat)

print (qy)

qy2=Onehot(qy)

print(qy2)

# drawing

if ( PLOTSHOW ) {

pchs = vector(length = nrow(xmat))

pchs[which(qy2[,1]==1)]="A"

pchs[which(qy2[,2]==1)]="B"

pchs[which(qy2[,3]==1)]="C"

points(xmat[,2], xmat[,3], pch=pchs, col="red")

}

#dev.off()

'AI(DeepLearning)' 카테고리의 다른 글

[tf] XOR tensorflow로 학습구현 (0)	2017.05.23
[tf] XOR manual solve (0)	2017.05.23
[R] binary classification (0)	2017.05.19
[R] linear regression Normal Equation (0)	2017.05.19
[R] linear regression (multi variable) 더하기 학습 (0)	2017.05.11

크레이지J의 탐구생활

[R] multinomial classification. 다중분류

'AI(DeepLearning)' 카테고리의 다른 글

+ Recent posts

티스토리툴바