data analysis & visualization

딥러닝/R +1

#data generate
library(tensorflow)
library(keras)


#holdout cross validation
set.seed(1)
data=data.frame(norm=sort(rnorm(100)),uni=sort(runif(100)))
set.seed(1)
idx=sort(sample(1:nrow(data),0.7*nrow(data)))
trainData=data[idx,]
testData=data[-idx,]
target='uni'

#standardization
source('/home/ducj/standard.R')
#normalization
train=trainData

min=apply(train[,sapply(train,is.numeric)],2,min)
max=apply(train[,sapply(train,is.numeric)],2,max)
xy=names(which(sapply(train,is.numeric)))
train_data=t(apply(train[,xy],1,function(x){(x-min)/(max-min)}))
train_data=as.matrix(train_data[,colnames(train_data)!=target])
train_label=as.matrix(train[,target,drop=F])

test=testData

test_data=t(apply(test[,xy],1,function(x){(x-min)/(max-min)}))
test_data=as.matrix(test_data[,colnames(test_data)!=target])
test_label=as.matrix(test[,target,drop=F])

train_data=array(train_data,dim=c(nrow(train_data),ncol(train_data),1))
train_label=array(train_label,dim=c(nrow(train_data),1))

test_data=array(test_data,dim=c(nrow(test_data),ncol(test_data),1))
test_label=array(test_label,dim=c(nrow(test_data),1))

#custom loss function
# quantile <- 0.5
# loss <- function(q, y, f) {
#   e <- y - f
#   k_mean(k_maximum(q * e, (q - 1) * e), axis = 2)
# }
loss <- function(y, f) {
  e <- y - f
  k_mean(e^2, axis = 2)
}


sess <- k_get_session()
ys <- k_constant(c(1,2,3,4), shape = c(2,2))
yhats <- k_constant(c(1,3,3,4), shape = c(2,2))
sess$run(loss( ys, yhats))

#dnn
dim(train_data)=c(nrow(train_data),ncol(train_data))
dim(test_data)=c(nrow(test_data),ncol(test_data))
model =keras_model_sequential()%>%
  layer_dense(units = 3, input_shape = c(ncol(train_data)),activation = 'sigmoid') %>%
  # layer_activation_leaky_relu() %>%
  layer_dense(units = 1)

model %>% compile(
  optimizer = "adam",
  loss = function(y_true, y_pred)
    loss(y_true, y_pred),
  metrics = "mae"
)

# history <-model %>% fit(train_data,train_label,epochs = 120,batch_size = 10)
history <-model %>% fit(train_data,train_label,epochs = 500,batch_size = 5,validation_data=list(test_data,test_label))
history <-model %>% fit(train_data,train_label,epochs = 1,batch_size = 70,validation_data=list(test_data,test_label))

pred=predict(model,test_data)
spTimer::spT.validation(z=test$uni,zhat=(pred-min[2])/(max[2]-min[2]))

#lstm
dim(train_data)=c(nrow(train_data),ncol(train_data),1)
dim(test_data)=c(nrow(test_data),ncol(test_data),1)

model=keras_model_sequential()%>%
  layer_cudnn_lstm(units=3,input_shape=c(ncol(train_data),1))%>%
  layer_dense(units=1)
model %>% compile(
  optimizer = "adam",
  loss = function(y_true, y_pred)
    loss(y_true, y_pred),
  metrics = "mae"
)
# model%>%compile(optimizer=optimizer_rmsprop(),loss='mae')
# model%>%compile(optimizer=optimizer_adam(lr=0.003),loss='mae')
history=model %>% fit(train_data,train_label, epochs=500, batch_size=5,validation_data=list(test_data,test_label))

pred2=predict(model,test_data)
spTimer::spT.validation(z=test$uni,zhat=(pred2-min[2])/(max[2]-min[2]))
plot(test$uni,type='l',ylab='unif')
lines((pred-min[2])/(max[2]-min[2]),type='l',col='2')
lines((pred2-min[2])/(max[2]-min[2]),type='l',col='3')