Rcpp-based R package; now just a copy of another package.

parent 09ed55ef
Package: bmf
Type: Package
Title: Binary Matrix Factorization Tools
Version: 0.1
Date: 2018-09-20
Authors@R:c(person('Ignacio Ramírez Paulino',role=c('aut','cre'),email=nacho@fing.edu.uy'))
Author: Ignacio Ramírez Paulino
Maintainer: Ignacio Ramírez Paulino <nacho@fing.edu.uy>
Imports: RColorBrewer, Rcpp, Rwave
Description: Binary Matrix Factorization via Dictionary Learning
Suggests:
License: GPL
LinkingTo:Rcpp
exportPattern("^[[:alpha:]]+")
useDynLib(bmf)
importFrom(Rcpp, sourceCpp)
# This file was generated by Rcpp::compileAttributes
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
DistWav <- function(u) {
.Call('enercast_DistWav', PACKAGE = 'enercast', u)
}
prevkerfon <- function(serie_x, serie_y, serie_x0, h, EPS) {
.Call('enercast_prevkerfon', PACKAGE = 'enercast', serie_x, serie_y, serie_x0, h, EPS)
}
CVkerfon <- function(serie_x, serie_y, p, r, h, EPS) {
.Call('enercast_CVkerfon', PACKAGE = 'enercast', serie_x, serie_y, p, r, h, EPS)
}
mape <- function(y, yhat){mean(abs(y - yhat)/abs(y))}
mae <- function(y, yhat){mean(abs(y - yhat))}
rmse <- function(y, yhat){sqrt(mean((y - yhat)^2))}
pdad <- function(y, yhat){mean(abs(y - yhat)/yhat) * 100}
#' Calibrate the EWA algorithm
#'
#' \code{ewa} agregates expert predictions using exponential weights and a
#' learning rate parameter \eqn{\eta} .
#'
#' @param eta : numeric value to start the initial grid for \eqn{\eta}
#' @param y : numeric vector contianing the time series
#' @param experts : matrix of time points X individual predictions (experts)
#' @param EPS : Epsilon tolerance
#' @return A list containg: \code{w} the matrix of fitted weights and \code{eta} the obtain
#' value for \eqn{\eta}.
ewa <- function(eta, y, experts, EPS = 1e-5){
experts <- as.matrix(experts)
p <- ncol(experts) # Number of experts
n <- nrow(experts)
if(length(y) != n) stop("Length of y must be equal to nrow(experts).")
regmat <- rep(0, p)
w <- matrix(0, ncol = p, nrow = n)
w[1, ] <- rep(1/p, p)
# Recursively predict and update weights from past losses
for(time in 1:(n - 1)) {
# prediction
pred_n <- experts[time, ] %*% w[time, ]
loss_pred_n <- as.numeric(loss(pred_n , y[time]))
loss_experts_n <- as.numeric(loss(experts[time, ], y[time]))
# update weights
regmat <- regmat + (loss_pred_n - loss_experts_n)
w[time + 1, ] <- exp(eta * regmat)
w[time + 1, ] <- checkweights(w[time + 1, ], EPS) # Normalize weights and eps-correction
}
return(list(w = w, eta = eta))
}
checkweights <- function(w, EPS) {
w[w < EPS] <- 0
w / sum(w)
}
loss <- function(x, y) (x - y)^2
hong <-
function(data){
trend <- 1:nrow(data)
if(!is.character(data$hour)) data$hour <- as.character(data$hour)
if(!is.character(data$month)) data$month <- as.character(data$month)
hv <- lm(load ~ trend + wday * hour + month + month * temp +
month * (temp^2) + month * (temp^3) + hour * temp +
hour * (temp^2) + hour * (temp^3),
data = data)
class(hv) <- c('lm', 'hv_class')
return(hv)
}
## File : functional-clustering.r
## Description : Clustering of a functional dataset using wavelets.
## Reference : Clustering functional data using wavelets
## Antoniadis, A., Brossat, X., Cugliari, J. and Poggi, J.-M. (2013)
## Int. J. of Wavelets, Multiresolution and Information Processing,
## Function : toDWT
## Description : Transforms a matrix of functional time series using the
## Descrete Wavelet Transform (DWT).
## Args : x Vector with a discretized time series
## filter.number, familiy Options parsed to wc (see wavethresh)
## Returns : Vector with the details coefficients of the DWT
toDWT <- function(x, filter.number = 6, family = "DaubLeAsymm"){
require(wavethresh)
x2 <- spline(x, n = 2^ceiling( log(length(x), 2) ),
method = 'natural')$y
Dx2 <- wd(x2, family = family, filter.number = filter.number)$D
Dx2
}
## Function : contrib
## Description : Computes the absolute or relative contributions of each
## scale of the DWT to the global energy of the vector.
## Args : x Vector containig the details' coefficients of the DWT.
## rel Logical. Should the relative contributions be calculated.
## Returns : A vector containing either the absolute or the relative
##  contributions.
contrib <- function(x, rel = FALSE, logit = rel) {
J <- log( length(x)+1, 2)
res <- numeric(J)
t0 <- 1
t1 <- 0
for( j in 1:J ) {
t1 <- t1 + 2^(J-j)
res[j] <- sqrt( sum( x[t0:t1]^2 ) )
t0 <- t1 + 1
}
if(rel) res <- res / sum(res)
if(logit) res <- log( 1 / (1 - res) )
return(res)
}
## TEST
# Create artificial data (100 trajectories of a unscaled wiener process)
#mat <- matrix(rnorm(20 * 100), nrow = 20)
#mat <- t(apply(mat, 2, cumsum)) # observations are on rows
# Construct the features for the clustering
#mat_dwt <- t(apply(mat, 1, toDWT)) # DWT over the rows
#mat_contr_abs <- t(apply(mat_dwt, 1, contrib))
#mat_contr_rel <- t(apply(mat_dwt, 1, contrib, rel = TRUE))
# Clustering using Partitioning around mediods from library cluster.
# Any other clustering technique can be used since we have a data matrix
# with observations on the rows and variables on the columns.
#library(cluster) # we need the pam function
#mat_pam_abs <- pam(mat_contr_abs, k = 3)
#mat_pam_rel <- pam(mat_contr_rel, k = 3)
This diff is collapsed.
###
comb.coef<-function(rv,Sv,s){
rn<-length(rv);Sn<-length(Sv)
if (rn==1&0%in%rv){
v<-rep(0,s*Sn);
for (i in 1:Sn) v[s*i]<-Sv[i]
return(v)
}
if (rn>=1&Sn>=1&sum(rv!=0)!=0&sum(Sv!=0)!=0) {
v<-rep(0,(s*Sn+rn+1))
for (i in 1:(Sn+1)){
for (j in 1:(rn+1)){
v[j+s*(i-1)]<-c(1,rv)[j]*c(1,Sv)[i]
}
}
v<-v[-1]
return(v)
}
}
###
maInvert <- function(ma){
q <- length(ma)
q0 <- max(which(c(1, ma) != 0)) - 1
if (!q0) return(ma)
roots <- polyroot(c(1, ma[1:q0]))
ind <- Mod(roots) < 1
if (all(!ind)) return(ma)
if (q0 == 1) return(c(1/ma[1], rep(0, q - q0)))
roots[ind] <- 1/roots[ind]
x <- 1
for (r in roots) x <- c(x, 0) - c(0, x)/r
c(Re(x[-1]), rep(0, q - q0))
}
###
acft<-function(ar=NULL,AR1=NULL,AR2=NULL,ma=NULL,MA1=NULL,MA2=NULL,s1=24,s2=168,k=500){
if(is.null(ar)) ar=0
if(is.null(AR1)) AR1=0
if(is.null(AR2)) AR2=0
if(is.null(ma)) ma=0
if(is.null(MA1)) MA1=0
if(is.null(MA2)) MA2=0
ma<-maInvert(ma)
MA1<-maInvert(MA1)
MA2<-maInvert(MA2)
maT=ma
arT=ar
if(any(MA1!=0)) maT<-comb.coef(rv=ma,Sv=MA1,s=s1) else maT=ma
if(any(AR1!=0)) arT<-comb.coef(rv=ar,Sv=AR1,s=s1) else arT=ar
if(any(MA2!=0)) maT<-comb.coef(rv=maT,Sv=MA2,s=s2) else maT=maT
if(any(AR2!=0)) arT<-comb.coef(rv=arT,Sv=AR2,s=s2)else arT=arT
ARMAacf(ar=arT,ma=maT,lag.max=k)[-1]
}
###
invento2<-function(par,yt,p=0,d=0,q=0,P1=0,D1=0,Q1=0,P2=0,D2=0,Q2=0,s1=24,s2=168,k=200){
ytc <- yt
if(p>0) {
ar <-par[1:p]
par<-par[-seq(p)]
} else ar <-NULL
if(P1>0){
AR1<-par[1:P1]
par<-par[-seq(P1)]
} else AR1<-NULL
if(P2>0){
AR2<-par[1:P2]
par<-par[-seq(P2)]
} else AR2<-NULL
if(q>0) {
ma<-par[1:q]
par<-par[-seq(q)]
} else ma <-NULL
if(Q1>0){
MA1<-par[1:Q1]
par<-par[-seq(Q1)]
} else MA1<-NULL
if(Q2>0){
MA2<-par[1:Q2]
par<-par[-seq(Q2)]
} else MA2<-NULL
if(d>0) ytc<-diff(ytc,differences=d)
if(D1>0) ytc<-diff(ytc,differences=D1,lag=s1)
if(D2>0) ytc<-diff(ytc,differences=D2,lag=s2)
acf1<-acf(ytc,lag.max=k,plot=FALSE)$acf[-1]
acfteo<-acft(ar=ar,AR1=AR1,AR2=AR2,ma=ma,MA1=MA1,MA2=MA2,s1=s1,s2=s2,k=k)
d<-sum((acfteo-acf1)^2)
return(d)
}
wgt <- function (X, gr = NULL, h){
nr <- nrow(X)
nc <- ncol(X)
# Check groups
if( is.null(gr) ) gr <- rep(1,nc)
if( length(gr) != nc ) stop("Check dimensions: wrong length of gr")
grE <- which( gr[-nc] == gr[nc] )
w <- apply( X[,grE ], 2, "-", X[,nc])
w <- apply( w, 2, DistWav)
# Check h for multi-dimensional optiminization
if( length(h) > 1 ) h <- h[gr[nc]]
# Normalization
w <- exp ( - 0.5*( w/h )^2 )
sumw <- sum(w)
wc <- numeric(nc-1)
wc[grE] <- w
if( sumw == 0) { # If no similarity is founded
w <- rep( 1 / (nc - 1), nc-1 )
} else {
w <- wc/sumw
}
w
}
newvars.rf <- function(data,type,t,...){
xrezaga <- function(vec,t,nom='xtemp'){
n <- length(vec) ; mtz <- matrix(0,n,t) ; diag(mtz) <- 1
AUX <- apply(apply(mtz,2,cumsum),2,cumsum)
M <- matrix(NA,nrow(AUX),ncol(AUX)); colnames(M) <- c(nom,paste(nom,seq(t-1),sep='.'))
for (j in 1:ncol(mtz))
M[,j] <- c(rep(NA,(j-1)),vec[AUX[,j]])
return(M[,-1])
}
xsqcb <- function(vec){
return(cbind(temp2=vec^2,temp3=vec^3))
}
xdesv <- function(vec,t,nom='dvtemp'){
n <- length(vec) ; mtz <- matrix(0,n,t) ; diag(mtz) <- 1
AUX <- apply(apply(mtz,2,cumsum),2,cumsum)
AUX <- AUX[-seq(t-1), -1]
M <- matrix(NA,nrow(AUX),ncol(AUX))
M.upper <- matrix(NA,(t-1),ncol(AUX))
for (j in 1:ncol(AUX)){
z1 <- AUX[ ,1] ; zj <- AUX[ ,j]
M[ ,j] <- abs(((vec[z1]+vec[zj])/2)-z1) }
M <- rbind(M.upper,M); colnames(M) <- paste(nom,seq(t-1),sep='.')
return(M)
}
switch(type,
load.lag = xrezaga(data$load,t=t,nom='xload'),
temp.lag = xrezaga(data$temp,t=t,nom='xtemp'),
temp.sqcb = xsqcb(data$temp),
temp.desv = xdesv(data$temp,t=t),
load.desv = xdesv(data$load,t=t,nom='dvload'))
}
plot.load.global <- function(yt,dataux=NULL,...){
if(is.null(dataux)){
plot(yt,type='l',ylab='Load',xlab='Time',...)
}
if(!is.null(dataux)){
names(dataux)[1] <- 'year'
aux1 <- by(dataux, dataux$year, nrow)
aux2 <- cumsum(aux1)-(aux1/2)
plot(yt,type='l',ylab='Load',xlab='Time',axes=FALSE,...);box()
axis(1,at=aux2,labels=names(aux1),tick=FALSE)
axis(1,at=c(0,cumsum(aux1)),labels=FALSE,tick=TRUE)
axis(2)
}
}
plot.load.wday <- function(data,...){
auxwday <- factor(data$wday,levels=c('Mon','Tue','Wed','Thu','Fri','Sat','Sun'),labels=c('Mon','Tue','Wed','Thu','Fri','Sat','Sun'),ordered=TRUE)
auxsday <- factor(data$sdays,labels=c('regular','special'),ordered=TRUE)
days <- unique(auxwday)
mean.d <- NULL
for (d in days){
DAT <- subset(data[(auxsday=='regular' & auxwday==d),])
mean.d <- rbind(mean.d,as.vector(by(DAT$load,DAT$hour,mean, na.rm=TRUE)))
}
row.names(mean.d) <- days
plot(data$load,type='n',xlim=c(1,ncol(mean.d)),ylim=c(min(mean.d),max(mean.d)),ylab='Load',xlab='Time',main='Mean Daily Load')
aux <- seq(1,ncol(mean.d),by=ncol(mean.d)/length(days))
for (d in 1:length(days)){
lines(mean.d[d,],col=d,lwd=2)
mtext(row.names(mean.d)[d],col=d,cex=0.7, side=3, line=0.5, at=aux[d])
}
}
plot.load.month <- function(data,...){
auxmonth <- factor(data$month,labels=c('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'),ordered=TRUE)
auxsday <- factor(data$sdays,labels=c('regular','special'),ordered=TRUE)
months <- unique(auxmonth)
mean.m <- NULL
for (m in months){
DAT <- subset(data[(auxsday=='regular' & auxmonth==m),])
mean.m <- rbind(mean.m,as.vector(by(DAT$load,DAT$hour,mean, na.rm=TRUE)))
}
row.names(mean.m) <- months
plot(data$load,type='n',xlim=c(1,ncol(mean.m)),ylim=c(min(mean.m),max(mean.m)),ylab='Load',xlab='Time',main='Mean Monthly Load')
aux <- seq(min(mean.m),max(mean.m),by=(max(mean.m)-min(mean.m))/length(months))
aux <- seq(1,ncol(mean.m),by=ncol(mean.m)/length(months))
for (m in 1:length(months)){
lines(mean.m[m,],col=m,lwd=2)
mtext(row.names(mean.m)[m],col=m,cex=0.7, side=3, line=0.5, at=aux[m])
}
}
plot.load.corr <- function(yt,data=NULL,nw=3,...){
if(is.null(data)){
s1=24
s2=168
} else{
frec<-apply(data[,c('wday','hour')],2,function(x)length(unique(x)))
s1<-frec[names(frec)=='hour']
s2<-prod(frec)
}
lag.max=s2*nw
par(las=1)
layout(1:3)
# ACF11
acf(yt,lag.max=s2*nw,axes=FALSE);box()
axis(2,seq(-1,1,0.2))
axis(1,seq(0,by=s2,length=nw+1),labels=c(NA,paste('week',seq(3))))
# par(ask=TRUE,las=1)
# ACF 2
acf(diff(yt,1),lag.max=s2*nw,axes=FALSE);box()
axis(2,seq(-1,1,0.2))
axis(1,seq(0,by=s2,length=nw+1),labels=c(NA,paste('week',seq(3))))
# par(ask=TRUE,las=1)
# ACF 3
acf(diff(diff(yt),s1),lag.max=s2*nw,axes=FALSE);box()
axis(2,seq(-1,1,0.2))
axis(1,seq(0,by=s2,length=nw+1),labels=c(NA,paste('week',seq(3))))
}
plot.temp <- function(data,group=NULL,...){
form <- data$load~data$temp|group
if (is.null(group))
form <- data$load~data$temp
rf <- colorRampPalette(rev(brewer.pal(11,'Spectral')))
hexbinplot(form,colramp=rf,xlab="temperature",ylab="load",colorkey=FALSE,aspect=1)
}
pred.ssm <- function(ssm.obj.i){
yt <- ssm.obj.i$yt
xa.pred <- ssm.obj.i$xa.pred
xf.pred <- ssm.obj.i$xf.pred
tita <- ssm.obj.i$optim$par
if (ssm.obj.i$fk$ct!=0){
tita.xf <- tita[seq(ncol(ssm.obj.i$fk$ct))]
tita.xa <- tita[-seq(ncol(ssm.obj.i$fk$ct))]
ssm.obj.i$fk$ct <- t(ssm.obj.i$fk$ct%*%tita.xf)
} else {
tita.xa <- tita
tita.xf <- NULL
}
aux <- exp(tita.xa[1:(length(tita.xa)-1)])
hht <- diag(length(aux))*aux
ggt <- matrix(exp(tita.xa[length(tita.xa)]),1,1)
fk1 <- fkf(a0=ssm.obj.i$fk$a0,P0=ssm.obj.i$fk$P0,dt=ssm.obj.i$fk$dt,ct=ssm.obj.i$fk$ct,Tt=ssm.obj.i$fk$Tt,Zt=ssm.obj.i$fk$Zt,yt=yt,HHt=hht,GGt=ggt)
att <- fk1$att[,ncol(fk1$att)]
Ptt <- fk1$Ptt[,,ncol(fk1$att)]
if(class(xa.pred)!='matrix') xa.pred <- matrix(xa.pred,ncol=length(att),byrow=TRUE)
pred <- xa.pred%*%att
predcov <- xa.pred%*%Ptt%*%t(xa.pred)+ggt[1,1]
if(!is.null(xf.pred)){
if(class(xf.pred)!='matrix') xf.pred <- as.matrix(xf.pred)
pred <- pred+xf.pred%*%tita.xf
}
return(list(predt=pred,covt=predcov))
}
predict.hong <- function(obj.hv,new.data=NULL){
if(inherits(obj.hv,'hv_class',TRUE)!=2)
stop('Object is not of hv_class')
pred <- predict(object=obj.hv,newdata=new.data)
}
predict.rf <- function(listRF, newdata=NULL, for.each='hour'){
if(is.null(newdata)){
PRED.RF <- lapply(listRF,predict)}
if(!is.null(newdata)){
k <- which(names(newdata)%in%for.each)
PRED.RF <- NULL
for(i in 1:nrow(newdata)){
wrf <- which(names(listRF)%in%newdata[i,k])
pred.rf <- predict(listRF[[wrf]],newdata[i,])
PRED.RF <- c(PRED.RF,pred.rf)
}
}
return(PRED.RF)
}
# DEPENDENCY: Julien Mairal SPAMS for R
library("spams")
predict.sparse <- function(sp,new.data){
if(inherits(sp,'sparse_class',TRUE) != 1)
stop('Object is not of sparse_class')
m = dim(new.data)[2]
n = dim(new.data)[1]
#
# construct "single day samples" as the concatenation of the m daily variables
T = 24 # period, could be a parameter in the future
N <- n / T
M = T*m
#
# normalize this variable to be N(0,Identity)
#
X <- matrix(nrow=M,ncol=N)
for (i in 1:m) {
Xi <- new.data[,i]
# dim(Xi) <- c(T,N)
# for (j in 1:N) {
# Xi[,j] <- solve(sp$scaling[[i]],Xi[,j] - sp$centering[[i]])
# }
dim(Xi) <- c(T,N)
X[((i-1)*T+1):(i*T),] <- Xi
}
A <- spams.lasso(X,sp$Dtoday,lambda1=sp$lambda,mode='PENALTY')
X1 <- sp$Dtomorrow %*% A
for (j in 1:N) {
Xj <- X1[,j]
for (i in 1:m) {
Xji <- Xj[((i-1)*T+1):(i*T)]
X1[((i-1)*T+1):(i*T),j] <- Xji
#X1[((i-1)*T+1):(i*T),j] <- sp$scaling[[i]] %*% Xji + sp$centering[[i]]
}
}
return(X1)
}
predict.ssm <- function(ssm.obj,newdata,for.each='hour'){
h <- length(ssm.obj)-1
auxH <- newdata[,for.each]
newdata <- newdata[,-which(colnames(newdata)%in%for.each)]
list.dat <- by(newdata,auxH,subset)
MF <- lapply(list.dat, FUN=model.frame, formula=ssm.obj$formulas$Xa)
XA <- lapply(MF, FUN=model.matrix, object=ssm.obj$formulas$Xa)
ct <- matrix(0,1,1)
if (!is.null(ssm.obj$formulas$Xf)){
XF <- lapply(MF, FUN=model.matrix, object=ssm.obj$formulas$Xf)
ct <- lapply(XF, as.matrix)}
k <- length(unique(auxH))
for (i in 1:k){
cual <- names(ssm.obj)%in%names(XA)[i]
cual <- which(cual)
ssm.obj[[cual]]$xa.pred <- XA[[i]]
if (!is.null(ssm.obj$formulas$Xf)){
ssm.obj[[cual]]$xf.pred <- ct[[i]]}
}
cuales <- names(ssm.obj)%in%names(XA)
PRED <- lapply(ssm.obj[cuales],FUN=pred.ssm)
pred <- vector('numeric',length(auxH))
se.pred <- vector('numeric',length(auxH))
aux2 <- unique(auxH)
for (i in 1:length(aux2)){
aux3 <- PRED[[aux2[i]]]
pred[auxH==aux2[i]] <- aux3$predt
se.pred[auxH==aux2[i]] <- sqrt(diag(aux3$covt))}
return(list(pred=pred,se.pred=se.pred))
}
predict.tsb <- function(mod,steps=24){
dem<-mod$data
n<-length(dem)
tita<-comb.coef(comb.coef(mod$ma$reg,mod$ma$est1,24),mod$ma$est2,168)
fi<-comb.coef(mod$ar$reg,mod$ar$est1,24)
delta<-1
if(mod$d[1]>0) for(i in 1:mod$d[1]) delta<-convolve(delta,c(-1,1),type='open')
if(mod$d[2]>0) for(i in 1:mod$d[2]) delta<-convolve(delta,c(-1,rep(0,mod$frec[1]-1),1),type='open')
if(mod$d[3]>0) for(i in 1:mod$d[3]) delta<-convolve(delta,c(-1,rep(0,mod$frec[2]-1),1),type='open')
delta[abs(delta)<1e-10]<-0
delta<-delta[-1]
if(is.null(fi))
fi <- vector('numeric')
if(is.null(tita))
tita <- vector('numeric')
modelo<-makeARIMA(phi=fi,Delta=-delta,theta=tita)
res<-KalmanRun(dem[(n-1000):n],mod=modelo)
modelo$a<-res$states[nrow(res$states),]
pred<-KalmanForecast(steps,modelo)
return(pred$pred)
}
predict.wavkerfun <- function(object, ..., newdata=NULL, sd )
{ if ( (!is.null(class(object))) && (class((object)) != "wavkerfun") )
stop("object is not of class wavkerfun")
if ( !is.null(newdata) )
stop("newdata is not used for prediction")