########## R script: curves with random intercepts and slopes
#
## Last modified December 1, 2003 by John Staudenmayer
## email: jstauden@math.umass.edu
##

#######
# Simulate some data
#######

#true function
f1 <- function(x)
  return(dnorm(x,1,.4)-dnorm(x,3,.25))
                               
library("nlme")

# Set parameters
clusters <- 20 # number of "clusters"

# observations per "clustger"
ms <- 8 + round(6*runif(clusters)-3)
# ms vary by cluster
sig <- .1 # std dev of e_ij's
# cov mat of random intercepts / slopes
D <- rbind(cbind(2,-1),c(-1,1)) 

unique.b0 <- rnorm(clusters) # random intercepts
unique.b0 <- unique.b0 - mean(unique.b0)
unique.b1 <- rnorm(clusters) # random slopes
unique.b1 <- unique.b1 - mean(unique.b1)

# correct the covariance
temp <- t(chol(D))%*%rbind(unique.b0,unique.b1)
unique.b0 <- temp[1,]
unique.b1 <- temp[2,]

possible.xs <- seq(from=0.1,to=4,length=max(ms))

x <- rep(-999,length=sum(ms))
b <- rep(-999,length=sum(ms)) 

l.ind <- 0
for (i in 1:clusters)
  {
    s.ind <- 1+l.ind
    l.ind <- s.ind + ms[i] - 1
    # fill in each cluster's xs with
    # a sample from the possible xs
    x[s.ind:l.ind] <-
      sort(sample(possible.xs,ms[i],replace=FALSE))
    b[s.ind:l.ind] <-
      unique.b0[i]+x[s.ind:l.ind]*unique.b1[i]
  }

#jitter the xs a bit
x <- x + rnorm(length(x))*.05

f1s <- f1(x)
       
eps <- rnorm(sum(ms))*sig
y <- f1s + b + eps
# note that b includes a random
# intercept and a random slope

n <- length(y)
num.knots <- min(20,floor(n/3))

#### Fitting ####

# groups.1 defines the "curves" group
# for the u's in the spline
groups.1 <- rep(1,sum(ms))
inds <- 1:length(groups.1)

# groups.2 defines the "cluster" group
# (each cluster has a random intercept
#  and a random slope)
groups.2 <- rep(-999,length=sum(ms))

l.ind <- 0
for (i in 1:clusters)
  {
    s.ind <- 1+l.ind
    l.ind <- s.ind + ms[i] - 1
    groups.2[s.ind:l.ind] <- rep(i,length=ms[i])
  }

# Set up design matrices and random effects
# block structure
knots.1 <-
  quantile(unique(x),
           seq(0,1,
               length=
               (num.knots+2))[-c(1,(num.knots+2))])

X <- cbind(rep(1,n),x)
Z <- outer(x,knots.1,"-")
Z <- Z*(Z>0)
re.block.val <- list(1:num.knots)

# for "curve"
Z.block  <-  list()
for (i in 1:length(re.block.val))
  Z.block[[i]] <-
  as.formula(paste("~Z[,c(",
                   paste(re.block.val[[i]],
                         collapse=","),")]-1"))

# for "cluster specific random intercepts
# and slopes"
Z.block.2  <-  list()
for (i in 1:length(re.block.val))
  Z.block.2[[i]] <-
  as.formula(paste("~X[,c(1,2)]-1"))

# Fit model using lme() and extract
# coefficient estimates

# note groups.2 "within" groups.1
data.fr <-
  groupedData( y ~ X[,-1] | groups.1 / groups.2,
              data =
              data.frame( y,X,Z,groups.1,groups.2))

lme.fit <- lme(y~X[,-1],
               data=data.fr,
               random=
               list(groups.1 = pdMat(Z.block[[1]],
                      pdClass="pdIdent"),
                    groups.2 = pdMat(Z.block.2[[1]],
                      pdClass="pdSymm")))

# The groups.1 random effects are the us from the spline.
# "pdIdent" specifies that they are uncorrelated
# the groups.2 random effects are the random intercepts
# and ranodm slopes. "pdSymm" specifies that they have an
# unstructured (positive definite symmetric) covariance
# matrix.

u.hat <- as.vector(unlist(ranef(lme.fit)$groups.1))

# extract random intercepts and slopes
b0.hat <- as.vector(unlist(ranef(lme.fit)$groups.2[,1]))
b1.hat <- as.vector(unlist(ranef(lme.fit)$groups.2[,2]))

sigusq.hat <-
  (as.numeric(exp(attributes(summary(lme.fit)$apVar)$Pars[4])))^2

temp <- unlist(as.vector( attributes(summary(lme.fit)$apVar)$Pars[1:3]))
D.hat <- matrix(0,2,2)
D.hat[1,1] <- exp(temp[1])^2
D.hat[2,2] <- exp(temp[2])^2
cor <-  2*(exp(temp[3])/(1+exp(temp[3])))-1
D.hat[1,2] <- cor*exp(temp[1])*exp(temp[2])
D.hat[2,1] <- D.hat[1,2]

                                        #D.hat ?

sigesq.hat <- (lme.fit$sigma^2)

beta.hat <- as.vector(unlist(lme.fit$coef$fix))

# Draw fits 

grid.size <- 101
x1.grid <- seq(min(x),max(x),length=grid.size)
ones.grid <- rep(1,grid.size)

X.grid <- cbind(ones.grid,x1.grid)
Z.grid <- outer(x1.grid,knots.1,"-")
Z.grid <- Z.grid*(Z.grid>0)

f1.hat.grid <- as.vector(X.grid%*%beta.hat+ Z.grid%*%u.hat)
f1.grid <- f1(x1.grid)

x11()
par(mfrow=c(1,3),bty="l")
plot(c(x1.grid),c(f1.hat.grid),bty="l",xlab="x1",
     ylab="f(x)",type="n",
     main = "Data",
     ylim=range(c(y,f1.grid,f1.hat.grid)))
l.ind <- 0
for (i in 1:clusters)
  {
    s.ind <- 1+l.ind
    l.ind <- s.ind + ms[i] - 1
    lines(x[s.ind:l.ind],y[s.ind:l.ind])
  }
points(x,y)

plot(c(x1.grid),c(f1.hat.grid),bty="l",xlab="x1",
     ylab="f(x)",type="n",
     main = "Overall Estimate",
     ylim=range(c(y,f1.grid,f1.hat.grid)))

lines(x1.grid,f1.hat.grid,lwd=2,col=2)
lines(x1.grid,f1.grid,lty=2) 
legend(1,5,c("True Curve","Estimate"),col=c(1,2),
       lty=c(1,2),lwd=c(1,2))

plot(c(x1.grid),c(f1.hat.grid),bty="l",xlab="x1",
     ylab="f(x)",type="n",
     main = "3 Cluster Specific Estimates",
     ylim=range(c(y,f1.grid,f1.hat.grid)))

for (i in 1:3)
  {
    points(x[groups.2==i],y[groups.2==i],col=i,pch=16)
    lines(x1.grid,
          b0.hat[i]+b1.hat[i]*x1.grid+
          f1.hat.grid,lwd=2,col=i)
  }