########## R script: curves with random intercepts and slopes # ## Last modified December 1, 2003 by John Staudenmayer ## email: jstauden@math.umass.edu ## ####### # Simulate some data ####### #true function f1 <- function(x) return(dnorm(x,1,.4)-dnorm(x,3,.25)) library("nlme") # Set parameters clusters <- 20 # number of "clusters" # observations per "clustger" ms <- 8 + round(6*runif(clusters)-3) # ms vary by cluster sig <- .1 # std dev of e_ij's # cov mat of random intercepts / slopes D <- rbind(cbind(2,-1),c(-1,1)) unique.b0 <- rnorm(clusters) # random intercepts unique.b0 <- unique.b0 - mean(unique.b0) unique.b1 <- rnorm(clusters) # random slopes unique.b1 <- unique.b1 - mean(unique.b1) # correct the covariance temp <- t(chol(D))%*%rbind(unique.b0,unique.b1) unique.b0 <- temp[1,] unique.b1 <- temp[2,] possible.xs <- seq(from=0.1,to=4,length=max(ms)) x <- rep(-999,length=sum(ms)) b <- rep(-999,length=sum(ms)) l.ind <- 0 for (i in 1:clusters) { s.ind <- 1+l.ind l.ind <- s.ind + ms[i] - 1 # fill in each cluster's xs with # a sample from the possible xs x[s.ind:l.ind] <- sort(sample(possible.xs,ms[i],replace=FALSE)) b[s.ind:l.ind] <- unique.b0[i]+x[s.ind:l.ind]*unique.b1[i] } #jitter the xs a bit x <- x + rnorm(length(x))*.05 f1s <- f1(x) eps <- rnorm(sum(ms))*sig y <- f1s + b + eps # note that b includes a random # intercept and a random slope n <- length(y) num.knots <- min(20,floor(n/3)) #### Fitting #### # groups.1 defines the "curves" group # for the u's in the spline groups.1 <- rep(1,sum(ms)) inds <- 1:length(groups.1) # groups.2 defines the "cluster" group # (each cluster has a random intercept # and a random slope) groups.2 <- rep(-999,length=sum(ms)) l.ind <- 0 for (i in 1:clusters) { s.ind <- 1+l.ind l.ind <- s.ind + ms[i] - 1 groups.2[s.ind:l.ind] <- rep(i,length=ms[i]) } # Set up design matrices and random effects # block structure knots.1 <- quantile(unique(x), seq(0,1, length= (num.knots+2))[-c(1,(num.knots+2))]) X <- cbind(rep(1,n),x) Z <- outer(x,knots.1,"-") Z <- Z*(Z>0) re.block.val <- list(1:num.knots) # for "curve" Z.block <- list() for (i in 1:length(re.block.val)) Z.block[[i]] <- as.formula(paste("~Z[,c(", paste(re.block.val[[i]], collapse=","),")]-1")) # for "cluster specific random intercepts # and slopes" Z.block.2 <- list() for (i in 1:length(re.block.val)) Z.block.2[[i]] <- as.formula(paste("~X[,c(1,2)]-1")) # Fit model using lme() and extract # coefficient estimates # note groups.2 "within" groups.1 data.fr <- groupedData( y ~ X[,-1] | groups.1 / groups.2, data = data.frame( y,X,Z,groups.1,groups.2)) lme.fit <- lme(y~X[,-1], data=data.fr, random= list(groups.1 = pdMat(Z.block[[1]], pdClass="pdIdent"), groups.2 = pdMat(Z.block.2[[1]], pdClass="pdSymm"))) # The groups.1 random effects are the us from the spline. # "pdIdent" specifies that they are uncorrelated # the groups.2 random effects are the random intercepts # and ranodm slopes. "pdSymm" specifies that they have an # unstructured (positive definite symmetric) covariance # matrix. u.hat <- as.vector(unlist(ranef(lme.fit)\$groups.1)) # extract random intercepts and slopes b0.hat <- as.vector(unlist(ranef(lme.fit)\$groups.2[,1])) b1.hat <- as.vector(unlist(ranef(lme.fit)\$groups.2[,2])) sigusq.hat <- (as.numeric(exp(attributes(summary(lme.fit)\$apVar)\$Pars[4])))^2 temp <- unlist(as.vector( attributes(summary(lme.fit)\$apVar)\$Pars[1:3])) D.hat <- matrix(0,2,2) D.hat[1,1] <- exp(temp[1])^2 D.hat[2,2] <- exp(temp[2])^2 cor <- 2*(exp(temp[3])/(1+exp(temp[3])))-1 D.hat[1,2] <- cor*exp(temp[1])*exp(temp[2]) D.hat[2,1] <- D.hat[1,2] #D.hat ? sigesq.hat <- (lme.fit\$sigma^2) beta.hat <- as.vector(unlist(lme.fit\$coef\$fix)) # Draw fits grid.size <- 101 x1.grid <- seq(min(x),max(x),length=grid.size) ones.grid <- rep(1,grid.size) X.grid <- cbind(ones.grid,x1.grid) Z.grid <- outer(x1.grid,knots.1,"-") Z.grid <- Z.grid*(Z.grid>0) f1.hat.grid <- as.vector(X.grid%*%beta.hat+ Z.grid%*%u.hat) f1.grid <- f1(x1.grid) x11() par(mfrow=c(1,3),bty="l") plot(c(x1.grid),c(f1.hat.grid),bty="l",xlab="x1", ylab="f(x)",type="n", main = "Data", ylim=range(c(y,f1.grid,f1.hat.grid))) l.ind <- 0 for (i in 1:clusters) { s.ind <- 1+l.ind l.ind <- s.ind + ms[i] - 1 lines(x[s.ind:l.ind],y[s.ind:l.ind]) } points(x,y) plot(c(x1.grid),c(f1.hat.grid),bty="l",xlab="x1", ylab="f(x)",type="n", main = "Overall Estimate", ylim=range(c(y,f1.grid,f1.hat.grid))) lines(x1.grid,f1.hat.grid,lwd=2,col=2) lines(x1.grid,f1.grid,lty=2) legend(1,5,c("True Curve","Estimate"),col=c(1,2), lty=c(1,2),lwd=c(1,2)) plot(c(x1.grid),c(f1.hat.grid),bty="l",xlab="x1", ylab="f(x)",type="n", main = "3 Cluster Specific Estimates", ylim=range(c(y,f1.grid,f1.hat.grid))) for (i in 1:3) { points(x[groups.2==i],y[groups.2==i],col=i,pch=16) lines(x1.grid, b0.hat[i]+b1.hat[i]*x1.grid+ f1.hat.grid,lwd=2,col=i) }