########## R script: curves with random intercepts # ## Last modified December 1, 2003 by John Staudenmayer ## email: jstauden@math.umass.edu ## f1 <- function(x) return(dnorm(x,1,.4)-dnorm(x,3,.25)) library("nlme") # Set parameters clusters <- 20 # number of "clusters" # observations per "cluster" ms <- 8 + round(6*runif(clusters)-3) # note that ms vary by cluster sig <- .1 # std dev of e_ij's bsig <- .5 # std dev of random intercepts unique.bs <- rnorm(clusters)*bsig # random intercepts unique.bs <- unique.bs - mean(unique.bs) possible.xs <- seq(from=0.1,to=4,length=max(ms)) x <- rep(-999,length=sum(ms)) b <- rep(-999,length=sum(ms)) l.ind <- 0 for (i in 1:clusters) { s.ind <- 1+l.ind l.ind <- s.ind + ms[i] - 1 # fill in each cluster's xs with sample from possible xs x[s.ind:l.ind] <- sort(sample(possible.xs,ms[i],replace=FALSE)) b[s.ind:l.ind] <- rep(unique.bs[i],length=ms[i]) } #jitter the xs a bit x <- x + rnorm(length(x))*.05 f1s <- f1(x) eps <- rnorm(sum(ms))*sig y <- f1s + b + eps n <- length(y) num.knots <- min(30,floor(n/3)) #### Fitting #### # groups.1 defines the "curves" group # for the u's in the spline groups.1 <- rep(1,sum(ms)) inds <- 1:length(groups.1) # groups.2 defines the "cluster" group # (each cluster has a random intercept) groups.2 <- rep(-999,length=sum(ms)) l.ind <- 0 for (i in 1:clusters) { s.ind <- 1+l.ind l.ind <- s.ind + ms[i] - 1 groups.2[s.ind:l.ind] <- rep(i,length=ms[i]) } # Set up design matrices and random effects block structure knots.1 <- quantile(unique(x), seq(0,1, length= (num.knots+2))[-c(1,(num.knots+2))]) X <- cbind(rep(1,n),x) Z <- outer(x,knots.1,"-") Z <- Z*(Z>0) re.block.val <- list(1:num.knots) # for "curve" Z.block <- list() for (i in 1:length(re.block.val)) Z.block[[i]] <- as.formula(paste("~Z[,c(", paste(re.block.val[[i]], collapse=","),")]-1")) # for "cluster specific random intercpet" Z.block.2 <- list() for (i in 1:length(re.block.val)) Z.block.2[[i]] <- as.formula(paste("~1")) # Fit model using lme() and extract coefficient estimates # note groups.2 "within" groups.1 data.fr <- groupedData( y ~ X[,-1] | groups.1 / groups.2, data = data.frame( y,X,Z,groups.1,groups.2)) lme.fit <- lme(y~X[,-1],data=data.fr, random= list(groups.1 = pdMat(Z.block[[1]], pdClass="pdIdent"), groups.2 = pdMat(Z.block.2[[1]], pdClass="pdIdent"))) # The groups.1 random effects are the us from the spline. # "pdIdent" specifies that they are uncorrelated # the groups.1 random effects are the random intercepts. # Again, "pdIdent" specifies that they are uncorrelated. u.hat <- as.vector(unlist(ranef(lme.fit)\$groups.1)) # random intercepts b.hat <- as.vector(unlist(ranef(lme.fit)\$groups.2)) beta.hat <- as.vector(unlist(lme.fit\$coef\$fix)) # spline variance sigusq.hat <- (as.numeric(exp(attributes(summary(lme.fit)\$apVar)\$Pars[2])))^2 # random intercept variance sigbsq.hat <- (as.numeric(exp(attributes(summary(lme.fit)\$apVar)\$Pars[1])))^2 sigesq.hat <- (lme.fit\$sigma^2) # Draw fits grid.size <- 101 x1.grid <- seq(min(x),max(x),length=grid.size) ones.grid <- rep(1,grid.size) X.grid <- cbind(ones.grid,x1.grid) Z.grid <- outer(x1.grid,knots.1,"-") Z.grid <- Z.grid*(Z.grid>0) f1.hat.grid <- as.vector(X.grid%*%beta.hat+ Z.grid%*%u.hat) f1.grid <- f1(x1.grid) x11() par(mfrow=c(1,3)) plot(c(x1.grid),c(f1.hat.grid),bty="l",xlab="x1", ylab="f(x)",type="n", main = "Data", ylim=range(c(y,f1.grid,f1.hat.grid))) l.ind <- 0 for (i in 1:clusters) { s.ind <- 1+l.ind l.ind <- s.ind + ms[i] - 1 lines(x[s.ind:l.ind],y[s.ind:l.ind]) } points(x,y) plot(c(x1.grid),c(f1.hat.grid),bty="l",xlab="x1", ylab="f(x)",type="n", main = "Overall Estimate", ylim=range(c(y,f1.grid,f1.hat.grid))) lines(x1.grid,f1.hat.grid,lwd=2,col=2) lines(x1.grid,f1.grid,lty=2) plot(c(x1.grid),c(f1.hat.grid),bty="l",xlab="x1", ylab="f(x)",type="n", main = "3 Cluster Specific Estimates", ylim=range(c(y,f1.grid,f1.hat.grid))) for (i in 1:3) { points(x[groups.2==i],y[groups.2==i],col=i,pch=16) lines(x1.grid,b.hat[i]+f1.hat.grid,lwd=2,col=i) }