I use the S-plus (version 2000 for MS Windows) statistical package for many of my statistical calculations. S-plus is highly programmable and this page lists some of my S-plus functions. Most are for simple data manipulation of a kind I have to do over and over again.
Move a function from one frame to another. In my case it is usually to my library with mylib defined by
attach(what = "j:\\sp2000\\mylib\\_data", name = "mylib", pos = 2)You can also put a directory name containing the Splus files as the to parameter.
##begin
move <- function(object, from = 1, to = "mylib") { # copied from page 5.7 of the programmer's manual # move an object between frames objname <- deparse(substitute(object)) assign(objname, get(objname, where = from), where = to) remove(objname, where = from) }
##end
Copy a function from one frame to another (in my, case it is usually to my library).
##begin
copy <- function(object, from = 1, to = "mylib") { # copied from page 5.7 of the programmer's manual # copy an object between frames objname <- deparse(substitute(object)) assign(objname, get(objname, where = from), where = to) }
##end
Change all instances of NA in a vector to false.
##begin
na.is.false <- function(X) { X1 <- X X1[is.na(X)] <- F X1 }
##end
Change all instances of NA in a vector to zero.
##begin
na.is.zero <- function(X) { X1 <- X X1[is.na(X)] <- 0 X1 }
##end
Like S-plus's match, but where we have two arrays.
##begin
bi.match <- function(x1, x2, table1, table2) { # match when we have two arrays to be matched # return array M of integers of same length as x1 and x2 # such that table1[M[i]] = x1[i], table2[M[i]] = x2[i]
table1.unique <- unique(table1) n1 <- length(table1.unique) table2.unique <- unique(table2) mm <- match(x1, table1.unique) + n1 * match(x2, table2.unique) tt <- match(table1, table1.unique) + n1 * match(table2, table2.unique) match(mm, tt) }
##end
##begin
X1 <- c(1,5,4,2,6,1,5,4,2,6,1,5,4,2,6,6,4,2,1,7) X2 <- c(2,2,2,2,2,3,3,3,3,3,1,1,1,1,1,1,2,3,4,1)
T1 <- c(6,5,4,3,2,1,1,2,3,4,5,6,2,3,4) T2 <- c(1,2,3,1,2,3,2,1,3,2,1,3,3,2,1)
MM <- bi.match(X1, X2, T1, T2) print(MM) print(T1[MM]) print(T2[MM])
##end
Like S-plus's match, but where we have three arrays.
##begin
tri.match <- function(x1, x2, x3, table1, table2, table3) { # match when we have three arrays to be matched # return array M of integers of same length as x1, x2, x3 # such that table1[M[i]] = x1[i], table2[M[i]] = x2[i], table3[M[i]] = x3[i]
table1.unique <- unique(table1) n1 <- length(table1.unique) table2.unique <- unique(table2) n2 <- length(table2.unique) table3.unique <- unique(table3) n3 <- length(table3.unique) prod <- as.double(n1) * as.double(n2) * as.double(n3) if (prod >= 2147483648) print(paste("May have overflow of index, n1*n2*n3 =", prod)) mm <- match(x1, table1.unique) + n1 * match(x2, table2.unique) + n1 * n2 * match(x3, table3.unique) tt <- match(table1, table1.unique) + n1 * match(table2, table2.unique) + n1 * n2 * match(table3, table3.unique) match(mm, tt) }
##end
##begin
X1 <- c(1,5,4,2,6,1,5,4,2,6,1,5,4,2,6,6,4,2,1,7) X2 <- c(2,2,2,2,2,3,3,3,3,3,1,1,1,1,1,1,2,3,4,1) X3 <- c(4,5,4,5,4,5,4,5,4,5,4,5,4,5,4,5,4,5,4,5)
T1 <- c(6,5,4,3,2,1,1,2,3,4,5,6,2,3,4) T2 <- c(1,2,3,1,2,3,2,1,3,2,1,3,3,2,1) T3 <- c(4,5,4,5,4,5,4,5,4,5,4,5,4,5,4)
MM <- tri.match(X1, X2, X3, T1, T2, T3) print(MM) print(T1[MM]) print(T2[MM]) print(T3[MM])
##end
Change the level names of a factor. The current names are in the vector OldNames and the new names are in the vector NewNames. Levels can be amalgamated. Levels missing from the OldLevels vector are replaced by NA. NA in original series should remain as NA.
##begin
ChangeLevelNames <- function(Factor, OldLevels, NewLevels) { if (length(OldLevels) != length(NewLevels)) { print("length of levels names vectors don't match") return(Factor) } Match <- match(levels(Factor), OldLevels) NLM <- NewLevels[Match] NLM[is.na(Match)] <- NA Trans <-NLM[as.integer(Factor)] Trans[is.na(Factor)] <- NA Result <- factor(Trans) Result }
##end
##begin
ChangeLevelNames(factor(c("Peter",NA,"Paul","Mary","Jim", "Pauline","Jim","Peter","Joan","Jim", "Joan","Paul","Pauline")), c("Peter","Paul","Joan","Mary","Jim","Queenie"), c("M","M","F","F","M","F") )
levels(ChangeLevelNames(factor(c("Peter",NA,"Paul","Mary","Jim", "Pauline","Jim","Peter","Joan","Jim", "Joan","Paul","Pauline")), c("Peter","Paul","Joan","Mary","Jim","Queenie"), c("M","M","F","F","M","F") ))
ChangeLevelNames(factor(c(1,5,3,6,4,5,2,7,5,2,4,NA,5,6)), c(1,3,5,2,4,6), c(1,1,1,2,2,2))
levels(ChangeLevelNames(factor(c(1,5,3,6,4,5,2,7,5,2,4,NA,5,6)), c(1,3,5,2,4,6), c(1,1,1,2,2,2)))
##end
Alternatively use as.data.frame(XXX,col.dims=0)
##begin
table.to.data.frame <- function(TheTable, DataName, TheList) { # # convert a table to a data.frame containing the same information # # TheTable contains the table # DataName is a string to head the data column # TheList is a list of strings being the headings for the dimensions # of the table # TheData <- as.vector(TheTable) N <- length(TheData) eval(parse(text = paste("TheDataFrame <- data.frame(", DataName, " = TheData, row.names = 1:N)", sep = ""))) DN <- dimnames(TheTable) K <- length(DN) if(K != length(TheList)) { print("List is wrong length") return(NA) } r1 <- 1 r2 <- N for(k in 1:K) { len <- length(DN[[k]]) r2 <- r2/len eval(parse(text = paste("TheDataFrame$", TheList[k], " <- rep(rep(DN[[k]], rep(r1, len)), r2)", sep = ""))) r1 <- r1 * len } TheDataFrame }
##end
You have a list of arrays with identical dimension structure.
Concatenate them into a higher dimensional array. The last subscript in the new array corresponds to the arrays in the original list.
##begin
# AL is a list of arrays with identical dimension structure # concatenate them into a higher dimensional array with the list # being converted into the last subscript # identity of array structures is not checked
ArrayBind <- function(AL) { DN <- dimnames(AL[[1]]) D <- dim(AL[[1]]) DN <- append(DN, list(names(AL))) D <- c(D,length(AL)) array(unlist(AL), dim=D, dimnames=DN) }
TestArray1 <- array(c(11,13,14,15,16,17),dim=c(2,3), dimnames = list(c("A","B"),c("P","Q","R")))
TestArray2 <- array(c(21,23,24,25,26,27),dim=c(2,3), dimnames = list(c("A","B"),c("P","Q","R")))
TestArray3 <- array(c(31,33,34,35,36,37),dim=c(2,3), dimnames = list(c("A","B"),c("P","Q","R")))
TestArray4 <- array(c(41,43,44,45,46,47),dim=c(2,3), dimnames = list(c("A","B"),c("P","Q","R")))
print(ArrayBind(list(TA1=TestArray1, TA2=TestArray2, TA3=TestArray3, TA4=TestArray4)))
##end
Look at the effects of a factor in a GLM analysis. Assume the factor is not involved in any interactions.
This analysis looks at the difference of each treatment effect from their average and at the pair-wise differences of the treatment levels.
GLM is the output from an lm or glm analysis; X is the name of a factor (include the data.frame name if part of a data.frame) and SigmaSq = NULL for an lm fit and either 1 or the fitted dispersion for a GLM fit.
##begin
MainEffects <- function(GLM = NULL, X = NULL, SigmaSq = NULL) { # Analyse effects of factor present only as a main effect # The factor must not be involved in interations # The factor name must not be the same as the beginning of some # other variable # # Set sigmasquare = 1 for binomial and poisson models # leave NULL for the LM # put equal to the dispersion for GLM models # # Assume we are using contrasts given in options under factor # Can't do ordered factors as yet # if(is.null(GLM) | is.null(X)) { print("Call is MainEffects(LM, X, SigmaSq) where LM is a lm") print("or glm object; X is a factor in the model; SigmaSq is") print("the residual variance or dispersion (optional for lm)") return(list(Message = "missing arguments")) } X.name <- deparse(substitute(X)) DollarLoc <- regexpr("\\$", X.name) if(DollarLoc >= 0) X.name <- substring(X.name, DollarLoc + 1) Names <- names(coef(GLM)) N.levels <- length(levels(X)) N.names <- length(Names) Deg <- 0 for(i in 1:N.names) { if(!is.na(pmatch(X.name, Names[i]))) { if(Deg == 0) Loc <- i Deg <- Deg + 1 } } if(Deg == 0) return(list(Message = "Not found")) print(paste("Loc =", Loc, " Deg =", Deg)) if(N.levels != Deg + 1) return(list(Message = "Level with no data")) Contrasts <- options()$contrasts[1] Contrasts.matrix <- eval(call(Contrasts, Deg + 1)) dimnames(Contrasts.matrix) <- list(levels(X), NULL) Coefs <- coef(GLM)[Loc:(Loc + Deg - 1)] Coefs.adjusted <- Contrasts.matrix %*% Coefs Coefs.adjusted <- Coefs.adjusted - mean(Coefs.adjusted) SM <- summary(GLM) if(is.null(SigmaSq)) SigmaSq <- (SM$sigma)^2 if(is.na(SigmaSq)) return(list(Message = "Dispersion required")) VarCov <- SM$cov.unscaled[Loc:(Loc + Deg - 1), Loc:(Loc + Deg - 1)] * SigmaSq F.value <- crossprod(Coefs, solve(VarCov, Coefs))/Deg VarCov.adjusted <- Contrasts.matrix %*% VarCov %*% t(Contrasts.matrix) N <- length(Coefs.adjusted) Ones <- rep(1, N) M <- matrix(outer(Ones, Coefs.adjusted), N, N) dimnames(M) <- list(levels(X), levels(X)) Differences <- t(M) - M M <- matrix(outer(VarCov.adjusted %*% Ones, Ones), N, N) dimnames(M) <- list(levels(X), levels(X)) VarCov.adjusted <- VarCov.adjusted - (M + t(M))/N + sum(VarCov.adjusted)/(N^2) Variances <- diag(VarCov.adjusted) Coefficients <- Coefs.adjusted SEs <- sqrt(Variances) t.values <- Coefficients/SEs Effects <- data.frame(Coefficients, SEs, t.values) M <- matrix(outer(Ones, Variances), N, N) dimnames(M) <- list(levels(X), levels(X)) SEs <- sqrt(M + t(M) - 2 * VarCov.adjusted) t.values <- Differences/SEs return(list( Message = "OK", X.name = X.name, Effects = Effects, F.value = F.value, Contrasts = Contrasts, Differences = Differences, SEs = SEs, t.values = t.values )) }
##end
Version of MainEffects for varcomp analyses.
##begin
MainEffectsVC <- function(GLM = NULL, X = NULL) { # Version of MainEffects for varcomp
# Analyse effects of factor present only as a main effect # The factor must not be involved in interations # The factor name must not be the same as the beginning of some # other variable # # Set sigmasquare = 1 for binomial and poisson models # leave NULL for the LM # put equal to the dispersion for GLM models # # Assume we are using contrasts given in options under factor # Can't do ordered factors as yet # if(is.null(GLM) | is.null(X)) { print("Call is MainEffects(LM, X, SigmaSq) where LM is a lm") print("or glm object; X is a factor in the model") return(list(Message = "missing arguments")) } X.name <- deparse(substitute(X)) DollarLoc <- regexpr("\\$", X.name) if(DollarLoc >= 0) X.name <- substring(X.name, DollarLoc + 1) Names <- names(coef(GLM)) N.levels <- length(levels(X)) N.names <- length(Names) Deg <- 0 for(i in 1:N.names) { if(!is.na(pmatch(X.name, Names[i]))) { if(Deg == 0) Loc <- i Deg <- Deg + 1 } } if(Deg == 0) return(list(Message = "Not found")) print(paste("Loc =", Loc, " Deg =", Deg)) if(N.levels != Deg + 1) return(list(Message = "Level with no data")) Contrasts <- options()$contrasts[1] Contrasts.matrix <- eval(call(Contrasts, Deg + 1)) dimnames(Contrasts.matrix) <- list(levels(X), NULL)
Coefs <- coef(GLM)[Loc:(Loc + Deg - 1)] Coefs.adjusted <- Contrasts.matrix %*% Coefs Coefs.adjusted <- Coefs.adjusted - mean(Coefs.adjusted) VarCov <- GLM$cov.fixed[Loc:(Loc + Deg - 1), Loc:(Loc + Deg - 1)] F.value <- crossprod(Coefs, solve(VarCov, Coefs))/Deg VarCov.adjusted <- Contrasts.matrix %*% VarCov %*% t(Contrasts.matrix) N <- length(Coefs.adjusted) Ones <- rep(1, N) M <- matrix(outer(Ones, Coefs.adjusted), N, N) dimnames(M) <- list(levels(X), levels(X)) Differences <- t(M) - M M <- matrix(outer(VarCov.adjusted %*% Ones, Ones), N, N) dimnames(M) <- list(levels(X), levels(X)) VarCov.adjusted <- VarCov.adjusted - (M + t(M))/N + sum(VarCov.adjusted)/ (N^2) Variances <- diag(VarCov.adjusted) Coefficients <- Coefs.adjusted SEs <- sqrt(Variances) t.values <- Coefficients/SEs Effects <- data.frame(Coefficients, SEs, t.values) M <- matrix(outer(Ones, Variances), N, N) dimnames(M) <- list(levels(X), levels(X)) SEs <- sqrt(M + t(M) - 2 * VarCov.adjusted) t.values <- Differences/SEs return(list(Message = "OK", X.name = X.name, Effects = Effects, F.value = F.value, Contrasts = Contrasts, Differences = Differences, SEs = SEs, t.values = t.values)) }
##end
A version of the S-plus function unique for two arrays.
bi.unique <- function(x1, x2) { # x1 and x2 have the same length # return a data.frame consisting of the unique rows of data.frame(x1,x2)
x1.unique <- unique(x1) n1 <- length(x1.unique) mm <- match(x1, x1.unique) + n1 * match(x2, unique(x2)) tt <- match(unique(mm), mm) return(data.frame(x1=x1[tt],x2=x2[tt])) }
##end
##begin
X1 <- c(1, 1, 5, 4, 6, 7, 10, 5, 7, 10) X2 <- c(8, 8, 7, 7, 7, 7, 7, 7, 6, 7) bi.unique(X2, X1)
##end
##begin
quad.match<- function(x1, x2, x3, x4, table1, table2, table3, table4) { # match when we have four arrays to be matched # return array M of integers of same length as x1, x2, x3 # such that table1[M[i]] = x1[i], table2[M[i]] = x2[i], table3[M[i]] = x3[i], # table4[M[i]] = x4[i]
table1.unique <- unique(table1) n1 <- length(table1.unique)
table2.unique <- unique(table2) n2 <- length(table2.unique)
table3.unique <- unique(table3) n3 <- length(table3.unique)
table4.unique <- unique(table4) n4 <- length(table4.unique)
prod <- as.double(n1) * as.double(n2) * as.double(n3) * as.double(n4) if(prod >= 2147483648) print(paste("May have overflow of index, n1*n2*n3*n4 =", prod))
mm <- match(x1, table1.unique) + n1 * match(x2, table2.unique) + n1 * n2 * match(x3, table3.unique) + n1 * n2 * n3 * match(x4, table4.unique)
tt <- match(table1, table1.unique) + n1 * match(table2, table2.unique) + n1 * n2 * match(table3, table3.unique) + n1 * n2 * n3 * match(table4, table4.unique) match(mm, tt) }
##end