Skip to content

Commit 76ca10a

Browse files
committed
work on generalizing mcode
1 parent c89ce97 commit 76ca10a

File tree

4 files changed

+95
-46
lines changed

4 files changed

+95
-46
lines changed

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: mcode
2-
Version: 0.1.2
3-
Date: 2015-05-23
2+
Version: 0.1.3
3+
Date: 2015-05-24
44
Title: Functions to Merge and Recode Across Multiple Variables
55
Authors@R: c(person("Thomas J.", "Leeper", role = c("aut", "cre"),
66
email = "thosjleeper@gmail.com"))

R/mcode.r

Lines changed: 80 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,83 @@
1-
mcode <- function(..., recodes, else.val=NA, as.factor.result=NULL, as.numeric.result=TRUE, levels=NULL){
2-
## STILL NEED TO FIGURE OUT A WILDCARD SYNTAX
3-
### AN EXAMPLE TO WORK FROM: recodes <- "c(1,1)=1;c(1,2)=2;c(1,3)=3;c(2,'*')=4"
4-
1+
mcode <- function(..., recodes, .fill = NA, .result = c("numeric", "character", "factor"), .levels = NULL){
52

3+
# process variables
64
vars <- list(...)
7-
lengths <- sapply(vars,FUN=length)
8-
# check variable lengths
9-
if(!identical(rep(lengths[1],length(vars)), lengths))
5+
classes <- sapply(vars, FUN = inherits, what = "factor")
6+
if(any(classes)) {
7+
.factors <- match.arg(.factors)
8+
if(.factors == "character") {
9+
vars[classes] <- lapply(vars[classes], as.character)
10+
} else {
11+
vars[classes] <- lapply(vars[classes], as.numeric)
12+
}
13+
}
14+
lengths <- sapply(vars, FUN = length)
15+
if(any(lengths > lengths[1] | lengths < lengths[1]))
1016
stop("Vectors specified in '...' have different lengths")
11-
splitrecodes <- strsplit(recodes,";")[[1]]
12-
# check number of vars implied by recode vectors
13-
codesplit <- function(a)
14-
eval(parse(text=strsplit(a,"=")[[1]][1]))
15-
ncodes <- lapply(splitrecodes,FUN=function(a) length(codesplit(a)))
16-
for(i in 1:length(ncodes)){
17-
if(!ncodes[[i]]==length(vars))
18-
stop(paste("Recode",i,"has",ncodes[[i]],"items but should have",length(vars)))
17+
oldvar <- mapply(c, vars)
18+
19+
# process recodes
20+
splitrecodes <- strsplit(gsub("\n|\t", "", recodes), split = ";")[[1]]
21+
x <- t(sapply(splitrecodes, function(x) strsplit(x, split = "=")[[1]]))
22+
outval <- unname(x[,2])
23+
inval1 <- gsub("[c()]", "", unname(x[,1])) # ignore bracketing
24+
inval <- unname(sapply(inval1, strsplit, split = ","))
25+
rlengths <- sapply(inval, length)
26+
if(any(rlengths < length(vars))) {
27+
w <- which(rlengths < length(vars))
28+
stop(ngettext(length(w), paste0("'recodes' entry ", w, " has less elements than ",length(vars)),
29+
paste0("'recodes' entries ", paste0(w, sep = ","), " has less element than ",length(vars))))
30+
} else if(any(rlengths > length(vars))) {
31+
w <- which(rlengths > length(vars))
32+
stop(ngettext(length(w), paste0("'recodes' entry ", w, " has more elements than ",length(vars)),
33+
paste0("'recodes' entries ", paste0(w, sep = ","), " has more elements than ",length(vars))))
34+
}
35+
if(any(rlengths > rlengths[1] | rlengths < rlengths[1])) {
36+
stop("'recodes' entries have inconsistent numbers of elements")
37+
}
38+
# parse special symbols
39+
parse_specials <- function(specials, var) {
40+
## * - wildcard
41+
specials[specials == "*"] <- "*"
42+
## NA - NA value
43+
specials[specials == "NA"] <- NA
44+
## min - minimum of that variable
45+
specials[specials == "min"] <- min(var, na.rm = TRUE)
46+
## max - maximum of that variable
47+
specials[specials == "max"] <- max(var, na.rm = TRUE)
48+
## mean - mean of that variable
49+
specials[specials == "mean"] <- mean(var, na.rm = TRUE)
50+
## median - median of that variable
51+
specials[specials == "median"] <- median(var, na.rm = TRUE)
52+
## : - range of values
53+
54+
return(specials)
55+
}
56+
57+
if(FALSE) {
58+
invalmat <- matrix(character(), nrow = length(inval), ncol = rlengths[1])
59+
for(i in 1:ncol(invalmat)) {
60+
invalmat[,i] <- parse_specials(sapply(inval, `[`, i), vars[[i]])
61+
}
62+
63+
64+
sapply(seq_along(newvar), function(x) {
65+
v <- sapply(vars, `[`, x)
66+
67+
68+
#ranged <- grepl(":", , fixed = TRUE)
69+
return(x)
70+
})
71+
72+
73+
apply(oldvar, 1, function(x) {
74+
75+
})
1976
}
20-
# list of old values for each recode combination
21-
oldvals <- lapply(splitrecodes,FUN=codesplit)
22-
# check for duplicate recode combinations
23-
dup <- duplicated(oldvals)
24-
if(sum(dup)>0)
25-
stop("Duplicate recodes in positions:",seq(length(dup))[dup==TRUE])
26-
# list of new values for each recode combination
27-
newval <- lapply(splitrecodes,FUN=function(a) strsplit(a,"=")[[1]][2])
28-
# original variables as list
29-
oldvar <- mapply(FUN=c,vars)
30-
# create new var from original vars, based on splitrecodes
31-
newvar <- vector(length=lengths[1], mode=mode(vars[[1]]))
77+
78+
newvar <- vector(mode = "character", length = lengths[1]) # return vector
3279
for(i in 1:dim(oldvar)[1]){
33-
check <- unlist(lapply(oldvals, FUN=function(a) {
80+
check <- unlist(lapply(inval, FUN=function(a) {
3481
s <- sum(oldvar[i,]==a,na.rm=TRUE)
3582
# sum code to deal with NAs
3683
if(is.na(s) || is.null(s))
@@ -52,20 +99,15 @@ mcode <- function(..., recodes, else.val=NA, as.factor.result=NULL, as.numeric.r
5299
}
53100
# check for no matches
54101
else if(max(check)<length(oldvar[i,]))
55-
newvar[i] <- else.val
102+
newvar[i] <- .fill
56103
else
57-
newvar[i] <- newval[check==length(oldvar[i,])]
104+
newvar[i] <- outval[check==length(oldvar[i,])]
58105
}
59106
}
107+
60108
# format resulting variable
61109
newvar <- unlist(newvar)
62-
if(!is.null(as.numeric.result) && as.numeric.result)
63-
newvar <- as.numeric(newvar)
64-
if(!is.null(as.factor.result) && as.factor.result) {
65-
if(!is.null(levels))
66-
newvar <- factor(newvar, levels = levels)
67-
else
68-
as.factor(newvar)
69-
}
110+
111+
newvar <- eval(call(paste0("as.", match.arg(.result)), newvar))
70112
return(newvar)
71113
}

man/mcode-package.Rd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
\name{mcode-package}
22
\alias{mcode-package}
3-
\title{Functions to Merge and Recode Across Multiple Variables}
4-
\description{}
3+
\title{mcode-package}
4+
\description{Functions to Merge and Recode Across Multiple Variables}
55
\details{
66
The package currently contains the following functions:
77
\itemize{

man/mcode.Rd

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ mcode(..., recodes, else.val=NA,
1515
\item{levels}{}
1616
}
1717
\details{
18-
Recoding is a basic step in any analysis. It's fairly easy to recode a single variable (e.g. by replacing values in the vector or using the \code{recode} function in \bold{car} or \code{mapvalues} in \bold{plyr}), but it can be cumbersome to recode multiple variables into a single vector .This is useful when, for example, a factorial experiment has the group for each factor stored as separate variables, but analysis will be performed across the entire design (rather than factor-by-factor), or when it is necessary to create a factor representing multivariate combinations of demographic groups (e.g., an age-sex-race stratification) from a set of separate input vectors representing each demographic variable. That would normally require a series of \code{\link[base]{ifelse}} statements or complex use of boolean arguments. This function aims to make it simple to create a single vector from multiple input vectors in a manner more useful than \code{\link[base]{interaction}}.
18+
Recoding is a basic step in any analysis. It is fairly easy to recode a single variable (e.g. by replacing values in the vector or using the \code{recode} function in \bold{car} or \code{mapvalues} in \bold{plyr}), but it can be cumbersome to recode multiple variables into a single vector .This is useful when, for example, a factorial experiment has the group for each factor stored as separate variables, but analysis will be performed across the entire design (rather than factor-by-factor), or when it is necessary to create a factor representing multivariate combinations of demographic groups (e.g., an age-sex-race stratification) from a set of separate input vectors representing each demographic variable. That would normally require a series of \code{\link[base]{ifelse}} statements or complex use of boolean arguments. This function aims to make it simple to create a single vector from multiple input vectors in a manner more useful than \code{\link[base]{interaction}}.
1919

2020
The syntax borrows from the \code{recode} function in the \bold{car} package.
2121

@@ -27,6 +27,16 @@ This really only works for categorical variables, but a continuous variable coul
2727
\author{Thomas J. Leeper}
2828
%\seealso{}
2929
\examples{
30+
# RECODE A SINGLE VARIABLE BASED ON A `car::recode`-STYLE SCHEME
31+
r <- mcode(c(1,3,5,4,2), recodes = "5=1;4=2;3=3;2=4;1=5")
32+
stopifnot(identical(r, c(5,3,1,2,4)))
33+
34+
35+
# WORK WITH MISSING VALUES:
36+
mcode(c(1,1,1,1,1,NA), c(1,1,2,2,NA,1), recodes = "c(1,1)=1;c(1,2)=2;c(1,NA)=3")
37+
38+
39+
# COMPARE `mcode` TO VARIOUS ALTERNATIVES
3040
a <- c(1,2,1,2,1,NA,2,NA)
3141
b <- c(1,1,2,2,NA,1,NA,2)
3242

@@ -60,8 +70,5 @@ mcode(c(rep(1,9),0),
6070
c(rep(1,8),0,1),
6171
c(rep(1,5),rep(0,2),rep(1,3)),
6272
recodes = r)
63-
64-
# WORK WITH MISSING VALUES:
65-
mcode(c(1,1,1,1,1,NA), c(1,1,2,2,NA,1), recodes="c(1,1)=1;c(1,2)=2;c(1,NA)=3")
6673
}
6774
%\keyword{}

0 commit comments

Comments
 (0)