11mcode <- function (... , recodes , else .val = NA , as.factor.result = NULL , as.numeric.result = TRUE , levels = NULL ){
2-
3- # # STILL NEED TO FIGURE OUT A WILDCARD SYNTAX
4- # ## AN EXAMPLE TO WORK FROM: recodes <- "c(1,1)=1;c(1,2)=2;c(1,3)=3;c(2,'*')=4"
2+
3+ # # STILL NEED TO FIGURE OUT A WILDCARD SYNTAX
4+ # ## AN EXAMPLE TO WORK FROM: recodes <- "c(1,1)=1;c(1,2)=2;c(1,3)=3;c(2,'*')=4"
55
6-
7- vars <- list (... )
8- lengths <- sapply(vars ,FUN = length )
9- # check variable lengths
10- if (! identical(rep(lengths [1 ],length(vars )), lengths ))
11- stop(" Vectors specified in '...' have different lengths" )
12- splitrecodes <- strsplit(recodes ," ;" )[[1 ]]
13- # check number of vars implied by recode vectors
14- codesplit <- function (a )
15- eval(parse(text = strsplit(a ," =" )[[1 ]][1 ]))
16- ncodes <- lapply(splitrecodes ,FUN = function (a ) length(codesplit(a )))
17- for (i in 1 : length(ncodes )){
18- if (! ncodes [[i ]]== length(vars ))
19- stop(paste(" Recode" ,i ," has" ,ncodes [[i ]]," items but should have" ,length(vars )))
20- }
21- # list of old values for each recode combination
22- oldvals <- lapply(splitrecodes ,FUN = codesplit )
23- # check for duplicate recode combinations
24- dup <- duplicated(oldvals )
25- if (sum(dup )> 0 )
26- stop(" Duplicate recodes in positions:" ,seq(length(dup ))[dup == TRUE ])
27- # list of new values for each recode combination
28- newval <- lapply(splitrecodes ,FUN = function (a ) strsplit(a ," =" )[[1 ]][2 ])
29- # original variables as list
30- oldvar <- mapply(FUN = c ,vars )
31- # create new var from original vars, based on splitrecodes
32- newvar <- vector(length = lengths [1 ], mode = mode(vars [[1 ]]))
33- for (i in 1 : dim(oldvar )[1 ]){
34- check <- unlist(lapply(oldvals , FUN = function (a ) {
35- s <- sum(oldvar [i ,]== a ,na.rm = TRUE )
36- # sum code to deal with NAs
37- if (is.na(s ) || is.null(s ))
38- s <- 0
39- if (TRUE %in% is.na(oldvar [i ,])){
40- for (k in 1 : length(oldvar [i ,])){
41- if (is.na(oldvar [i ,])[k ] & is.na(a )[k ])
42- s <- s + 1
43- }
44- }
45- invisible (s )
46- }
47- ))
48- if (TRUE %in% (check > 0 )){
49- # check for multiple matches (shouldn't happen)
50- if (sum(check == length(oldvar [i ,]))> 1 ){
51- newvar [i ] <- NA
52- warning(" Multiple matches for case " ,i ," so NA used instead" )
53- }
54- # check for no matches
55- else if (max(check )< length(oldvar [i ,]))
56- newvar [i ] <- else .val
57- else
58- newvar [i ] <- newval [check == length(oldvar [i ,])]
59- }
60- }
61- # format resulting variable
62- newvar <- unlist(newvar )
63- if (! is.null(as.numeric.result ) && as.numeric.result )
64- newvar <- as.numeric(newvar )
65- if (! is.null(as.factor.result ) && as.factor.result ) {
6+
7+ vars <- list (... )
8+ lengths <- sapply(vars ,FUN = length )
9+ # check variable lengths
10+ if (! identical(rep(lengths [1 ],length(vars )), lengths ))
11+ stop(" Vectors specified in '...' have different lengths" )
12+ splitrecodes <- strsplit(recodes ," ;" )[[1 ]]
13+ # check number of vars implied by recode vectors
14+ codesplit <- function (a )
15+ eval(parse(text = strsplit(a ," =" )[[1 ]][1 ]))
16+ ncodes <- lapply(splitrecodes ,FUN = function (a ) length(codesplit(a )))
17+ for (i in 1 : length(ncodes )){
18+ if (! ncodes [[i ]]== length(vars ))
19+ stop(paste(" Recode" ,i ," has" ,ncodes [[i ]]," items but should have" ,length(vars )))
20+ }
21+ # list of old values for each recode combination
22+ oldvals <- lapply(splitrecodes ,FUN = codesplit )
23+ # check for duplicate recode combinations
24+ dup <- duplicated(oldvals )
25+ if (sum(dup )> 0 )
26+ stop(" Duplicate recodes in positions:" ,seq(length(dup ))[dup == TRUE ])
27+ # list of new values for each recode combination
28+ newval <- lapply(splitrecodes ,FUN = function (a ) strsplit(a ," =" )[[1 ]][2 ])
29+ # original variables as list
30+ oldvar <- mapply(FUN = c ,vars )
31+ # create new var from original vars, based on splitrecodes
32+ newvar <- vector(length = lengths [1 ], mode = mode(vars [[1 ]]))
33+ for (i in 1 : dim(oldvar )[1 ]){
34+ check <- unlist(lapply(oldvals , FUN = function (a ) {
35+ s <- sum(oldvar [i ,]== a ,na.rm = TRUE )
36+ # sum code to deal with NAs
37+ if (is.na(s ) || is.null(s ))
38+ s <- 0
39+ if (TRUE %in% is.na(oldvar [i ,])){
40+ for (k in 1 : length(oldvar [i ,])){
41+ if (is.na(oldvar [i ,])[k ] & is.na(a )[k ])
42+ s <- s + 1
43+ }
44+ }
45+ invisible (s )
46+ }
47+ ))
48+ if (TRUE %in% (check > 0 )){
49+ # check for multiple matches (shouldn't happen)
50+ if (sum(check == length(oldvar [i ,]))> 1 ){
51+ newvar [i ] <- NA
52+ warning(" Multiple matches for case " ,i ," so NA used instead" )
53+ }
54+ # check for no matches
55+ else if (max(check )< length(oldvar [i ,]))
56+ newvar [i ] <- else .val
57+ else
58+ newvar [i ] <- newval [check == length(oldvar [i ,])]
59+ }
60+ }
61+ # format resulting variable
62+ newvar <- unlist(newvar )
63+ if (! is.null(as.numeric.result ) && as.numeric.result )
64+ newvar <- as.numeric(newvar )
65+ if (! is.null(as.factor.result ) && as.factor.result ) {
6666 if (! is.null(levels ))
67- newvar <- factor (newvar , levels = levels )
67+ newvar <- factor (newvar , levels = levels )
6868 else
69- as.factor(newvar )
69+ as.factor(newvar )
7070 }
71- return (newvar )
72- }
71+ return (newvar )
72+ }
0 commit comments