Find all combinations among levels of a factor and output to list of data frames
I have a data frame like so:
set.seed(540)
df< data.frame(site= c(rep(1, 30), rep(2,30)),
season= c(rep("wet", 20), rep("dry",10), rep("wet",16), rep("dry", 14)),
plot= c(rep("A",5), rep("B",3), rep("C", 6), rep("D", 6),rep("E", 2), rep("F", 3),rep("G", 4), rep("H", 1),
rep("I",3), rep("J",10), rep("K", 1), rep("L", 1),rep("M", 2), rep("N", 3),rep("O", 6), rep("P", 4)),
plantsp= sample(1:100,60, replace= TRUE),
lepsp= sample(1:100,60,replace= TRUE),
psitsp= sample(1:100,60,replace= TRUE))
df[] < lapply(df, as.character)
df$plantsp<paste('plantsp', df$plantsp, sep='_')
df$lepsp<paste('lepsp', df$lepsp, sep='_')
df$psitsp<paste('psitsp', df$psitsp, sep='_')
df$paste1< paste(df$varX, df$varY, sep='_')
df$paste3< paste(df$varZ, df$varY, df$varZ, sep="_")
Interactions sampled within plots plot
within a given location site
and season
. I need to subset plots nested within each site
and season
. Within each site
and season
subset, I need to apply a function that will subset a data frame for each plot combination. Each data frame will be put into a list. Combinations are not considered different if the order varies (ie. 1_2 is the same as 2_1) and 2 levels are the grouping size for the combination. I would prefer a grouping size argument that is adjustable so that I may change how many levels are grouped.
I have tried the following:
## Create function that generates all possible combinations of 2 plots within each `sub`
combns< combn(unique(levels(df$plot)), 2, simplify=FALSE)
## Nest data
myList <
df %>%
group_by(season, site) %>%
do(sub_group=data.frame(.)) %>%
select(sub_group) %>%
map(combns)
Example combinations for this data frame would be:
#All combinations of levels within plot when nesting within site=1 and season=wet
subGroup1_comb1< df[df$plot == "A"  df$plot == "B", ]
subGroup1_comb2< df[df$plot == "A"  df$plot == "C", ]
subGroup1_comb3< df[df$plot == "A"  df$plot == "D", ]
subGroup1_comb4< df[df$plot == "B"  df$plot == "C", ]
subGroup1_comb5< df[df$plot == "B"  df$plot == "D", ]
subGroup1_comb6< df[df$plot == "C"  df$plot == "D", ]
#All combinations of levels within plot when nesting within site=1 and season=dry
subGroup2_comb1< df[df$plot == "E"  df$plot == "F", ]
subGroup2_comb2< df[df$plot == "E"  df$plot == "G", ]
subGroup2_comb3< df[df$plot == "E"  df$plot == "H", ]
subGroup2_comb4< df[df$plot == "F"  df$plot == "G", ]
subGroup2_comb5< df[df$plot == "F"  df$plot == "H", ]
subGroup2_comb6< df[df$var3 == "G"  df$plot == "H", ]
#All combinations of levels within plot when nesting within season=2 and site=wet
subGroup3_comb1< df[df$plot == "I"  df$plot == "J", ]
subGroup3_comb2< df[df$plot == "I"  df$plot == "K", ]
subGroup3_comb3< df[df$plot == "I"  df$plot == "L", ]
subGroup3_comb4< df[df$plot == "I"  df$plot == "M", ]
subGroup3_comb5< df[df$plot == "J"  df$plot == "K", ]
subGroup3_comb6< df[df$plot == "J"  df$plot == "L", ]
subGroup3_comb7< df[df$plot == "J"  df$plot == "M", ]
subGroup3_comb8< df[df$plot == "K"  df$plot == "L", ]
subGroup3_comb9< df[df$plot == "K"  df$plot == "M", ]
subGroup3_comb10< df[df$plot == "M"  df$plot == "L", ]
#All combinations of levels within plot when nesting within site=2 and season=dry
subGroup4_comb1< df[df$plot == "M"  df$plot == "N", ]
subGroup4_comb2< df[df$plot == "M"  df$plot == "O", ]
subGroup4_comb3< df[df$plot == "M"  df$plot == "P", ]
subGroup4_comb4< df[df$plot == "N"  df$plot == "O", ]
subGroup4_comb5< df[df$plot == "N"  df$plot == "P", ]
subGroup4_comb6< df[df$plot == "O"  df$plot == "P", ]
The result output for this example would be:
myList<list(subGroup1_comb1,
subGroup1_comb2,
subGroup1_comb3,
subGroup1_comb4,
subGroup1_comb5,
subGroup1_comb6,
subGroup2_comb1,
subGroup2_comb2,
subGroup2_comb3,
subGroup2_comb4,
subGroup2_comb5,
subGroup2_comb6,
subGroup3_comb1,
subGroup3_comb2,
subGroup3_comb3,
subGroup3_comb4,
subGroup3_comb5,
subGroup3_comb6,
subGroup3_comb7,
subGroup3_comb8,
subGroup3_comb9,
subGroup3_comb10,
subGroup4_comb1,
subGroup4_comb2,
subGroup4_comb3,
subGroup4_comb4,
subGroup4_comb5,
subGroup4_comb6)
1 answer

It is not the most clean solution, ideally code would be less repetitive and iterate through some loop.
# Subset for each site, and season sub1< df[df$site == "1" & df$season == "wet", ] plotLev1<unique(sub1$plot) sub2< df[df$site == "1" & df$season == "dry", ] plotLev2<unique(sub2$plot) sub3< df[df$site == "2" & df$season == "wet", ] plotLev3<unique(sub3$plot) sub4< df[df$site == "2" & df$season == "dry", ] plotLev4<unique(sub4$plot) ## Generate all combinations of plots comb1 < combn(plotLev1, 2, simplify=FALSE) comb2 < combn(plotLev2, 2, simplify=FALSE) comb3 < combn(plotLev3, 2, simplify=FALSE) comb4 < combn(plotLev4, 2, simplify=FALSE) ## get list of data frames for each plot combo myList1 < lapply(comb1, function(x) sub1[sub1[,"plot"] %in% x, ]) myList1 myList2 < lapply(comb2, function(x) sub2[sub2[,"plot"] %in% x, ]) myList2 myList3 < lapply(comb3, function(x) sub3[sub3[,"plot"] %in% x, ]) myList3 myList4 < lapply(comb4, function(x) sub4[sub4[,"plot"] %in% x, ]) myList4