Find all combinations among levels of a factor and output to list of data frames

I have a data frame like so:

 set.seed(540)
 df<- data.frame(site= c(rep(1, 30), rep(2,30)), 
            season= c(rep("wet", 20), rep("dry",10), rep("wet",16), rep("dry", 14)),
            plot= c(rep("A",5), rep("B",3), rep("C", 6), rep("D", 6),rep("E", 2), rep("F", 3),rep("G", 4), rep("H", 1),
                    rep("I",3), rep("J",10), rep("K", 1), rep("L", 1),rep("M", 2), rep("N", 3),rep("O", 6), rep("P", 4)), 
            plantsp= sample(1:100,60, replace= TRUE), 
            lepsp= sample(1:100,60,replace= TRUE), 
            psitsp= sample(1:100,60,replace= TRUE))
df[] <- lapply(df, as.character)
df$plantsp<-paste('plantsp', df$plantsp, sep='_')
df$lepsp<-paste('lepsp', df$lepsp, sep='_')
df$psitsp<-paste('psitsp', df$psitsp, sep='_')
df$paste1<- paste(df$varX, df$varY, sep='_')
df$paste3<- paste(df$varZ, df$varY, df$varZ, sep="_")

Interactions sampled within plots plot within a given location site and season. I need to subset plots nested within each site and season. Within each site and season subset, I need to apply a function that will subset a data frame for each plot combination. Each data frame will be put into a list. Combinations are not considered different if the order varies (ie. 1_2 is the same as 2_1) and 2 levels are the grouping size for the combination. I would prefer a grouping size argument that is adjustable so that I may change how many levels are grouped.

I have tried the following:

## Create function that generates all possible combinations of 2 plots within each `sub`
combns<- combn(unique(levels(df$plot)), 2, simplify=FALSE)

## Nest data
myList <- 
  df %>% 
  group_by(season, site) %>% 
  do(sub_group=data.frame(.)) %>% 
  select(sub_group) %>% 
  map(combns)

Example combinations for this data frame would be:

#All combinations of levels within plot when nesting within site=1 and season=wet
subGroup1_comb1<- df[df$plot == "A" | df$plot == "B", ]
subGroup1_comb2<- df[df$plot == "A" | df$plot == "C", ]
subGroup1_comb3<- df[df$plot == "A" | df$plot == "D", ]
subGroup1_comb4<- df[df$plot == "B" | df$plot == "C", ]
subGroup1_comb5<- df[df$plot == "B" | df$plot == "D", ]
subGroup1_comb6<- df[df$plot == "C" | df$plot == "D", ]

#All combinations of levels within plot when nesting within site=1 and season=dry
subGroup2_comb1<- df[df$plot == "E" | df$plot == "F", ]
subGroup2_comb2<- df[df$plot == "E" | df$plot == "G", ]
subGroup2_comb3<- df[df$plot == "E" | df$plot == "H", ]
subGroup2_comb4<- df[df$plot == "F" | df$plot == "G", ]
subGroup2_comb5<- df[df$plot == "F" | df$plot == "H", ]
subGroup2_comb6<- df[df$var3 == "G" | df$plot == "H", ]

#All combinations of levels within plot when nesting within season=2 and site=wet
subGroup3_comb1<- df[df$plot == "I" | df$plot == "J", ]
subGroup3_comb2<- df[df$plot == "I" | df$plot == "K", ]
subGroup3_comb3<- df[df$plot == "I" | df$plot == "L", ]
subGroup3_comb4<- df[df$plot == "I" | df$plot == "M", ]
subGroup3_comb5<- df[df$plot == "J" | df$plot == "K", ]
subGroup3_comb6<- df[df$plot == "J" | df$plot == "L", ]
subGroup3_comb7<- df[df$plot == "J" | df$plot == "M", ]
subGroup3_comb8<- df[df$plot == "K" | df$plot == "L", ]
subGroup3_comb9<- df[df$plot == "K" | df$plot == "M", ]
subGroup3_comb10<- df[df$plot == "M" | df$plot == "L", ]


#All combinations of levels within plot when nesting within site=2 and season=dry
subGroup4_comb1<- df[df$plot == "M" | df$plot == "N", ]
subGroup4_comb2<- df[df$plot == "M" | df$plot == "O", ]
subGroup4_comb3<- df[df$plot == "M" | df$plot == "P", ]
subGroup4_comb4<- df[df$plot == "N" | df$plot == "O", ]
subGroup4_comb5<- df[df$plot == "N" | df$plot == "P", ]
subGroup4_comb6<- df[df$plot == "O" | df$plot == "P", ]

The result output for this example would be:

myList<-list(subGroup1_comb1,
subGroup1_comb2,
subGroup1_comb3,
subGroup1_comb4,
subGroup1_comb5,
subGroup1_comb6,
subGroup2_comb1,
subGroup2_comb2,
subGroup2_comb3,
subGroup2_comb4,
subGroup2_comb5,
subGroup2_comb6,
subGroup3_comb1,
subGroup3_comb2,
subGroup3_comb3,
subGroup3_comb4,
subGroup3_comb5,
subGroup3_comb6,
subGroup3_comb7,
subGroup3_comb8,
subGroup3_comb9,
subGroup3_comb10,
subGroup4_comb1,
subGroup4_comb2,
subGroup4_comb3,
subGroup4_comb4,
subGroup4_comb5,
subGroup4_comb6)

1 answer

  • answered 2019-10-08 16:01 Danielle

    It is not the most clean solution, ideally code would be less repetitive and iterate through some loop.

    # Subset for each site, and season
    sub1<- df[df$site == "1" & df$season == "wet", ]
    plotLev1<-unique(sub1$plot)
    
    sub2<- df[df$site == "1" & df$season == "dry", ]
    plotLev2<-unique(sub2$plot)
    
    sub3<- df[df$site == "2" & df$season == "wet", ]
    plotLev3<-unique(sub3$plot)
    
    sub4<- df[df$site == "2" & df$season == "dry", ]
    plotLev4<-unique(sub4$plot)
    
    ## Generate all combinations of plots
    comb1 <- combn(plotLev1, 2, simplify=FALSE)
    comb2 <- combn(plotLev2, 2, simplify=FALSE)
    comb3 <- combn(plotLev3, 2, simplify=FALSE)
    comb4 <- combn(plotLev4, 2, simplify=FALSE)
    
    ## get list of data frames for each plot combo
    myList1 <- lapply(comb1, function(x) sub1[sub1[,"plot"] %in% x, ])
    myList1
    
    myList2 <- lapply(comb2, function(x) sub2[sub2[,"plot"] %in% x, ])
    myList2
    
    myList3 <- lapply(comb3, function(x) sub3[sub3[,"plot"] %in% x, ])
    myList3
    
    myList4 <- lapply(comb4, function(x) sub4[sub4[,"plot"] %in% x, ])
    myList4