Вот лучший подход, который не опирается на слепую выборку, а вместо этого представляет сходство между каждой парой кодов как ребром в сети, а затем использует функцию igraph large_ivs для поиска самых разнородных пар кодов:
rm(list=ls())
library(gtools)
library(igraph)
##
outputfolder <- "XXXXXXXXXX"
dir.create(outputfolder,showWarnings = F)
setwd(outputfolder)
## Available colours
x <- c("W", "R", "G", "B", "P", "Y")
## Generate all possible colour combinations, for 6 colours & 4 positions
body <- data.frame(permutations(n=6,r=4,v=x,repeats.allowed=T), stringsAsFactors = F) ; colnames(body) <- c("Head","Thorax","L_gaster","R_gaster")
write.table(body,file="Paint_marks_full_list.txt",col.names=T,row.names=F,quote=F,append=F)
## Generate edge list
edge_list <- data.frame(comb_1=character(),comb_2=character(),similarity=character())
if (!file.exists("Edge_list.txt")){
write.table(edge_list,file="Edge_list.txt",col.names=T,row.names=F,quote=F,append=F)
}else{
edge_list <- read.table("Edge_list.txt",header=T,stringsAsFactors = F)
}
if (nrow(edge_list)>0){
last_i <- edge_list[nrow(edge_list),"comb_1"]
last_j <- edge_list[nrow(edge_list),"comb_2"]
}
if (!(last_i==(nrow(body)-1)&last_j==nrow(body))){
for (i in last_i:(nrow(body)-1)){
print(paste("Combination",i))
for (j in (i+1):nrow(body)){
if (i>last_i|j>last_j){
simil <- length(which(body[i,]==body[j,]))
if (simil>0){
write.table(data.frame(comb_1=i,comb_2=j,similarity=simil),file="Edge_list.txt",col.names=F,row.names=F,quote=F,append=T)
}
}
}
}
}
######let's make 3 graphs with edges representing overlap between combinations ###
##First graph, in which ANY overlap between two combinations is seen as an edge. Will be used to produce list of paint combination with no overlap
net1 <- graph.data.frame(edge_list[c("comb_1","comb_2")],directed=F)
##Second graph, in which only overlaps of 2 or more spots is seen as an edge. Will be used to produce list of paint combinations with no more than 1 spot in common
net2 <- graph.data.frame(edge_list[which(edge_list$similarity>=2),c("comb_1","comb_2")],directed=F)
##Third graph, in which only overlaps of 3 or more spots is seen as an edge. Will be used to produce list of paint combinations with no more than 2 spots in common
net3 <- graph.data.frame(edge_list[which(edge_list$similarity>=3),c("comb_1","comb_2")],directed=F)
#######Now let's use the ivs function to get independent vertex sets, i.e., set of vertices with no connections between any of them
no_overlap_list <- largest_ivs(net1)
max_one_spot_overlap_list <- largest_ivs(net2)
max_two_spots_overlap_list <- largest_ivs(net3)