У меня есть такой фрейм данных:
Из этого я хочу удалить строки, в которых одинаковые элементы повторяются в нескольких столбцах: AssemblyName, qseqid, sseqid. В то же время я хочу сохранить строки, даже если они повторяются, но если есть другая строка / строки с таким же именем assemblyName, но с разными qseqid и sseqid.
Например, из приведенного выше изображения: batch2_21032019_ENT924_assembly .fasta в AssemblyName имеет 4 записи (строки: 747 748 771 785). Если бы не было строки 771,785, я бы удалил строки 747,748, которые содержат одинаковые assemblyName, qseqid и sseqid. Но поскольку здесь есть строки 771 и 785, которые имеют разные qseqid и sseqid, я буду sh, чтобы сохранить все 4 строки.
Но последние несколько строк с 1422 по 1503 я не хочу сохраняйте их, потому что они повторяются в столбцах assemblyName, qseqid, sseqid
По сути, мне нужен информационный кадр со следующим выводом:
Как мне добиться этого в R? Вот мой dput фрагмент:
structure(list(assemblyName = structure(c(5L, 12L, 12L, 24L,
24L, 42L, 48L, 48L, 48L, 48L, 76L, 76L, 76L, 79L, 79L, 79L), .Label = c("batch1_08032019_ENT1252_assembly.fasta",
"batch1_08032019_ENT1350_assembly.fasta", "batch1_08032019_ENT1368_assembly.fasta",
"batch1_08032019_ENT1382_assembly.fasta", "batch1_08032019_ENT1420_assembly.fasta",
"batch1_08032019_ENT1458_assembly.fasta", "batch1_08032019_ENT1489_assembly.fasta",
"batch14_02082019_ENT1646_assembly.fasta", "batch2_21032019_ENT1079_assembly.fasta",
"batch2_21032019_ENT1192_assembly.fasta", "batch2_21032019_ENT1219_assembly.fasta",
"batch2_21032019_ENT1250_assembly.fasta", "batch2_21032019_ENT1357_assembly.fasta",
"batch2_21032019_ENT1440_assembly.fasta", "batch2_21032019_ENT1669_assembly.fasta",
"batch2_21032019_ENT1758_assembly.fasta", "batch2_21032019_ENT1916_assembly.fasta",
"batch2_21032019_ENT1940_assembly.fasta", "batch2_21032019_ENT1968_assembly.fasta",
"batch2_21032019_ENT256_assembly.fasta", "batch2_21032019_ENT264_assembly.fasta",
"batch2_21032019_ENT267_assembly.fasta", "batch2_21032019_ENT268_assembly.fasta",
"batch2_21032019_ENT285_assembly.fasta", "batch2_21032019_ENT3_assembly.fasta",
"batch2_21032019_ENT310_assembly.fasta", "batch2_21032019_ENT312_assembly.fasta",
"batch2_21032019_ENT337_assembly.fasta", "batch2_21032019_ENT341_assembly.fasta",
"batch2_21032019_ENT358_assembly.fasta", "batch2_21032019_ENT360_assembly.fasta",
"batch2_21032019_ENT378_assembly.fasta", "batch2_21032019_ENT385_assembly.fasta",
"batch2_21032019_ENT421_assembly.fasta", "batch2_21032019_ENT422_assembly.fasta",
"batch2_21032019_ENT423_assembly.fasta", "batch2_21032019_ENT454_assembly.fasta",
"batch2_21032019_ENT465_assembly.fasta", "batch2_21032019_ENT466_assembly.fasta",
"batch2_21032019_ENT473_assembly.fasta", "batch2_21032019_ENT497_assembly.fasta",
"batch2_21032019_ENT5_assembly.fasta", "batch2_21032019_ENT50_assembly.fasta",
"batch2_21032019_ENT595_assembly.fasta", "batch2_21032019_ENT607_assembly.fasta",
"batch2_21032019_ENT708_assembly.fasta", "batch2_21032019_ENT807_assembly.fasta",
"batch2_21032019_ENT924_assembly.fasta", "batch20_11102019_ENT1249_assembly.fasta",
"batch20_11102019_ENT783_assembly.fasta", "batch20_11102019_ENT784_assembly.fasta",
"batch20_11102019_ENT785_assembly.fasta", "batch20_11102019_ENT835_assembly.fasta",
"batch20_11102019_ENT849_assembly.fasta", "batch20_11102019_ENT897_assembly.fasta",
"batch20_11102019_ENT901_assembly.fasta", "batch20_11102019_ENT903_assembly.fasta",
"batch20_11102019_ENT912_assembly.fasta", "batch20_11102019_ENT916_assembly.fasta",
"batch20_11102019_ENT938_assembly.fasta", "batch20_11102019_ENT965_assembly.fasta",
"batch20_11102019_ENT981_assembly.fasta", "batch20_11102019_ENT983_assembly.fasta",
"batch20_11102019_ENT990_assembly.fasta", "batch21x_16102019_ENT1251_assembly.fasta",
"batch21x_16102019_ENT1262_assembly.fasta", "batch21x_16102019_ENT1263_assembly.fasta",
"batch21x_16102019_ENT1266_assembly.fasta", "batch21x_16102019_ENT1267_assembly.fasta",
"batch21x_16102019_ENT1271_assembly.fasta", "batch21x_16102019_ENT1274_assembly.fasta",
"batch21x_16102019_ENT1276_assembly.fasta", "batch21x_16102019_ENT1278_assembly.fasta",
"batch21x_16102019_ENT1279_assembly.fasta", "batch21x_16102019_ENT1280_assembly.fasta",
"batch21x_16102019_ENT1288_assembly.fasta", "batch21x_16102019_ENT1296_assembly.fasta",
"batch21x_16102019_ENT1300_assembly.fasta", "batch21x_16102019_ENT1321_assembly.fasta",
"batch21x_16102019_ENT1322_assembly.fasta", "batch21x_16102019_ENT1325_assembly.fasta",
"batch21x_16102019_ENT1330_assembly.fasta", "batch21x_16102019_ENT1384_assembly.fasta",
"batch21x_16102019_ENT1393_assembly.fasta", "batch21x_16102019_ENT1394_assembly.fasta",
"batch21x_16102019_ENT1396_assembly.fasta", "batch21x_16102019_ENT1465_assembly.fasta",
"batch21x_16102019_ENT1502_assembly.fasta", "batch21x_16102019_ENT1570_assembly.fasta",
"batch21x_16102019_ENT1599_assembly.fasta", "batch21x_16102019_ENT1649_assembly.fasta",
"batch21x_16102019_ENT1676_assembly.fasta", "batch21x_16102019_ENT1681_assembly.fasta",
"batch21x_16102019_ENT1691_assembly.fasta", "batch21x_16102019_ENT1837_assembly.fasta",
"batch21x_16102019_ENT1895_assembly.fasta", "batch21x_16102019_ENT1896_assembly.fasta",
"batch21x_16102019_ENT1929_assembly.fasta", "batch21x_16102019_ENT1941_assembly.fasta",
"batch21x_16102019_ENT209_assembly.fasta", "batch21x_16102019_ENT689_assembly.fasta",
"batch21x_16102019_ENT732_assembly.fasta", "batch21x_16102019_ENT790_assembly.fasta",
"batch22_18102019_ENT1331_assembly.fasta", "batch22_18102019_ENT1336_assembly.fasta",
"batch22_18102019_ENT1337_assembly.fasta", "batch22_18102019_ENT1352_assembly.fasta",
"batch22_18102019_ENT1359_assembly.fasta", "batch22_18102019_ENT1413_assembly.fasta",
"batch22_18102019_ENT1475_assembly.fasta", "batch22_18102019_ENT1515_assembly.fasta",
"batch22_18102019_ENT1559_assembly.fasta", "batch22_18102019_ENT1580_assembly.fasta",
"batch22_18102019_ENT1595_assembly.fasta"), class = "factor"),
qseqid = structure(c(107L, 71L, 89L, 109L, 122L, 119L, 19L,
19L, 69L, 117L, 61L, 61L, 61L, 72L, 72L, 72L), .Label = c("",
"1_length=4775743_depth=1.00x_circular=true", "1_length=4782442_depth=1.00x_circular=true",
"1_length=4798941_depth=1.00x_circular=true", "1_length=4811272_depth=1.00x_circular=true",
"1_length=4854518_depth=1.00x_circular=true", "1_length=4870013_depth=1.00x",
"1_length=4877560_depth=1.00x_circular=true", "1_length=4879405_depth=1.00x_circular=true",
"1_length=4880726_depth=1.00x_circular=true", "1_length=4910657_depth=1.00x_circular=true",
"1_length=4945396_depth=1.00x_circular=true", "1_length=4980803_depth=1.00x_circular=true",
"1_length=4995045_depth=1.00x_circular=true", "1_length=4995093_depth=1.00x_circular=true",
"1_length=5004019_depth=1.00x_circular=true", "1_length=5024487_depth=1.00x_circular=true",
"1_length=5386431_depth=1.00x_circular=true", "1_length=5418220_depth=1.00x_circular=true",
"10_length=167596_depth=0.99x_circular=true", "10_length=41259_depth=2.09x_circular=true",
"19_length=13505_depth=0.90x", "2_length=123974_depth=3.35x_circular=true",
"2_length=174608_depth=2.06x_circular=true", "2_length=177751_depth=2.86x_circular=true",
"2_length=258181_depth=1.64x_circular=true", "2_length=278408_depth=1.57x_circular=true",
"2_length=41183_depth=3.34x_circular=true", "2_length=41190_depth=5.16x_circular=true",
"2_length=41215_depth=3.01x_circular=true", "2_length=41217_depth=2.25x_circular=true",
"2_length=71861_depth=0.77x_circular=true", "2_length=71861_depth=2.89x_circular=true",
"2_length=72968_depth=0.51x_circular=true", "2_length=91069_depth=1.21x_circular=true",
"2_length=91643_depth=2.11x_circular=true", "2_length=92072_depth=0.81x_circular=true",
"20_length=5469_depth=1.62x", "22_length=90789_depth=1.44x_circular=true",
"3_length=112875_depth=0.98x_circular=true", "3_length=118064_depth=3.79x_circular=true",
"3_length=127528_depth=1.73x_circular=true", "3_length=164596_depth=1.02x_circular=true",
"3_length=165091_depth=1.16x_circular=true", "3_length=165095_depth=2.12x_circular=true",
"3_length=165543_depth=0.59x_circular=true", "3_length=174323_depth=1.93x_circular=true",
"3_length=174796_depth=0.74x_circular=true", "3_length=180232_depth=1.88x_circular=true",
"3_length=180817_depth=1.81x_circular=true", "3_length=38610_depth=3.37x_circular=true",
"3_length=41182_depth=3.37x_circular=true", "3_length=41182_depth=4.04x_circular=true",
"3_length=41184_depth=4.98x_circular=true", "3_length=41185_depth=5.84x_circular=true",
"3_length=41186_depth=3.26x_circular=true", "3_length=41232_depth=2.49x_circular=true",
"3_length=50138_depth=1.79x_circular=true", "3_length=58175_depth=0.39x",
"3_length=62334_depth=2.76x_circular=true", "3_length=67915_depth=0.42x",
"3_length=71861_depth=2.39x_circular=true", "3_length=71861_depth=2.99x_circular=true",
"3_length=72145_depth=0.97x_circular=true", "3_length=72168_depth=0.80x_circular=true",
"3_length=731673_depth=1.22x", "3_length=74789_depth=2.02x_circular=true",
"3_length=74794_depth=2.26x_circular=true", "3_length=75214_depth=2.77x_circular=true",
"3_length=79594_depth=1.46x_circular=true", "3_length=88353_depth=2.00x_circular=true",
"3_length=89872_depth=0.49x_circular=true", "3_length=90666_depth=2.61x_circular=true",
"3_length=96544_depth=1.98x_circular=true", "38_length=14280_depth=2.50x",
"39_length=41187_depth=6.10x_circular=true", "4_length=129927_depth=0.88x",
"4_length=161129_depth=0.64x_circular=true", "4_length=165104_depth=0.58x_circular=true",
"4_length=170202_depth=0.80x", "4_length=41182_depth=1.27x_circular=true",
"4_length=41186_depth=4.34x_circular=true", "4_length=41188_depth=2.88x_circular=true",
"4_length=41190_depth=2.44x_circular=true", "4_length=41190_depth=3.46x_circular=true",
"4_length=41215_depth=3.66x_circular=true", "4_length=41224_depth=2.50x_circular=true",
"4_length=46161_depth=2.45x_circular=true", "4_length=51479_depth=1.11x_circular=true",
"4_length=71795_depth=2.16x_circular=true", "4_length=71859_depth=1.18x_circular=true",
"4_length=71861_depth=0.80x_circular=true", "4_length=71861_depth=1.56x_circular=true",
"4_length=71861_depth=1.95x_circular=true", "4_length=71861_depth=3.09x_circular=true",
"4_length=71861_depth=3.28x_circular=true", "4_length=71868_depth=0.67x_circular=true",
"4_length=71875_depth=0.43x_circular=true", "4_length=72162_depth=0.61x_circular=true",
"4_length=72162_depth=1.28x_circular=true", "4_length=73397_depth=1.60x_circular=true",
"4_length=73399_depth=2.01x_circular=true", "4_length=88057_depth=1.72x_circular=true",
"46_length=5494_depth=4.49x", "5_length=110787_depth=5.28x_circular=true",
"5_length=41185_depth=3.00x_circular=true", "5_length=41190_depth=2.13x_circular=true",
"5_length=42336_depth=2.31x_circular=true", "5_length=46161_depth=2.20x_circular=true",
"5_length=51479_depth=1.02x_circular=true", "5_length=51479_depth=2.10x_circular=true",
"5_length=55129_depth=3.86x_circular=true", "5_length=6141_depth=16.45x_circular=true",
"5_length=62044_depth=5.10x", "5_length=6211_depth=4.24x_circular=true",
"5_length=65498_depth=0.98x_circular=true", "5_length=70472_depth=2.31x",
"5_length=71861_depth=1.24x_circular=true", "6_length=41190_depth=4.77x_circular=true",
"6_length=46161_depth=0.86x_circular=true", "6_length=71861_depth=2.24x_circular=true",
"6_length=7604_depth=3.49x_circular=true", "6_length=80977_depth=0.65x_circular=true",
"6_length=95567_depth=1.42x_circular=true", "64_length=6420_depth=2.15x"
), class = "factor"), sseqid = c("NDM-1", "NDM-5", "OXA-181",
"NDM-5", "OXA-181", "NDM-1", "OXA-181", "OXA-181", "NDM-5",
"NDM-5", "OXA-181", "OXA-181", "OXA-181", "OXA-181", "OXA-181",
"OXA-181"), qlen = c(41190L, 88353L, 51479L, 46161L, 7604L,
41190L, 5418220L, 5418220L, 75214L, 70472L, 67915L, 67915L,
67915L, 89872L, 89872L, 89872L), qstart = c(23131L, 14408L,
25135L, 25547L, 5873L, 23131L, 5244180L, 4252066L, 36917L,
20047L, 51138L, 44729L, 38320L, 4678L, 11087L, 88141L), qend = c(23943L,
15220L, 25932L, 26359L, 6670L, 23943L, 5244977L, 4252863L,
37729L, 20859L, 51935L, 45526L, 39117L, 5475L, 11884L, 88938L
)), .Names = c("assemblyName", "qseqid", "sseqid", "qlen",
"qstart", "qend"), row.names = c(78L, 209L, 223L, 389L, 403L,
656L, 747L, 748L, 771L, 785L, 1422L, 1423L, 1424L, 1501L, 1502L,
1503L), class = "data.frame")