Я хотел бы извлечь семейный уровень и ID # для каждого таксона в некоторых данных последовательности. Последующие анализы требуют пакетов picante и phyloseq, но в этом вопросе они не понадобятся. В приведенном ниже примере я хотел бы:
- Уменьшить V1 с «000000001 | размер: 678798» и «000000009 | размер: 1497» до «1» и «9»
- Извлеките «Endozoicimonaceae» и «Synechococcaceae» в новую колонку
a <- structure(list(V1 = structure(1:2, .Label =
c("000000001|size:678798", "000000009|size:1497",
"000000019|size:66182", "000000020|size:4043", "000000025|size:549",
"000000030|size:1689", "000000035|size:655", "000000036|size:718",
"000000041|size:431", "000000047|size:3312", "000000054|size:584",
"000000065|size:376", "000000069|size:2341", "000000085|size:771",
"000000091|size:296", "000000095|size:462", "000000107|size:378",
"000000108|size:612", "000000116|size:319", "000000121|size:1355",
"000000126|size:256", "000000270|size:10342", "000000274|size:293",
"000000299|size:154434", "000000301|size:35432",
"000000305|size:440",
"000000311|size:285", "000000342|size:760", "000000344|size:474",
"000000365|size:450", "000000368|size:4805", "000000369|size:337",
"000000398|size:1217", "000000399|size:7457", "000000401|size:1426",
"000000418|size:724", "000000419|size:1201", "000000423|size:1173",
"000000445|size:252", "000000487|size:1142", "000000488|size:5676",
"000000509|size:6883", "000000545|size:345", "000000546|size:419",
"000000586|size:518", "000000606|size:448", "000000607|size:3070",
"000000610|size:894", "000000624|size:52798",
"000000625|size:19114",
"000000628|size:1808", "000000630|size:20151",
"000000668|size:2021",
"000000669|size:1193", "000000680|size:1287", "000000683|size:305",
"000000685|size:265", "000000694|size:6939", "000000695|size:333",
"000000697|size:634", "000000716|size:24392", "000000719|size:760",
"000000760|size:357", "000000762|size:661", "000000788|size:837",
"000000789|size:368", "000000792|size:1075",
"000000793|size:2143"), class = "factor"), V2 = structure(c(28L,
11L), .Label = c("k__Bacteria; p__Actinobacteria; c__Actinobacteria;
o__Actinomycetales; f__Corynebacteriaceae; g__Corynebacterium; s__",
"k__Bacteria; p__Actinobacteria; c__Actinobacteria;
o__Actinomycetales; f__Micrococcaceae; g__Micrococcus; s__",
"k__Bacteria; p__Bacteroidetes; c__Cytophagia; o__Cytophagales;
f__[Amoebophilaceae]; g__SGUS912; s__", "k__Bacteria;
p__Bacteroidetes; c__Flavobacteriia; o__Flavobacteriales;
f__Flavobacteriaceae; g__; s__", "k__Bacteria; p__Cyanobacteria;
c__Chloroplast; o__; f__; g__; s__", "k__Bacteria; p__Cyanobacteria;
c__Chloroplast; o__CAB-I; f__; g__; s__", "k__Bacteria;
p__Cyanobacteria; c__Chloroplast; o__Chlorophyta; f__Ulvophyceae;
g__;
s__", "k__Bacteria; p__Cyanobacteria; c__Chloroplast;
o__Stramenopiles; f__; g__; s__", "k__Bacteria; p__Cyanobacteria;
c__Nostocophycideae; o__Stigonematales; f__Rivulariaceae;
g__Rivularia; s__", "k__Bacteria; p__Cyanobacteria;
c__Synechococcophycideae; o__Pseudanabaenales; f__Pseudanabaenaceae;
g__; s__", "k__Bacteria; p__Cyanobacteria; c__Synechococcophycideae;
o__Synechococcales; f__Synechococcaceae; g__Synechococcus; s__",
"k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales;
f__Bacillaceae", "k__Bacteria; p__Firmicutes; c__Bacilli;
o__Bacillales; f__Bacillaceae; g__Bacillus; s__", "k__Bacteria;
p__Firmicutes; c__Bacilli; o__Bacillales; f__Bacillaceae;
g__Bacillus;
s__firmus", "k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales;
f__Bacillaceae; g__Geobacillus; s__", "k__Bacteria; p__Firmicutes;
c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus;
s__", "k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales;
f__Lactobacillaceae; g__Lactobacillus; s__iners", "k__Bacteria;
p__Proteobacteria; c__Alphaproteobacteria; o__Caulobacterales;
f__Caulobacteraceae; g__; s__", "k__Bacteria; p__Proteobacteria;
c__Alphaproteobacteria; o__Caulobacterales; f__Caulobacteraceae;
g__Brevundimonas; s__diminuta", "k__Bacteria; p__Proteobacteria;
c__Alphaproteobacteria; o__Rhizobiales; f__Rhizobiaceae;
g__Agrobacterium; s__", "k__Bacteria; p__Proteobacteria;
c__Alphaproteobacteria; o__Rhodobacterales; f__Rhodobacteraceae;
g__;
s__", "k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria;
o__Rhodobacterales; f__Rhodobacteraceae; g__Paracoccus; s__",
"k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria;
o__Rhodospirillales; f__Acetobacteraceae; g__; s__", "k__Bacteria;
p__Proteobacteria; c__Betaproteobacteria; o__Neisseriales;
f__Neisseriaceae; g__; s__", "k__Bacteria; p__Proteobacteria;
c__Deltaproteobacteria; o__Myxococcales; f__; g__; s__",
"k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria;
o__Alteromonadales; f__Alteromonadaceae; g__Alteromonas; s__",
"k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria;
o__Alteromonadales; f__Alteromonadaceae; g__Spongiibacter; s__",
"k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria;
o__Oceanospirillales; f__Endozoicimonaceae; g__; s__",
"k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria;
o__Pseudomonadales; f__Moraxellaceae; g__Acinetobacter",
"k__Bacteria;
p__Proteobacteria; c__Gammaproteobacteria; o__Pseudomonadales;
f__Moraxellaceae; g__Acinetobacter; s__", "k__Bacteria;
p__Proteobacteria; c__Gammaproteobacteria; o__Pseudomonadales;
f__Moraxellaceae; g__Acinetobacter; s__rhizosphaerae", "k__Bacteria;
p__Proteobacteria; c__Gammaproteobacteria; o__Pseudomonadales;
f__Moraxellaceae; g__Enhydrobacter; s__", "k__Bacteria;
p__Proteobacteria; c__Gammaproteobacteria; o__Pseudomonadales;
f__Pseudomonadaceae; g__; s__", "k__Bacteria; p__Proteobacteria;
c__Gammaproteobacteria; o__Vibrionales; f__Pseudoalteromonadaceae;
g__Pseudoalteromonas; s__", "k__Bacteria; p__Proteobacteria;
c__Gammaproteobacteria; o__Xanthomonadales; f__Xanthomonadaceae;
g__Luteimonas; s__", "k__Bacteria; p__Verrucomicrobia;
c__Verrucomicrobiae; o__Verrucomicrobiales; f__Verrucomicrobiaceae;
g__Rubritalea; s__", "Unassigned"), class = "factor"), V3 = c(1, 1),
V4 = c(3L, 3L)), row.names = 1:2, class = "data.frame")