Ниже вы можете попробовать базовое решение R, которое применило ave
+ gsub
M <- within(M,V5 <- ave(V5,
gsub("(Cluster_\\d+).*","\\1",V5),
FUN = function(x) ave(x,
cumsum(grepl("_A",x)),
FUN = function(q) head(q,1))))
так, что
> M
V1 V2 V3 V4 V5
1 [1,] 0.4212778 0.6874073 0.1551896 Cluster_1
2 [2,] 0.6874073 0.5610995 0.1779030 Cluster_1
3 [3,] 0.1551896 0.1779030 0.9515304 Cluster_1
4 [4,] 0.4675764 0.5407295 0.7942978 Cluster_1_A
5 [5,] 0.4675764 0.5407295 0.7942978 Cluster_1_A
6 [6,] 0.4675764 0.5407295 0.7942978 Cluster_1_A
7 [7,] 0.4675764 0.5407295 0.7942978 Cluster_2
8 [8,] 0.4675764 0.5407295 0.7942978 Cluster_2_A
9 [9,] 0.4675764 0.5407295 0.7942978 Cluster_2_A
10 [10,] 0.4675764 0.5407295 0.7942978 Cluster_2_A
11 [11,] 0.4675764 0.5407295 0.7942978 Cluster_2_1_A
12 [12,] 0.4675764 0.5407295 0.7942978 Cluster_2_1_A
13 [13,] 0.4675764 0.5407295 0.7942978 Cluster_2_1_A
14 [14,] 0.4675764 0.5407295 0.7942978 Cluster_3
15 [15,] 0.4675764 0.5407295 0.7942978 Cluster_3
16 [15,] 0.4675764 0.5407295 0.7942978 Cluster_3
17 [16,] 0.4675764 0.5407295 0.7942978 Cluster_4
18 [17,] 0.4675764 0.5407295 0.7942978 Cluster_4
ДАННЫЕ
M <- structure(list(V1 = c("[1,]", "[2,]", "[3,]", "[4,]", "[5,]",
"[6,]", "[7,]", "[8,]", "[9,]", "[10,]", "[11,]", "[12,]", "[13,]",
"[14,]", "[15,]", "[15,]", "[16,]", "[17,]"), V2 = c(0.4212778,
0.6874073, 0.1551896, 0.4675764, 0.4675764, 0.4675764, 0.4675764,
0.4675764, 0.4675764, 0.4675764, 0.4675764, 0.4675764, 0.4675764,
0.4675764, 0.4675764, 0.4675764, 0.4675764, 0.4675764), V3 = c(0.6874073,
0.5610995, 0.177903, 0.5407295, 0.5407295, 0.5407295, 0.5407295,
0.5407295, 0.5407295, 0.5407295, 0.5407295, 0.5407295, 0.5407295,
0.5407295, 0.5407295, 0.5407295, 0.5407295, 0.5407295), V4 = c(0.1551896,
0.177903, 0.9515304, 0.7942978, 0.7942978, 0.7942978, 0.7942978,
0.7942978, 0.7942978, 0.7942978, 0.7942978, 0.7942978, 0.7942978,
0.7942978, 0.7942978, 0.7942978, 0.7942978, 0.7942978), V5 = c("Cluster_1",
"Cluster_1", "Cluster_1", "Cluster_1_A", "Cluster_1", "Cluster_1",
"Cluster_2", "Cluster_2_A", "Cluster_2", "Cluster_2", "Cluster_2_1_A",
"Cluster_2", "Cluster_2", "Cluster_3", "Cluster_3", "Cluster_3",
"Cluster_4", "Cluster_4")), class = "data.frame", row.names = c(NA,
-18L))