У меня есть список строк, который состоит из четырех компонентов:
a_b_c_d
, где:
a
имеет 3 шаблона строк: str
, jtp
и mdl
b
имеет 5 шаблонов строк: HBW
, HBS
, HBO
, NHBB
и NHBO
c
имеет 4 шаблонастроки: L
, M
, H
и ALL
d
имеет 4 шаблона строк: NMT
, MC
, CAR
и PT
a
, b
, c
и d
связаны друг с другом _
в порядке - Некоторые записи не имеют
b
, c
и d
Мне нужно извлечь c
из списков как новое поле income
.Если c
не существует, его следует заменить на NA
.Ниже приводится фактический фрейм данных, который я использую:
df <- c(
"str_HBW_L_NMT" ,"str_HBW_M_NMT" ,"str_HBW_H_NMT" ,"str_HBW_L_MC" ,"str_HBW_M_MC" ,"str_HBW_H_MC" ,
"str_HBW_L_CAR" ,"str_HBW_M_CAR" ,"str_HBW_H_CAR" ,"str_HBW_L_PT" ,"str_HBW_M_PT" ,"str_HBW_H_PT" ,
"str_HBS_L_NMT" ,"str_HBS_M_NMT" ,"str_HBS_H_NMT" ,"str_HBS_L_MC" ,"str_HBS_M_MC" ,"str_HBS_H_MC" ,
"str_HBS_L_CAR" ,"str_HBS_M_CAR" ,"str_HBS_H_CAR" ,"str_HBS_L_PT" ,"str_HBS_M_PT" ,"str_HBS_H_PT" ,
"str_HBO_L_NMT" ,"str_HBO_M_NMT" ,"str_HBO_H_NMT" ,"str_HBO_L_MC" ,"str_HBO_M_MC" ,"str_HBO_H_MC" ,
"str_HBO_L_CAR" ,"str_HBO_M_CAR" ,"str_HBO_H_CAR" ,"str_HBO_L_PT" ,"str_HBO_M_PT" ,"str_HBO_H_PT" ,
"str_NHBB_L_NMT","str_NHBB_M_NMT","str_NHBB_H_NMT","str_NHBB_L_MC","str_NHBB_M_MC","str_NHBB_H_MC",
"str_NHBB_L_CAR","str_NHBB_M_CAR","str_NHBB_H_CAR","str_NHBB_L_PT","str_NHBB_M_PT","str_NHBB_H_PT",
"str_NHBO_L_NMT","str_NHBO_M_NMT","str_NHBO_H_NMT","str_NHBO_L_MC","str_NHBO_M_MC","str_NHBO_H_MC",
"str_NHBO_L_CAR","str_NHBO_M_CAR","str_NHBO_H_CAR","str_NHBO_L_PT","str_NHBO_M_PT","str_NHBO_H_PT",
"str_HBW_L" ,"str_HBW_M" ,"str_HBW_H" ,"str_HBS_L" ,"str_HBS_M" ,"str_HBS_H" ,
"str_HBO_L" ,"str_HBO_M" ,"str_HBO_H" ,"str_NHBB_L" ,"str_NHBB_M" ,"str_NHBB_H" ,
"str_NHBO_L" ,"str_NHBO_M" ,"str_NHBO_H" ,"str_HBW" ,"str_HBS" ,"str_HBO" ,
"str_NHBB" ,"str_NHBO" ,"str_L" ,"str_M" ,"str_H" ,"str_ALL" ,
"jtp_HBW_L_NMT" ,"jtp_HBW_M_NMT" ,"jtp_HBW_H_NMT" ,"jtp_HBW_L_MC" ,"jtp_HBW_M_MC" ,"jtp_HBW_H_MC" ,
"jtp_HBW_L_CAR" ,"jtp_HBW_M_CAR" ,"jtp_HBW_H_CAR" ,"jtp_HBW_L_PT" ,"jtp_HBW_M_PT" ,"jtp_HBW_H_PT" ,
"jtp_HBS_L_NMT" ,"jtp_HBS_M_NMT" ,"jtp_HBS_H_NMT" ,"jtp_HBS_L_MC" ,"jtp_HBS_M_MC" ,"jtp_HBS_H_MC" ,
"jtp_HBS_L_CAR" ,"jtp_HBS_M_CAR" ,"jtp_HBS_H_CAR" ,"jtp_HBS_L_PT" ,"jtp_HBS_M_PT" ,"jtp_HBS_H_PT" ,
"jtp_HBW_L" ,"jtp_HBW_M" ,"jtp_HBW_H" ,"jtp_HBS_L" ,"jtp_HBS_M" ,"jtp_HBS_H" ,
"jtp_HBW" ,"jtp_HBS" ,"jtp_L" ,"jtp_M" ,"jtp_H" ,"jtp_ALL" ,
"mdl_HBW_L_NMT" ,"mdl_HBW_M_NMT" ,"mdl_HBW_H_NMT" ,"mdl_HBW_L_MC" ,"mdl_HBW_M_MC" ,"mdl_HBW_H_MC" ,
"mdl_HBW_L_CAR" ,"mdl_HBW_M_CAR" ,"mdl_HBW_H_CAR" ,"mdl_HBW_L_PT" ,"mdl_HBW_M_PT" ,"mdl_HBW_H_PT" ,
"mdl_HBS_L_NMT" ,"mdl_HBS_M_NMT" ,"mdl_HBS_H_NMT" ,"mdl_HBS_L_MC" ,"mdl_HBS_M_MC" ,"mdl_HBS_H_MC" ,
"mdl_HBS_L_CAR" ,"mdl_HBS_M_CAR" ,"mdl_HBS_H_CAR" ,"mdl_HBS_L_PT" ,"mdl_HBS_M_PT" ,"mdl_HBS_H_PT" ,
"mdl_HBO_L_NMT" ,"mdl_HBO_M_NMT" ,"mdl_HBO_H_NMT" ,"mdl_HBO_L_MC" ,"mdl_HBO_M_MC" ,"mdl_HBO_H_MC" ,
"mdl_HBO_L_CAR" ,"mdl_HBO_M_CAR" ,"mdl_HBO_H_CAR" ,"mdl_HBO_L_PT" ,"mdl_HBO_M_PT" ,"mdl_HBO_H_PT" ,
"mdl_NHBB_L_NMT","mdl_NHBB_M_NMT","mdl_NHBB_H_NMT","mdl_NHBB_L_MC","mdl_NHBB_M_MC","mdl_NHBB_H_MC",
"mdl_NHBB_L_CAR","mdl_NHBB_M_CAR","mdl_NHBB_H_CAR","mdl_NHBB_L_PT","mdl_NHBB_M_PT","mdl_NHBB_H_PT",
"mdl_NHBO_L_NMT","mdl_NHBO_M_NMT","mdl_NHBO_H_NMT","mdl_NHBO_L_MC","mdl_NHBO_M_MC","mdl_NHBO_H_MC",
"mdl_NHBO_L_CAR","mdl_NHBO_M_CAR","mdl_NHBO_H_CAR","mdl_NHBO_L_PT","mdl_NHBO_M_PT","mdl_NHBO_H_PT",
"mdl_HBW_L" ,"mdl_HBW_M" ,"mdl_HBW_H" ,"mdl_HBS_L" ,"mdl_HBS_M" ,"mdl_HBS_H" ,
"mdl_HBO_L" ,"mdl_HBO_M" ,"mdl_HBO_H" ,"mdl_NHBB_L" ,"mdl_NHBB_M" ,"mdl_NHBB_H" ,
"mdl_NHBO_L" ,"mdl_NHBO_M" ,"mdl_NHBO_H" ,"mdl_HBW" ,"mdl_HBS" ,"mdl_HBO" ,
"mdl_NHBB" ,"mdl_NHBO" ,"mdl_L" ,"mdl_M" ,"mdl_H" ,"mdl_ALL"
)
Я провел много испытаний, но не смог извлечь его правильно.Ниже приведен пример моего сценария:
df %>% mutate(income=str_extract_all(string=name,
pattern="(?!str|jtp|mdl|HBW|HBS|HBO|NHBB|NHBO|_)[L|M|H|(ALL)](?!NMT|MC|CAR|PT|_)"))
Есть ли у вас какие-либо предложения для извлечения желаемого результата, как показано ниже?Я предпочитаю использовать tidyverse
и stringr
, но функция base
также применима вместо stringr
.
name income
1 str_HBW_L_NMT L
2 str_HBW_M_NMT M
3 str_HBW_H_NMT H
4 str_HBW_L_MC L
5 str_HBW_M_MC M
6 str_HBW_H_MC H
7 str_HBW_L_CAR L
8 str_HBW_M_CAR M
9 str_HBW_H_CAR H
10 str_HBW_L_PT L
11 str_HBW_M_PT M
12 str_HBW_H_PT H
13 str_HBS_L_NMT L
14 str_HBS_M_NMT M
15 str_HBS_H_NMT H
16 str_HBS_L_MC L
17 str_HBS_M_MC M
18 str_HBS_H_MC H
19 str_HBS_L_CAR L
20 str_HBS_M_CAR M
21 str_HBS_H_CAR H
22 str_HBS_L_PT L
23 str_HBS_M_PT M
24 str_HBS_H_PT H
25 str_HBO_L_NMT L
26 str_HBO_M_NMT M
27 str_HBO_H_NMT H
28 str_HBO_L_MC L
29 str_HBO_M_MC M
30 str_HBO_H_MC H
31 str_HBO_L_CAR L
32 str_HBO_M_CAR M
33 str_HBO_H_CAR H
34 str_HBO_L_PT L
35 str_HBO_M_PT M
36 str_HBO_H_PT H
37 str_NHBB_L_NMT L
38 str_NHBB_M_NMT M
39 str_NHBB_H_NMT H
40 str_NHBB_L_MC L
41 str_NHBB_M_MC M
42 str_NHBB_H_MC H
43 str_NHBB_L_CAR L
44 str_NHBB_M_CAR M
45 str_NHBB_H_CAR H
46 str_NHBB_L_PT L
47 str_NHBB_M_PT M
48 str_NHBB_H_PT H
49 str_NHBO_L_NMT L
50 str_NHBO_M_NMT M
51 str_NHBO_H_NMT H
52 str_NHBO_L_MC L
53 str_NHBO_M_MC M
54 str_NHBO_H_MC H
55 str_NHBO_L_CAR L
56 str_NHBO_M_CAR M
57 str_NHBO_H_CAR H
58 str_NHBO_L_PT L
59 str_NHBO_M_PT M
60 str_NHBO_H_PT H
61 str_HBW_L L
62 str_HBW_M M
63 str_HBW_H H
64 str_HBS_L L
65 str_HBS_M M
66 str_HBS_H H
67 str_HBO_L L
68 str_HBO_M M
69 str_HBO_H H
70 str_NHBB_L L
71 str_NHBB_M M
72 str_NHBB_H H
73 str_NHBO_L L
74 str_NHBO_M M
75 str_NHBO_H H
76 str_HBW <N/A>
77 str_HBS <N/A>
78 str_HBO <N/A>
79 str_NHBB <N/A>
80 str_NHBO <N/A>
81 str_L L
82 str_M M
83 str_H H
84 str_ALL ALL
85 jtp_HBW_L_NMT L
86 jtp_HBW_M_NMT M
87 jtp_HBW_H_NMT H
88 jtp_HBW_L_MC L
89 jtp_HBW_M_MC M
90 jtp_HBW_H_MC H
91 jtp_HBW_L_CAR L
92 jtp_HBW_M_CAR M
93 jtp_HBW_H_CAR H
94 jtp_HBW_L_PT L
95 jtp_HBW_M_PT M
96 jtp_HBW_H_PT H
97 jtp_HBS_L_NMT L
98 jtp_HBS_M_NMT M
99 jtp_HBS_H_NMT H
100 jtp_HBS_L_MC L
101 jtp_HBS_M_MC M
102 jtp_HBS_H_MC H
103 jtp_HBS_L_CAR L
104 jtp_HBS_M_CAR M
105 jtp_HBS_H_CAR H
106 jtp_HBS_L_PT L
107 jtp_HBS_M_PT M
108 jtp_HBS_H_PT H
109 jtp_HBW_L L
110 jtp_HBW_M M
111 jtp_HBW_H H
112 jtp_HBS_L L
113 jtp_HBS_M M
114 jtp_HBS_H H
115 jtp_HBW <N/A>
116 jtp_HBS <N/A>
117 jtp_L L
118 jtp_M M
119 jtp_H H
120 jtp_ALL ALL
121 mdl_HBW_L_NMT L
122 mdl_HBW_M_NMT M
123 mdl_HBW_H_NMT H
124 mdl_HBW_L_MC L
125 mdl_HBW_M_MC M
126 mdl_HBW_H_MC H
127 mdl_HBW_L_CAR L
128 mdl_HBW_M_CAR M
129 mdl_HBW_H_CAR H
130 mdl_HBW_L_PT L
131 mdl_HBW_M_PT M
132 mdl_HBW_H_PT H
133 mdl_HBS_L_NMT L
134 mdl_HBS_M_NMT M
135 mdl_HBS_H_NMT H
136 mdl_HBS_L_MC L
137 mdl_HBS_M_MC M
138 mdl_HBS_H_MC H
139 mdl_HBS_L_CAR L
140 mdl_HBS_M_CAR M
141 mdl_HBS_H_CAR H
142 mdl_HBS_L_PT L
143 mdl_HBS_M_PT M
144 mdl_HBS_H_PT H
145 mdl_HBO_L_NMT L
146 mdl_HBO_M_NMT M
147 mdl_HBO_H_NMT H
148 mdl_HBO_L_MC L
149 mdl_HBO_M_MC M
150 mdl_HBO_H_MC H
151 mdl_HBO_L_CAR L
152 mdl_HBO_M_CAR M
153 mdl_HBO_H_CAR H
154 mdl_HBO_L_PT L
155 mdl_HBO_M_PT M
156 mdl_HBO_H_PT H
157 mdl_NHBB_L_NMT L
158 mdl_NHBB_M_NMT M
159 mdl_NHBB_H_NMT H
160 mdl_NHBB_L_MC L
161 mdl_NHBB_M_MC M
162 mdl_NHBB_H_MC H
163 mdl_NHBB_L_CAR L
164 mdl_NHBB_M_CAR M
165 mdl_NHBB_H_CAR H
166 mdl_NHBB_L_PT L
167 mdl_NHBB_M_PT M
168 mdl_NHBB_H_PT H
169 mdl_NHBO_L_NMT L
170 mdl_NHBO_M_NMT M
171 mdl_NHBO_H_NMT H
172 mdl_NHBO_L_MC L
173 mdl_NHBO_M_MC M
174 mdl_NHBO_H_MC H
175 mdl_NHBO_L_CAR L
176 mdl_NHBO_M_CAR M
177 mdl_NHBO_H_CAR H
178 mdl_NHBO_L_PT L
179 mdl_NHBO_M_PT M
180 mdl_NHBO_H_PT H
181 mdl_HBW_L L
182 mdl_HBW_M M
183 mdl_HBW_H H
184 mdl_HBS_L L
185 mdl_HBS_M M
186 mdl_HBS_H H
187 mdl_HBO_L L
188 mdl_HBO_M M
189 mdl_HBO_H H
190 mdl_NHBB_L L
191 mdl_NHBB_M M
192 mdl_NHBB_H H
193 mdl_NHBO_L L
194 mdl_NHBO_M M
195 mdl_NHBO_H H
196 mdl_HBW <N/A>
197 mdl_HBS <N/A>
198 mdl_HBO <N/A>
199 mdl_NHBB <N/A>
200 mdl_NHBO <N/A>
201 mdl_L L
202 mdl_M M
203 mdl_H H
204 mdl_ALL ALL
============== НОВЫЙ ПРИМЕР КАДРА ДАННЫХ ========== Добавлены записи, содержащие только c
или d
в верхней части оригинального df.
df <- c(
"NMT","MC","CAR","PT","L","M","H","ALL",
"str_HBW_L_NMT" ,"str_HBW_M_NMT" ,"str_HBW_H_NMT" ,"str_HBW_L_MC" ,"str_HBW_M_MC" ,"str_HBW_H_MC" ,
"str_HBW_L_CAR" ,"str_HBW_M_CAR" ,"str_HBW_H_CAR" ,"str_HBW_L_PT" ,"str_HBW_M_PT" ,"str_HBW_H_PT" ,
"str_HBS_L_NMT" ,"str_HBS_M_NMT" ,"str_HBS_H_NMT" ,"str_HBS_L_MC" ,"str_HBS_M_MC" ,"str_HBS_H_MC" ,
"str_HBS_L_CAR" ,"str_HBS_M_CAR" ,"str_HBS_H_CAR" ,"str_HBS_L_PT" ,"str_HBS_M_PT" ,"str_HBS_H_PT" ,
"str_HBO_L_NMT" ,"str_HBO_M_NMT" ,"str_HBO_H_NMT" ,"str_HBO_L_MC" ,"str_HBO_M_MC" ,"str_HBO_H_MC" ,
"str_HBO_L_CAR" ,"str_HBO_M_CAR" ,"str_HBO_H_CAR" ,"str_HBO_L_PT" ,"str_HBO_M_PT" ,"str_HBO_H_PT" ,
"str_NHBB_L_NMT","str_NHBB_M_NMT","str_NHBB_H_NMT","str_NHBB_L_MC","str_NHBB_M_MC","str_NHBB_H_MC",
"str_NHBB_L_CAR","str_NHBB_M_CAR","str_NHBB_H_CAR","str_NHBB_L_PT","str_NHBB_M_PT","str_NHBB_H_PT",
"str_NHBO_L_NMT","str_NHBO_M_NMT","str_NHBO_H_NMT","str_NHBO_L_MC","str_NHBO_M_MC","str_NHBO_H_MC",
"str_NHBO_L_CAR","str_NHBO_M_CAR","str_NHBO_H_CAR","str_NHBO_L_PT","str_NHBO_M_PT","str_NHBO_H_PT",
"str_HBW_L" ,"str_HBW_M" ,"str_HBW_H" ,"str_HBS_L" ,"str_HBS_M" ,"str_HBS_H" ,
"str_HBO_L" ,"str_HBO_M" ,"str_HBO_H" ,"str_NHBB_L" ,"str_NHBB_M" ,"str_NHBB_H" ,
"str_NHBO_L" ,"str_NHBO_M" ,"str_NHBO_H" ,"str_HBW" ,"str_HBS" ,"str_HBO" ,
"str_NHBB" ,"str_NHBO" ,"str_L" ,"str_M" ,"str_H" ,"str_ALL" ,
"jtp_HBW_L_NMT" ,"jtp_HBW_M_NMT" ,"jtp_HBW_H_NMT" ,"jtp_HBW_L_MC" ,"jtp_HBW_M_MC" ,"jtp_HBW_H_MC" ,
"jtp_HBW_L_CAR" ,"jtp_HBW_M_CAR" ,"jtp_HBW_H_CAR" ,"jtp_HBW_L_PT" ,"jtp_HBW_M_PT" ,"jtp_HBW_H_PT" ,
"jtp_HBS_L_NMT" ,"jtp_HBS_M_NMT" ,"jtp_HBS_H_NMT" ,"jtp_HBS_L_MC" ,"jtp_HBS_M_MC" ,"jtp_HBS_H_MC" ,
"jtp_HBS_L_CAR" ,"jtp_HBS_M_CAR" ,"jtp_HBS_H_CAR" ,"jtp_HBS_L_PT" ,"jtp_HBS_M_PT" ,"jtp_HBS_H_PT" ,
"jtp_HBW_L" ,"jtp_HBW_M" ,"jtp_HBW_H" ,"jtp_HBS_L" ,"jtp_HBS_M" ,"jtp_HBS_H" ,
"jtp_HBW" ,"jtp_HBS" ,"jtp_L" ,"jtp_M" ,"jtp_H" ,"jtp_ALL" ,
"mdl_HBW_L_NMT" ,"mdl_HBW_M_NMT" ,"mdl_HBW_H_NMT" ,"mdl_HBW_L_MC" ,"mdl_HBW_M_MC" ,"mdl_HBW_H_MC" ,
"mdl_HBW_L_CAR" ,"mdl_HBW_M_CAR" ,"mdl_HBW_H_CAR" ,"mdl_HBW_L_PT" ,"mdl_HBW_M_PT" ,"mdl_HBW_H_PT" ,
"mdl_HBS_L_NMT" ,"mdl_HBS_M_NMT" ,"mdl_HBS_H_NMT" ,"mdl_HBS_L_MC" ,"mdl_HBS_M_MC" ,"mdl_HBS_H_MC" ,
"mdl_HBS_L_CAR" ,"mdl_HBS_M_CAR" ,"mdl_HBS_H_CAR" ,"mdl_HBS_L_PT" ,"mdl_HBS_M_PT" ,"mdl_HBS_H_PT" ,
"mdl_HBO_L_NMT" ,"mdl_HBO_M_NMT" ,"mdl_HBO_H_NMT" ,"mdl_HBO_L_MC" ,"mdl_HBO_M_MC" ,"mdl_HBO_H_MC" ,
"mdl_HBO_L_CAR" ,"mdl_HBO_M_CAR" ,"mdl_HBO_H_CAR" ,"mdl_HBO_L_PT" ,"mdl_HBO_M_PT" ,"mdl_HBO_H_PT" ,
"mdl_NHBB_L_NMT","mdl_NHBB_M_NMT","mdl_NHBB_H_NMT","mdl_NHBB_L_MC","mdl_NHBB_M_MC","mdl_NHBB_H_MC",
"mdl_NHBB_L_CAR","mdl_NHBB_M_CAR","mdl_NHBB_H_CAR","mdl_NHBB_L_PT","mdl_NHBB_M_PT","mdl_NHBB_H_PT",
"mdl_NHBO_L_NMT","mdl_NHBO_M_NMT","mdl_NHBO_H_NMT","mdl_NHBO_L_MC","mdl_NHBO_M_MC","mdl_NHBO_H_MC",
"mdl_NHBO_L_CAR","mdl_NHBO_M_CAR","mdl_NHBO_H_CAR","mdl_NHBO_L_PT","mdl_NHBO_M_PT","mdl_NHBO_H_PT",
"mdl_HBW_L" ,"mdl_HBW_M" ,"mdl_HBW_H" ,"mdl_HBS_L" ,"mdl_HBS_M" ,"mdl_HBS_H" ,
"mdl_HBO_L" ,"mdl_HBO_M" ,"mdl_HBO_H" ,"mdl_NHBB_L" ,"mdl_NHBB_M" ,"mdl_NHBB_H" ,
"mdl_NHBO_L" ,"mdl_NHBO_M" ,"mdl_NHBO_H" ,"mdl_HBW" ,"mdl_HBS" ,"mdl_HBO" ,
"mdl_NHBB" ,"mdl_NHBO" ,"mdl_L" ,"mdl_M" ,"mdl_H" ,"mdl_ALL"