Возможный ответ с использованием регулярных выражений и stringr::str_extract_all()
Я предположил, что ваши цифры должны быть 0-9, а не 1-9.Если нет, измените все [0-9]
на [1-9]
.
Кроме того, если вы ищете определенное количество (скажем: n) повторений букв / цифр, измените +
на {n}
, как впервый шаблон в vec
.
library( data.table )
library( stringr )
# NA123456 - First letter constant - N, Letter A-K, Numbers 1-9
# SA123456 - First 2 letters constant - SA, Numbers 1-9
# MABC1234 - First letter constant - M, Letters A-Z, Numbers 1-9
# QABC1234 - First letter constant - Q, Letters A-Z, Numbers 1-9
# WABC1234 - First letter constant - W, Letters A-Z, Numbers 1-9
# TABC1234 - First letter constant - T, Letters A-Z, Numbers 1-9
# 3ABCD123 - First number constant - 3, Letters A-Z, Numbers 1-9
#create a vector with all regex-patterns
#I assumed 1-9 should be 0-9 ?? <-- !!
vec <- c( "N[A-K]{1}[0-9]+",
"SA[0-9]+",
"M[A-Z]+[0-9]+",
"Q[A-Z]+[0-9]+",
"W[A-Z]+[0-9]+",
"T[A-Z]+[0-9]+",
"3[A-Z]+[0-9]+" )
#paste patterns together to one large regex-OR-pattern
pattern <- paste( vec, collapse = "|" )
#extract all patterns from the column 'Property', and put (as vector) in Aa-reference
#extract all patterns from the column 'Property', and put (as vector) in Aa-reference
DT[, Aa_reference := stringr::str_extract_all( Address, pattern )]
выход
# Property Address Aa_reference
# 1: PIC: 3WABG086 260 SPRINGHURST ROAD
# 2: PIC: 35PSR217 1350 RIVER ROAD
# 3: PIC# NH244157 1038 QUONDONG ROAD
# 4: PIC: 3GMUF425 70 DIGBY ROAD
# 5: PIC# 3GMUF425 70 DIGBY ROAD
# 6: PIC QTIWW0626 REMOLEA
# 7: PIC#EBWSE235 BOX 191
# 8: PIC #3WLKM019 198 MONTGOMERY ROAD
# 9: PIC # 3BWMM021 149 ANDERSONS ROAD
# 10: PIC: 3WCGN034 WERRIBEE
# 11: GARANGULA PIC: NH630488 PO BOX 84
# 12: GARANGULA PIC: NH630488 PO BOX 84
# 13: PIC: 3GMTL320 2980 GLENELG HIGHWAY
# 14: GREENSLOPES PIC: MJKE0261 914 WEST KENTISH ROAD
# 15: PIC: WFZB3246 859 PFEIFFER ROAD
# 16: PIC: WFAY3549 34605 ALBANY HIGHWAY
# 17: PIC: 3CEXK044 2244 LAVERS HILL ROAD
# 18: PIC: QGWW0462 ELDERFIELD
# 19: PIC: 3WCGN034 WERRIBEE
# 20: KAYA DORPER & WHITE DORPER STUD PIC: WABN0262 WABN0262
# 21: SPOTTSWOOD PIC QKDR0078 QKDR0078
# 22: COOMBOONA HOLSTEINS PIC 3SPSR217 3SPSR217
# 23: ROSEVALE PIC: QKEV0169 QKEV0169
# 24: <NA> PIC 3EGON009 3EGON009
# 25: <NA> PIC WFKPO316 WFKPO316
# 26: IVADENE PIC 3WANP0T1 3WANP0
# 27: <NA> PIC ND225813 ND225813
# 28: HEAVENLY VALLEY FARMS PIC #NF538645 NF538645
# 29: C/- CED WISE AB CENTRE PIC: QCST0158 QCST0158
# 30: GARANGULA PIC # NH630488 NH630488
# Property Address Aa_reference
выборочные данные
DT <- fread('
Property | Address | Aa_reference
PIC: 3WABG086| 260 SPRINGHURST ROAD| NA
PIC: 35PSR217| 1350 RIVER ROAD | NA
PIC# NH244157| 1038 QUONDONG ROAD |NA
PIC: 3GMUF425| 70 DIGBY ROAD| NA
PIC# 3GMUF425| 70 DIGBY ROAD | NA
PIC QTIWW0626 | REMOLEA | NA
PIC#EBWSE235 | BOX 191 | NA
PIC #3WLKM019 | 198 MONTGOMERY ROAD| NA
PIC # 3BWMM021 | 149 ANDERSONS ROAD | NA
PIC: 3WCGN034 | WERRIBEE | NA
GARANGULA PIC: NH630488| PO BOX 84 |NA
GARANGULA PIC: NH630488 | PO BOX 84| NA
PIC: 3GMTL320| 2980 GLENELG HIGHWAY| NA
GREENSLOPES PIC: MJKE0261| 914 WEST KENTISH ROAD| NA
PIC: WFZB3246 | 859 PFEIFFER ROAD| NA
PIC: WFAY3549| 34605 ALBANY HIGHWAY| NA
PIC: 3CEXK044 | 2244 LAVERS HILL ROAD| NA
PIC: QGWW0462 | ELDERFIELD| NA
PIC: 3WCGN034 | WERRIBEE| NA
KAYA DORPER & WHITE DORPER STUD| PIC: WABN0262| NA
SPOTTSWOOD| PIC QKDR0078 | NA
COOMBOONA HOLSTEINS| PIC 3SPSR217 | NA
ROSEVALE | PIC: QKEV0169 | NA
NA| PIC 3EGON009 | NA
NA | PIC WFKPO316 | NA
IVADENE| PIC 3WANP0T1 | NA
NA | PIC ND225813 | NA
HEAVENLY VALLEY FARMS| PIC #NF538645 | NA
C/- CED WISE AB CENTRE| PIC: QCST0158 |NA
GARANGULA| PIC # NH630488 |NA
', sep = "|")