Мне нужно импортировать некоторые данные в R. Данные содержат два столбца: code и nestedString. nestedString - это массив PHP, который хранится как одна строка.
Я не могу понять, как:
- преобразовать nestedString в список R.
- А затем сгладить этот список в тиббл / фрейм данных.
Я пробовал использовать REGEX для замены символов, чтобы я мог читать строку как список R, но это не сработало. Например, если nestedString = "[[424,5454], [5,3]]", я попытался заменить символы так, чтобы nestedString = "c (c (424,5454), c (5 , 3)) ». Откуда я планировал установить nestedList = list (nestedString), но я не зашел так далеко.
Кто-нибудь знает, как это решить? Я создал приведенный ниже образец - просто чтобы вы знали, что приведенное ниже не работает.
# Libraries
library(tidyverse)
# I can't create sample.
sample <- tibble(code = "60446",
nestedString = "[[890000,845000,828000,727000,795000],[645000,700000,850000,610000],[1310000,836000,835000,1120000,835000],[820000,760000,770000,820000,835000,800000,730000,840000],[660000,1000000],[757000,765000,1206000,901000,695000],[712000,865000,750000,990000,742500],[827000,720000,680000,825000,685000,720000,760000,675000],[753000,765000,841000,800000],[735000,717000,960000,762000,814000,715000,680000,710000,550000,1060000,900000,900000],[865000,705000,840000,705000,820000,830000,750000,680000,735000,710000,1090000],[830000,576000,820000],[790000,830000,575000,770000,680000],[1280000,758000,500000],[715000,680000,900000,763500,750000,800000],[730000,823000,940000,830000,630000,692000,1100000],[],[],[710000,790000,700000,1020000,695000,780000,1175000],[828000,605000,730000,960000,732000],[1250000,1400000,855000],[850000,720000,995000,980000,863000,1100000],[700000,960000,901000,945000,810000],[843000,775000,860000,825000,795000,750000,645000,799000],[858000,975000,705000,1400000,980000],[818500,850000],[935000,765000,1090000,850000,787000,830000,800000,1200000,770000,1100000],[928000,710000,835000,880000,890000,667000,780000],[900000,775000,810000,825000,715000,862500],[1100000,720000,830000,630000],[840000,868000,885000,880000,690000,757000,805000,740000,641000,738000],[865000,730000,675000,865000],[762000,1950000,1950000,1950000,885000,756000,730000,720000],[855000,800000,880000,870000,720000],[615000,875000,670000],[1255000,865000,865000,785000,600000,720000,935000,846800,850000],[840000,775000,885000,810000,766000,762000,970000,690000],[905000,741000,848000,855000],[752500,900000,770000,1900000,1900000,1900000,1900000,850000,1600000,1600000,820000,750000,962000,780000],[1200000,825000,780000,830000,780000],[830000,820000,780000,790000,870000,775000],[760000,1000000,850000,780000,1190000,915000,678000],[870000,738000],[830000,750000,780000,870000,767000,745000,1135000,760000],[568000,620000,920000,650000,695000,780000,970000,695000,880000,780000],[645000,780000,800000,710000,890000,820000,632000,725000],[865000,820000,800000,1080000],[1350000,907000,760000,770000,750000,745000,390000,845000,700000],[765000,840000,815000,695500,901000,901000,664500,850000,780000,825000,675000,790000,1428000],[850000,781000,790000,755000,980000],[1300000,1412000,1430000,750000],[690000,825000,618000,700000,750000],[730000,658000,890000,710000,690000,770000,630000,815000],[675000,960000,751000,655000,775000,920000],[710000,658000,780000,680000,725000],[960000,1700000,720000,800000,970000,665000,765000],[730000,644000,648000,680000],[795000,662000,806000,645000,718500],[520000,650000,1300000,760000,740000,750000,1400000],[590000,730000,660000,625000,1015000,850000,1405000],[720000,896000,900000,740000,760000,1100000,710000,660000,900000,880000,747000],[807000,727000,1060000,720000,705000,725000],[755500,740000,715000,723000,700000,787000,740000,740000,720000,745000,670000],[708000,1000000,715000,595000,1045000,1051000],[603000,1000000],[916000,600000,547000,648000],[755000,1065000,540000,565000,560000,580000,976000,740000,712500],[607000,640000,578000,600000],[1010000,915000,851000,610000,640000,513000,792000,580000,580500,780000],[871000,675000,550000,620000],[616000,568000,572000,525000,596000,560000,445000],[606500,577500,550000,603000,690000,860000,556000,575000,569000,760000,620000],[725000,630000,438000,505000,1100000,535000,530000,585000,610000,600000,491000,508000,530000],[770000,755000,510000,422000,482500],[1075000,613000,520000,640000],[571500,540000,565000,585000,497000],[510000,530000,588000],[900000,605000,635200,610000],[520000,490000,527000,535000,450000,470000,560000,570000],[470000,595000,550000,629950,583000,385000,565000,520000],[550000],[465500,515000,999000,530000,475000,551000,510000,402000,450000],[590000,600000,450000,515000,440000,710000,545000,522000],[530000,450000,620000],[673000,866000,435000,425000,660000,595000,660000,615000,540000,450000],[530000,639000],[449000,459950,575000],[560000,610000,540000],[595000,470000],[480000,480000,480000,499950],[373500,373500,398000,430000,600000,523000,530000,400000,405000,810000],[520000],[473000,470000,516000,386000,460000,505000],[475000,487000,393000,516000,465000,490000,610000,601000,465000],[375000,455000,493000,410000,680000],[385000,470000,480000],[562000,480000],[470000,440000,565000,465000,415000,485000],[380000,535000,455000,920000,440000],[440000,422500,435000,490000],[670000],[415000,510000,455000,458000,340000],[481000,445000,435000,517500,420000,440000],[355000,780000,530000,461500,600000,360000,405000,447500,415000],[495000,485000,435000,435000,422000,510000,660000,416000],[410000,495000,430000,560000,399000],[432000,570000,405000,370000,415000,415000],[450000,665000,465000,430000],[485000,417000,365000,298000,385000,510000,405500,380000],[685000,435000,620000,375000,582000,365000],[422000,370000,363000],[385000,400000],[425000],[370000,528000,416000,468000],[339000,403000,392500,425000,446000],[390000,368000,475000,410000],[370000],[378000,410000,375000,472500,375000,425000],[370000,370000,365000,350000,510000,636000,430000,921000],[],[395000,735000],[352500,360000],[410000,589000,370000,370000],[311000,330000,310000,370000,311000,422000,458000],[335000,415000],[585000,357000,382000,565000,460000,382000,448000,280000,415000,400000],[353000,370000,400000,370000,490000,385000,390000,350000,345000,291000],[365000,273000,330000,336000,319000,285000,380000,348000],[390000,305000,375000,387500,385000,460000,350000],[333000,350000,365000,380000,390000],[432500,338000,365000,412000,360000,441000],[405000,425000,330000,265000,368000,320000,300000,355000],[350000,447500,345000,370000,455000,350000],[345000,370000,369000,340700,286000,362000,350000],[390000,362000,290000,395000,510000,322500,335000],[312000,362000,170000,280500,457500,382000],[327000],[290000,367000,515000,431000,332000],[330000,420000,378000,307000,281500],[308000],[],[347500,317000],[350000,379000],[495000,365000,400000,312000,225000],[300000],[329000,270000],[325000],[],[310000],[],[],[],[362000],[],[],[],[],[],[332000],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]")
# This is the format I'd like to achieve. I've used the first two lists as an example.
endGoal <- tibble(code = "60446",
valueIndex = c(0,0,0,0,0,1,1,1,1), #identifies which list the value came from.
values = c(890000,845000,828000,727000,795000,645000,700000,850000,610000))
Вот решение для всех, кто найдет этот пост.
convert_php <- function(table) {
df <- table %>%
mutate(values = str_match_all(data, "\[(.*?)\]")) %>%
unnest(values) %>%
mutate(values = values[,2]) %>%
select(-data) %>%
group_by(id) %>%
mutate(valueIndex = row_number(id)) %>%
mutate(values = str_remove(values, "\[\[")) %>%
mutate(values = str_remove(values, "\[")) %>%
mutate(values = str_remove(values, "\]")) %>%
mutate(values = ifelse(values == "", NA, values)) %>%
mutate(values = str_split(values, ",")) %>%
unnest(values) %>%
mutate(valueIndex = valueIndex - 1)
return(df)
}