По вашему запросу
library(rvest)
url<-"http://secc.gov.in/lgdStateList"
page<-html_session(url)
## STATE LOOP ##
state <- html_nodes(page,css="#lgdState > option") %>% html_text()
state <- state[-1]
state_id <- html_nodes(page,css="#lgdState > option") %>% html_attr('value')
state_id <- state_id[-1]
for(i in 1:length(state)){
page1<-rvest:::request_POST(page, url="http://secc.gov.in/lgdDistrictList",
body=list(
"stateCode"=state_id[i]
),
encode="form")
## DISTRICT LOOP ##
district <- html_nodes(page1,css="#lgdDistrict > option") %>% html_text()
district <- district[-1]
district_id <- html_nodes(page1,css="#lgdDistrict > option") %>% html_attr('value')
district_id <- district_id[-1]
for(j in 1:length(district)){
page2<-rvest:::request_POST(page1,url="http://secc.gov.in/lgdBlockList",
body=list(
"stateCode"=state_id[i],
"districtCode"=district_id[j]
),
encode = "form")
## BLOCK LOOP ##
block <- html_nodes(page2, css="#lgdBlock > option") %>% html_text()
block <- block [-1]
block_id <- html_nodes(page2, css="#lgdBlock > option") %>% html_attr('value')
block_id <- block_id[-1]
for(k in 1:length(block)){
page3<-rvest:::request_POST(page2,url="http://secc.gov.in/lgdGpList",
body=list(
"stateCode"=state_id[i],
"districtCode"=district_id[j],
"blockCode"=block_id[k]
),
encode = "form")
txt <- html_nodes(page3,css="#example a") %>% html_attr("onclick")
library(stringr)
gpcode<-sapply(txt,function(x){
k <- str_extract_all(x, "\\([^()]+\\)")[[1]]
k <- substring(k, 2, nchar(k)-1)
regexp <- "[[:digit:]]+"
k <- str_extract(strsplit(k, ",")[[1]][4], regexp)
})
## GP CODE LOOP to download file ##
for(l in 1:length(gpcode)){
page4<-rvest:::request_POST(page3,url="http://secc.gov.in/downloadLgdwisePdfFile",
body=list(
"stateCode"=state_id[i],
"districtCode"=district_id[j],
"blockCode"=block_id[k],
"gpCode"=gpcode[l]
),
encode = "form")
error = "PDF File for this Gram Panchayat is not available."
error_displayed = try(html_nodes(page4,css=".error") %>% html_text())
if(error != error_displayed){
filename<-gsub("attachment;filename=","",page4$response$headers$`content-disposition`)
filename<-str_replace_all(filename, '"', "")
writeBin(page4$response$content,filename)
}
}
}
}
}
Это опять без RSelenium.:)