library(rvest)
url<-"https://kvk.icar.gov.in/facilities_list.aspx"
page<-html_session(url)
form<-html_form(page)[[1]]
states<-html_nodes(page,css="#ContentPlaceHolder1_ddlState > option") %>% html_attr("value")
states<-states[-1]
states_name<-html_nodes(page,css="#ContentPlaceHolder1_ddlState > option") %>% html_text()
states_name<-states_name[-1]
final_df<-0
#### STATES LOOP ####
for(i in 1:length(states)){
filled_form<-set_values(form,
"ctl00$ContentPlaceHolder1$ddlState"=states[i])
page1<-submit_form(page,filled_form)
district<-html_nodes(page1,css="#ContentPlaceHolder1_ddlDistrict > option") %>% html_attr("value")
district<-district[-1]
district_name<-html_nodes(page1,css="#ContentPlaceHolder1_ddlDistrict > option") %>% html_text()
district_name<-district_name[-1]
#### DISTRICT LOOP ####
for(j in 1:length(district)){
filled_form1<-set_values(html_form(page1)[[1]],
"ctl00$ContentPlaceHolder1$ddlState"=states[i],
"ctl00$ContentPlaceHolder1$ddlDistrict"=district[j])
page2<-submit_form(page1,filled_form1)
kvk<-html_nodes(page2,css="#ContentPlaceHolder1_ddlKvk > option") %>% html_attr("value")
kvk<-kvk[-1]
kvk_name<-html_nodes(page2,css="#ContentPlaceHolder1_ddlKvk > option") %>% html_text()
kvk_name<-kvk_name[-1]
#### KVK LOOP ####
for(k in 1:length(kvk)){
filled_form2<-set_values(html_form(page2)[[1]],
"ctl00$ContentPlaceHolder1$ddlState"=states[i],
"ctl00$ContentPlaceHolder1$ddlDistrict"=district[j],
"ctl00$ContentPlaceHolder1$ddlKvk"=kvk[k])
page3<-submit_form(page2,filled_form2)
contact_text<-gsub("[\r\n]","",html_nodes(page3,css=".panel-body") %>% html_text())
if(length(contact_text) == 0){contact_text=""}
df<-data.frame(cbind(states_name[i],district_name[j],kvk[k],contact_text))
names(df)<-c("STATE","DISTRICT","KVK","CONTACT_TEXT")
final_df[i*j*k] = list(df)
### WAITTIME TO AVOID HTTP 500 error - So the server is not overloaded
sleep(5)
}
}
}
output_df<-data.table::rbindlist(final_df,fill=TRUE)
# After this perform some string operations to extract the exact information required from the CONTACT_TEXT variable
В приведенном выше ответе не используется пакет RSelenium, и я думаю, что он заслуживает большего доверия, чем RSelenium.