Я использую базу данных posgresSql в своем приложении. Сначала я запрашиваю в своей базе данных столбец, в котором хранятся мои исходные ссылки на веб-сайты.
Затем я использую метод getElement для извлечения каждой ссылки href, доступа к каждой ссылке и извлечения заголовка и текста в базу данных.
Я получаю сообщение об ошибке. Я не могу разрешить сообщение о согласии с файлом cookie:
URL url2 ;
URL url;
logger.info(href);
try {
url = new URL( elem.webSite );
url2= new URL (href);
String host = url.getHost();
String host2 = url2.getHost();
int level = 1;
//This method deletes all cookies
if ( href.indexOf("special-pages")>0 && href.indexOf("rss-feed")>0)
continue;
if (this.equals(href,host ) ) {
} else if (host.equals(host2)){
WebDriver driverLink = new ChromeDriver();
/*Set <Cookie> cookieList = driver.manage().getCookies();
for (Cookie getCookies: cookieList) {
driver.manage().deleteAllCookies();
logger.info(getCookies);
}
*/
driverLink.get(href);
title = driverLink.getTitle();
text = driverLink.findElement(By.xpath("/html/body")).getText();
driverLink.close();
res = insertIntoDB2(elem.id_progetto,href,title,text,level);
logger.info("insert outcome: " + res);
}
//int siz = allLinks.size();
//System.out.println(title);
//total = link.getSize();
//System.exit(0);
//driverLink.close();
//System.out.println(link.getText() + " - " + link.getAttribute("href"));
level = level+1;
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
System.exit(0);
});
}
private boolean equals(String link, String host) {
if (link.endsWith("/") )
host += "/";
boolean check = link.equals(host) ||
link.equals( "https://"+host ) ||
link.equals( "http://"+host );
if (check)
return true;
else
return false;
}
//********************************************************************************************************
//*******************************************************************************************************
private boolean insertIntoDB2(int id_progetto, String link, String title, String text, int level) {
logger.info("Enter into insertIntoDB {}, {}, {}", id_progetto, link, title);
title = title.replaceAll("'", "''");
text = text.replaceAll("'", "''");
try {
CallableStatement stmt= connection.prepareCall("{? = call public.InsertLink(?,?,?,?,?)}" );
stmt.registerOutParameter(1, Types.VARCHAR);
stmt.setInt(2, id_progetto);
stmt.setString(3, link);
stmt.setString(4, title );
stmt.setString(5, text);
stmt.setInt(6, level);
stmt.execute();
stmt.closeOnCompletion();
String r = stmt.getString(1);
logger.info("return from stored procedure" + r);
return true;
}
catch( java.sql.SQLException ex) {
ex.printStackTrace();
return false;
}
}
class ProgettoItem {
public int id_progetto;
public String webSite;
public ProgettoItem (int id, String webSite) {
id_progetto = id;
this.webSite = webSite;
}
}
} // end class SeleniumTest2 if (href.indexOf("#")>=0) {
Here is my Error log: ***********************************************************
[INFO ] 2019-11-03 18:11:58.419 [main] ProgettoPagine - https://plus-resilient.interreg-med.eu/special-pages/rss-feed/
[INFO ] 2019-11-03 18:11:58.435 [main] ProgettoPagine - javascript:tagAnalyticsCNIL.CookieConsent.showInform()
java.net.MalformedURLException: unknown protocol: javascript
at java.net.URL.<init>(Unknown Source)
at java.net.URL.<init>(Unknown Source)
at java.net.URL.<init>(Unknown Source)
at webmining.ProgettoPagine.lambda$0(ProgettoPagine.java:221)
at java.util.ArrayList.forEach(Unknown Source)
at webmining.ProgettoPagine.searchWebSite(ProgettoPagine.java:177)
at webmining.ProgettoPagine.run(ProgettoPagine.java:71)
at webmining.ProgettoPagine.main(ProgettoPagine.java:57)
Here is my code:****************************************************************