package com.samir.XMLParser;
import java.io.*;
import java.net.*;
import java.util.*;
import javax.xml.parsers.*;
import org.w3c.dom.*;
public class HTMLRemoverParser {
HTMLRemoverBean objBean;
Vector<HTMLRemoverBean> vectParse;
int mediaThumbnailCount;
boolean urlflag;
int count = 0;
public HTMLRemoverParser() {
try {
vectParse = new Vector<HTMLRemoverBean>();
URL url = new URL("http://news.yahoo.com/rss/politics");
URLConnection con = url.openConnection();
System.out.println("Connection is : " + con);
BufferedReader reader = new BufferedReader(new InputStreamReader(
con.getInputStream()));
System.out.println("Reader :" + reader);
String inputLine;
String fullStr = "";
while ((inputLine = reader.readLine()) != null)
fullStr = fullStr.concat(inputLine + "\n");
InputStream istream = url.openStream();
DocumentBuilder builder = DocumentBuilderFactory.newInstance()
.newDocumentBuilder();
Document doc = builder.parse(istream);
doc.getDocumentElement().normalize();
NodeList nList = doc.getElementsByTagName("item");
System.out.println();
for (int temp = 0; temp < nList.getLength(); temp++) {
Node nNode = nList.item(temp);
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
objBean = new HTMLRemoverBean();
vectParse.add(objBean);
objBean.title = getTagValue("title", eElement);
objBean.description = getTagValue("description", eElement);
String noHTMLString = objBean.description.replaceAll("\\<.*?\\>", "");
objBean.description=noHTMLString;
objBean.link = getTagValue("link", eElement);
objBean.pubdate = getTagValue("pubDate", eElement);
}
}
for (int index1 = 0; index1 < vectParse.size(); index1++) {
HTMLRemoverBean ObjNB = (HTMLRemoverBean) vectParse
.get(index1);
System.out.println("Item No : " + index1);
System.out.println();
System.out.println("Title is : " + ObjNB.title);
System.out.println("Description is : " + ObjNB.description);
System.out.println("Link is : " + ObjNB.link);
System.out.println("Pubdate is : " + ObjNB.pubdate);
System.out.println();
System.out
.println("-------------------------------------------------------------------------------------------------------------");
}
} catch (Exception e) {
e.printStackTrace();
}
}
private String getTagValue(String sTag, Element eElement) {
NodeList nlList = eElement.getElementsByTagName(sTag).item(0)
.getChildNodes();
Node nValue = (Node) nlList.item(0);
return nValue.getNodeValue();
}
public static void main(String[] args) {
new HTMLRemoverParser();
}
}
И Боб: ::
package com.samir.XMLParser;
public class HTMLRemoverBean {
public String title;
public String description;
public String link;
public String pubdate;
}