public class HTMLParser
extends java.lang.Object
| Modifier and Type | Field and Description |
|---|---|
static java.lang.String |
DEFAULT_ATTR_LINK_TAG |
| Constructor and Description |
|---|
HTMLParser(android.content.Context context)
Constructor.
|
HTMLParser(java.lang.String document,
android.content.Context context)
Constructor.
|
| Modifier and Type | Method and Description |
|---|---|
void |
filterByTag(java.lang.String tag)
Keep only the content that is under the selected tag.
|
java.lang.String |
getContent()
Get the html document as a string.
|
org.jsoup.nodes.Element |
getDocument()
Return the document as an org.jsoup.nodes.Element instance.
|
java.lang.String |
getURLbyText(java.lang.String text)
Recover the URL in the 'href' attribute, matching the passed text.
|
java.lang.String |
getURLbyText(java.lang.String text,
java.lang.String attribute)
Recover the URL attribute matching the passed text.
|
void |
removeLinks()
Delete all the links (tag 'a') from the document.
|
void |
setBaseDocument(java.io.InputStream htmlBase,
java.lang.String charsetName,
java.lang.String baseUri)
Injects the document element into the base document.
|
void |
setContent(java.io.InputStream is)
Reset the content of the document.
|
void |
setContent(java.lang.String document)
Reset the content of the document.
|
public static final java.lang.String DEFAULT_ATTR_LINK_TAG
public HTMLParser(android.content.Context context)
context - The application context.java.lang.IllegalArgumentException - if context is null.public HTMLParser(java.lang.String document,
android.content.Context context)
document - html content to manipulate.context - The application context.java.lang.IllegalArgumentException - if context is null.public void filterByTag(java.lang.String tag)
throws java.lang.Exception
tag - selected tag.java.lang.IllegalArgumentException - if text or @code attribute is null.java.lang.NoSuchFieldExceptionjava.lang.Exceptionpublic java.lang.String getContent()
public org.jsoup.nodes.Element getDocument()
public java.lang.String getURLbyText(java.lang.String text)
throws java.lang.Exception
text - where to search the URL.java.lang.Exceptionpublic java.lang.String getURLbyText(java.lang.String text,
java.lang.String attribute)
throws java.lang.Exception
text - where to search the URL.attribute - attribute that owns the URL ('href' in most of cases).java.lang.IllegalArgumentException - if text or @code attribute is null.java.lang.NoSuchFieldExceptionjava.lang.Exceptionpublic void removeLinks()
public void setBaseDocument(java.io.InputStream htmlBase,
java.lang.String charsetName,
java.lang.String baseUri)
throws java.lang.Exception
htmlBase - base html document (it must have basic html structure (html/body).charsetName - charset to use, for example 'UTF-8'.baseUri - URI of the base document.java.lang.IllegalArgumentException - if htmlBase or @code baseUri is null.java.lang.Exceptionpublic void setContent(java.io.InputStream is)
throws java.io.IOException
is - java.io.IOExceptionpublic void setContent(java.lang.String document)
document - The HTML document to parse.java.lang.IllegalArgumentException - if document is null.