public class HtmlModule extends ModuleBase
Modifier and Type | Field and Description |
---|---|
protected ChecksumInputStream |
_cstream
PRIVATE INSTANCE FIELDS.
|
protected String |
_doctype |
protected DataInputStream |
_dstream |
protected TextMDMetadata |
_textMD |
protected boolean |
_withTextMD |
static int |
HTML_3_2 |
static int |
HTML_4_0_FRAMESET |
static int |
HTML_4_0_STRICT |
static int |
HTML_4_0_TRANSITIONAL |
static int |
HTML_4_01_FRAMESET |
static int |
HTML_4_01_STRICT |
static int |
HTML_4_01_TRANSITIONAL |
static int |
XHTML_1_0_FRAMESET |
static int |
XHTML_1_0_STRICT |
static int |
XHTML_1_0_TRANSITIONAL |
static int |
XHTML_1_1 |
_app, _bigEndian, _checksumFinished, _countStream, _coverage, _crc32, _date, _defaultParams, _features, _format, _init, _isRandomAccess, _je, _logger, _md5, _mimeType, _name, _nByte, _note, _param, _release, _repInfoNote, _rights, _sha1, _signature, _specification, _validityNote, _vendor, _verbosity, _wellFormedNote
MAXIMUM_VERBOSITY, MINIMUM_VERBOSITY
Constructor and Description |
---|
HtmlModule()
Instantiate an HtmlModule object.
|
Modifier and Type | Method and Description |
---|---|
protected int |
checkDoctype(List elements) |
void |
checkSignatures(File file,
InputStream stream,
RepInfo info)
Check if the digital object conforms to this Module's internal signature
information.
|
protected static boolean |
isXmlAvailable() |
int |
parse(InputStream stream,
RepInfo info,
int parseIndex)
Parse the content of a purported HTML stream digital object and store the
results in RepInfo.
|
protected int |
seemsToBeXHTML(List elements) |
protected String |
stripQuotes(String str) |
addIntegerProperty, addIntegerProperty, applyDefaultParams, calcRAChecksum, checkSignatures, getApp, getBase, getBufferedDataStream, getCoverage, getCRC32, getDate, getDefaultParams, getFeatures, getFormat, getMimeType, getName, getNByte, getNote, getRelease, getRepInfoNote, getRights, getSignature, getSpecification, getValidityNote, getVendor, getWellFormedNote, hasFeature, init, initFeatures, initParse, isBigEndian, isRandomAccess, param, parse, readByteBuf, readDouble, readDouble, readDouble, readFloat, readFloat, readSignedByte, readSignedByte, readSignedByte, readSignedInt, readSignedInt, readSignedInt, readSignedLong, readSignedRational, readSignedRational, readSignedShort, readSignedShort, readSignedShort, readUnsignedByte, readUnsignedByte, readUnsignedByte, readUnsignedInt, readUnsignedInt, readUnsignedInt, readUnsignedRational, readUnsignedRational, readUnsignedRational, readUnsignedShort, readUnsignedShort, readUnsignedShort, resetParams, setApp, setBase, setChecksums, setCRC32, setDefaultParams, setMD5, setNByte, setSHA1, setValidityNote, setVerbosity, show, skipBytes, skipBytes, vectorToPropArray
protected ChecksumInputStream _cstream
protected DataInputStream _dstream
protected String _doctype
public static final int HTML_3_2
public static final int HTML_4_0_STRICT
public static final int HTML_4_0_FRAMESET
public static final int HTML_4_0_TRANSITIONAL
public static final int HTML_4_01_STRICT
public static final int HTML_4_01_FRAMESET
public static final int HTML_4_01_TRANSITIONAL
public static final int XHTML_1_0_STRICT
public static final int XHTML_1_0_TRANSITIONAL
public static final int XHTML_1_0_FRAMESET
public static final int XHTML_1_1
protected boolean _withTextMD
protected TextMDMetadata _textMD
public int parse(InputStream stream, RepInfo info, int parseIndex) throws IOException
parse
in interface Module
parse
in class ModuleBase
stream
- An InputStream, positioned at its beginning, which is
generated from the object to be parsed. If multiple calls to
parse
are made on the basis of a nonzero value
being returned, a new InputStream must be provided each time.info
- A fresh (on the first call) RepInfo object which will be
modified to reflect the results of the parsing If multiple
calls to parse
are made on the basis of a nonzero
value being returned, the same RepInfo object should be passed
with each call.parseIndex
- Must be 0 in first call to parse
. If
parse
returns a nonzero value, it must be called
again with parseIndex
equal to that return value.IOException
public void checkSignatures(File file, InputStream stream, RepInfo info) throws IOException
checkSignatures
in interface Module
checkSignatures
in class ModuleBase
file
- A File object for the object being parsedstream
- An InputStream, positioned at its beginning, which is
generated from the object to be parsedinfo
- A fresh RepInfo object which will be modified to reflect the
results of the testIOException
protected int checkDoctype(List elements)
protected int seemsToBeXHTML(List elements)
protected static boolean isXmlAvailable()
Copyright © 2008–2017 The Open Preservation Foundation. All rights reserved.