COSParser.java (pdfbox-2.0.23-src) | : | COSParser.java (pdfbox-2.0.24-src) | ||
---|---|---|---|---|
skipping to change at line 42 | skipping to change at line 42 | |||
import java.util.Map.Entry; | import java.util.Map.Entry; | |||
import java.util.Queue; | import java.util.Queue; | |||
import java.util.Set; | import java.util.Set; | |||
import java.util.TreeMap; | import java.util.TreeMap; | |||
import org.apache.commons.logging.Log; | import org.apache.commons.logging.Log; | |||
import org.apache.commons.logging.LogFactory; | import org.apache.commons.logging.LogFactory; | |||
import org.apache.pdfbox.cos.COSArray; | import org.apache.pdfbox.cos.COSArray; | |||
import org.apache.pdfbox.cos.COSBase; | import org.apache.pdfbox.cos.COSBase; | |||
import org.apache.pdfbox.cos.COSDictionary; | import org.apache.pdfbox.cos.COSDictionary; | |||
import org.apache.pdfbox.cos.COSDocument; | import org.apache.pdfbox.cos.COSDocument; | |||
import org.apache.pdfbox.cos.COSInputStream; | ||||
import org.apache.pdfbox.cos.COSName; | import org.apache.pdfbox.cos.COSName; | |||
import org.apache.pdfbox.cos.COSNull; | import org.apache.pdfbox.cos.COSNull; | |||
import org.apache.pdfbox.cos.COSNumber; | import org.apache.pdfbox.cos.COSNumber; | |||
import org.apache.pdfbox.cos.COSObject; | import org.apache.pdfbox.cos.COSObject; | |||
import org.apache.pdfbox.cos.COSObjectKey; | import org.apache.pdfbox.cos.COSObjectKey; | |||
import org.apache.pdfbox.cos.COSStream; | import org.apache.pdfbox.cos.COSStream; | |||
import org.apache.pdfbox.io.IOUtils; | import org.apache.pdfbox.io.IOUtils; | |||
import org.apache.pdfbox.io.RandomAccessRead; | import org.apache.pdfbox.io.RandomAccessRead; | |||
import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType; | import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType; | |||
import org.apache.pdfbox.pdmodel.encryption.AccessPermission; | import org.apache.pdfbox.pdmodel.encryption.AccessPermission; | |||
skipping to change at line 315 | skipping to change at line 314 | |||
// check the startxref offset | // check the startxref offset | |||
long fixedOffset = checkXRefOffset(startXrefOffset); | long fixedOffset = checkXRefOffset(startXrefOffset); | |||
if (fixedOffset > -1) | if (fixedOffset > -1) | |||
{ | { | |||
startXrefOffset = fixedOffset; | startXrefOffset = fixedOffset; | |||
} | } | |||
document.setStartXref(startXrefOffset); | document.setStartXref(startXrefOffset); | |||
long prev = startXrefOffset; | long prev = startXrefOffset; | |||
// ---- parse whole chain of xref tables/object streams using PREV refer ence | // ---- parse whole chain of xref tables/object streams using PREV refer ence | |||
Set<Long> prevSet = new HashSet<Long>(); | Set<Long> prevSet = new HashSet<Long>(); | |||
COSDictionary trailer = null; | ||||
while (prev > 0) | while (prev > 0) | |||
{ | { | |||
// seek to xref table | // seek to xref table | |||
source.seek(prev); | source.seek(prev); | |||
// skip white spaces | // skip white spaces | |||
skipSpaces(); | skipSpaces(); | |||
// save current position instead of prev due to skipped spaces | ||||
prevSet.add(source.getPosition()); | ||||
// -- parse xref | // -- parse xref | |||
if (source.peek() == X) | if (source.peek() == X) | |||
{ | { | |||
// xref table and trailer | // xref table and trailer | |||
// use existing parser to parse xref table | // use existing parser to parse xref table | |||
if (!parseXrefTable(prev) || !parseTrailer()) | if (!parseXrefTable(prev) || !parseTrailer()) | |||
{ | { | |||
throw new IOException("Expected trailer object at offset " | throw new IOException("Expected trailer object at offset " | |||
+ source.getPosition()); | + source.getPosition()); | |||
} | } | |||
COSDictionary trailer = xrefTrailerResolver.getCurrentTrailer(); | trailer = xrefTrailerResolver.getCurrentTrailer(); | |||
// check for a XRef stream, it may contain some object ids of co mpressed objects | // check for a XRef stream, it may contain some object ids of co mpressed objects | |||
if(trailer.containsKey(COSName.XREF_STM)) | if(trailer.containsKey(COSName.XREF_STM)) | |||
{ | { | |||
int streamOffset = trailer.getInt(COSName.XREF_STM); | int streamOffset = trailer.getInt(COSName.XREF_STM); | |||
// check the xref stream reference | // check the xref stream reference | |||
fixedOffset = checkXRefOffset(streamOffset); | fixedOffset = checkXRefOffset(streamOffset); | |||
if (fixedOffset > -1 && fixedOffset != streamOffset) | if (fixedOffset > -1 && fixedOffset != streamOffset) | |||
{ | { | |||
LOG.warn("/XRefStm offset " + streamOffset + " is incorr ect, corrected to " + fixedOffset); | LOG.warn("/XRefStm offset " + streamOffset + " is incorr ect, corrected to " + fixedOffset); | |||
streamOffset = (int)fixedOffset; | streamOffset = (int)fixedOffset; | |||
skipping to change at line 378 | skipping to change at line 379 | |||
{ | { | |||
LOG.error("Skipped XRef stream due to a corrupt offs et:"+streamOffset); | LOG.error("Skipped XRef stream due to a corrupt offs et:"+streamOffset); | |||
} | } | |||
else | else | |||
{ | { | |||
throw new IOException("Skipped XRef stream due to a corrupt offset:"+streamOffset); | throw new IOException("Skipped XRef stream due to a corrupt offset:"+streamOffset); | |||
} | } | |||
} | } | |||
} | } | |||
prev = trailer.getLong(COSName.PREV); | prev = trailer.getLong(COSName.PREV); | |||
if (prev > 0) | ||||
{ | ||||
// check the xref table reference | ||||
fixedOffset = checkXRefOffset(prev); | ||||
if (fixedOffset > -1 && fixedOffset != prev) | ||||
{ | ||||
prev = fixedOffset; | ||||
trailer.setLong(COSName.PREV, prev); | ||||
} | ||||
} | ||||
} | } | |||
else | else | |||
{ | { | |||
// parse xref stream | // parse xref stream | |||
prev = parseXrefObjStream(prev, true); | prev = parseXrefObjStream(prev, true); | |||
if (prev > 0) | trailer = xrefTrailerResolver.getCurrentTrailer(); | |||
} | ||||
if (prev > 0) | ||||
{ | ||||
// check the xref table reference | ||||
fixedOffset = checkXRefOffset(prev); | ||||
if (fixedOffset > -1 && fixedOffset != prev) | ||||
{ | { | |||
// check the xref table reference | prev = fixedOffset; | |||
fixedOffset = checkXRefOffset(prev); | trailer.setLong(COSName.PREV, prev); | |||
if (fixedOffset > -1 && fixedOffset != prev) | ||||
{ | ||||
prev = fixedOffset; | ||||
COSDictionary trailer = xrefTrailerResolver.getCurrentTr | ||||
ailer(); | ||||
trailer.setLong(COSName.PREV, prev); | ||||
} | ||||
} | } | |||
} | } | |||
if (prevSet.contains(prev)) | if (prevSet.contains(prev)) | |||
{ | { | |||
throw new IOException("/Prev loop at offset " + prev); | throw new IOException("/Prev loop at offset " + prev); | |||
} | } | |||
prevSet.add(prev); | ||||
} | } | |||
// ---- build valid xrefs out of the xref chain | // ---- build valid xrefs out of the xref chain | |||
xrefTrailerResolver.setStartxref(startXrefOffset); | xrefTrailerResolver.setStartxref(startXrefOffset); | |||
COSDictionary trailer = xrefTrailerResolver.getTrailer(); | trailer = xrefTrailerResolver.getTrailer(); | |||
document.setTrailer(trailer); | document.setTrailer(trailer); | |||
document.setIsXRefStream(XRefType.STREAM == xrefTrailerResolver.getXrefT ype()); | document.setIsXRefStream(XRefType.STREAM == xrefTrailerResolver.getXrefT ype()); | |||
// check the offsets of all referenced objects | // check the offsets of all referenced objects | |||
checkXrefOffsets(); | checkXrefOffsets(); | |||
// copy xref table | // copy xref table | |||
document.addXRefTable(xrefTrailerResolver.getXrefTable()); | document.addXRefTable(xrefTrailerResolver.getXrefTable()); | |||
return trailer; | return trailer; | |||
} | } | |||
/** | /** | |||
skipping to change at line 1172 | skipping to change at line 1162 | |||
} | } | |||
} | } | |||
String endStream = readString(); | String endStream = readString(); | |||
if (endStream.equals("endobj") && isLenient) | if (endStream.equals("endobj") && isLenient) | |||
{ | { | |||
LOG.warn("stream ends with 'endobj' instead of 'endstream' at offset " | LOG.warn("stream ends with 'endobj' instead of 'endstream' at offset " | |||
+ source.getPosition()); | + source.getPosition()); | |||
// avoid follow-up warning about missing endobj | // avoid follow-up warning about missing endobj | |||
source.rewind(ENDOBJ.length); | source.rewind(ENDOBJ.length); | |||
} | } | |||
else if (endStream.length() > 9 && isLenient && endStream.substring(0,9) .equals(ENDSTREAM_STRING)) | else if (endStream.length() > 9 && isLenient && endStream.startsWith(END STREAM_STRING)) | |||
{ | { | |||
LOG.warn("stream ends with '" + endStream + "' instead of 'endstream ' at offset " | LOG.warn("stream ends with '" + endStream + "' instead of 'endstream ' at offset " | |||
+ source.getPosition()); | + source.getPosition()); | |||
// unread the "extra" bytes | // unread the "extra" bytes | |||
source.rewind(endStream.substring(9).getBytes(ISO_8859_1).length); | source.rewind(endStream.substring(9).getBytes(ISO_8859_1).length); | |||
} | } | |||
else if (!endStream.equals(ENDSTREAM_STRING)) | else if (!endStream.equals(ENDSTREAM_STRING)) | |||
{ | { | |||
throw new IOException( | throw new IOException( | |||
"Error reading stream, expected='endstream' actual='" | "Error reading stream, expected='endstream' actual='" | |||
skipping to change at line 1458 | skipping to change at line 1448 | |||
LOG.error("Can't find the object xref table/stream at offset " + objectO ffset); | LOG.error("Can't find the object xref table/stream at offset " + objectO ffset); | |||
return 0; | return 0; | |||
} | } | |||
private boolean validateXrefOffsets(Map<COSObjectKey, Long> xrefOffset) thro ws IOException | private boolean validateXrefOffsets(Map<COSObjectKey, Long> xrefOffset) thro ws IOException | |||
{ | { | |||
if (xrefOffset == null) | if (xrefOffset == null) | |||
{ | { | |||
return true; | return true; | |||
} | } | |||
Map<COSObjectKey, COSObjectKey> correctedKeys = new HashMap<COSObjectKey , COSObjectKey>(); | ||||
for (Entry<COSObjectKey, Long> objectEntry : xrefOffset.entrySet()) | for (Entry<COSObjectKey, Long> objectEntry : xrefOffset.entrySet()) | |||
{ | { | |||
COSObjectKey objectKey = objectEntry.getKey(); | COSObjectKey objectKey = objectEntry.getKey(); | |||
Long objectOffset = objectEntry.getValue(); | Long objectOffset = objectEntry.getValue(); | |||
// a negative offset number represents an object number itself | // a negative offset number represents an object number itself | |||
// see type 2 entry in xref stream | // see type 2 entry in xref stream | |||
if (objectOffset != null && objectOffset >= 0 | if (objectOffset != null && objectOffset >= 0) | |||
&& !checkObjectKey(objectKey, objectOffset)) | ||||
{ | { | |||
LOG.debug("Stop checking xref offsets as at least one (" + objec | COSObjectKey foundObjectKey = findObjectKey(objectKey, objectOff | |||
tKey | set); | |||
+ ") couldn't be dereferenced"); | if (foundObjectKey == null) | |||
return false; | { | |||
LOG.debug("Stop checking xref offsets as at least one (" + o | ||||
bjectKey | ||||
+ ") couldn't be dereferenced"); | ||||
return false; | ||||
} | ||||
else if (foundObjectKey != objectKey) | ||||
{ | ||||
// Generation was fixed - need to update map later, after it | ||||
eration | ||||
correctedKeys.put(objectKey, foundObjectKey); | ||||
} | ||||
} | } | |||
} | } | |||
for (Entry<COSObjectKey, COSObjectKey> correctedKeyEntry : correctedKeys | ||||
.entrySet()) | ||||
{ | ||||
xrefOffset.put(correctedKeyEntry.getValue(), | ||||
xrefOffset.remove(correctedKeyEntry.getKey())); | ||||
} | ||||
return true; | return true; | |||
} | } | |||
/** | /** | |||
* Check the XRef table by dereferencing all objects and fixing the offset i f necessary. | * Check the XRef table by dereferencing all objects and fixing the offset i f necessary. | |||
* | * | |||
* @throws IOException if something went wrong. | * @throws IOException if something went wrong. | |||
*/ | */ | |||
private void checkXrefOffsets() throws IOException | private void checkXrefOffsets() throws IOException | |||
{ | { | |||
skipping to change at line 1501 | skipping to change at line 1505 | |||
if (bfSearchCOSObjectKeyOffsets != null && !bfSearchCOSObjectKeyOffs ets.isEmpty()) | if (bfSearchCOSObjectKeyOffsets != null && !bfSearchCOSObjectKeyOffs ets.isEmpty()) | |||
{ | { | |||
LOG.debug("Replaced read xref table with the results of a brute force search"); | LOG.debug("Replaced read xref table with the results of a brute force search"); | |||
xrefOffset.clear(); | xrefOffset.clear(); | |||
xrefOffset.putAll(bfSearchCOSObjectKeyOffsets); | xrefOffset.putAll(bfSearchCOSObjectKeyOffsets); | |||
} | } | |||
} | } | |||
} | } | |||
/** | /** | |||
* Check if the given object can be found at the given offset. | * Check if the given object can be found at the given offset. Returns the p | |||
rovided object key if everything is ok. | ||||
* If the generation number differs it will be fixed and a new object key is | ||||
returned. | ||||
* | * | |||
* @param objectKey the object we are looking for | * @param objectKey the key of object we are looking for | |||
* @param offset the offset where to look | * @param offset the offset where to look | |||
* @return returns true if the given object can be dereferenced at the given | * @return returns the found/fixed object key | |||
offset | * | |||
* @throws IOException if something went wrong | * @throws IOException if something went wrong | |||
*/ | */ | |||
private boolean checkObjectKey(COSObjectKey objectKey, long offset) throws I OException | private COSObjectKey findObjectKey(COSObjectKey objectKey, long offset) thro ws IOException | |||
{ | { | |||
// there can't be any object at the very beginning of a pdf | // there can't be any object at the very beginning of a pdf | |||
if (offset < MINIMUM_SEARCH_OFFSET) | if (offset < MINIMUM_SEARCH_OFFSET) | |||
{ | { | |||
return false; | return null; | |||
} | } | |||
boolean objectKeyFound = false; | ||||
try | try | |||
{ | { | |||
source.seek(offset); | source.seek(offset); | |||
// try to read the given object/generation number | // try to read the given object/generation number | |||
if (objectKey.getNumber() == readObjectNumber()) | if (objectKey.getNumber() == readObjectNumber()) | |||
{ | { | |||
int genNumber = readGenerationNumber(); | int genNumber = readGenerationNumber(); | |||
// finally try to read the object marker | ||||
readExpectedString(OBJ_MARKER, true); | ||||
if (genNumber == objectKey.getGeneration()) | if (genNumber == objectKey.getGeneration()) | |||
{ | { | |||
// finally try to read the object marker | return objectKey; | |||
readExpectedString(OBJ_MARKER, true); | ||||
objectKeyFound = true; | ||||
} | } | |||
else if (isLenient && genNumber > objectKey.getGeneration()) | else if (isLenient && genNumber > objectKey.getGeneration()) | |||
{ | { | |||
// finally try to read the object marker | return new COSObjectKey(objectKey.getNumber(), genNumber); | |||
readExpectedString(OBJ_MARKER, true); | ||||
objectKeyFound = true; | ||||
objectKey.fixGeneration(genNumber); | ||||
} | } | |||
} | } | |||
} | } | |||
catch (IOException exception) | catch (IOException exception) | |||
{ | { | |||
// Swallow the exception, obviously there isn't any valid object num ber | // Swallow the exception, obviously there isn't any valid object num ber | |||
LOG.debug("No valid object at given location " + offset + " - ignori ng", exception); | ||||
} | } | |||
// return resulting value | return null; | |||
return objectKeyFound; | ||||
} | } | |||
/** | /** | |||
* Brute force search for every object in the pdf. | * Brute force search for every object in the pdf. | |||
* | * | |||
* @throws IOException if something went wrong | * @throws IOException if something went wrong | |||
*/ | */ | |||
private void bfSearchForObjects() throws IOException | private void bfSearchForObjects() throws IOException | |||
{ | { | |||
if (bfSearchCOSObjectKeyOffsets == null) | if (bfSearchCOSObjectKeyOffsets == null) | |||
skipping to change at line 1735 | skipping to change at line 1737 | |||
return newValue; | return newValue; | |||
} | } | |||
/** | /** | |||
* Brute force search for all trailer marker. | * Brute force search for all trailer marker. | |||
* | * | |||
* @throws IOException if something went wrong | * @throws IOException if something went wrong | |||
*/ | */ | |||
private boolean bfSearchForTrailer(COSDictionary trailer) throws IOException | private boolean bfSearchForTrailer(COSDictionary trailer) throws IOException | |||
{ | { | |||
Map<String, COSDictionary> trailerDicts = new HashMap<String, COSDiction ary>(); | ||||
long originOffset = source.getPosition(); | long originOffset = source.getPosition(); | |||
source.seek(MINIMUM_SEARCH_OFFSET); | source.seek(MINIMUM_SEARCH_OFFSET); | |||
while (!source.isEOF()) | while (!source.isEOF()) | |||
{ | { | |||
// search for trailer marker | // search for trailer marker | |||
if (isString(TRAILER_MARKER)) | if (isString(TRAILER_MARKER)) | |||
{ | { | |||
source.seek(source.getPosition() + TRAILER_MARKER.length); | source.seek(source.getPosition() + TRAILER_MARKER.length); | |||
try | try | |||
{ | { | |||
boolean rootFound = false; | boolean rootFound = false; | |||
boolean infoFound = false; | boolean infoFound = false; | |||
skipSpaces(); | skipSpaces(); | |||
COSDictionary trailerDict = parseCOSDictionary(); | COSDictionary trailerDict = parseCOSDictionary(); | |||
StringBuilder trailerKeys = new StringBuilder(); | ||||
COSObject rootObj = trailerDict.getCOSObject(COSName.ROOT); | COSObject rootObj = trailerDict.getCOSObject(COSName.ROOT); | |||
if (rootObj != null) | if (rootObj != null) | |||
{ | { | |||
long objNumber = rootObj.getObjectNumber(); | // check if the dictionary can be dereferenced and is th | |||
int genNumber = rootObj.getGenerationNumber(); | e one we are looking for | |||
trailerKeys.append(objNumber).append(" "); | COSDictionary rootDict = retrieveCOSDictionary(rootObj); | |||
trailerKeys.append(genNumber).append(" "); | if (rootDict != null && isCatalog(rootDict)) | |||
rootFound = true; | { | |||
rootFound = true; | ||||
} | ||||
} | } | |||
COSObject infoObj = trailerDict.getCOSObject(COSName.INFO); | COSObject infoObj = trailerDict.getCOSObject(COSName.INFO); | |||
if (infoObj != null) | if (infoObj != null) | |||
{ | { | |||
long objNumber = infoObj.getObjectNumber(); | // check if the dictionary can be dereferenced and is th | |||
int genNumber = infoObj.getGenerationNumber(); | e one we are looking for | |||
trailerKeys.append(objNumber).append(" "); | COSDictionary infoDict = retrieveCOSDictionary(infoObj); | |||
trailerKeys.append(genNumber).append(" "); | if (infoDict != null && isInfo(infoDict)) | |||
infoFound = true; | { | |||
infoFound = true; | ||||
} | ||||
} | } | |||
if (rootFound && infoFound) | if (rootFound && infoFound) | |||
{ | { | |||
trailerDicts.put(trailerKeys.toString(), trailerDict); | trailer.setItem(COSName.ROOT, rootObj); | |||
trailer.setItem(COSName.INFO, infoObj); | ||||
if (trailerDict.containsKey(COSName.ENCRYPT)) | ||||
{ | ||||
COSObject encObj = trailerDict.getCOSObject(COSName. | ||||
ENCRYPT); | ||||
if (encObj != null) | ||||
{ | ||||
// check if the dictionary can be dereferenced | ||||
// TODO check if the dictionary is an encryption | ||||
dictionary? | ||||
COSDictionary encDict = retrieveCOSDictionary(en | ||||
cObj); | ||||
if (encDict != null) | ||||
{ | ||||
trailer.setItem(COSName.ENCRYPT, encObj); | ||||
} | ||||
} | ||||
} | ||||
if (trailerDict.containsKey(COSName.ID)) | ||||
{ | ||||
COSBase idObj = trailerDict.getItem(COSName.ID); | ||||
if (idObj instanceof COSArray) | ||||
{ | ||||
trailer.setItem(COSName.ID, idObj); | ||||
} | ||||
} | ||||
return true; | ||||
} | } | |||
} | } | |||
catch (IOException exception) | catch (IOException exception) | |||
{ | { | |||
continue; | continue; | |||
} | } | |||
} | } | |||
source.read(); | source.read(); | |||
} | } | |||
source.seek(originOffset); | source.seek(originOffset); | |||
// eliminate double entries | ||||
int trailerdictsSize = trailerDicts.size(); | ||||
String firstEntry = null; | ||||
if (trailerdictsSize > 0) | ||||
{ | ||||
String[] keys = new String[trailerdictsSize]; | ||||
trailerDicts.keySet().toArray(keys); | ||||
firstEntry = keys[0]; | ||||
for (int i = 1; i < trailerdictsSize; i++) | ||||
{ | ||||
if (firstEntry.equals(keys[i])) | ||||
{ | ||||
trailerDicts.remove(keys[i]); | ||||
} | ||||
} | ||||
} | ||||
// continue if one entry is left only | ||||
if (trailerDicts.size() == 1) | ||||
{ | ||||
boolean rootFound = false; | ||||
boolean infoFound = false; | ||||
COSDictionary trailerDict = trailerDicts.get(firstEntry); | ||||
COSBase rootObj = trailerDict.getItem(COSName.ROOT); | ||||
if (rootObj instanceof COSObject) | ||||
{ | ||||
// check if the dictionary can be dereferenced and is the one we | ||||
are looking for | ||||
COSDictionary rootDict = retrieveCOSDictionary((COSObject) rootO | ||||
bj); | ||||
if (rootDict != null && isCatalog(rootDict)) | ||||
{ | ||||
rootFound = true; | ||||
} | ||||
} | ||||
COSBase infoObj = trailerDict.getItem(COSName.INFO); | ||||
if (infoObj instanceof COSObject) | ||||
{ | ||||
// check if the dictionary can be dereferenced and is the one we | ||||
are looking for | ||||
COSDictionary infoDict = retrieveCOSDictionary((COSObject) infoO | ||||
bj); | ||||
if (infoDict != null && isInfo(infoDict)) | ||||
{ | ||||
infoFound = true; | ||||
} | ||||
} | ||||
if (rootFound && infoFound) | ||||
{ | ||||
trailer.setItem(COSName.ROOT, rootObj); | ||||
trailer.setItem(COSName.INFO, infoObj); | ||||
if (trailerDict.containsKey(COSName.ENCRYPT)) | ||||
{ | ||||
COSBase encObj = trailerDict.getItem(COSName.ENCRYPT); | ||||
if (encObj instanceof COSObject) | ||||
{ | ||||
// check if the dictionary can be dereferenced | ||||
// TODO check if the dictionary is an encryption diction | ||||
ary? | ||||
COSDictionary encDict = retrieveCOSDictionary((COSObject | ||||
) encObj); | ||||
if (encDict != null) | ||||
{ | ||||
trailer.setItem(COSName.ENCRYPT, encObj); | ||||
} | ||||
} | ||||
} | ||||
if (trailerDict.containsKey(COSName.ID)) | ||||
{ | ||||
COSBase idObj = trailerDict.getItem(COSName.ID); | ||||
if (idObj instanceof COSArray) | ||||
{ | ||||
trailer.setItem(COSName.ID, idObj); | ||||
} | ||||
} | ||||
return true; | ||||
} | ||||
} | ||||
return false; | return false; | |||
} | } | |||
/** | /** | |||
* Brute force search for the last EOF marker. | * Brute force search for the last EOF marker. | |||
* | * | |||
* @throws IOException if something went wrong | * @throws IOException if something went wrong | |||
*/ | */ | |||
private void bfSearchForLastEOFMarker() throws IOException | private void bfSearchForLastEOFMarker() throws IOException | |||
{ | { | |||
skipping to change at line 1999 | skipping to change at line 1954 | |||
} | } | |||
// check if the object was overwritten | // check if the object was overwritten | |||
if (offset.equals(bfOffset)) | if (offset.equals(bfOffset)) | |||
{ | { | |||
source.seek(offset); | source.seek(offset); | |||
long stmObjNumber = readObjectNumber(); | long stmObjNumber = readObjectNumber(); | |||
int stmGenNumber = readGenerationNumber(); | int stmGenNumber = readGenerationNumber(); | |||
readExpectedString(OBJ_MARKER, true); | readExpectedString(OBJ_MARKER, true); | |||
int nrOfObjects = 0; | int nrOfObjects = 0; | |||
COSStream stream = null; | COSStream stream = null; | |||
COSInputStream is = null; | ||||
List<Long> objectNumbers = null; | List<Long> objectNumbers = null; | |||
try | try | |||
{ | { | |||
COSDictionary dict = parseCOSDictionary(); | COSDictionary dict = parseCOSDictionary(); | |||
int offsetFirstStream = dict.getInt(COSName.FIRST); | int offsetFirstStream = dict.getInt(COSName.FIRST); | |||
nrOfObjects = dict.getInt(COSName.N); | nrOfObjects = dict.getInt(COSName.N); | |||
// skip the stream if required values are missing | // skip the stream if required values are missing | |||
if (offsetFirstStream == -1 || nrOfObjects == -1) | if (offsetFirstStream == -1 || nrOfObjects == -1) | |||
{ | { | |||
continue; | continue; | |||
skipping to change at line 2032 | skipping to change at line 1986 | |||
} | } | |||
} | } | |||
catch (IOException exception) | catch (IOException exception) | |||
{ | { | |||
LOG.debug( | LOG.debug( | |||
"Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset); | "Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset); | |||
continue; | continue; | |||
} | } | |||
finally | finally | |||
{ | { | |||
if (is != null) | ||||
{ | ||||
is.close(); | ||||
} | ||||
if (stream != null) | if (stream != null) | |||
{ | { | |||
stream.close(); | stream.close(); | |||
} | } | |||
} | } | |||
if (objectNumbers.size() < nrOfObjects) | if (objectNumbers.size() < nrOfObjects) | |||
{ | { | |||
LOG.debug( | LOG.debug( | |||
"Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset); | "Skipped corrupt stream: (" + stmObjNumber + " 0 at offset " + offset); | |||
continue; | continue; | |||
skipping to change at line 2238 | skipping to change at line 2188 | |||
/** | /** | |||
* Search for the different parts of the trailer dictionary. | * Search for the different parts of the trailer dictionary. | |||
* | * | |||
* @param trailer | * @param trailer | |||
* @return true if the root was found, false if not. | * @return true if the root was found, false if not. | |||
* @throws IOException | * @throws IOException | |||
*/ | */ | |||
private boolean searchForTrailerItems(COSDictionary trailer) throws IOExcept ion | private boolean searchForTrailerItems(COSDictionary trailer) throws IOExcept ion | |||
{ | { | |||
boolean rootFound = false; | COSObject rootObject = null; | |||
Long rootOffset = null; | ||||
COSObject infoObject = null; | ||||
Long infoOffset = null; | ||||
for (Entry<COSObjectKey, Long> entry : bfSearchCOSObjectKeyOffsets.entry Set()) | for (Entry<COSObjectKey, Long> entry : bfSearchCOSObjectKeyOffsets.entry Set()) | |||
{ | { | |||
COSDictionary dictionary = retrieveCOSDictionary(entry.getKey(), ent ry.getValue()); | COSDictionary dictionary = retrieveCOSDictionary(entry.getKey(), ent ry.getValue()); | |||
if (dictionary == null) | if (dictionary == null) | |||
{ | { | |||
continue; | continue; | |||
} | } | |||
// document catalog | // document catalog | |||
if (isCatalog(dictionary)) | if (isCatalog(dictionary)) | |||
{ | { | |||
trailer.setItem(COSName.ROOT, document.getObjectFromPool(entry.g | COSObject cosObject = document.getObjectFromPool(entry.getKey()) | |||
etKey())); | ; | |||
rootFound = true; | rootObject = compareCOSObjects(cosObject, entry.getValue(), root | |||
Object, rootOffset); | ||||
if (rootObject == cosObject) | ||||
{ | ||||
rootOffset = entry.getValue(); | ||||
} | ||||
} | } | |||
// info dictionary | // info dictionary | |||
else if (isInfo(dictionary)) | else if (isInfo(dictionary)) | |||
{ | { | |||
trailer.setItem(COSName.INFO, document.getObjectFromPool(entry.g | COSObject cosObject = document.getObjectFromPool(entry.getKey()) | |||
etKey())); | ; | |||
infoObject = compareCOSObjects(cosObject, entry.getValue(), info | ||||
Object, infoOffset); | ||||
if (infoObject == cosObject) | ||||
{ | ||||
infoOffset = entry.getValue(); | ||||
} | ||||
} | } | |||
// encryption dictionary, if existing, is lost | // encryption dictionary, if existing, is lost | |||
// We can't run "Algorithm 2" from PDF specification because of miss ing ID | // We can't run "Algorithm 2" from PDF specification because of miss ing ID | |||
} | } | |||
return rootFound; | if (rootObject != null) | |||
{ | ||||
trailer.setItem(COSName.ROOT, rootObject); | ||||
} | ||||
if (infoObject != null) | ||||
{ | ||||
trailer.setItem(COSName.INFO, infoObject); | ||||
} | ||||
return rootObject != null; | ||||
} | ||||
private COSObject compareCOSObjects(COSObject newObject, Long newOffset, | ||||
COSObject currentObject, Long currentOffset) | ||||
{ | ||||
if (currentObject != null) | ||||
{ | ||||
// check if the current object is an updated version of the previous | ||||
found object | ||||
if (currentObject.getObjectNumber() == newObject.getObjectNumber()) | ||||
{ | ||||
return currentObject.getGenerationNumber() < newObject.getGenera | ||||
tionNumber() | ||||
? newObject | ||||
: currentObject; | ||||
} | ||||
// most likely the object with the bigger offset is the newer one | ||||
return currentOffset != null && newOffset > currentOffset ? newObjec | ||||
t : currentObject; | ||||
} | ||||
return newObject; | ||||
} | } | |||
private COSDictionary retrieveCOSDictionary(COSObject object) throws IOExcep tion | private COSDictionary retrieveCOSDictionary(COSObject object) throws IOExcep tion | |||
{ | { | |||
COSObjectKey key = new COSObjectKey(object); | COSObjectKey key = new COSObjectKey(object); | |||
Long offset = bfSearchCOSObjectKeyOffsets.get(key); | Long offset = bfSearchCOSObjectKeyOffsets.get(key); | |||
if (offset != null) | if (offset != null) | |||
{ | { | |||
return retrieveCOSDictionary(key, offset); | long currentPosition = source.getPosition(); | |||
COSDictionary dictionary = retrieveCOSDictionary(key, offset); | ||||
source.seek(currentPosition); | ||||
return dictionary; | ||||
} | } | |||
return null; | return null; | |||
} | } | |||
private COSDictionary retrieveCOSDictionary(COSObjectKey key, long offset) t hrows IOException | private COSDictionary retrieveCOSDictionary(COSObjectKey key, long offset) t hrows IOException | |||
{ | { | |||
COSDictionary dictionary = null; | COSDictionary dictionary = null; | |||
// handle compressed objects | // handle compressed objects | |||
if (offset < 0) | if (offset < 0) | |||
{ | { | |||
skipping to change at line 2607 | skipping to change at line 2598 | |||
} | } | |||
//sometimes there is some garbage in the header before the header | //sometimes there is some garbage in the header before the header | |||
//actually starts, so lets try to find the header first. | //actually starts, so lets try to find the header first. | |||
int headerStart = header.indexOf( headerMarker ); | int headerStart = header.indexOf( headerMarker ); | |||
// greater than zero because if it is zero then there is no point of tri mming | // greater than zero because if it is zero then there is no point of tri mming | |||
if ( headerStart > 0 ) | if ( headerStart > 0 ) | |||
{ | { | |||
//trim off any leading characters | //trim off any leading characters | |||
header = header.substring( headerStart, header.length() ); | header = header.substring(headerStart); | |||
} | } | |||
// This is used if there is garbage after the header on the same line | // This is used if there is garbage after the header on the same line | |||
if (header.startsWith(headerMarker) && !header.matches(headerMarker + "\ \d.\\d")) | if (header.startsWith(headerMarker) && !header.matches(headerMarker + "\ \d.\\d")) | |||
{ | { | |||
if (header.length() < headerMarker.length() + 3) | if (header.length() < headerMarker.length() + 3) | |||
{ | { | |||
// No version number at all, set to 1.4 as default | // No version number at all, set to 1.4 as default | |||
header = headerMarker + defaultVersion; | header = headerMarker + defaultVersion; | |||
LOG.debug("No version found, set to " + defaultVersion + " as de fault."); | LOG.debug("No version found, set to " + defaultVersion + " as de fault."); | |||
End of changes. 40 change blocks. | ||||
152 lines changed or deleted | 150 lines changed or added |