BaseParser.java (pdfbox-2.0.23-src) | : | BaseParser.java (pdfbox-2.0.24-src) | ||
---|---|---|---|---|
skipping to change at line 156 | skipping to change at line 156 | |||
* @return The parsed Dictionary object. | * @return The parsed Dictionary object. | |||
* | * | |||
* @throws IOException If there is an error parsing the dictionary object. | * @throws IOException If there is an error parsing the dictionary object. | |||
*/ | */ | |||
private COSBase parseCOSDictionaryValue() throws IOException | private COSBase parseCOSDictionaryValue() throws IOException | |||
{ | { | |||
long numOffset = seqSource.getPosition(); | long numOffset = seqSource.getPosition(); | |||
COSBase value = parseDirObject(); | COSBase value = parseDirObject(); | |||
skipSpaces(); | skipSpaces(); | |||
// proceed if the given object is a number and the following is a number as well | // proceed if the given object is a number and the following is a number as well | |||
if (!(value instanceof COSNumber) || !isDigit()) | if ((!(value instanceof COSNumber) || !isDigit())) | |||
{ | { | |||
return value; | return value; | |||
} | } | |||
// read the remaining information of the object number | // read the remaining information of the object number | |||
long genOffset = seqSource.getPosition(); | long genOffset = seqSource.getPosition(); | |||
COSBase generationNumber = parseDirObject(); | COSBase generationNumber = parseDirObject(); | |||
skipSpaces(); | skipSpaces(); | |||
readExpectedChar('R'); | readExpectedChar('R'); | |||
if (!(value instanceof COSInteger)) | if (!(value instanceof COSInteger)) | |||
{ | { | |||
skipping to change at line 215 | skipping to change at line 215 | |||
while (!done) | while (!done) | |||
{ | { | |||
skipSpaces(); | skipSpaces(); | |||
char c = (char) seqSource.peek(); | char c = (char) seqSource.peek(); | |||
if (c == '>') | if (c == '>') | |||
{ | { | |||
done = true; | done = true; | |||
} | } | |||
else if (c == '/') | else if (c == '/') | |||
{ | { | |||
parseCOSDictionaryNameValuePair(obj); | // something went wrong, most likely the dictionary is corrupted | |||
// stop immediately and return everything read so far | ||||
if (!parseCOSDictionaryNameValuePair(obj)) | ||||
{ | ||||
return obj; | ||||
} | ||||
} | } | |||
else | else | |||
{ | { | |||
// invalid dictionary, we were expecting a /Name, read until the end or until we can recover | // invalid dictionary, we were expecting a /Name, read until the end or until we can recover | |||
LOG.warn("Invalid dictionary, found: '" + c + "' but expected: ' /' at offset " + seqSource.getPosition()); | LOG.warn("Invalid dictionary, found: '" + c + "' but expected: ' /' at offset " + seqSource.getPosition()); | |||
if (readUntilEndOfCOSDictionary()) | if (readUntilEndOfCOSDictionary()) | |||
{ | { | |||
// we couldn't recover | // we couldn't recover | |||
return obj; | return obj; | |||
} | } | |||
skipping to change at line 279 | skipping to change at line 284 | |||
c = seqSource.read(); | c = seqSource.read(); | |||
} | } | |||
if (c == -1) | if (c == -1) | |||
{ | { | |||
return true; | return true; | |||
} | } | |||
seqSource.unread(c); | seqSource.unread(c); | |||
return false; | return false; | |||
} | } | |||
private void parseCOSDictionaryNameValuePair(COSDictionary obj) throws IOExc eption | private boolean parseCOSDictionaryNameValuePair(COSDictionary obj) throws IO Exception | |||
{ | { | |||
COSName key = parseCOSName(); | COSName key = parseCOSName(); | |||
COSBase value = parseCOSDictionaryValue(); | COSBase value = parseCOSDictionaryValue(); | |||
skipSpaces(); | skipSpaces(); | |||
if (((char) seqSource.peek()) == 'd') | ||||
{ | ||||
// if the next string is 'def' then we are parsing a cmap stream | ||||
// and want to ignore it, otherwise throw an exception. | ||||
String potentialDEF = readString(); | ||||
if (!potentialDEF.equals(DEF)) | ||||
{ | ||||
seqSource.unread(potentialDEF.getBytes(ISO_8859_1)); | ||||
} | ||||
else | ||||
{ | ||||
skipSpaces(); | ||||
} | ||||
} | ||||
if (value == null) | if (value == null) | |||
{ | { | |||
LOG.warn("Bad dictionary declaration at offset " + seqSource.getPosi tion()); | LOG.warn("Bad dictionary declaration at offset " + seqSource.getPosi tion()); | |||
return false; | ||||
} | ||||
else if (value instanceof COSInteger && !((COSInteger)value).isValid()) | ||||
{ | ||||
LOG.warn("Skipped out of range number value at offset " + seqSource. | ||||
getPosition()); | ||||
} | } | |||
else | else | |||
{ | { | |||
// label this item as direct, to avoid signature problems. | // label this item as direct, to avoid signature problems. | |||
value.setDirect(true); | value.setDirect(true); | |||
obj.setItem(key, value); | obj.setItem(key, value); | |||
} | } | |||
return true; | ||||
} | } | |||
protected void skipWhiteSpaces() throws IOException | protected void skipWhiteSpaces() throws IOException | |||
{ | { | |||
//PDF Ref 3.2.7 A stream must be followed by either | //PDF Ref 3.2.7 A stream must be followed by either | |||
//a CRLF or LF but nothing else. | //a CRLF or LF but nothing else. | |||
int whitespace = seqSource.read(); | int whitespace = seqSource.read(); | |||
//see brother_scan_cover.pdf, it adds whitespaces | //see brother_scan_cover.pdf, it adds whitespaces | |||
skipping to change at line 662 | skipping to change at line 658 | |||
pbo = null; | pbo = null; | |||
} | } | |||
} | } | |||
if( pbo != null ) | if( pbo != null ) | |||
{ | { | |||
po.add( pbo ); | po.add( pbo ); | |||
} | } | |||
else | else | |||
{ | { | |||
//it could be a bad object in the array which is just skipped | //it could be a bad object in the array which is just skipped | |||
LOG.warn("Corrupt object reference at offset " + | LOG.warn("Corrupt array element at offset " | |||
seqSource.getPosition() + ", start offset: " + startPosi | + seqSource.getPosition() + ", start offset: " + startPo | |||
tion); | sition); | |||
// This could also be an "endobj" or "endstream" which means we | ||||
can assume that | ||||
// the array has ended. | ||||
String isThisTheEnd = readString(); | String isThisTheEnd = readString(); | |||
// return immediately if a corrupt element is followed by anothe | ||||
r array | ||||
// to avoid a possible infinite recursion as most likely the who | ||||
le array is corrupted | ||||
if (isThisTheEnd.isEmpty() && seqSource.peek() == '[') | ||||
{ | ||||
return po; | ||||
} | ||||
seqSource.unread(isThisTheEnd.getBytes(ISO_8859_1)); | seqSource.unread(isThisTheEnd.getBytes(ISO_8859_1)); | |||
// This could also be an "endobj" or "endstream" which means we | ||||
can assume that | ||||
// the array has ended. | ||||
if(ENDOBJ_STRING.equals(isThisTheEnd) || ENDSTREAM_STRING.equals (isThisTheEnd)) | if(ENDOBJ_STRING.equals(isThisTheEnd) || ENDSTREAM_STRING.equals (isThisTheEnd)) | |||
{ | { | |||
return po; | return po; | |||
} | } | |||
} | } | |||
skipSpaces(); | skipSpaces(); | |||
} | } | |||
// read ']' | // read ']' | |||
seqSource.read(); | seqSource.read(); | |||
skipSpaces(); | skipSpaces(); | |||
skipping to change at line 692 | skipping to change at line 693 | |||
/** | /** | |||
* Determine if a character terminates a PDF name. | * Determine if a character terminates a PDF name. | |||
* | * | |||
* @param ch The character | * @param ch The character | |||
* @return true if the character terminates a PDF name, otherwise false. | * @return true if the character terminates a PDF name, otherwise false. | |||
*/ | */ | |||
protected boolean isEndOfName(int ch) | protected boolean isEndOfName(int ch) | |||
{ | { | |||
return ch == ASCII_SPACE || ch == ASCII_CR || ch == ASCII_LF || ch == 9 || ch == '>' || | return ch == ASCII_SPACE || ch == ASCII_CR || ch == ASCII_LF || ch == 9 || ch == '>' || | |||
ch == '<' || ch == '[' || ch =='/' || ch ==']' || ch ==')' || ch =='(' || | ch == '<' || ch == '[' || ch =='/' || ch ==']' || ch ==')' || ch =='(' || | |||
ch == 0 || ch == '\f'; | ch == 0 || ch == '\f' || ch == '%'; | |||
} | } | |||
/** | /** | |||
* This will parse a PDF name from the stream. | * This will parse a PDF name from the stream. | |||
* | * | |||
* @return The parsed PDF name. | * @return The parsed PDF name. | |||
* @throws IOException If there is an error reading from the stream. | * @throws IOException If there is an error reading from the stream. | |||
*/ | */ | |||
protected COSName parseCOSName() throws IOException | protected COSName parseCOSName() throws IOException | |||
{ | { | |||
skipping to change at line 855 | skipping to change at line 856 | |||
skipSpaces(); | skipSpaces(); | |||
char c = (char)seqSource.peek(); | char c = (char)seqSource.peek(); | |||
switch(c) | switch(c) | |||
{ | { | |||
case '<': | case '<': | |||
// pull off first left bracket | // pull off first left bracket | |||
int leftBracket = seqSource.read(); | int leftBracket = seqSource.read(); | |||
// check for second left bracket | // check for second left bracket | |||
c = (char) seqSource.peek(); | c = (char) seqSource.peek(); | |||
seqSource.unread(leftBracket); | seqSource.unread(leftBracket); | |||
if(c == '<') | return c == '<' ? parseCOSDictionary() : parseCOSString(); | |||
{ | ||||
COSDictionary retval = parseCOSDictionary(); | ||||
skipSpaces(); | ||||
return retval; | ||||
} | ||||
else | ||||
{ | ||||
return parseCOSString(); | ||||
} | ||||
case '[': | case '[': | |||
// array | // array | |||
return parseCOSArray(); | return parseCOSArray(); | |||
case '(': | case '(': | |||
return parseCOSString(); | return parseCOSString(); | |||
case '/': | case '/': | |||
// name | // name | |||
return parseCOSName(); | return parseCOSName(); | |||
case 'n': | case 'n': | |||
// null | // null | |||
skipping to change at line 910 | skipping to change at line 901 | |||
case 'R': | case 'R': | |||
seqSource.read(); | seqSource.read(); | |||
return new COSObject(null); | return new COSObject(null); | |||
case (char)-1: | case (char)-1: | |||
return null; | return null; | |||
default: | default: | |||
if( Character.isDigit(c) || c == '-' || c == '+' || c == '.') | if( Character.isDigit(c) || c == '-' || c == '+' || c == '.') | |||
{ | { | |||
return parseCOSNumber(); | return parseCOSNumber(); | |||
} | } | |||
else | // This is not suppose to happen, but we will allow for it | |||
// so we are more compatible with POS writers that don't | ||||
// follow the spec | ||||
long startOffset = seqSource.getPosition(); | ||||
String badString = readString(); | ||||
if (badString.isEmpty()) | ||||
{ | { | |||
//This is not suppose to happen, but we will allow for it | int peek = seqSource.peek(); | |||
//so we are more compatible with POS writers that don't | // we can end up in an infinite loop otherwise | |||
//follow the spec | throw new IOException( | |||
String badString = readString(); | "Unknown dir object c='" + c + "' cInt=" + (int) c + " p | |||
if (badString.isEmpty()) | eek='" + (char) peek | |||
{ | + "' peekInt=" + peek + " at offset " + seqSource.getPos | |||
int peek = seqSource.peek(); | ition() | |||
// we can end up in an infinite loop otherwise | + " (start offset: " + startOffset + ")"); | |||
throw new IOException( "Unknown dir object c='" + c + | } | |||
"' cInt=" + (int)c + " peek='" + (char)peek | ||||
+ "' peekInt=" + peek + " at offset " + seqSource.ge | ||||
tPosition() ); | ||||
} | ||||
// if it's an endstream/endobj, we want to put it back so the ca | // if it's an endstream/endobj, we want to put it back so the caller | |||
ller will see it | will see it | |||
if(ENDOBJ_STRING.equals(badString) || ENDSTREAM_STRING.equals(ba | if (ENDOBJ_STRING.equals(badString) || ENDSTREAM_STRING.equals(badSt | |||
dString)) | ring)) | |||
{ | { | |||
seqSource.unread(badString.getBytes(ISO_8859_1)); | seqSource.unread(badString.getBytes(ISO_8859_1)); | |||
} | } | |||
else | else | |||
{ | { | |||
LOG.warn("Skipped unexpected dir object = '" + badString + " | LOG.warn("Skipped unexpected dir object = '" + badString + "' at | |||
' at offset " | offset " | |||
+ seqSource.getPosition()); | + seqSource.getPosition() + " (start offset: " + startOf | |||
} | fset + ")"); | |||
} | } | |||
} | } | |||
return null; | return null; | |||
} | } | |||
private COSNumber parseCOSNumber() throws IOException | private COSNumber parseCOSNumber() throws IOException | |||
{ | { | |||
StringBuilder buf = new StringBuilder(); | StringBuilder buf = new StringBuilder(); | |||
int ic = seqSource.read(); | int ic = seqSource.read(); | |||
char c = (char) ic; | char c = (char) ic; | |||
End of changes. 14 change blocks. | ||||
64 lines changed or deleted | 59 lines changed or added |