"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java" between
pdfbox-2.0.23-src.zip and pdfbox-2.0.24-src.zip

About: Apache PDFBox is a Java PDF library tool that allows creation of new PDF documents, manipulation of existing documents and the ability to extract content from documents.

BaseParser.java  (pdfbox-2.0.23-src):BaseParser.java  (pdfbox-2.0.24-src)
skipping to change at line 156 skipping to change at line 156
* @return The parsed Dictionary object. * @return The parsed Dictionary object.
* *
* @throws IOException If there is an error parsing the dictionary object. * @throws IOException If there is an error parsing the dictionary object.
*/ */
private COSBase parseCOSDictionaryValue() throws IOException private COSBase parseCOSDictionaryValue() throws IOException
{ {
long numOffset = seqSource.getPosition(); long numOffset = seqSource.getPosition();
COSBase value = parseDirObject(); COSBase value = parseDirObject();
skipSpaces(); skipSpaces();
// proceed if the given object is a number and the following is a number as well // proceed if the given object is a number and the following is a number as well
if (!(value instanceof COSNumber) || !isDigit()) if ((!(value instanceof COSNumber) || !isDigit()))
{ {
return value; return value;
} }
// read the remaining information of the object number // read the remaining information of the object number
long genOffset = seqSource.getPosition(); long genOffset = seqSource.getPosition();
COSBase generationNumber = parseDirObject(); COSBase generationNumber = parseDirObject();
skipSpaces(); skipSpaces();
readExpectedChar('R'); readExpectedChar('R');
if (!(value instanceof COSInteger)) if (!(value instanceof COSInteger))
{ {
skipping to change at line 215 skipping to change at line 215
while (!done) while (!done)
{ {
skipSpaces(); skipSpaces();
char c = (char) seqSource.peek(); char c = (char) seqSource.peek();
if (c == '>') if (c == '>')
{ {
done = true; done = true;
} }
else if (c == '/') else if (c == '/')
{ {
parseCOSDictionaryNameValuePair(obj); // something went wrong, most likely the dictionary is corrupted
// stop immediately and return everything read so far
if (!parseCOSDictionaryNameValuePair(obj))
{
return obj;
}
} }
else else
{ {
// invalid dictionary, we were expecting a /Name, read until the end or until we can recover // invalid dictionary, we were expecting a /Name, read until the end or until we can recover
LOG.warn("Invalid dictionary, found: '" + c + "' but expected: ' /' at offset " + seqSource.getPosition()); LOG.warn("Invalid dictionary, found: '" + c + "' but expected: ' /' at offset " + seqSource.getPosition());
if (readUntilEndOfCOSDictionary()) if (readUntilEndOfCOSDictionary())
{ {
// we couldn't recover // we couldn't recover
return obj; return obj;
} }
skipping to change at line 279 skipping to change at line 284
c = seqSource.read(); c = seqSource.read();
} }
if (c == -1) if (c == -1)
{ {
return true; return true;
} }
seqSource.unread(c); seqSource.unread(c);
return false; return false;
} }
private void parseCOSDictionaryNameValuePair(COSDictionary obj) throws IOExc eption private boolean parseCOSDictionaryNameValuePair(COSDictionary obj) throws IO Exception
{ {
COSName key = parseCOSName(); COSName key = parseCOSName();
COSBase value = parseCOSDictionaryValue(); COSBase value = parseCOSDictionaryValue();
skipSpaces(); skipSpaces();
if (((char) seqSource.peek()) == 'd')
{
// if the next string is 'def' then we are parsing a cmap stream
// and want to ignore it, otherwise throw an exception.
String potentialDEF = readString();
if (!potentialDEF.equals(DEF))
{
seqSource.unread(potentialDEF.getBytes(ISO_8859_1));
}
else
{
skipSpaces();
}
}
if (value == null) if (value == null)
{ {
LOG.warn("Bad dictionary declaration at offset " + seqSource.getPosi tion()); LOG.warn("Bad dictionary declaration at offset " + seqSource.getPosi tion());
return false;
}
else if (value instanceof COSInteger && !((COSInteger)value).isValid())
{
LOG.warn("Skipped out of range number value at offset " + seqSource.
getPosition());
} }
else else
{ {
// label this item as direct, to avoid signature problems. // label this item as direct, to avoid signature problems.
value.setDirect(true); value.setDirect(true);
obj.setItem(key, value); obj.setItem(key, value);
} }
return true;
} }
protected void skipWhiteSpaces() throws IOException protected void skipWhiteSpaces() throws IOException
{ {
//PDF Ref 3.2.7 A stream must be followed by either //PDF Ref 3.2.7 A stream must be followed by either
//a CRLF or LF but nothing else. //a CRLF or LF but nothing else.
int whitespace = seqSource.read(); int whitespace = seqSource.read();
//see brother_scan_cover.pdf, it adds whitespaces //see brother_scan_cover.pdf, it adds whitespaces
skipping to change at line 662 skipping to change at line 658
pbo = null; pbo = null;
} }
} }
if( pbo != null ) if( pbo != null )
{ {
po.add( pbo ); po.add( pbo );
} }
else else
{ {
//it could be a bad object in the array which is just skipped //it could be a bad object in the array which is just skipped
LOG.warn("Corrupt object reference at offset " + LOG.warn("Corrupt array element at offset "
seqSource.getPosition() + ", start offset: " + startPosi + seqSource.getPosition() + ", start offset: " + startPo
tion); sition);
// This could also be an "endobj" or "endstream" which means we
can assume that
// the array has ended.
String isThisTheEnd = readString(); String isThisTheEnd = readString();
// return immediately if a corrupt element is followed by anothe
r array
// to avoid a possible infinite recursion as most likely the who
le array is corrupted
if (isThisTheEnd.isEmpty() && seqSource.peek() == '[')
{
return po;
}
seqSource.unread(isThisTheEnd.getBytes(ISO_8859_1)); seqSource.unread(isThisTheEnd.getBytes(ISO_8859_1));
// This could also be an "endobj" or "endstream" which means we
can assume that
// the array has ended.
if(ENDOBJ_STRING.equals(isThisTheEnd) || ENDSTREAM_STRING.equals (isThisTheEnd)) if(ENDOBJ_STRING.equals(isThisTheEnd) || ENDSTREAM_STRING.equals (isThisTheEnd))
{ {
return po; return po;
} }
} }
skipSpaces(); skipSpaces();
} }
// read ']' // read ']'
seqSource.read(); seqSource.read();
skipSpaces(); skipSpaces();
skipping to change at line 692 skipping to change at line 693
/** /**
* Determine if a character terminates a PDF name. * Determine if a character terminates a PDF name.
* *
* @param ch The character * @param ch The character
* @return true if the character terminates a PDF name, otherwise false. * @return true if the character terminates a PDF name, otherwise false.
*/ */
protected boolean isEndOfName(int ch) protected boolean isEndOfName(int ch)
{ {
return ch == ASCII_SPACE || ch == ASCII_CR || ch == ASCII_LF || ch == 9 || ch == '>' || return ch == ASCII_SPACE || ch == ASCII_CR || ch == ASCII_LF || ch == 9 || ch == '>' ||
ch == '<' || ch == '[' || ch =='/' || ch ==']' || ch ==')' || ch =='(' || ch == '<' || ch == '[' || ch =='/' || ch ==']' || ch ==')' || ch =='(' ||
ch == 0 || ch == '\f'; ch == 0 || ch == '\f' || ch == '%';
} }
/** /**
* This will parse a PDF name from the stream. * This will parse a PDF name from the stream.
* *
* @return The parsed PDF name. * @return The parsed PDF name.
* @throws IOException If there is an error reading from the stream. * @throws IOException If there is an error reading from the stream.
*/ */
protected COSName parseCOSName() throws IOException protected COSName parseCOSName() throws IOException
{ {
skipping to change at line 855 skipping to change at line 856
skipSpaces(); skipSpaces();
char c = (char)seqSource.peek(); char c = (char)seqSource.peek();
switch(c) switch(c)
{ {
case '<': case '<':
// pull off first left bracket // pull off first left bracket
int leftBracket = seqSource.read(); int leftBracket = seqSource.read();
// check for second left bracket // check for second left bracket
c = (char) seqSource.peek(); c = (char) seqSource.peek();
seqSource.unread(leftBracket); seqSource.unread(leftBracket);
if(c == '<') return c == '<' ? parseCOSDictionary() : parseCOSString();
{
COSDictionary retval = parseCOSDictionary();
skipSpaces();
return retval;
}
else
{
return parseCOSString();
}
case '[': case '[':
// array // array
return parseCOSArray(); return parseCOSArray();
case '(': case '(':
return parseCOSString(); return parseCOSString();
case '/': case '/':
// name // name
return parseCOSName(); return parseCOSName();
case 'n': case 'n':
// null // null
skipping to change at line 910 skipping to change at line 901
case 'R': case 'R':
seqSource.read(); seqSource.read();
return new COSObject(null); return new COSObject(null);
case (char)-1: case (char)-1:
return null; return null;
default: default:
if( Character.isDigit(c) || c == '-' || c == '+' || c == '.') if( Character.isDigit(c) || c == '-' || c == '+' || c == '.')
{ {
return parseCOSNumber(); return parseCOSNumber();
} }
else // This is not suppose to happen, but we will allow for it
// so we are more compatible with POS writers that don't
// follow the spec
long startOffset = seqSource.getPosition();
String badString = readString();
if (badString.isEmpty())
{ {
//This is not suppose to happen, but we will allow for it int peek = seqSource.peek();
//so we are more compatible with POS writers that don't // we can end up in an infinite loop otherwise
//follow the spec throw new IOException(
String badString = readString(); "Unknown dir object c='" + c + "' cInt=" + (int) c + " p
if (badString.isEmpty()) eek='" + (char) peek
{ + "' peekInt=" + peek + " at offset " + seqSource.getPos
int peek = seqSource.peek(); ition()
// we can end up in an infinite loop otherwise + " (start offset: " + startOffset + ")");
throw new IOException( "Unknown dir object c='" + c + }
"' cInt=" + (int)c + " peek='" + (char)peek
+ "' peekInt=" + peek + " at offset " + seqSource.ge
tPosition() );
}
// if it's an endstream/endobj, we want to put it back so the ca // if it's an endstream/endobj, we want to put it back so the caller
ller will see it will see it
if(ENDOBJ_STRING.equals(badString) || ENDSTREAM_STRING.equals(ba if (ENDOBJ_STRING.equals(badString) || ENDSTREAM_STRING.equals(badSt
dString)) ring))
{ {
seqSource.unread(badString.getBytes(ISO_8859_1)); seqSource.unread(badString.getBytes(ISO_8859_1));
} }
else else
{ {
LOG.warn("Skipped unexpected dir object = '" + badString + " LOG.warn("Skipped unexpected dir object = '" + badString + "' at
' at offset " offset "
+ seqSource.getPosition()); + seqSource.getPosition() + " (start offset: " + startOf
} fset + ")");
} }
} }
return null; return null;
} }
private COSNumber parseCOSNumber() throws IOException private COSNumber parseCOSNumber() throws IOException
{ {
StringBuilder buf = new StringBuilder(); StringBuilder buf = new StringBuilder();
int ic = seqSource.read(); int ic = seqSource.read();
char c = (char) ic; char c = (char) ic;
 End of changes. 14 change blocks. 
64 lines changed or deleted 59 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)