PDFTextStripper.java (pdfbox-2.0.23-src) | : | PDFTextStripper.java (pdfbox-2.0.24-src) | ||
---|---|---|---|---|
skipping to change at line 1962 | skipping to change at line 1962 | |||
// confusion. For example, it converts the micro symbol in | // confusion. For example, it converts the micro symbol in | |||
// extended Latin to the value in the Greek script. We normalize | // extended Latin to the value in the Greek script. We normalize | |||
// the Unicode Alphabetic and Arabic A&B Presentation forms. | // the Unicode Alphabetic and Arabic A&B Presentation forms. | |||
char c = word.charAt(q); | char c = word.charAt(q); | |||
if (0xFB00 <= c && c <= 0xFDFF || 0xFE70 <= c && c <= 0xFEFF) | if (0xFB00 <= c && c <= 0xFDFF || 0xFE70 <= c && c <= 0xFEFF) | |||
{ | { | |||
if (builder == null) | if (builder == null) | |||
{ | { | |||
builder = new StringBuilder(strLength * 2); | builder = new StringBuilder(strLength * 2); | |||
} | } | |||
builder.append(word.substring(p, q)); | builder.append(word, p, q); | |||
// Some fonts map U+FDF2 differently than the Unicode spec. | // Some fonts map U+FDF2 differently than the Unicode spec. | |||
// They add an extra U+0627 character to compensate. | // They add an extra U+0627 character to compensate. | |||
// This removes the extra character for those fonts. | // This removes the extra character for those fonts. | |||
if (c == 0xFDF2 && q > 0 | if (c == 0xFDF2 && q > 0 | |||
&& (word.charAt(q - 1) == 0x0627 || word.charAt(q - 1) = = 0xFE8D)) | && (word.charAt(q - 1) == 0x0627 || word.charAt(q - 1) = = 0xFE8D)) | |||
{ | { | |||
builder.append("\u0644\u0644\u0647"); | builder.append("\u0644\u0644\u0647"); | |||
} | } | |||
else | else | |||
{ | { | |||
skipping to change at line 1986 | skipping to change at line 1986 | |||
} | } | |||
p = q + 1; | p = q + 1; | |||
} | } | |||
} | } | |||
if (builder == null) | if (builder == null) | |||
{ | { | |||
return handleDirection(word); | return handleDirection(word); | |||
} | } | |||
else | else | |||
{ | { | |||
builder.append(word.substring(p, q)); | builder.append(word, p, q); | |||
return handleDirection(builder.toString()); | return handleDirection(builder.toString()); | |||
} | } | |||
} | } | |||
/** | /** | |||
* Used within {@link #normalize(List)} to handle a {@link TextPosition}. | * Used within {@link #normalize(List)} to handle a {@link TextPosition}. | |||
* | * | |||
* @return The StringBuilder that must be used when calling this method. | * @return The StringBuilder that must be used when calling this method. | |||
*/ | */ | |||
private StringBuilder normalizeAdd(List<WordWithTextPositions> normalized, | private StringBuilder normalizeAdd(List<WordWithTextPositions> normalized, | |||
End of changes. 2 change blocks. | ||||
2 lines changed or deleted | 2 lines changed or added |