"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "src/main/java/com/openkm/extractor/OpenOfficeTextExtractor.java" between
OpenKM-document-management-system-6.3.10.tar.gz and OpenKM-document-management-system-6.3.11.tar.gz

About: OpenKM (Knowledge Management) is a document management system that allows easy management of documents, users, roles and finding your enterprise documents and records. Community version (source code).

OpenOfficeTextExtractor.java  (OpenKM-document-management-system-6.3.10):OpenOfficeTextExtractor.java  (OpenKM-document-management-system-6.3.11)
skipping to change at line 24 skipping to change at line 24
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details. * GNU General Public License for more details.
* <p> * <p>
* You should have received a copy of the GNU General Public License along * You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc., * with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/ */
package com.openkm.extractor; package com.openkm.extractor;
import net.xeoh.plugins.base.annotations.PluginImplementation;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes; import org.xml.sax.Attributes;
import org.xml.sax.InputSource; import org.xml.sax.InputSource;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
import org.xml.sax.XMLReader; import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.DefaultHandler;
import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory; import javax.xml.parsers.SAXParserFactory;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.zip.ZipEntry; import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream; import java.util.zip.ZipInputStream;
/** /**
* Text extractor for OpenOffice documents. * Text extractor for OpenOffice documents.
*/ */
@PluginImplementation
public class OpenOfficeTextExtractor extends AbstractTextExtractor { public class OpenOfficeTextExtractor extends AbstractTextExtractor {
/** /**
* Logger instance. * Logger instance.
*/ */
private static final Logger logger = LoggerFactory.getLogger(OpenOfficeTe xtExtractor.class); private static final Logger log = LoggerFactory.getLogger(OpenOfficeTextE xtractor.class);
/** /**
* Creates a new <code>OpenOfficeTextExtractor</code> instance. * Creates a new <code>OpenOfficeTextExtractor</code> instance.
*/ */
public OpenOfficeTextExtractor() { public OpenOfficeTextExtractor() {
super(new String[]{"application/vnd.oasis.opendocument.database", "application/vnd.oasis.opendocument.formula", super(new String[]{"application/vnd.oasis.opendocument.database", "application/vnd.oasis.opendocument.formula",
"application/vnd.oasis.opendocument.graphics", "a pplication/vnd.oasis.opendocument.presentation", "application/vnd.oasis.opendocument.graphics", "a pplication/vnd.oasis.opendocument.presentation",
"application/vnd.oasis.opendocument.spreadsheet", "application/vnd.oasis.opendocument.text", "application/vnd.oasis.opendocument.spreadsheet", "application/vnd.oasis.opendocument.text",
"application/vnd.sun.xml.calc", "application/vnd. sun.xml.draw", "application/vnd.sun.xml.impress", "application/vnd.sun.xml.calc", "application/vnd. sun.xml.draw", "application/vnd.sun.xml.impress",
"application/vnd.sun.xml.writer"}); "application/vnd.sun.xml.writer"});
skipping to change at line 72 skipping to change at line 74
/** /**
* {@inheritDoc} * {@inheritDoc}
*/ */
public String extractText(InputStream stream, String type, String encodin g) throws IOException { public String extractText(InputStream stream, String type, String encodin g) throws IOException {
try { try {
SAXParserFactory saxParserFactory = SAXParserFactory.newI nstance(); SAXParserFactory saxParserFactory = SAXParserFactory.newI nstance();
saxParserFactory.setValidating(false); saxParserFactory.setValidating(false);
SAXParser saxParser = saxParserFactory.newSAXParser(); SAXParser saxParser = saxParserFactory.newSAXParser();
XMLReader xmlReader = saxParser.getXMLReader(); XMLReader xmlReader = saxParser.getXMLReader();
xmlReader.setFeature("http://xml.org/sax/features/validat ion", false);
xmlReader.setFeature("http://apache.org/xml/features/nonv alidating/load-external-dtd", false); xmlReader.setFeature("http://apache.org/xml/features/nonv alidating/load-external-dtd", false);
xmlReader.setFeature("http://xml.org/sax/features/externa
l-parameter-entities", false);
xmlReader.setFeature("http://xml.org/sax/features/externa
l-general-entities", false);
xmlReader.setFeature("http://xml.org/sax/features/validat
ion", false);
ZipInputStream zis = new ZipInputStream(stream); ZipInputStream zis = new ZipInputStream(stream);
ZipEntry ze = zis.getNextEntry(); ZipEntry ze = zis.getNextEntry();
while (ze != null && !ze.getName().equals("content.xml")) { while (ze != null && !ze.getName().equals("content.xml")) {
ze = zis.getNextEntry(); ze = zis.getNextEntry();
} }
OpenOfficeContentHandler contentHandler = new OpenOfficeC ontentHandler(); OpenOfficeContentHandler contentHandler = new OpenOfficeC ontentHandler();
xmlReader.setContentHandler(contentHandler); xmlReader.setContentHandler(contentHandler);
try { try {
xmlReader.parse(new InputSource(zis)); xmlReader.parse(new InputSource(zis));
} finally { } finally {
zis.close(); zis.close();
} }
return contentHandler.getContent(); return contentHandler.getContent();
} catch (ParserConfigurationException | SAXException e) { } catch (ParserConfigurationException | SAXException e) {
logger.warn("Failed to extract OpenOffice text content", e); log.warn("Failed to extract OpenOffice text content", e);
throw new IOException(e.getMessage(), e); throw new IOException(e.getMessage(), e);
} finally { } finally {
stream.close(); stream.close();
} }
} }
// --------------------------------------------< OpenOfficeContentHandler > // --------------------------------------------< OpenOfficeContentHandler >
private static class OpenOfficeContentHandler extends DefaultHandler { private static class OpenOfficeContentHandler extends DefaultHandler {
private StringBuffer content; private StringBuffer content;
 End of changes. 6 change blocks. 
3 lines changed or deleted 10 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)