"Fossies" - the Fresh Open Source Software Archive

Member "recoll-1.26.3/filters/rclkwd" (31 May 2018, 5537 Bytes) of package /linux/privat/recoll-1.26.3.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Bash source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file.

    1 #!/bin/sh
    2 # @(#$Id: rclkwd,v 1.2 2008-10-08 08:27:34 dockes Exp $  (C) 2004 J.F.Dockes
    3 # Parts taken from Estraier:
    4 #================================================================
    5 # Estraier: a personal full-text search system
    6 # Copyright (C) 2003-2004 Mikio Hirabayashi
    7 #================================================================
    8 #================================================================
    9 # Extract text from a kword file
   10 #================================================================
   11 
   12 # set variables
   13 LANG=C ; export LANG
   14 LC_ALL=C ; export LC_ALL
   15 progname="rclkwd"
   16 filetype=kword
   17 
   18 
   19 #RECFILTCOMMONCODE
   20 ##############################################################################
   21 # !! Leave the previous line unmodified!! Code imported from the
   22 # recfiltcommon file
   23 
   24 # Utility code common to all shell filters. This could be sourced at run
   25 # time, but it's slightly more efficient to include the code in the
   26 # filters at build time (with a sed script).
   27 
   28 # Describe error in a way that can be interpreted by our caller
   29 senderror()
   30 {
   31     echo RECFILTERROR $*
   32     # Also alert on stderr just in case
   33     echo ":2:$progname::: $*" 1>&2
   34     exit 1
   35 }
   36 
   37 iscmd()
   38 {
   39     cmd=$1
   40     case $cmd in
   41     */*)
   42     if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
   43     *)
   44       oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
   45       for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
   46       return 1 ;;
   47     esac
   48 }
   49 
   50 checkcmds()
   51 {
   52     for cmd in $*;do
   53       if iscmd $cmd 
   54       then 
   55         a=1
   56       else 
   57         senderror HELPERNOTFOUND $cmd
   58       fi
   59     done
   60 }
   61 
   62 # show help message
   63 if test $# -ne 1 -o "$1" = "--help" 
   64 then
   65   echo "Convert a $filetype file to HTML text for Recoll indexing."
   66   echo "Usage: $progname [infile]"
   67   exit 1
   68 fi
   69 
   70 infile="$1"
   71 
   72 # check the input file existence (may be '-' for stdin)
   73 if test "X$infile" != X- -a ! -f "$infile"
   74 then
   75   senderror INPUTNOSUCHFILE "$infile"
   76 fi
   77 
   78 # protect access to our temp files and directories
   79 umask 77
   80 
   81 ##############################################################################
   82 # !! Leave the following line unmodified !
   83 #ENDRECFILTCOMMONCODE
   84 
   85 checkcmds unzip gunzip tar xsltproc
   86 
   87 # We need a temporary directory
   88 if test z"$RECOLL_TMPDIR" != z; then
   89    ttdir=$RECOLL_TMPDIR
   90 elif test z"$TMPDIR" != z ; then
   91    ttdir=$TMPDIR
   92 else
   93    ttdir=/tmp
   94 fi
   95 tmpdir=$ttdir/rclkwd_tmp$$
   96 mkdir $tmpdir || exit 1
   97 mkdir $tmpdir/rclkwdtmp || exit 1
   98 
   99 cleanup()
  100 {
  101     # Note that we're using a constant part (rclkwdtmp), that hopefully
  102     # guarantees that we can't do big mistakes here.
  103     rm -rf $tmpdir/rclkwdtmp
  104     rmdir $tmpdir
  105 }
  106     
  107 trap cleanup EXIT HUP QUIT INT TERM
  108 
  109 # Old kwd files are gzip/tar archibes. Newer ones are zip archives.
  110 if file "$infile" | grep -qi gzip ; then
  111    # Unzip the input file and change to the unzipped directory
  112    gunzip < "$infile" | (cd $tmpdir/rclkwdtmp;tar xf -)
  113 else
  114    # Unzip the input file and change to the unzipped directory
  115    unzip -q -d $tmpdir/rclkwdtmp "$infile"
  116 fi
  117 cd $tmpdir/rclkwdtmp
  118 
  119 metafile=documentinfo.xml
  120 contentfile=maindoc.xml
  121 
  122 echo '<html><head>
  123 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">'
  124 
  125 if test -f $metafile ; then
  126   xsltproc --nonet --novalid - $metafile <<EOF
  127 <?xml version="1.0"?>
  128 <xsl:stylesheet version="1.0"
  129   xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  130   xmlns:kw="http://www.koffice.org/DTD/document-info"
  131   exclude-result-prefixes="kw"
  132   >
  133 
  134 <xsl:output method="html" encoding="UTF-8"/>
  135 
  136 <xsl:template match="/">
  137   <xsl:apply-templates select="kw:document-info|document-info"/>
  138 </xsl:template>
  139 <xsl:template match="/kw:document-info|/document-info">
  140   <xsl:apply-templates select="kw:author|author"/>
  141   <xsl:apply-templates select="kw:about/kw:abstract|abstract"/>
  142   <xsl:apply-templates select="kw:about/kw:title|title"/>
  143   <xsl:apply-templates select="kw:about/kw:keyword|keyword"/>
  144   <xsl:apply-templates select="kw:about/kw:subject|subject"/>
  145 </xsl:template>
  146 
  147 <xsl:template match="kw:author|author">
  148   <meta>
  149     <xsl:attribute name="name">author</xsl:attribute>
  150     <xsl:attribute name="content">
  151     <xsl:value-of select="kw:full-name|full-name"/>
  152     </xsl:attribute>
  153          </meta><xsl:text>
  154     </xsl:text>
  155 </xsl:template>
  156 
  157 <xsl:template match="kw:abstract|abstract">
  158   <meta>
  159     <xsl:attribute name="name">abtract</xsl:attribute>
  160     <xsl:attribute name="content">
  161     <xsl:value-of select="."/>
  162     </xsl:attribute>
  163          </meta><xsl:text>
  164     </xsl:text>
  165 </xsl:template>
  166 
  167 <xsl:template match="kw:keyword|kw:subject|keyword|subject">
  168   <meta>
  169     <xsl:attribute name="name">keywords</xsl:attribute>
  170     <xsl:attribute name="content">
  171     <xsl:value-of select="."/>
  172     </xsl:attribute>
  173          </meta><xsl:text>
  174     </xsl:text>
  175 </xsl:template>
  176 
  177 <xsl:template match="kw:title|title">
  178   <title><xsl:value-of select="."/></title><xsl:text>
  179     </xsl:text>
  180 </xsl:template>
  181 
  182 </xsl:stylesheet>
  183 
  184 EOF
  185 fi
  186 
  187 echo '</head><body>'
  188 xsltproc --nonet --novalid - $contentfile <<EOF
  189 <?xml version="1.0"?>
  190 <xsl:stylesheet version="1.0"
  191   xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  192   xmlns:kw="http://www.koffice.org/DTD/kword"
  193   exclude-result-prefixes="kw"
  194   >
  195 
  196 <xsl:output method="html" encoding="UTF-8"/>
  197 
  198 <xsl:template match="/">
  199   <xsl:apply-templates select="//kw:TEXT|//TEXT"/>
  200 </xsl:template>
  201 
  202 <xsl:template match="kw:TEXT|TEXT">
  203   <xsl:if test="normalize-space(.) != ''">
  204     <p><xsl:value-of select="."/></p><xsl:text>
  205     </xsl:text>
  206   </xsl:if>
  207 </xsl:template>
  208 
  209 </xsl:stylesheet>
  210 
  211 EOF
  212 
  213 echo '</body></html>'
  214 
  215 cd /
  216 # exit normally
  217 exit 0