"Fossies" - the Fresh Open Source Software Archive

Member "recoll-1.26.3/filters/rclimg.py" (4 Sep 2019, 3063 Bytes) of package /linux/privat/recoll-1.26.3.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "rclimg.py" see the Fossies "Dox" file reference documentation.

    1 #!/usr/bin/env python3
    2 
    3 # Python-based Image Tag extractor for Recoll. This is less thorough
    4 # than the Perl-based rclimg script, but useful if you don't want to
    5 # have to install Perl (e.g. on Windows).
    6 #
    7 # Uses pyexiv2. Also tried Pillow, found it useless for tags.
    8 #
    9 from __future__ import print_function
   10 
   11 import sys
   12 import os
   13 import rclexecm
   14 import re
   15 from rclbasehandler import RclBaseHandler
   16 
   17 try:
   18     import pyexiv2
   19 except:
   20     print("RECFILTERROR HELPERNOTFOUND python3:pyexiv2")
   21     sys.exit(1);
   22 
   23 khexre = re.compile('.*\.0[xX][0-9a-fA-F]+$')
   24 
   25 pyexiv2_titles = {
   26     'Xmp.dc.subject',
   27     'Xmp.lr.hierarchicalSubject',
   28     'Xmp.MicrosoftPhoto.LastKeywordXMP',
   29     }
   30 
   31 # Keys for which we set meta tags
   32 meta_pyexiv2_keys = {
   33     'Xmp.dc.subject',
   34     'Xmp.lr.hierarchicalSubject',
   35     'Xmp.MicrosoftPhoto.LastKeywordXMP',
   36     'Xmp.digiKam.TagsList',
   37     'Exif.Photo.DateTimeDigitized',
   38     'Exif.Photo.DateTimeOriginal',
   39     'Exif.Image.DateTime',
   40     }
   41 
   42 exiv2_dates = ['Exif.Photo.DateTimeOriginal',
   43                'Exif.Image.DateTime', 'Exif.Photo.DateTimeDigitized']
   44 
   45 class ImgTagExtractor(RclBaseHandler):
   46     def __init__(self, em):
   47         super(ImgTagExtractor, self).__init__(em)
   48 
   49     def html_text(self, filename):
   50         ok = False
   51 
   52         metadata = pyexiv2.ImageMetadata(filename)
   53         metadata.read()
   54         keys = metadata.exif_keys + metadata.iptc_keys + metadata.xmp_keys
   55         mdic = {}
   56         for k in keys:
   57             # we skip numeric keys and undecoded makernote data
   58             if k != 'Exif.Photo.MakerNote' and not khexre.match(k):
   59                 mdic[k] = str(metadata[k].raw_value)
   60 
   61         docdata = b'<html><head>\n'
   62 
   63         ttdata = set()
   64         for k in pyexiv2_titles:
   65             if k in mdic:
   66                 ttdata.add(self.em.htmlescape(mdic[k]))
   67         if ttdata:
   68             title = ""
   69             for v in ttdata:
   70                 v = v.replace('[', '').replace(']', '').replace("'", "")
   71                 title += v + " "
   72             docdata += rclexecm.makebytes("<title>" + title + "</title>\n")
   73 
   74         for k in exiv2_dates:
   75             if k in mdic:
   76                 # Recoll wants: %Y-%m-%d %H:%M:%S.
   77                 # We get 2014:06:27 14:58:47
   78                 dt = mdic[k].replace(":", "-", 2)
   79                 docdata += b'<meta name="date" content="' + \
   80                            rclexecm.makebytes(dt) + b'">\n'
   81                 break
   82 
   83         for k,v in mdic.items():
   84             if k ==  'Xmp.digiKam.TagsList':
   85                 docdata += b'<meta name="keywords" content="' + \
   86                            rclexecm.makebytes(self.em.htmlescape(mdic[k])) + \
   87                            b'">\n'
   88 
   89         docdata += b'</head><body>\n'
   90         for k,v in mdic.items():
   91             docdata += rclexecm.makebytes(k + " : " + \
   92                                      self.em.htmlescape(mdic[k]) + "<br />\n")
   93         docdata += b'</body></html>'
   94 
   95         return docdata
   96 
   97 
   98 if __name__ == '__main__':
   99     proto = rclexecm.RclExecM()
  100     extract = ImgTagExtractor(proto)
  101     rclexecm.main(proto, extract)