6 years ago · b39fe8231e
--- a/better_figures_and_images/better_figures_and_images.py
+++ b/better_figures_and_images/better_figures_and_images.py
@@ -22,6 +22,7 @@ from pelican import signals
 
				 from bs4 import BeautifulSoup
			
 
				 from PIL import Image
			
 
				 import pysvg.parser
			
 
				+import cssutils
			
 
				 
			
 
				 import logging
			
 
				 logger = logging.getLogger(__name__)
			
@@ -43,24 +44,37 @@ def content_object_init(instance):
 
				             logger.debug('Better Fig. img_path: %s', img_path)
			
 
				             logger.debug('Better Fig. img_fname: %s', img_filename)
			
 
				 
			
 
				+            # If the image already has attributes... then we can skip it. Assuming it's already optimised
			
 
				+            if 'style' in img.attrs:
			
 
				+                sheet = cssutils.parseStyle(img['style'])
			
 
				+                if len(sheet.width) > 0 or len(sheet.height) > 0:
			
 
				+                    continue
			
 
				+
			
 
				             # Pelican 3.5+ supports {attach} macro for auto copy, in this use case the content does not exist in output
			
 
				             # due to the fact it has not been copied, hence we take it from the source (same as current document)
			
 
				             if img_filename.startswith('{attach}'):
			
 
				                 img_path = os.path.dirname(instance.source_path)
			
 
				                 img_filename = img_filename[8:]
			
 
				                 src = os.path.join(img_path, img_filename)
			
 
				-            else:
			
 
				+            elif img_path.startswith(('{filename}', '|filename|')):
			
 
				                 # Strip off {filename}, |filename| or /static
			
 
				-                if img_path.startswith(('{filename}', '|filename|')):
			
 
				-                    img_path = img_path[10:]
			
 
				-                elif img_path.startswith('/static'):
			
 
				-                    img_path = img_path[7:]
			
 
				-                elif img_path.startswith('data:image'):
			
 
				-                    # Image is encoded in-line (not a file).
			
 
				-                    continue
			
 
				+                img_path = img_path[10:]
			
 
				+            elif img_path.startswith('/static'):
			
 
				+                img_path = img_path[7:]
			
 
				+            elif img_path.startswith('data:image'):
			
 
				+                # Image is encoded in-line (not a file).
			
 
				+                continue
			
 
				+            else:
			
 
				+                # Check the location in the output as some plugins create them there.
			
 
				+                output_path = path.dirname(instance.save_as)
			
 
				+                image_output_location = path.join(instance.settings['OUTPUT_PATH'], output_path, img_filename)
			
 
				+                if path.isfile(image_output_location):
			
 
				+                    src = image_output_location
			
 
				+                    logger.info('{src} located in output, missing from content.'.format(src=img_filename))
			
 
				                 else:
			
 
				-                    logger.warning('Better Fig. Error: img_path should start with either {filename}, |filename| or /static')
			
 
				+                    logger.warning('Better Fig. Error: img_path should start with either {attach}, {filename}, |filename| or /static')
			
 
				 
			
 
				+            if src is None:
			
 
				                 # search src path list
			
 
				                 # 1. Build the source image filename from PATH
			
 
				                 # 2. Build the source image filename from STATIC_PATHS
			
--- a/jpeg_reader/.gitignore
+++ b/jpeg_reader/.gitignore
@@ -0,0 +1 @@
 
				+.cache
			
--- a/jpeg_reader/README.md
+++ b/jpeg_reader/README.md
@@ -0,0 +1,45 @@
 
				+### Pelican JPEG Reader.
			
 
				+
			
 
				+Original author: [Mitchell Currie](https://github.com/mitchins)
			
 
				+
			
 
				+##### Requirements:
			
 
				+
			
 
				+* Python3
			
 
				+* Pelican
			
 
				+* Pillow library (PIL for python3)
			
 
				+* Exiv2 binary accessible by $PATH
			
 
				+
			
 
				+To avoid undesired creation of content, the specific extension must be `jpeg_article`, i.e. "myPhoto.jpeg_article", it's a regular JPEG image, but this avoids your other JPEG images getting picked up. It can work for pages or blogs, and determines based on whether it's `content/blog` or `content/pages` (or whatever you use for content).
			
 
				+
			
 
				+#### Most relevant EXIF/IPTC flags from Exiv2 that are used
			
 
				+
			
 
				+|  Page/Article Field | Exiv2 Key  |  Description |
			
 
				+|---|---|---|
			
 
				+| title  | `Exif.Image.ImageDescription`  |  Defaults to 'Untitled' |
			
 
				+| author  | `Exif.Image.Artist`  |  Default to Unknown. Currently Scalar |
			
 
				+| date  |  `Exif.Photo.DateTimeOriginal` |  Undefined behaviour if not present as required |
			
 
				+|  slug |  `Iptc.Application2.Headline` |  Defaults to title's value |
			
 
				+|  body |  `Exif.Photo.UserComment` |  This goes under image in page/article, blank default |
			
 
				+|  summary |  `Iptc.Application2.Caption` |  Used for article index, defaults to first 140 characters of the body |
			
 
				+|  category |  `Iptc.Application2.SuppCategory` |  Specifies the category of page/article if `USE_FOLDER_AS_CATEGORY` not set  |
			
 
				+|  template |  `Iptc.Application2.ObjectName` |  If specified will set the template metadata property to tell pelican where to look  |
			
 
				+|  tags |  `Iptc.Application2.Keywords` |  For each entry found with this key, a tag is created with the value of the entry |
			
 
				+|  `metadata['exiv2']` | ***Everything***|  All exiv2 fields from the image are shoved into the metadata dictionary of the item, under `exiv2` key for template usage |
			
 
				+
			
 
				+
			
 
				+
			
 
				+#### Pelican Settings Added or Honoured:
			
 
				+
			
 
				+|  Key in pelicanconf.py |  Description |
			
 
				+|---|---|
			
 
				+| `PATH`  |  **Content Path** |
			
 
				+| `OUTPUT_PATH` |  **Output Path** |
			
 
				+| `USE_FOLDER_AS_CATEGORY` | **Category from folder name** If enabled, takes the category from the name of the folder the file is in. Otherwise the category will attempt to be read from `Iptc.Application2.SuppCategory` |
			
 
				+|  `SITEURL` | **Site Url** The optional absolute Url for the site, defaults to '' usually. |
			
 
				+|  `PAGE_URL` | **Page Url** The format string to specify where page html files are saved to |
			
 
				+|  `PAGE_SAVE_AS` | **Page Save Path** The format string to specify where page html files are physically written to disk |
			
 
				+|  `ARTICLE_URL` | ** Article Url** The format string to specify where page html files are saved to |
			
 
				+|  `ARTICLE_SAVE_AS` | **Article Save Path** The format string to specify where page html files are physically written to disk |
			
 
				+
			
 
				+
			
 
				+
			
--- a/jpeg_reader/__init__.py
+++ b/jpeg_reader/__init__.py
@@ -0,0 +1 @@
 
				+from .jpeg_reader import *
			
--- a/jpeg_reader/constants.py
+++ b/jpeg_reader/constants.py
@@ -0,0 +1,42 @@
 
				+from enum import Enum
			
 
				+
			
 
				+
			
 
				+class Exiv(Enum):
			
 
				+    DESCRIPTION = 'Exif.Image.ImageDescription'
			
 
				+    ARTIST = 'Exif.Image.Artist'
			
 
				+    DATETIME = 'Exif.Photo.DateTimeOriginal'
			
 
				+    HEADLINE = 'Iptc.Application2.Headline'
			
 
				+    COMMENT = 'Exif.Photo.UserComment'
			
 
				+    CAPTION = 'Iptc.Application2.Caption'
			
 
				+    KEYWORDS = 'Iptc.Application2.Keywords'
			
 
				+    CATEGORY = 'Iptc.Application2.SuppCategory'
			
 
				+    OBJECT_NAME = 'Iptc.Application2.ObjectName'
			
 
				+
			
 
				+
			
 
				+class PelicanConfig(Enum):
			
 
				+    PATH = 'PATH'
			
 
				+    OUTPUT_PATH = 'OUTPUT_PATH'
			
 
				+    USE_FOLDER_AS_CATEGORY = 'USE_FOLDER_AS_CATEGORY'
			
 
				+    SITE_URL = 'SITEURL'
			
 
				+    PAGE_URL = 'PAGE_URL'
			
 
				+    PAGE_SAVE_AS = 'PAGE_SAVE_AS'
			
 
				+    ARTICLE_URL = 'ARTICLE_URL'
			
 
				+    ARTICLE_SAVE_AS = 'ARTICLE_SAVE_AS'
			
 
				+
			
 
				+
			
 
				+class PelicanMetadata(Enum):
			
 
				+    TITLE = 'title'
			
 
				+    AUTHORS = 'authors'
			
 
				+    DATE = 'date'
			
 
				+    SLUG = 'slug'
			
 
				+    TAGS = 'tags'
			
 
				+    CATEGORY = 'category'
			
 
				+    SUMMARY = 'summary'
			
 
				+    FEATURED_IMAGE = 'featured_image'  # Acts as a thumbnail
			
 
				+    TEMPLATE = 'template'
			
 
				+    CUSTOM_ALL = 'exiv2'  # Not officially part of metadata, but we add it ourselves
			
 
				+
			
 
				+
			
 
				+class PelicanClass(Enum):
			
 
				+    BLOG = 'blog'
			
 
				+    PAGES = 'pages'
			
--- a/jpeg_reader/exiv2_parser.py
+++ b/jpeg_reader/exiv2_parser.py
@@ -0,0 +1,61 @@
 
				+import re
			
 
				+import subprocess
			
 
				+from typing import List, Tuple
			
 
				+
			
 
				+from . import util
			
 
				+
			
 
				+
			
 
				+class Keyword:
			
 
				+    def __init__(self, *, keyword:str, kind: str, count: int):
			
 
				+        self.keyword = keyword
			
 
				+        self.kind = kind
			
 
				+        self.count = count
			
 
				+
			
 
				+
			
 
				+class Exiv2Parser:
			
 
				+    @classmethod
			
 
				+    def get_exiv2_version(cls) -> Tuple[str, str]:
			
 
				+        commands = ['exiv2', '--version']
			
 
				+        process = subprocess.Popen(commands, stdout=subprocess.PIPE)
			
 
				+        output = util.to_str(process.communicate()[0])
			
 
				+        match = re.search(r'exiv2 ([\d.]+) (\w+)', output)
			
 
				+        if match is not None:
			
 
				+            return match.groups()
			
 
				+        return None
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_values(cls, file_path: str) -> dict:
			
 
				+        keywords = cls.__get_keys(file_path)
			
 
				+        result = dict()
			
 
				+        for key in keywords:
			
 
				+            commands = ['exiv2', '-K', key.keyword, '-P', 't', 'print', file_path]
			
 
				+            process = subprocess.Popen(commands, stdout=subprocess.PIPE)
			
 
				+            output = util.to_str(process.communicate()[0]).rstrip('\n')
			
 
				+            # Check if the key is a list or scalar
			
 
				+            if key.count > 1:
			
 
				+                result[key.keyword] = output.split('\n')  # Assume the output is like keywords, one per line
			
 
				+            else:
			
 
				+                result[key.keyword] = output  # Assume the whole input is the value
			
 
				+        return result
			
 
				+
			
 
				+    @classmethod
			
 
				+    def __get_keys(cls, file_path: str) -> List[Keyword]:
			
 
				+        found_keywords = dict()
			
 
				+        commands = ['exiv2', '-P', 'ky', 'print', file_path]
			
 
				+        process = subprocess.Popen(commands, stdout=subprocess.PIPE)
			
 
				+        output = util.to_str(process.communicate()[0])
			
 
				+        for match in re.finditer(r'([\w.]+)\W+(\w+)\W*\n?', output):
			
 
				+            code, kind = match.groups()
			
 
				+            keyword = found_keywords.get(code, Keyword(keyword=code, kind=kind, count=0))
			
 
				+            keyword.count += 1
			
 
				+            found_keywords[code] = keyword
			
 
				+
			
 
				+        return list(found_keywords.values())
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    #data = Exiv2Parser.get_values('content/blog/terms2.jpeg')
			
 
				+    #print(data)
			
 
				+    version_info = Exiv2Parser.get_exiv2_version()
			
 
				+    print(version_info)
			
 
				+
			
 
				+
			
--- a/jpeg_reader/jpeg_reader.py
+++ b/jpeg_reader/jpeg_reader.py
@@ -0,0 +1,128 @@
 
				+"""
			
 
				+This plugin uses the metadata from JPEG images (EXIF and IPTC) to construct a meaningful page or gallery.
			
 
				+Possible uses are gallery pages or a blog article that's mainly about an image.
			
 
				+With this tool, it's posible to just dump an image without any extra data/linkage to create coherent output.
			
 
				+The note here is that the extension is `jpeg_article` so it doesn't pick up {attach} or other static resources.
			
 
				+"""
			
 
				+
			
 
				+import logging
			
 
				+from datetime import datetime
			
 
				+from os import makedirs, sep
			
 
				+from os.path import join, dirname, isdir, splitext
			
 
				+from typing import Tuple
			
 
				+
			
 
				+from PIL import Image
			
 
				+from pelican import signals
			
 
				+from pelican.readers import BaseReader
			
 
				+from pelican.urlwrappers import URLWrapper, Category, Author, Tag
			
 
				+
			
 
				+from .constants import Exiv, PelicanConfig, PelicanMetadata, PelicanClass
			
 
				+from .exiv2_parser import Exiv2Parser
			
 
				+
			
 
				+
			
 
				+class JpegReader(BaseReader):
			
 
				+    logger = logging.getLogger('JpegReader')
			
 
				+    enabled = True
			
 
				+    file_extensions = ('jpeg_article')
			
 
				+    thumb_size = 250, 250
			
 
				+
			
 
				+    def __init__(self, settings):
			
 
				+        super(JpegReader, self).__init__(settings)
			
 
				+
			
 
				+    def read(self, source_path):
			
 
				+        try:
			
 
				+            if Exiv2Parser.get_exiv2_version() is not None:
			
 
				+                content, metadata = self.parse_jpeg(source_path=source_path)
			
 
				+
			
 
				+        except ValueError:      # if file can't be parsed, ignore it
			
 
				+            pass
			
 
				+        else:
			
 
				+            return content, metadata
			
 
				+
			
 
				+    def parse_jpeg(self, *, source_path: str) -> Tuple[str, dict]:
			
 
				+        JpegReader.logger.info(source_path)
			
 
				+
			
 
				+        img = Image.open(source_path)
			
 
				+
			
 
				+        image_data = Exiv2Parser.get_values(source_path)
			
 
				+
			
 
				+        title = image_data.get(Exiv.DESCRIPTION.value, 'Untitled')
			
 
				+        author = image_data.get(Exiv.ARTIST.value, 'Unknown')
			
 
				+        date_string = image_data.get(Exiv.DATETIME.value, '')
			
 
				+
			
 
				+        date = datetime.strptime(date_string, "%Y:%m:%d %H:%M:%S")
			
 
				+        slug = URLWrapper(image_data.get(Exiv.HEADLINE.value, title), self.settings).slug
			
 
				+        description_long = image_data.get(Exiv.COMMENT.value, '')
			
 
				+        summary = image_data.get(Exiv.CAPTION.value, description_long[:140])
			
 
				+
			
 
				+        tags = [Tag(tag, self.settings) for tag in image_data.get(Exiv.KEYWORDS.value, list())]
			
 
				+
			
 
				+        content_root = self.settings[PelicanConfig.PATH.value]
			
 
				+        path_output = self.settings[PelicanConfig.OUTPUT_PATH.value]
			
 
				+        relative_source = dirname(source_path[len(content_root):]).lstrip(sep)
			
 
				+        if self.settings[PelicanConfig.USE_FOLDER_AS_CATEGORY.value]:
			
 
				+            category = relative_source.split(sep)[-1]
			
 
				+        else:
			
 
				+            category = image_data.get(Exiv.CATEGORY.value, None)
			
 
				+
			
 
				+        type_of_content = relative_source.split(sep)[0]  # either 'blog' or 'pages' as far as I know.
			
 
				+        url_site = self.settings[PelicanConfig.SITE_URL.value]
			
 
				+
			
 
				+        if type_of_content.lower() == PelicanClass.PAGES.value:
			
 
				+            url_document = self.settings[PelicanConfig.PAGE_URL.value]
			
 
				+            document_save_as = self.settings[PelicanConfig.PAGE_SAVE_AS.value]
			
 
				+        else:  # Assume PelicanClass.BLOG
			
 
				+            url_document = self.settings[PelicanConfig.ARTICLE_URL.value]
			
 
				+            document_save_as = self.settings[PelicanConfig.ARTICLE_SAVE_AS.value]
			
 
				+
			
 
				+        page_url_complete = join(url_site, url_document)
			
 
				+
			
 
				+        author_wrapper = Author(author, self.settings)
			
 
				+
			
 
				+        # Move image in place:
			
 
				+        metadata = {PelicanMetadata.TITLE.value: title, PelicanMetadata.AUTHORS.value: [author_wrapper],
			
 
				+                    PelicanMetadata.DATE.value: date, PelicanMetadata.SLUG.value: slug,
			
 
				+                    PelicanMetadata.TAGS.value: tags,
			
 
				+                    PelicanMetadata.CUSTOM_ALL.value: image_data}
			
 
				+        if category is not None:
			
 
				+            metadata[PelicanMetadata.CATEGORY.value] = Category(category, self.settings)
			
 
				+
			
 
				+        thumb_name = '{0}_thumb.jpg'.format(slug)
			
 
				+        original_name = '{0}.jpg'.format(slug)
			
 
				+
			
 
				+        path_output_html = join(path_output, document_save_as).format(**metadata)
			
 
				+        path_output_dir = dirname(path_output_html)
			
 
				+        path_output_original = join(path_output_dir, original_name)
			
 
				+        path_output_thumb = join(path_output_dir, thumb_name)
			
 
				+
			
 
				+        # Here we generate the summary info incase this is used for articles we get nice thumbnails and summary
			
 
				+        metadata[PelicanMetadata.SUMMARY.value] = summary
			
 
				+        metadata[PelicanMetadata.FEATURED_IMAGE.value] = join(url_site, path_output_thumb[len(path_output):])
			
 
				+        if Exiv.OBJECT_NAME.value in image_data:
			
 
				+            metadata[PelicanMetadata.TEMPLATE.value] = image_data[Exiv.OBJECT_NAME.value]
			
 
				+
			
 
				+        # Write the size/HTML out before we reduce the image to a thumb
			
 
				+        content = "<img src='{src}' alt='{alt}' style='width: {width}px; height: auto; max-width: 100%;'></img><p>{body}</p>" \
			
 
				+            .format(src=original_name, alt=title, width=img.width, height=img.height, body=description_long)
			
 
				+
			
 
				+        # Ensure the directory levels exist
			
 
				+        if not isdir(path_output_dir):
			
 
				+            makedirs(path_output_dir)
			
 
				+        img.save(path_output_original)
			
 
				+        img.thumbnail(self.thumb_size)
			
 
				+        img.save(path_output_thumb)
			
 
				+
			
 
				+        # Debug info if we need it
			
 
				+        JpegReader.logger.debug(content)
			
 
				+        JpegReader.logger.debug(str(metadata))
			
 
				+        JpegReader.logger.debug(path_output_html)
			
 
				+
			
 
				+        return content, metadata
			
 
				+
			
 
				+
			
 
				+def add_reader(readers):
			
 
				+    readers.reader_classes['jpeg_article'] = JpegReader
			
 
				+
			
 
				+
			
 
				+def register():
			
 
				+    signals.readers_init.connect(add_reader)
			
--- a/jpeg_reader/test_exiv2_parser.py
+++ b/jpeg_reader/test_exiv2_parser.py
@@ -0,0 +1,38 @@
 
				+import subprocess
			
 
				+
			
 
				+from .exiv2_parser import Exiv2Parser
			
 
				+
			
 
				+
			
 
				+class MockPopen(object):
			
 
				+    """Mock Popen method"""
			
 
				+    def __init__(self, cmd, *, stdout):
			
 
				+        pass
			
 
				+
			
 
				+    def communicate(self):
			
 
				+        """Mock communicate method of Popen"""
			
 
				+        return b'bash: command not found: exiv2', b''
			
 
				+
			
 
				+
			
 
				+class MockPopenSuccess(MockPopen):
			
 
				+    def __init__(self, cmd, *, stdout):
			
 
				+        MockPopen.__init__(self, cmd, stdout=stdout)
			
 
				+
			
 
				+    def communicate(self):
			
 
				+        """Mock communicate method of Popen"""
			
 
				+        return b'exiv2 0.26 001a00 (64 bit build)', b''
			
 
				+
			
 
				+
			
 
				+def test_get_version_fail(mocker, monkeypatch):
			
 
				+    monkeypatch.setattr(subprocess, 'Popen', MockPopen)
			
 
				+    version_info = Exiv2Parser.get_exiv2_version()
			
 
				+    assert version_info is None
			
 
				+
			
 
				+
			
 
				+def test_get_version_success(mocker, monkeypatch):
			
 
				+    monkeypatch.setattr(subprocess, 'Popen', MockPopenSuccess)
			
 
				+    version, commit = Exiv2Parser.get_exiv2_version()
			
 
				+    assert version == '0.26'
			
 
				+    assert commit == '001a00'
			
 
				+
			
 
				+
			
 
				+
			
--- a/jpeg_reader/util.py
+++ b/jpeg_reader/util.py
@@ -0,0 +1,14 @@
 
				+def to_str(bytes_or_str):
			
 
				+    if isinstance(bytes_or_str, bytes):
			
 
				+        value = bytes_or_str.decode('utf-8')
			
 
				+    else:
			
 
				+        value = bytes_or_str
			
 
				+    return value  # Instance of Str
			
 
				+
			
 
				+
			
 
				+def to_bytes(bytes_or_str):
			
 
				+    if isinstance(bytes_or_str, str):
			
 
				+        value = bytes_or_str.encode('utf-8')
			
 
				+    else:
			
 
				+        value = bytes_or_str
			
 
				+    return value  # Instance of Bytes