Forráskód Böngészése

Merge pull request #13 from abackstrom/import-sitemap

Import the sitemap plugin from getpelican/pelican@024ecb30
Justin Mayer 12 éve
szülő
commit
5ec7de70e2
2 módosított fájl, 195 hozzáadás és 0 törlés
  1. 0 0
      pelicanext/sitemap/__init__.py
  2. 195 0
      pelicanext/sitemap/sitemap.py

+ 0 - 0
pelicanext/sitemap/__init__.py


+ 195 - 0
pelicanext/sitemap/sitemap.py

@@ -0,0 +1,195 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import collections
+import os.path
+
+from datetime import datetime
+from logging import warning, info
+from codecs import open
+
+from pelican import signals, contents
+
+TXT_HEADER = """{0}/index.html
+{0}/archives.html
+{0}/tags.html
+{0}/categories.html
+"""
+
+XML_HEADER = """<?xml version="1.0" encoding="utf-8"?>
+<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
+xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+"""
+
+XML_URL = """
+<url>
+<loc>{0}/{1}</loc>
+<lastmod>{2}</lastmod>
+<changefreq>{3}</changefreq>
+<priority>{4}</priority>
+</url>
+"""
+
+XML_FOOTER = """
+</urlset>
+"""
+
+
+def format_date(date):
+    if date.tzinfo:
+        tz = date.strftime('%s')
+        tz = tz[:-2] + ':' + tz[-2:]
+    else:
+        tz = "-00:00"
+    return date.strftime("%Y-%m-%dT%H:%M:%S") + tz
+
+
+class SitemapGenerator(object):
+
+    def __init__(self, context, settings, path, theme, output_path, *null):
+
+        self.output_path = output_path
+        self.context = context
+        self.now = datetime.now()
+        self.siteurl = settings.get('SITEURL')
+
+        self.format = 'xml'
+
+        self.changefreqs = {
+            'articles': 'monthly',
+            'indexes': 'daily',
+            'pages': 'monthly'
+        }
+
+        self.priorities = {
+            'articles': 0.5,
+            'indexes': 0.5,
+            'pages': 0.5
+        }
+
+        config = settings.get('SITEMAP', {})
+
+        if not isinstance(config, dict):
+            warning("sitemap plugin: the SITEMAP setting must be a dict")
+        else:
+            fmt = config.get('format')
+            pris = config.get('priorities')
+            chfreqs = config.get('changefreqs')
+
+            if fmt not in ('xml', 'txt'):
+                warning("sitemap plugin: SITEMAP['format'] must be `txt' or `xml'")
+                warning("sitemap plugin: Setting SITEMAP['format'] on `xml'")
+            elif fmt == 'txt':
+                self.format = fmt
+                return
+
+            valid_keys = ('articles', 'indexes', 'pages')
+            valid_chfreqs = ('always', 'hourly', 'daily', 'weekly', 'monthly',
+                    'yearly', 'never')
+
+            if isinstance(pris, dict):
+                # We use items for Py3k compat. .iteritems() otherwise
+                for k, v in pris.items():
+                    if k in valid_keys and not isinstance(v, (int, float)):
+                        default = self.priorities[k]
+                        warning("sitemap plugin: priorities must be numbers")
+                        warning("sitemap plugin: setting SITEMAP['priorities']"
+                                "['{0}'] on {1}".format(k, default))
+                        pris[k] = default
+                self.priorities.update(pris)
+            elif pris is not None:
+                warning("sitemap plugin: SITEMAP['priorities'] must be a dict")
+                warning("sitemap plugin: using the default values")
+
+            if isinstance(chfreqs, dict):
+                # .items() for py3k compat.
+                for k, v in chfreqs.items():
+                    if k in valid_keys and v not in valid_chfreqs:
+                        default = self.changefreqs[k]
+                        warning("sitemap plugin: invalid changefreq `{0}'".format(v))
+                        warning("sitemap plugin: setting SITEMAP['changefreqs']"
+                                "['{0}'] on '{1}'".format(k, default))
+                        chfreqs[k] = default
+                self.changefreqs.update(chfreqs)
+            elif chfreqs is not None:
+                warning("sitemap plugin: SITEMAP['changefreqs'] must be a dict")
+                warning("sitemap plugin: using the default values")
+
+
+
+    def write_url(self, page, fd):
+
+        if getattr(page, 'status', 'published') != 'published':
+            return
+
+        page_path = os.path.join(self.output_path, page.url)
+        if not os.path.exists(page_path):
+            return
+
+        lastmod = format_date(getattr(page, 'date', self.now))
+
+        if isinstance(page, contents.Article):
+            pri = self.priorities['articles']
+            chfreq = self.changefreqs['articles']
+        elif isinstance(page, contents.Page):
+            pri = self.priorities['pages']
+            chfreq = self.changefreqs['pages']
+        else:
+            pri = self.priorities['indexes']
+            chfreq = self.changefreqs['indexes']
+
+
+        if self.format == 'xml':
+            fd.write(XML_URL.format(self.siteurl, page.url, lastmod, chfreq, pri))
+        else:
+            fd.write(self.siteurl + '/' + loc + '\n')
+
+
+    def generate_output(self, writer):
+        path = os.path.join(self.output_path, 'sitemap.{0}'.format(self.format))
+
+        pages = self.context['pages'] + self.context['articles'] \
+                + [ c for (c, a) in self.context['categories']] \
+                + [ t for (t, a) in self.context['tags']] \
+                + [ a for (a, b) in self.context['authors']]
+
+        for article in self.context['articles']:
+            pages += article.translations
+
+        info('writing {0}'.format(path))
+
+        with open(path, 'w', encoding='utf-8') as fd:
+
+            if self.format == 'xml':
+                fd.write(XML_HEADER)
+            else:
+                fd.write(TXT_HEADER.format(self.siteurl))
+
+            FakePage = collections.namedtuple('FakePage',
+                                              ['status',
+                                               'date',
+                                               'url'])
+
+            for standard_page_url in ['index.html',
+                                      'archives.html',
+                                      'tags.html',
+                                      'categories.html']:
+                fake = FakePage(status='published',
+                                date=self.now,
+                                url=standard_page_url)
+                self.write_url(fake, fd)
+
+            for page in pages:
+                self.write_url(page, fd)
+
+            if self.format == 'xml':
+                fd.write(XML_FOOTER)
+
+
+def get_generators(generators):
+    return SitemapGenerator
+
+
+def register():
+    signals.get_generators.connect(get_generators)