pdf.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. # -*- coding: utf-8 -*-
  2. '''
  3. PDF Generator
  4. -------
  5. The pdf plugin generates PDF files from reStructuredText and Markdown sources.
  6. '''
  7. from __future__ import unicode_literals, print_function
  8. from io import open
  9. from pelican import signals
  10. from pelican.generators import Generator
  11. from pelican.readers import MarkdownReader
  12. import os
  13. import logging
  14. logger = logging.getLogger(__name__)
  15. import xhtml2pdf.util
  16. if 'pyPdf' not in dir(xhtml2pdf.util):
  17. try:
  18. from xhtml2pdf.util import PyPDF2
  19. xhtml2pdf.util.pyPdf = PyPDF2
  20. except ImportError:
  21. logger.error('Failed to monkeypatch xhtml2pdf. ' +
  22. 'You have missing dependencies')
  23. raise
  24. from rst2pdf.createpdf import RstToPdf
  25. class PdfGenerator(Generator):
  26. "Generate PDFs on the output dir, for all articles and pages"
  27. supported_md_fields = ['date']
  28. def __init__(self, *args, **kwargs):
  29. super(PdfGenerator, self).__init__(*args, **kwargs)
  30. if 'PDF_STYLE_PATH' in self.settings:
  31. pdf_style_path = [self.settings['PDF_STYLE_PATH']]
  32. else:
  33. pdf_style_path = []
  34. if 'PDF_STYLE' in self.settings:
  35. pdf_style = [self.settings['PDF_STYLE']]
  36. else:
  37. pdf_style = []
  38. self.pdfcreator = RstToPdf(breakside=0,
  39. stylesheets=pdf_style,
  40. style_path=pdf_style_path,
  41. raw_html=True)
  42. def _create_pdf(self, obj, output_path):
  43. filename = obj.slug + '.pdf'
  44. output_pdf = os.path.join(output_path, filename)
  45. mdreader = MarkdownReader(self.settings)
  46. _, ext = os.path.splitext(obj.source_path)
  47. if ext == '.rst':
  48. with open(obj.source_path, encoding='utf-8') as f:
  49. text = f.read()
  50. header = ''
  51. elif ext[1:] in mdreader.file_extensions and mdreader.enabled:
  52. text, meta = mdreader.read(obj.source_path)
  53. header = ''
  54. if 'title' in meta:
  55. title = meta['title']
  56. header = title + '\n' + '#' * len(title) + '\n\n'
  57. del meta['title']
  58. for k in meta.keys():
  59. # We can't support all fields, so we strip the ones that won't
  60. # look good
  61. if k not in self.supported_md_fields:
  62. del meta[k]
  63. header += '\n'.join([':%s: %s' % (k, meta[k]) for k in meta])
  64. header += '\n\n.. raw:: html\n\n\t'
  65. text = text.replace('\n', '\n\t')
  66. # rst2pdf casts the text to str and will break if it finds
  67. # non-escaped characters. Here we nicely escape them to XML/HTML
  68. # entities before proceeding
  69. text = text.encode('ascii', 'xmlcharrefreplace')
  70. else:
  71. # We don't support this format
  72. logger.warn('Ignoring unsupported file ' + obj.source_path)
  73. return
  74. logger.info(' [ok] writing %s' % output_pdf)
  75. self.pdfcreator.createPdf(text=(header+text),
  76. output=output_pdf)
  77. def generate_context(self):
  78. pass
  79. def generate_output(self, writer=None):
  80. # we don't use the writer passed as argument here
  81. # since we write our own files
  82. logger.info(' Generating PDF files...')
  83. pdf_path = os.path.join(self.output_path, 'pdf')
  84. if not os.path.exists(pdf_path):
  85. try:
  86. os.mkdir(pdf_path)
  87. except OSError:
  88. logger.error("Couldn't create the pdf output folder in " +
  89. pdf_path)
  90. for article in self.context['articles']:
  91. self._create_pdf(article, pdf_path)
  92. for page in self.context['pages']:
  93. self._create_pdf(page, pdf_path)
  94. def get_generators(generators):
  95. return PdfGenerator
  96. def register():
  97. signals.get_generators.connect(get_generators)