1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556 |
- import re
- import math
- from pelican import signals
- from html.parser import HTMLParser
- WPM = 230.0
- class MLStripper(HTMLParser):
- def __init__(self):
- super().__init__()
-
- self.reset()
- self.fed = []
-
- def handle_data(self, d):
- self.fed.append(d)
-
- def get_data(self):
- return ''.join(self.fed)
- def strip_tags(html):
- s = MLStripper()
- s.feed(html)
- return s.get_data()
- def calculate_readtime(content_object):
- if content_object._content is not None:
- content = content_object._content
- text = strip_tags(content)
- words = re.split(r'[^0-9A-Za-z]+', text)
- num_words = len(words)
- minutes = int(math.ceil(num_words / WPM))
-
- if minutes == 0:
- minutes = 1
- content_object.readtime = {
- "minutes": minutes,
- }
- def register():
- signals.content_object_init.connect(calculate_readtime)
|