123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172 |
- # -*- coding: utf-8 -*-
- """
- Post Statistics
- ========================
- This plugin calculates various statistics about a post and stores them in an article.stats dictionary:
- wc: how many words
- read_mins: how many minutes to read this article, based on 250 wpm (http://en.wikipedia.org/wiki/Words_per_minute#Reading_and_comprehension)
- word_counts: frquency count of all the words in the article; can be used for tag/word clouds/
- fi: Flesch-kincaid Index/ Reading Ease
- fk: Flesch-kincaid Grade Level
- """
- from pelican import signals
- from bs4 import BeautifulSoup
- import re
- from collections import Counter
- from .readability import *
- def calculate_stats(instance):
- if instance._content is not None:
- stats = {}
- content = instance._content
- # How fast do average people read?
- WPM = 250
- # Use BeautifulSoup to get readable/visible text
- raw_text = BeautifulSoup(content, 'html.parser').getText()
- # Process the text to remove entities
- entities = r'\&\#?.+?;'
- raw_text = raw_text.replace(' ', ' ')
- raw_text = re.sub(entities, '', raw_text)
- # Flesch-kincaid readbility stats counts sentances,
- # so save before removing punctuation
- tmp = raw_text
- # Process the text to remove punctuation
- drop = u'.,?!@#$%^&*()_+-=\|/[]{}`~:;\'\"‘’—…“”'
- raw_text = raw_text.translate(dict((ord(c), u'') for c in drop))
- # Count the words in the text
- words = raw_text.lower().split()
- word_count = Counter(words)
- # Return the stats
- stats['word_counts'] = word_count
- stats['wc'] = sum(word_count.values())
- # Calulate how long it'll take to read, rounding up
- stats['read_mins'] = (stats['wc'] + WPM - 1) // WPM
- if stats['read_mins'] == 0:
- stats['read_mins'] = 1
- # Calculate Flesch-kincaid readbility stats
- readability_stats = stcs, words, sbls = text_stats(tmp, stats['wc'])
- stats['fi'] = "{:.2f}".format(flesch_index(readability_stats))
- stats['fk'] = "{:.2f}".format(flesch_kincaid_level(readability_stats))
- instance.stats = stats
- def register():
- signals.content_object_init.connect(calculate_stats)
|