12 vuotta sitten · 69f7b1fb0a
--- a/post_stats/post_stats.py
+++ b/post_stats/post_stats.py
@@ -3,11 +3,13 @@
 
				 Post Statistics
			
 
				 ========================
			
 
				 
			
 
				-This plugin calculates various Statistics about a post and stores them in an article.stats disctionary:
			
 
				+This plugin calculates various statistics about a post and stores them in an article.stats dictionary:
			
 
				 
			
 
				 wc: how many words
			
 
				 read_mins: how many minutes to read this article, based on 250 wpm (http://en.wikipedia.org/wiki/Words_per_minute#Reading_and_comprehension)
			
 
				 word_counts: frquency count of all the words in the article; can be used for tag/word clouds/
			
 
				+fi: Flesch-kincaid Index/ Reading Ease
			
 
				+fk: Flesch-kincaid Grade Level
			
 
				 
			
 
				 """
			
 
				 
			
@@ -16,6 +18,8 @@ from bs4 import BeautifulSoup
 
				 import re
			
 
				 from collections import Counter
			
 
				 
			
 
				+from .readability import *
			
 
				+
			
 
				 
			
 
				 def calculate_stats(instance):
			
 
				 
			
@@ -26,17 +30,21 @@ def calculate_stats(instance):
 
				         # How fast do average people read?
			
 
				         WPM = 250
			
 
				 
			
 
				-        # Pre-process the text to remove entities
			
 
				+        # Use BeautifulSoup to get readable/visible text
			
 
				+        raw_text = BeautifulSoup(content).getText()
			
 
				+
			
 
				+        # Process the text to remove entities
			
 
				         entities = r'\&\#?.+?;'
			
 
				-        content = content.replace('&nbsp;', ' ')
			
 
				-        content = re.sub(entities, '', content)
			
 
				+        raw_text = raw_text.replace('&nbsp;', ' ')
			
 
				+        raw_text = re.sub(entities, '', raw_text)
			
 
				 
			
 
				-        # Pre-process the text to remove punctuation
			
 
				-        drop = u'.,?!@#$%^&*()_+-=\|/[]{}`~:;\'\"‘’—…“”'
			
 
				-        content = content.translate(dict((ord(c), u'') for c in drop))
			
 
				+        # Flesch-kincaid readbility stats counts sentances,
			
 
				+        # so save before removing punctuation
			
 
				+        tmp = raw_text
			
 
				 
			
 
				-        # Use BeautifulSoup to get readable/visible text
			
 
				-        raw_text = BeautifulSoup(content).getText()
			
 
				+        # Process the text to remove punctuation
			
 
				+        drop = u'.,?!@#$%^&*()_+-=\|/[]{}`~:;\'\"‘’—…“”'
			
 
				+        raw_text = raw_text.translate(dict((ord(c), u'') for c in drop))
			
 
				 
			
 
				         # Count the words in the text
			
 
				         words = raw_text.lower().split()
			
@@ -45,11 +53,17 @@ def calculate_stats(instance):
 
				         # Return the stats
			
 
				         stats['word_counts'] = word_count
			
 
				         stats['wc'] = sum(word_count.values())
			
 
				+
			
 
				         # Calulate how long it'll take to read, rounding up
			
 
				         stats['read_mins'] = (stats['wc'] + WPM - 1) // WPM
			
 
				         if stats['read_mins'] == 0:
			
 
				             stats['read_mins'] = 1
			
 
				 
			
 
				+        # Calculate Flesch-kincaid readbility stats
			
 
				+        readability_stats = stcs, words, sbls = text_stats(tmp, stats['wc'])
			
 
				+        stats['fi'] = "{:.2f}".format(flesch_index(readability_stats))
			
 
				+        stats['fk'] = "{:.2f}".format(flesch_kincaid_level(readability_stats))
			
 
				+
			
 
				         instance.stats = stats
			
 
				 
			
 
				 
			
--- a/post_stats/readability.py
+++ b/post_stats/readability.py
@@ -0,0 +1,56 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+# Adadpted from here: http://acdx.net/calculating-the-flesch-kincaid-level-in-python/
			
 
				+# See here for details: http://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_test
			
 
				+
			
 
				+from __future__ import division
			
 
				+import re
			
 
				+
			
 
				+
			
 
				+def mean(seq):
			
 
				+    return sum(seq) / len(seq)
			
 
				+
			
 
				+
			
 
				+def syllables(word):
			
 
				+    if len(word) <= 3:
			
 
				+        return 1
			
 
				+
			
 
				+    word = re.sub(r"(es|ed|(?<!l)e)$", "", word)
			
 
				+    return len(re.findall(r"[aeiouy]+", word))
			
 
				+
			
 
				+
			
 
				+def normalize(text):
			
 
				+    terminators = ".!?:;"
			
 
				+    term = re.escape(terminators)
			
 
				+    text = re.sub(r"[^%s\sA-Za-z]+" % term, "", text)
			
 
				+    text = re.sub(r"\s*([%s]+\s*)+" % term, ". ", text)
			
 
				+    return re.sub(r"\s+", " ", text)
			
 
				+
			
 
				+
			
 
				+def text_stats(text, wc):
			
 
				+    text = normalize(text)
			
 
				+    stcs = [s.split(" ") for s in text.split(". ")]
			
 
				+    stcs = filter(lambda s: len(s) >= 2, stcs)
			
 
				+
			
 
				+    if wc:
			
 
				+        words = wc
			
 
				+    else:
			
 
				+        words = sum(len(s) for s in stcs)
			
 
				+
			
 
				+    sbls = sum(syllables(w) for s in stcs for w in s)
			
 
				+
			
 
				+    return len(stcs), words, sbls
			
 
				+
			
 
				+
			
 
				+def flesch_index(stats):
			
 
				+    stcs, words, sbls = stats
			
 
				+    if stcs == 0 or words == 0:
			
 
				+        return 0
			
 
				+    return 206.835 - 1.015 * (words / stcs) - 84.6 * (sbls / words)
			
 
				+
			
 
				+
			
 
				+def flesch_kincaid_level(stats):
			
 
				+    stcs, words, sbls = stats
			
 
				+    if stcs == 0 or words == 0:
			
 
				+        return 0
			
 
				+    return 0.39 * (words / stcs) + 11.8 * (sbls / words) - 15.59
			
--- a/post_stats/readme.rst
+++ b/post_stats/readme.rst
@@ -0,0 +1,49 @@
 
				+Post Statistics
			
 
				+==================
			
 
				+
			
 
				+A Pelican plugin to calculate various statistics about a post and store them in an article.stats dictionary:
			
 
				+
			
 
				+- ``wc``: how many words
			
 
				+- ``read_mins``: how many minutes to read this article, based on 250 wpm (http://en.wikipedia.org/wiki/Words_per_minute#Reading_and_comprehension)
			
 
				+- ``word_counts``: frquency count of all the words in the article; can be used for tag/word clouds/
			
 
				+- ``fi``: Flesch-kincaid Index/ Reading Ease (see: http://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests)
			
 
				+- ``fk``: Flesch-kincaid Grade Level
			
 
				+
			
 
				+Example:
			
 
				+
			
 
				+.. code-block:: python
			
 
				+
			
 
				+    {
			
 
				+        'wc': 2760,
			
 
				+        'fi': '65.94',
			
 
				+        'fk': '7.65',
			
 
				+        'word_counts': Counter({u'to': 98, u'a': 90, u'the': 83, u'of': 50, ...}),
			
 
				+        'read_mins': 12
			
 
				+    }
			
 
				+
			
 
				+This allows you to output these values in your templates, like this, for example:
			
 
				+
			
 
				+.. code-block:: html+jinja
			
 
				+
			
 
				+	<p title="~{{ article.stats['wc'] }} words">~{{ article.stats['read_mins'] }} min read</p>
			
 
				+	<ul>
			
 
				+	    <li>Flesch-kincaid Index/ Reading Ease: {{ article.stats['fi'] }}</li>
			
 
				+	    <li>Flesch-kincaid Grade Level: {{ article.stats['fk'] }}</li>
			
 
				+	</ul>
			
 
				+
			
 
				+The ``word_counts`` variable is a Counter dictionary and looks like this, with each unique word and it's frequency:
			
 
				+
			
 
				+.. code-block:: python
			
 
				+
			
 
				+	Counter({u'to': 98, u'a': 90, u'the': 83, u'of': 50, u'karma': 50, .....
			
 
				+
			
 
				+and could be used to create a tag/word cloud for a post.
			
 
				+
			
 
				+Requirements
			
 
				+============
			
 
				+
			
 
				+`post_stats` requires BeautifulSoup.
			
 
				+
			
 
				+.. code-block:: console
			
 
				+
			
 
				+    $ pip install beautifulsoup4