|
@@ -3,11 +3,13 @@
|
|
Post Statistics
|
|
Post Statistics
|
|
========================
|
|
========================
|
|
|
|
|
|
-This plugin calculates various Statistics about a post and stores them in an article.stats disctionary:
|
|
|
|
|
|
+This plugin calculates various statistics about a post and stores them in an article.stats dictionary:
|
|
|
|
|
|
wc: how many words
|
|
wc: how many words
|
|
read_mins: how many minutes to read this article, based on 250 wpm (http://en.wikipedia.org/wiki/Words_per_minute#Reading_and_comprehension)
|
|
read_mins: how many minutes to read this article, based on 250 wpm (http://en.wikipedia.org/wiki/Words_per_minute#Reading_and_comprehension)
|
|
word_counts: frquency count of all the words in the article; can be used for tag/word clouds/
|
|
word_counts: frquency count of all the words in the article; can be used for tag/word clouds/
|
|
|
|
+fi: Flesch-kincaid Index/ Reading Ease
|
|
|
|
+fk: Flesch-kincaid Grade Level
|
|
|
|
|
|
"""
|
|
"""
|
|
|
|
|
|
@@ -16,6 +18,8 @@ from bs4 import BeautifulSoup
|
|
import re
|
|
import re
|
|
from collections import Counter
|
|
from collections import Counter
|
|
|
|
|
|
|
|
+from .readability import *
|
|
|
|
+
|
|
|
|
|
|
def calculate_stats(instance):
|
|
def calculate_stats(instance):
|
|
|
|
|
|
@@ -26,17 +30,21 @@ def calculate_stats(instance):
|
|
# How fast do average people read?
|
|
# How fast do average people read?
|
|
WPM = 250
|
|
WPM = 250
|
|
|
|
|
|
- # Pre-process the text to remove entities
|
|
|
|
|
|
+ # Use BeautifulSoup to get readable/visible text
|
|
|
|
+ raw_text = BeautifulSoup(content).getText()
|
|
|
|
+
|
|
|
|
+ # Process the text to remove entities
|
|
entities = r'\&\#?.+?;'
|
|
entities = r'\&\#?.+?;'
|
|
- content = content.replace(' ', ' ')
|
|
|
|
- content = re.sub(entities, '', content)
|
|
|
|
|
|
+ raw_text = raw_text.replace(' ', ' ')
|
|
|
|
+ raw_text = re.sub(entities, '', raw_text)
|
|
|
|
|
|
- # Pre-process the text to remove punctuation
|
|
|
|
- drop = u'.,?!@#$%^&*()_+-=\|/[]{}`~:;\'\"‘’—…“”'
|
|
|
|
- content = content.translate(dict((ord(c), u'') for c in drop))
|
|
|
|
|
|
+ # Flesch-kincaid readbility stats counts sentances,
|
|
|
|
+ # so save before removing punctuation
|
|
|
|
+ tmp = raw_text
|
|
|
|
|
|
- # Use BeautifulSoup to get readable/visible text
|
|
|
|
- raw_text = BeautifulSoup(content).getText()
|
|
|
|
|
|
+ # Process the text to remove punctuation
|
|
|
|
+ drop = u'.,?!@#$%^&*()_+-=\|/[]{}`~:;\'\"‘’—…“”'
|
|
|
|
+ raw_text = raw_text.translate(dict((ord(c), u'') for c in drop))
|
|
|
|
|
|
# Count the words in the text
|
|
# Count the words in the text
|
|
words = raw_text.lower().split()
|
|
words = raw_text.lower().split()
|
|
@@ -45,11 +53,17 @@ def calculate_stats(instance):
|
|
# Return the stats
|
|
# Return the stats
|
|
stats['word_counts'] = word_count
|
|
stats['word_counts'] = word_count
|
|
stats['wc'] = sum(word_count.values())
|
|
stats['wc'] = sum(word_count.values())
|
|
|
|
+
|
|
# Calulate how long it'll take to read, rounding up
|
|
# Calulate how long it'll take to read, rounding up
|
|
stats['read_mins'] = (stats['wc'] + WPM - 1) // WPM
|
|
stats['read_mins'] = (stats['wc'] + WPM - 1) // WPM
|
|
if stats['read_mins'] == 0:
|
|
if stats['read_mins'] == 0:
|
|
stats['read_mins'] = 1
|
|
stats['read_mins'] = 1
|
|
|
|
|
|
|
|
+ # Calculate Flesch-kincaid readbility stats
|
|
|
|
+ readability_stats = stcs, words, sbls = text_stats(tmp, stats['wc'])
|
|
|
|
+ stats['fi'] = "{:.2f}".format(flesch_index(readability_stats))
|
|
|
|
+ stats['fk'] = "{:.2f}".format(flesch_kincaid_level(readability_stats))
|
|
|
|
+
|
|
instance.stats = stats
|
|
instance.stats = stats
|
|
|
|
|
|
|
|
|