readability.py 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. # -*- coding: utf-8 -*-
  2. # Adadpted from here: http://acdx.net/calculating-the-flesch-kincaid-level-in-python/
  3. # See here for details: http://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_test
  4. from __future__ import division
  5. import re
  6. def mean(seq):
  7. return sum(seq) / len(seq)
  8. def syllables(word):
  9. if len(word) <= 3:
  10. return 1
  11. word = re.sub(r"(es|ed|(?<!l)e)$", "", word)
  12. return len(re.findall(r"[aeiouy]+", word))
  13. def normalize(text):
  14. terminators = ".!?:;"
  15. term = re.escape(terminators)
  16. text = re.sub(r"[^%s\sA-Za-z]+" % term, "", text)
  17. text = re.sub(r"\s*([%s]+\s*)+" % term, ". ", text)
  18. return re.sub(r"\s+", " ", text)
  19. def text_stats(text, wc):
  20. text = normalize(text)
  21. stcs = [s.split(" ") for s in text.split(". ")]
  22. stcs = [s for s in stcs if len(s) >= 2]
  23. if wc:
  24. words = wc
  25. else:
  26. words = sum(len(s) for s in stcs)
  27. sbls = sum(syllables(w) for s in stcs for w in s)
  28. return len(stcs), words, sbls
  29. def flesch_index(stats):
  30. stcs, words, sbls = stats
  31. if stcs == 0 or words == 0:
  32. return 0
  33. return 206.835 - 1.015 * (words / stcs) - 84.6 * (sbls / words)
  34. def flesch_kincaid_level(stats):
  35. stcs, words, sbls = stats
  36. if stcs == 0 or words == 0:
  37. return 0
  38. return 0.39 * (words / stcs) + 11.8 * (sbls / words) - 15.59