Browse Source

Merge pull request #473 from cscutcher/master

Refactor filetime_from_git and add GIT_FILETIME_FOLLOW option
Justin Mayer 10 years ago
parent
commit
c570d986cd
3 changed files with 181 additions and 27 deletions
  1. 12 0
      filetime_from_git/README.rst
  2. 34 27
      filetime_from_git/filetime_from_git.py
  3. 135 0
      filetime_from_git/git_wrapper.py

+ 12 - 0
filetime_from_git/README.rst

@@ -26,6 +26,18 @@ operations like copy, move will not affect the generated results.
 If some article or page doesn't like to use git time, set a ``gittime: off``
 metadata to disable it.
 
+You can also set GIT_FILETIME_FOLLOW to True in your pelican config to 
+make the plugin follow file renames i.e. ensure the creation date matches
+the original file creation date, not the date is was renamed.
+
+FAQ
+---
+
+### Q. I get a GitCommandError: 'git rev-list ...' when I run the plugin. What's up?
+Be sure to use the correct gitpython module for your distros git binary.
+Using the GIT_FILETIME_FOLLOW option to True may also make your problem go away as it uses
+a different method to find commits.
+
 Some notes on git
 ~~~~~~~~~~~~~~~~~~
 

+ 34 - 27
filetime_from_git/filetime_from_git.py

@@ -1,18 +1,11 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 import os
-from git import Git, Repo, InvalidGitRepositoryError
 from pelican import signals, contents
-from datetime import datetime
-from time import mktime, altzone
 from pelican.utils import strftime, set_date_tzinfo
+from datetime import datetime
+from git_wrapper import git_wrapper
 
-try:
-    repo = Repo(os.path.abspath('.'))
-    git = Git(os.path.abspath('.'))
-except InvalidGitRepositoryError as e:
-    repo = None
 
 def datetime_from_timestamp(timestamp, content):
     """
@@ -20,15 +13,23 @@ def datetime_from_timestamp(timestamp, content):
     so that datetime is comparable to other datetime objects in recent versions
     that now also have timezone information.
     """
-    return set_date_tzinfo(datetime.fromtimestamp(timestamp), tz_name=content.settings.get('TIMEZONE', None))
+    return set_date_tzinfo(
+        datetime.fromtimestamp(timestamp),
+        tz_name=content.settings.get('TIMEZONE', None))
+
 
 def filetime_from_git(content):
-    if isinstance(content, contents.Static) or repo is None:
+    if isinstance(content, contents.Static):
         return
+
+    git = git_wrapper('.')
+    tz_name = content.settings.get('TIMEZONE', None)
+
     gittime = content.metadata.get('gittime', 'yes').lower()
     gittime = gittime.replace("false", "no").replace("off", "no")
     if gittime == "no":
         return
+
     # 1. file is not managed by git
     #    date: fs time
     # 2. file is staged, but has no commits
@@ -38,36 +39,42 @@ def filetime_from_git(content):
     # 4. file is managed, but dirty
     #    date: first commit time, update: fs time
     path = content.source_path
-    status, stdout, stderr = git.execute(['git', 'ls-files', path, '--error-unmatch'],
-            with_extended_output=True, with_exceptions=False)
-    if status != 0:
-        # file is not managed by git
-        content.date = datetime_from_timestamp(os.stat(path).st_ctime, content)
-    else:
-        # file is managed by git
-        commits = repo.commits(path=path)
+    if git.is_file_managed_by_git(path):
+        commits = git.get_commits(
+            path, follow=content.settings.get('GIT_FILETIME_FOLLOW', False))
+
         if len(commits) == 0:
             # never commited, but staged
-            content.date = datetime_from_timestamp(os.stat(path).st_ctime, content)
+            content.date = git.datetime_from_timestamp(
+                os.stat(path).st_ctime, content)
         else:
             # has commited
-            content.date = datetime_from_timestamp(mktime(commits[-1].committed_date) - altzone, content)
+            content.date = git.get_commit_date(
+                commits[-1], tz_name)
 
-            status, stdout, stderr = git.execute(['git', 'diff', '--quiet', 'HEAD', path],
-                    with_extended_output=True, with_exceptions=False)
-            if status != 0:
+            if git.is_file_modified(path):
                 # file has changed
-                content.modified = datetime_from_timestamp(os.stat(path).st_ctime, content)
+                content.modified = datetime_from_timestamp(
+                    os.stat(path).st_ctime, content)
             else:
                 # file is not changed
                 if len(commits) > 1:
-                    content.modified = datetime_from_timestamp(mktime(commits[0].committed_date) - altzone, content)
+                    content.modified = git.get_commit_date(
+                        commits[0], tz_name)
+    else:
+        # file is not managed by git
+        content.date = datetime_from_timestamp(os.stat(path).st_ctime, content)
+
     if not hasattr(content, 'modified'):
         content.modified = content.date
+
     if hasattr(content, 'date'):
         content.locale_date = strftime(content.date, content.date_format)
+
     if hasattr(content, 'modified'):
-        content.locale_modified = strftime(content.modified, content.date_format)
+        content.locale_modified = strftime(
+            content.modified, content.date_format)
+
 
 def register():
     signals.content_object_init.connect(filetime_from_git)

+ 135 - 0
filetime_from_git/git_wrapper.py

@@ -0,0 +1,135 @@
+# -*- coding: utf-8 -*-
+"""
+Wrap python git interface for compatibility with older/newer version
+"""
+import logging
+import os
+from time import mktime, altzone
+from datetime import datetime
+from pelican.utils import set_date_tzinfo
+from git import Git, Repo
+
+DEV_LOGGER = logging.getLogger(__name__)
+
+
+class _GitWrapperCommon(object):
+    '''
+    Wrap git module to provide a more stable interface across versions
+    '''
+    def __init__(self, repo_path):
+        self.git = Git()
+        self.repo = Repo(os.path.abspath('.'))
+
+    def is_file_managed_by_git(self, path):
+        '''
+        :param path: Path to check
+        :returns: True if path is managed by git
+        '''
+        status, _stdout, _stderr = self.git.execute(
+            ['git', 'ls-files', path, '--error-unmatch'],
+            with_extended_output=True,
+            with_exceptions=False)
+        return status == 0
+
+    def is_file_modified(self, path):
+        '''
+        Does a file have local changes not yet committed
+
+        :returns: True if file has local changes
+        '''
+        status, _stdout, _stderr = self.git.execute(
+            ['git', 'diff', '--quiet', 'HEAD', path],
+            with_extended_output=True,
+            with_exceptions=False)
+        return status != 0
+
+    def get_commits_following(self, path):
+        '''
+        Get all commits including path following the file through
+        renames
+
+        :param path: Path which we will find commits for
+        :returns: Sequence of commit objects. Newest to oldest
+        '''
+        commit_shas = self.git.log(
+            '--pretty=%H', '--follow', '--', path).splitlines()
+        return map(self.repo.commit, commit_shas)
+
+    def get_commits(self, path, follow=False):
+        '''
+        Get all commits including path
+
+        :param path: Path which we will find commits for
+        :param bool follow: If True we will follow path through renames
+
+        :returns: Sequence of commit objects. Newest to oldest
+        '''
+        if follow:
+            return self.get_commits_following(path)
+        else:
+            return self._get_commits(path)
+
+
+class _GitWrapperLegacy(_GitWrapperCommon):
+    def _get_commits(self, path):
+        '''
+        Get all commits including path without following renames
+
+        :param path: Path which we will find commits for
+
+        :returns: Sequence of commit objects. Newest to oldest
+        '''
+        return self.repo.commits(path=path)
+
+    @staticmethod
+    def get_commit_date(commit, tz_name):
+        '''
+        Get datetime of commit comitted_date
+        '''
+        return set_date_tzinfo(
+            datetime.fromtimestamp(mktime(commit.committed_date) - altzone),
+            tz_name=tz_name)
+
+
+class _GitWrapper(_GitWrapperCommon):
+    def _get_commits(self, path):
+        '''
+        Get all commits including path without following renames
+
+        :param path: Path which we will find commits for
+
+        :returns: Sequence of commit objects. Newest to oldest
+
+        .. NOTE ::
+            If this fails it could be that your gitpython version is out of sync with the git
+            binary on your distro. Make sure you use the correct gitpython version.
+
+            Alternatively enabling GIT_FILETIME_FOLLOW may also make your problem go away.
+        '''
+        return list(self.repo.iter_commits(path=path))
+
+    @staticmethod
+    def get_commit_date(commit, tz_name):
+        '''
+        Get datetime of commit comitted_date
+        '''
+        return set_date_tzinfo(
+            datetime.fromtimestamp(commit.committed_date),
+            tz_name=tz_name)
+
+
+_wrapper_cache = {}
+
+
+def git_wrapper(path):
+    '''
+    Get appropriate wrapper factory and cache instance for path
+    '''
+    path = os.path.abspath(path)
+    if path not in _wrapper_cache:
+        if hasattr(Repo, 'commits'):
+            _wrapper_cache[path] = _GitWrapperLegacy(path)
+        else:
+            _wrapper_cache[path] = _GitWrapper(path)
+
+    return _wrapper_cache[path]