Implement git ref reading purely in Python

Its much faster to read the refs from 114 projects when the reader
is pure Python and just doing file IO than forking 114 git commands
and parsing their output.

The reader caches refs based upon file mtimes.  If any single ref
file has been modified since the last read, we re-read the entire
repository's ref namespace.  This simplifies the code as we don't
need to worry about shooting down symbolic-refs, but it may cause
more IO than is necessary if only one ref gets updated.

This change drops `repo branches` in Android from 1.658s to 0.206s.
Likewise, `repo sync` improves dramatically as well.

Signed-off-by: Shawn O. Pearce <sop@google.com>
This commit is contained in:
Shawn O. Pearce 2009-04-17 18:49:50 -07:00
parent 5b23f24881
commit d237b69865
2 changed files with 137 additions and 26 deletions

133
git_refs.py Normal file
View File

@ -0,0 +1,133 @@
#
# Copyright (C) 2009 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
HEAD = 'HEAD'
R_HEADS = 'refs/heads/'
R_TAGS = 'refs/tags/'
R_PUB = 'refs/published/'
R_M = 'refs/remotes/m/'
class GitRefs(object):
def __init__(self, gitdir):
self._gitdir = gitdir
self._phyref = None
self._symref = None
self._mtime = {}
@property
def all(self):
if self._phyref is None or self._NeedUpdate():
self._LoadAll()
return self._phyref
def get(self, name):
try:
return self.all[name]
except KeyError:
return ''
def _NeedUpdate(self):
for name, mtime in self._mtime.iteritems():
try:
if mtime != os.path.getmtime(os.path.join(self._gitdir, name)):
return True
except OSError:
return True
return False
def _LoadAll(self):
self._phyref = {}
self._symref = {}
self._mtime = {}
self._ReadPackedRefs()
self._ReadLoose('refs/')
self._ReadLoose1(os.path.join(self._gitdir, HEAD), HEAD)
scan = self._symref
attempts = 0
while scan and attempts < 5:
scan_next = {}
for name, dest in scan.iteritems():
if dest in self._phyref:
self._phyref[name] = self._phyref[dest]
else:
scan_next[name] = dest
scan = scan_next
attempts += 1
def _ReadPackedRefs(self):
path = os.path.join(self._gitdir, 'packed-refs')
try:
fd = open(path, 'r')
mtime = os.path.getmtime(path)
except IOError:
return
except OSError:
return
try:
for line in fd:
if line[0] == '#':
continue
if line[0] == '^':
continue
line = line[:-1]
p = line.split(' ')
id = p[0]
name = p[1]
self._phyref[name] = id
finally:
fd.close()
self._mtime['packed-refs'] = mtime
def _ReadLoose(self, prefix):
base = os.path.join(self._gitdir, prefix)
for name in os.listdir(base):
p = os.path.join(base, name)
if os.path.isdir(p):
self._mtime[prefix] = os.path.getmtime(base)
self._ReadLoose(prefix + name + '/')
elif name.endswith('.lock'):
pass
else:
self._ReadLoose1(p, prefix + name)
def _ReadLoose1(self, path, name):
try:
fd = open(path, 'r')
mtime = os.path.getmtime(path)
except OSError:
return
except IOError:
return
try:
id = fd.readline()
finally:
fd.close()
if not id:
return
id = id[:-1]
if id.startswith('ref: '):
self._symref[name] = id[5:]
else:
self._phyref[name] = id
self._mtime[name] = mtime

View File

@ -28,11 +28,7 @@ from error import GitError, ImportError, UploadError
from error import ManifestInvalidRevisionError from error import ManifestInvalidRevisionError
from remote import Remote from remote import Remote
HEAD = 'HEAD' from git_refs import GitRefs, HEAD, R_HEADS, R_TAGS, R_PUB, R_M
R_HEADS = 'refs/heads/'
R_TAGS = 'refs/tags/'
R_PUB = 'refs/published/'
R_M = 'refs/remotes/m/'
def _error(fmt, *args): def _error(fmt, *args):
msg = fmt % args msg = fmt % args
@ -226,6 +222,7 @@ class Project(object):
else: else:
self.work_git = None self.work_git = None
self.bare_git = self._GitGetByExec(self, bare=True) self.bare_git = self._GitGetByExec(self, bare=True)
self.bare_ref = GitRefs(gitdir)
@property @property
def Exists(self): def Exists(self):
@ -301,7 +298,7 @@ class Project(object):
"""Get all existing local branches. """Get all existing local branches.
""" """
current = self.CurrentBranch current = self.CurrentBranch
all = self.bare_git.ListRefs() all = self._allrefs
heads = {} heads = {}
pubd = {} pubd = {}
@ -1030,32 +1027,13 @@ class Project(object):
@property @property
def _allrefs(self): def _allrefs(self):
return self.bare_git.ListRefs() return self.bare_ref.all
class _GitGetByExec(object): class _GitGetByExec(object):
def __init__(self, project, bare): def __init__(self, project, bare):
self._project = project self._project = project
self._bare = bare self._bare = bare
def ListRefs(self, *args):
cmdv = ['for-each-ref', '--format=%(objectname) %(refname)']
cmdv.extend(args)
p = GitCommand(self._project,
cmdv,
bare = self._bare,
capture_stdout = True,
capture_stderr = True)
r = {}
for line in p.process.stdout:
id, name = line[:-1].split(' ', 2)
r[name] = id
if p.Wait() != 0:
raise GitError('%s for-each-ref %s: %s' % (
self._project.name,
str(args),
p.stderr))
return r
def LsOthers(self): def LsOthers(self):
p = GitCommand(self._project, p = GitCommand(self._project,
['ls-files', ['ls-files',