From d237b698652120f4d859b6f9e12e3aa15aa7b2d5 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Fri, 17 Apr 2009 18:49:50 -0700 Subject: [PATCH] Implement git ref reading purely in Python Its much faster to read the refs from 114 projects when the reader is pure Python and just doing file IO than forking 114 git commands and parsing their output. The reader caches refs based upon file mtimes. If any single ref file has been modified since the last read, we re-read the entire repository's ref namespace. This simplifies the code as we don't need to worry about shooting down symbolic-refs, but it may cause more IO than is necessary if only one ref gets updated. This change drops `repo branches` in Android from 1.658s to 0.206s. Likewise, `repo sync` improves dramatically as well. Signed-off-by: Shawn O. Pearce --- git_refs.py | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++ project.py | 30 ++---------- 2 files changed, 137 insertions(+), 26 deletions(-) create mode 100644 git_refs.py diff --git a/git_refs.py b/git_refs.py new file mode 100644 index 00000000..9851e78b --- /dev/null +++ b/git_refs.py @@ -0,0 +1,133 @@ +# +# Copyright (C) 2009 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +HEAD = 'HEAD' +R_HEADS = 'refs/heads/' +R_TAGS = 'refs/tags/' +R_PUB = 'refs/published/' +R_M = 'refs/remotes/m/' + + +class GitRefs(object): + def __init__(self, gitdir): + self._gitdir = gitdir + self._phyref = None + self._symref = None + self._mtime = {} + + @property + def all(self): + if self._phyref is None or self._NeedUpdate(): + self._LoadAll() + return self._phyref + + def get(self, name): + try: + return self.all[name] + except KeyError: + return '' + + def _NeedUpdate(self): + for name, mtime in self._mtime.iteritems(): + try: + if mtime != os.path.getmtime(os.path.join(self._gitdir, name)): + return True + except OSError: + return True + return False + + def _LoadAll(self): + self._phyref = {} + self._symref = {} + self._mtime = {} + + self._ReadPackedRefs() + self._ReadLoose('refs/') + self._ReadLoose1(os.path.join(self._gitdir, HEAD), HEAD) + + scan = self._symref + attempts = 0 + while scan and attempts < 5: + scan_next = {} + for name, dest in scan.iteritems(): + if dest in self._phyref: + self._phyref[name] = self._phyref[dest] + else: + scan_next[name] = dest + scan = scan_next + attempts += 1 + + def _ReadPackedRefs(self): + path = os.path.join(self._gitdir, 'packed-refs') + try: + fd = open(path, 'r') + mtime = os.path.getmtime(path) + except IOError: + return + except OSError: + return + try: + for line in fd: + if line[0] == '#': + continue + if line[0] == '^': + continue + + line = line[:-1] + p = line.split(' ') + id = p[0] + name = p[1] + + self._phyref[name] = id + finally: + fd.close() + self._mtime['packed-refs'] = mtime + + def _ReadLoose(self, prefix): + base = os.path.join(self._gitdir, prefix) + for name in os.listdir(base): + p = os.path.join(base, name) + if os.path.isdir(p): + self._mtime[prefix] = os.path.getmtime(base) + self._ReadLoose(prefix + name + '/') + elif name.endswith('.lock'): + pass + else: + self._ReadLoose1(p, prefix + name) + + def _ReadLoose1(self, path, name): + try: + fd = open(path, 'r') + mtime = os.path.getmtime(path) + except OSError: + return + except IOError: + return + try: + id = fd.readline() + finally: + fd.close() + + if not id: + return + id = id[:-1] + + if id.startswith('ref: '): + self._symref[name] = id[5:] + else: + self._phyref[name] = id + self._mtime[name] = mtime diff --git a/project.py b/project.py index 311379ca..086f0d77 100644 --- a/project.py +++ b/project.py @@ -28,11 +28,7 @@ from error import GitError, ImportError, UploadError from error import ManifestInvalidRevisionError from remote import Remote -HEAD = 'HEAD' -R_HEADS = 'refs/heads/' -R_TAGS = 'refs/tags/' -R_PUB = 'refs/published/' -R_M = 'refs/remotes/m/' +from git_refs import GitRefs, HEAD, R_HEADS, R_TAGS, R_PUB, R_M def _error(fmt, *args): msg = fmt % args @@ -226,6 +222,7 @@ class Project(object): else: self.work_git = None self.bare_git = self._GitGetByExec(self, bare=True) + self.bare_ref = GitRefs(gitdir) @property def Exists(self): @@ -301,7 +298,7 @@ class Project(object): """Get all existing local branches. """ current = self.CurrentBranch - all = self.bare_git.ListRefs() + all = self._allrefs heads = {} pubd = {} @@ -1030,32 +1027,13 @@ class Project(object): @property def _allrefs(self): - return self.bare_git.ListRefs() + return self.bare_ref.all class _GitGetByExec(object): def __init__(self, project, bare): self._project = project self._bare = bare - def ListRefs(self, *args): - cmdv = ['for-each-ref', '--format=%(objectname) %(refname)'] - cmdv.extend(args) - p = GitCommand(self._project, - cmdv, - bare = self._bare, - capture_stdout = True, - capture_stderr = True) - r = {} - for line in p.process.stdout: - id, name = line[:-1].split(' ', 2) - r[name] = id - if p.Wait() != 0: - raise GitError('%s for-each-ref %s: %s' % ( - self._project.name, - str(args), - p.stderr)) - return r - def LsOthers(self): p = GitCommand(self._project, ['ls-files',