diff --git a/git_command.py b/git_command.py index 54db4d16..f5352ea0 100644 --- a/git_command.py +++ b/git_command.py @@ -98,13 +98,15 @@ class _GitCall(object): return fun git = _GitCall() -def git_require(min_version, fail=False): +def git_require(min_version, fail=False, msg=''): git_version = git.version_tuple() if min_version <= git_version: return True if fail: need = '.'.join(map(str, min_version)) - print('fatal: git %s or later required' % need, file=sys.stderr) + if msg: + msg = ' for ' + msg + print('fatal: git %s or later required%s' % (need, msg), file=sys.stderr) sys.exit(1) return False diff --git a/manifest_xml.py b/manifest_xml.py index 94d22b3c..3814a25a 100644 --- a/manifest_xml.py +++ b/manifest_xml.py @@ -413,6 +413,12 @@ class XmlManifest(object): self._Load() return self._manifest_server + @property + def CloneFilter(self): + if self.manifestProject.config.GetBoolean('repo.partialclone'): + return self.manifestProject.config.GetString('repo.clonefilter') + return None + @property def IsMirror(self): return self.manifestProject.config.GetBoolean('repo.mirror') diff --git a/project.py b/project.py index 67d3bb20..9702e9da 100755 --- a/project.py +++ b/project.py @@ -1226,7 +1226,8 @@ class Project(object): archive=False, optimized_fetch=False, prune=False, - submodules=False): + submodules=False, + clone_filter=None): """Perform only the network IO portion of the sync process. Local working directory/branch state is not affected. """ @@ -1309,7 +1310,8 @@ class Project(object): not self._RemoteFetch(initial=is_new, quiet=quiet, alt_dir=alt_dir, current_branch_only=current_branch_only, no_tags=no_tags, prune=prune, depth=depth, - submodules=submodules, force_sync=force_sync)): + submodules=submodules, force_sync=force_sync, + clone_filter=clone_filter)): return False mp = self.manifest.manifestProject @@ -1959,7 +1961,8 @@ class Project(object): prune=False, depth=None, submodules=False, - force_sync=False): + force_sync=False, + clone_filter=None): is_sha1 = False tag_name = None @@ -2050,6 +2053,11 @@ class Project(object): cmd = ['fetch'] + if clone_filter: + git_require((2, 19, 0), fail=True, msg='partial clones') + cmd.append('--filter=%s' % clone_filter) + self.config.SetString('extensions.partialclone', self.remote.name) + if depth: cmd.append('--depth=%s' % depth) else: @@ -2150,12 +2158,12 @@ class Project(object): return self._RemoteFetch(name=name, current_branch_only=current_branch_only, initial=False, quiet=quiet, alt_dir=alt_dir, - depth=None) + depth=None, clone_filter=clone_filter) else: # Avoid infinite recursion: sync all branches with depth set to None return self._RemoteFetch(name=name, current_branch_only=False, initial=False, quiet=quiet, alt_dir=alt_dir, - depth=None) + depth=None, clone_filter=clone_filter) return ok diff --git a/repo b/repo index 8ed147df..649c4e48 100755 --- a/repo +++ b/repo @@ -199,6 +199,13 @@ group.add_option('--dissociate', group.add_option('--depth', type='int', default=None, dest='depth', help='create a shallow clone with given depth; see git clone') +group.add_option('--partial-clone', action='store_true', + dest='partial_clone', + help='perform partial clone (https://git-scm.com/' + 'docs/gitrepository-layout#_code_partialclone_code)') +group.add_option('--clone-filter', action='store', default='blob:none', + dest='clone_filter', + help='filter for use with --partial-clone [default: %default]') group.add_option('--archive', dest='archive', action='store_true', help='checkout an archive instead of a git repository for ' diff --git a/subcmds/init.py b/subcmds/init.py index 1c809ab4..eaa6da50 100644 --- a/subcmds/init.py +++ b/subcmds/init.py @@ -115,6 +115,13 @@ to update the working directory files. g.add_option('--depth', type='int', default=None, dest='depth', help='create a shallow clone with given depth; see git clone') + g.add_option('--partial-clone', action='store_true', + dest='partial_clone', + help='perform partial clone (https://git-scm.com/' + 'docs/gitrepository-layout#_code_partialclone_code)') + g.add_option('--clone-filter', action='store', default='blob:none', + dest='clone_filter', + help='filter for use with --partial-clone [default: %default]') g.add_option('--archive', dest='archive', action='store_true', help='checkout an archive instead of a git repository for ' @@ -253,13 +260,25 @@ to update the working directory files. 'in another location.', file=sys.stderr) sys.exit(1) + if opt.partial_clone: + if opt.mirror: + print('fatal: --mirror and --partial-clone are mutually exclusive', + file=sys.stderr) + sys.exit(1) + m.config.SetString('repo.partialclone', 'true') + if opt.clone_filter: + m.config.SetString('repo.clonefilter', opt.clone_filter) + else: + opt.clone_filter = None + if opt.submodules: m.config.SetString('repo.submodules', 'true') if not m.Sync_NetworkHalf(is_new=is_new, quiet=opt.quiet, clone_bundle=not opt.no_clone_bundle, current_branch_only=opt.current_branch_only, - no_tags=opt.no_tags, submodules=opt.submodules): + no_tags=opt.no_tags, submodules=opt.submodules, + clone_filter=opt.clone_filter): r = m.GetRemote(m.remote.name) print('fatal: cannot obtain manifest %s' % r.url, file=sys.stderr) diff --git a/subcmds/sync.py b/subcmds/sync.py index 02cd3879..b752cfbe 100644 --- a/subcmds/sync.py +++ b/subcmds/sync.py @@ -85,6 +85,9 @@ class _FetchError(Exception): """Internal error thrown in _FetchHelper() when we don't want stack trace.""" pass +class _CheckoutError(Exception): + """Internal error thrown in _CheckoutOne() when we don't want stack trace.""" + class Sync(Command, MirrorSafeCommand): jobs = 1 common = True @@ -266,7 +269,7 @@ later is required to fix a server side protocol bug. help=SUPPRESS_HELP) def _FetchProjectList(self, opt, projects, sem, *args, **kwargs): - """Main function of the fetch threads when jobs are > 1. + """Main function of the fetch threads. Delegates most of the work to _FetchHelper. @@ -286,7 +289,8 @@ later is required to fix a server side protocol bug. finally: sem.release() - def _FetchHelper(self, opt, project, lock, fetched, pm, err_event): + def _FetchHelper(self, opt, project, lock, fetched, pm, err_event, + clone_filter): """Fetch git objects for a single project. Args: @@ -300,6 +304,7 @@ later is required to fix a server side protocol bug. lock held). err_event: We'll set this event in the case of an error (after printing out info about the error). + clone_filter: Filter for use in a partial clone. Returns: Whether the fetch was successful. @@ -312,7 +317,6 @@ later is required to fix a server side protocol bug. # Encapsulate everything in a try/except/finally so that: # - We always set err_event in the case of an exception. - # - We always make sure we call sem.release(). # - We always make sure we unlock the lock if we locked it. start = time.time() success = False @@ -325,7 +329,8 @@ later is required to fix a server side protocol bug. clone_bundle=not opt.no_clone_bundle, no_tags=opt.no_tags, archive=self.manifest.IsArchive, optimized_fetch=opt.optimized_fetch, - prune=opt.prune) + prune=opt.prune, + clone_filter=clone_filter) self._fetch_times.Set(project, time.time() - start) # Lock around all the rest of the code, since printing, updating a set @@ -389,7 +394,8 @@ later is required to fix a server side protocol bug. lock=lock, fetched=fetched, pm=pm, - err_event=err_event) + err_event=err_event, + clone_filter=self.manifest.CloneFilter) if self.jobs > 1: t = _threading.Thread(target = self._FetchProjectList, kwargs = kwargs) @@ -416,6 +422,148 @@ later is required to fix a server side protocol bug. return fetched + def _CheckoutWorker(self, opt, sem, project, *args, **kwargs): + """Main function of the fetch threads. + + Delegates most of the work to _CheckoutOne. + + Args: + opt: Program options returned from optparse. See _Options(). + projects: Projects to fetch. + sem: We'll release() this semaphore when we exit so that another thread + can be started up. + *args, **kwargs: Remaining arguments to pass to _CheckoutOne. See the + _CheckoutOne docstring for details. + """ + try: + success = self._CheckoutOne(opt, project, *args, **kwargs) + if not success: + sys.exit(1) + finally: + sem.release() + + def _CheckoutOne(self, opt, project, lock, pm, err_event): + """Checkout work tree for one project + + Args: + opt: Program options returned from optparse. See _Options(). + project: Project object for the project to checkout. + lock: Lock for accessing objects that are shared amongst multiple + _CheckoutWorker() threads. + pm: Instance of a Project object. We will call pm.update() (with our + lock held). + err_event: We'll set this event in the case of an error (after printing + out info about the error). + + Returns: + Whether the fetch was successful. + """ + # We'll set to true once we've locked the lock. + did_lock = False + + if not opt.quiet: + print('Checking out project %s' % project.name) + + # Encapsulate everything in a try/except/finally so that: + # - We always set err_event in the case of an exception. + # - We always make sure we unlock the lock if we locked it. + start = time.time() + syncbuf = SyncBuffer(self.manifest.manifestProject.config, + detach_head=opt.detach_head) + success = False + try: + try: + project.Sync_LocalHalf(syncbuf, force_sync=opt.force_sync) + success = syncbuf.Finish() + + # Lock around all the rest of the code, since printing, updating a set + # and Progress.update() are not thread safe. + lock.acquire() + did_lock = True + + if not success: + err_event.set() + print('error: Cannot checkout %s' % (project.name), + file=sys.stderr) + raise _CheckoutError() + + pm.update() + except _CheckoutError: + pass + except Exception as e: + print('error: Cannot checkout %s: %s: %s' % + (project.name, type(e).__name__, str(e)), + file=sys.stderr) + err_event.set() + raise + finally: + if did_lock: + lock.release() + finish = time.time() + self.event_log.AddSync(project, event_log.TASK_SYNC_LOCAL, + start, finish, success) + + return success + + def _Checkout(self, all_projects, opt): + """Checkout projects listed in all_projects + + Args: + all_projects: List of all projects that should be checked out. + opt: Program options returned from optparse. See _Options(). + """ + + # Perform checkouts in multiple threads when we are using partial clone. + # Without partial clone, all needed git objects are already downloaded, + # in this situation it's better to use only one process because the checkout + # would be mostly disk I/O; with partial clone, the objects are only + # downloaded when demanded (at checkout time), which is similar to the + # Sync_NetworkHalf case and parallelism would be helpful. + if self.manifest.CloneFilter: + syncjobs = self.jobs + else: + syncjobs = 1 + + lock = _threading.Lock() + pm = Progress('Syncing work tree', len(all_projects)) + + threads = set() + sem = _threading.Semaphore(syncjobs) + err_event = _threading.Event() + + for project in all_projects: + # Check for any errors before running any more tasks. + # ...we'll let existing threads finish, though. + if err_event.isSet() and not opt.force_broken: + break + + sem.acquire() + if project.worktree: + kwargs = dict(opt=opt, + sem=sem, + project=project, + lock=lock, + pm=pm, + err_event=err_event) + if syncjobs > 1: + t = _threading.Thread(target=self._CheckoutWorker, + kwargs=kwargs) + # Ensure that Ctrl-C will not freeze the repo process. + t.daemon = True + threads.add(t) + t.start() + else: + self._CheckoutWorker(**kwargs) + + for t in threads: + t.join() + + pm.end() + # If we saw an error, exit with code 1 so that other scripts can check. + if err_event.isSet(): + print('\nerror: Exited sync due to checkout errors', file=sys.stderr) + sys.exit(1) + def _GCProjects(self, projects): gc_gitdirs = {} for project in projects: @@ -746,7 +894,8 @@ later is required to fix a server side protocol bug. current_branch_only=opt.current_branch_only, no_tags=opt.no_tags, optimized_fetch=opt.optimized_fetch, - submodules=self.manifest.HasSubmodules) + submodules=self.manifest.HasSubmodules, + clone_filter=self.manifest.CloneFilter) finish = time.time() self.event_log.AddSync(mp, event_log.TASK_SYNC_NETWORK, start, finish, success) @@ -846,20 +995,7 @@ later is required to fix a server side protocol bug. if self.UpdateProjectList(opt): sys.exit(1) - syncbuf = SyncBuffer(mp.config, - detach_head = opt.detach_head) - pm = Progress('Syncing work tree', len(all_projects)) - for project in all_projects: - pm.update() - if project.worktree: - start = time.time() - project.Sync_LocalHalf(syncbuf, force_sync=opt.force_sync) - self.event_log.AddSync(project, event_log.TASK_SYNC_LOCAL, - start, time.time(), syncbuf.Recently()) - pm.end() - print(file=sys.stderr) - if not syncbuf.Finish(): - sys.exit(1) + self._Checkout(all_projects, opt) # If there's a notice that's supposed to print at the end of the sync, print # it now...