From ab2d3211043e2cb42a55f56e5abf69d23103c105 Mon Sep 17 00:00:00 2001 From: Kuang-che Wu Date: Wed, 6 Nov 2024 13:03:42 +0800 Subject: [PATCH] sync: fix connection error on macOS With a large number of sync workers, the sync process may fail on macOS due to connection errors. The root cause is that multiple workers may attempt to connect to the multiprocessing manager server at the same time when handling the first job. This can lead to connection failures if there are too many pending connections, exceeding the socket listening backlog. Bug: 377538810 Change-Id: I1924d318d076ca3be61d75daa37bfa8d7dc23ed7 Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/441541 Tested-by: Josip Sokcevic Commit-Queue: Josip Sokcevic Reviewed-by: Josip Sokcevic --- subcmds/sync.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/subcmds/sync.py b/subcmds/sync.py index decf559b..8e4dde6b 100644 --- a/subcmds/sync.py +++ b/subcmds/sync.py @@ -821,6 +821,16 @@ later is required to fix a server side protocol bug. jobs = jobs_str(len(items)) return f"{jobs} | {elapsed_str(elapsed)} {earliest_proj}" + @classmethod + def InitWorker(cls): + # Force connect to the manager server now. + # This is good because workers are initialized one by one. Without this, + # multiple workers may connect to the manager when handling the first + # job at the same time. Then the connection may fail if too many + # connections are pending and execeeded the socket listening backlog, + # especially on MacOS. + len(cls.get_parallel_context()["sync_dict"]) + def _Fetch(self, projects, opt, err_event, ssh_proxy, errors): ret = True @@ -913,6 +923,7 @@ later is required to fix a server side protocol bug. # idle while other workers still have more than one job in # their chunk queue. chunksize=1, + initializer=self.InitWorker, ) finally: sync_event.set()