sync: Added logging of repo sync state and config options for analysis.

git_config.py:
+ Added SyncAnalysisState class, which saves the following data
  into the config object.
  ++ sys.argv, options, superproject's logging data.
  ++ repo.*, branch.* and remote.* parameters from config object.
  ++ current time as synctime.
  ++ Version number of the object.
+ All the keys for the above data are prepended with 'repo.syncstate.'
+ Added GetSyncAnalysisStateData and UpdateSyncAnalysisState methods
  to GitConfig object to save/get the above data.

git_trace2_event_log.py:
+ Added LogConfigEvents method with code from DefParamRepoEvents
  to log events.

sync.py:
+ superproject_logging_data is a dictionary that collects all the
  superproject data that is to be logged as trace2 event.
+ Sync at the end logs the previously saved syncstate.* parameters
  as previous_sync_state. Then it calls config's UpdateSyncAnalysisState
  to save and log all the current options, superproject logged data.

docs/internal-fs-layout.md:
+ Added doc string explaining [repo.syncstate ...] sections of
  .repo/manifests.git/config file.

test_git_config.py:
+ Added unit test for the new methods of GitConfig object.

Tested:
$ ./run_tests

$ repo_dev init --use-superproject -u https://android.googlesource.com/platform/manifest

Tested it by running the following command multiple times.
$ repo_dev sync -j 20
  repo sync has finished successfully

  Verified config file has [syncstate ...] data saved.

Bug: [google internal] b/188573450
Change-Id: I1f914ce50f3382111b72940ca56de7c41b53d460
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/313123
Tested-by: Raman Tenneti <rtenneti@google.com>
Reviewed-by: Mike Frysinger <vapier@google.com>
Reviewed-by: Xin Li <delphij@google.com>
This commit is contained in:
Raman Tenneti 2021-07-28 14:36:49 -07:00
parent ae86a46022
commit 7954de13b7
6 changed files with 152 additions and 9 deletions

View File

@ -146,7 +146,12 @@ Instead, you should use standard Git workflows like [git worktree] or
The `.repo/manifests.git/config` file is used to track settings for the entire The `.repo/manifests.git/config` file is used to track settings for the entire
repo client checkout. repo client checkout.
Most settings use the `[repo]` section to avoid conflicts with git. Most settings use the `[repo]` section to avoid conflicts with git.
Everything under `[repo.syncstate.*]` is used to keep track of sync details for logging
purposes.
User controlled settings are initialized when running `repo init`. User controlled settings are initialized when running `repo init`.
| Setting | `repo init` Option | Use/Meaning | | Setting | `repo init` Option | Use/Meaning |

View File

@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
import contextlib import contextlib
import datetime
import errno import errno
from http.client import HTTPException from http.client import HTTPException
import json import json
@ -30,6 +31,10 @@ from repo_trace import Trace
from git_command import GitCommand from git_command import GitCommand
from git_refs import R_CHANGES, R_HEADS, R_TAGS from git_refs import R_CHANGES, R_HEADS, R_TAGS
# Prefix that is prepended to all the keys of SyncAnalysisState's data
# that is saved in the config.
SYNC_STATE_PREFIX = 'repo.syncstate.'
ID_RE = re.compile(r'^[0-9a-f]{40}$') ID_RE = re.compile(r'^[0-9a-f]{40}$')
REVIEW_CACHE = dict() REVIEW_CACHE = dict()
@ -262,6 +267,22 @@ class GitConfig(object):
self._branches[b.name] = b self._branches[b.name] = b
return b return b
def GetSyncAnalysisStateData(self):
"""Returns data to be logged for the analysis of sync performance."""
return {k: v for k, v in self.DumpConfigDict().items() if k.startswith(SYNC_STATE_PREFIX)}
def UpdateSyncAnalysisState(self, options, superproject_logging_data):
"""Update Config's SYNC_STATE_PREFIX* data with the latest sync data.
Args:
options: Options passed to sync returned from optparse. See _Options().
superproject_logging_data: A dictionary of superproject data that is to be logged.
Returns:
SyncAnalysisState object.
"""
return SyncAnalysisState(self, options, superproject_logging_data)
def GetSubSections(self, section): def GetSubSections(self, section):
"""List all subsection names matching $section.*.* """List all subsection names matching $section.*.*
""" """
@ -717,3 +738,69 @@ class Branch(object):
def _Get(self, key, all_keys=False): def _Get(self, key, all_keys=False):
key = 'branch.%s.%s' % (self.name, key) key = 'branch.%s.%s' % (self.name, key)
return self._config.GetString(key, all_keys=all_keys) return self._config.GetString(key, all_keys=all_keys)
class SyncAnalysisState:
"""Configuration options related to logging of sync state for analysis.
This object is versioned.
"""
def __init__(self, config, options, superproject_logging_data):
"""Initializes SyncAnalysisState.
Saves the following data into the |config| object.
- sys.argv, options, superproject's logging data.
- repo.*, branch.* and remote.* parameters from config object.
- Current time as synctime.
- Version number of the object.
All the keys saved by this object are prepended with SYNC_STATE_PREFIX.
Args:
config: GitConfig object to store all options.
options: Options passed to sync returned from optparse. See _Options().
superproject_logging_data: A dictionary of superproject data that is to be logged.
"""
self._config = config
now = datetime.datetime.utcnow()
self._Set('main.synctime', now.isoformat() + 'Z')
self._Set('main.version', '1')
self._Set('sys.argv', sys.argv)
for key, value in superproject_logging_data.items():
self._Set(f'superproject.{key}', value)
for key, value in options.__dict__.items():
self._Set(f'options.{key}', value)
config_items = config.DumpConfigDict().items()
EXTRACT_NAMESPACES = {'repo', 'branch', 'remote'}
self._SetDictionary({k: v for k, v in config_items
if not k.startswith(SYNC_STATE_PREFIX) and
k.split('.', 1)[0] in EXTRACT_NAMESPACES})
def _SetDictionary(self, data):
"""Save all key/value pairs of |data| dictionary.
Args:
data: A dictionary whose key/value are to be saved.
"""
for key, value in data.items():
self._Set(key, value)
def _Set(self, key, value):
"""Set the |value| for a |key| in the |_config| member.
|key| is prepended with the value of SYNC_STATE_PREFIX constant.
Args:
key: Name of the key.
value: |value| could be of any type. If it is 'bool', it will be saved
as a Boolean and for all other types, it will be saved as a String.
"""
if value is None:
return
sync_key = f'{SYNC_STATE_PREFIX}{key}'
if isinstance(value, str):
self._config.SetString(sync_key, value)
elif isinstance(value, bool):
self._config.SetBoolean(sync_key, value)
else:
self._config.SetString(sync_key, str(value))

View File

@ -144,6 +144,19 @@ class EventLog(object):
command_event['subcommands'] = subcommands command_event['subcommands'] = subcommands
self._log.append(command_event) self._log.append(command_event)
def LogConfigEvents(self, config, event_dict_name):
"""Append a |event_dict_name| event for each config key in |config|.
Args:
config: Configuration dictionary.
event_dict_name: Name of the event dictionary for items to be logged under.
"""
for param, value in config.items():
event = self._CreateEventDict(event_dict_name)
event['param'] = param
event['value'] = value
self._log.append(event)
def DefParamRepoEvents(self, config): def DefParamRepoEvents(self, config):
"""Append a 'def_param' event for each repo.* config key to the current log. """Append a 'def_param' event for each repo.* config key to the current log.
@ -152,12 +165,7 @@ class EventLog(object):
""" """
# Only output the repo.* config parameters. # Only output the repo.* config parameters.
repo_config = {k: v for k, v in config.items() if k.startswith('repo.')} repo_config = {k: v for k, v in config.items() if k.startswith('repo.')}
self.LogConfigEvents(repo_config, 'def_param')
for param, value in repo_config.items():
def_param_event = self._CreateEventDict('def_param')
def_param_event['param'] = param
def_param_event['value'] = value
self._log.append(def_param_event)
def ErrorEvent(self, msg, fmt): def ErrorEvent(self, msg, fmt):
"""Append a 'error' event to the current log.""" """Append a 'error' event to the current log."""

View File

@ -282,7 +282,7 @@ later is required to fix a server side protocol bug.
"""Returns True if current-branch or use-superproject options are enabled.""" """Returns True if current-branch or use-superproject options are enabled."""
return opt.current_branch_only or git_superproject.UseSuperproject(opt, self.manifest) return opt.current_branch_only or git_superproject.UseSuperproject(opt, self.manifest)
def _UpdateProjectsRevisionId(self, opt, args, load_local_manifests): def _UpdateProjectsRevisionId(self, opt, args, load_local_manifests, superproject_logging_data):
"""Update revisionId of every project with the SHA from superproject. """Update revisionId of every project with the SHA from superproject.
This function updates each project's revisionId with SHA from superproject. This function updates each project's revisionId with SHA from superproject.
@ -293,6 +293,7 @@ later is required to fix a server side protocol bug.
args: Arguments to pass to GetProjects. See the GetProjects args: Arguments to pass to GetProjects. See the GetProjects
docstring for details. docstring for details.
load_local_manifests: Whether to load local manifests. load_local_manifests: Whether to load local manifests.
superproject_logging_data: A dictionary of superproject data that is to be logged.
Returns: Returns:
Returns path to the overriding manifest file instead of None. Returns path to the overriding manifest file instead of None.
@ -312,6 +313,7 @@ later is required to fix a server side protocol bug.
submodules_ok=opt.fetch_submodules) submodules_ok=opt.fetch_submodules)
update_result = superproject.UpdateProjectsRevisionId(all_projects) update_result = superproject.UpdateProjectsRevisionId(all_projects)
manifest_path = update_result.manifest_path manifest_path = update_result.manifest_path
superproject_logging_data['updatedrevisionid'] = bool(manifest_path)
if manifest_path: if manifest_path:
self._ReloadManifest(manifest_path, load_local_manifests) self._ReloadManifest(manifest_path, load_local_manifests)
else: else:
@ -964,8 +966,14 @@ later is required to fix a server side protocol bug.
self._UpdateManifestProject(opt, mp, manifest_name) self._UpdateManifestProject(opt, mp, manifest_name)
load_local_manifests = not self.manifest.HasLocalManifests load_local_manifests = not self.manifest.HasLocalManifests
if git_superproject.UseSuperproject(opt, self.manifest): use_superproject = git_superproject.UseSuperproject(opt, self.manifest)
manifest_name = self._UpdateProjectsRevisionId(opt, args, load_local_manifests) or opt.manifest_name superproject_logging_data = {
'superproject': use_superproject,
'haslocalmanifests': bool(self.manifest.HasLocalManifests),
}
if use_superproject:
manifest_name = self._UpdateProjectsRevisionId(
opt, args, load_local_manifests, superproject_logging_data) or opt.manifest_name
if self.gitc_manifest: if self.gitc_manifest:
gitc_manifest_projects = self.GetProjects(args, gitc_manifest_projects = self.GetProjects(args,
@ -1079,6 +1087,15 @@ later is required to fix a server side protocol bug.
file=sys.stderr) file=sys.stderr)
sys.exit(1) sys.exit(1)
# Log the previous sync analysis state from the config.
self.git_event_log.LogConfigEvents(mp.config.GetSyncAnalysisStateData(),
'previous_sync_state')
# Update and log with the new sync analysis state.
mp.config.UpdateSyncAnalysisState(opt, superproject_logging_data)
self.git_event_log.LogConfigEvents(mp.config.GetSyncAnalysisStateData(),
'current_sync_state')
if not opt.quiet: if not opt.quiet:
print('repo sync has finished successfully.') print('repo sync has finished successfully.')

View File

@ -11,3 +11,12 @@
intk = 10k intk = 10k
intm = 10m intm = 10m
intg = 10g intg = 10g
[repo "syncstate.main"]
synctime = 2021-07-29T19:18:53.201328Z
version = 1
[repo "syncstate.sys"]
argv = ['/usr/bin/pytest-3']
[repo "syncstate.superproject"]
test = false
[repo "syncstate.options"]
verbose = true

View File

@ -104,6 +104,23 @@ class GitConfigReadOnlyTests(unittest.TestCase):
for key, value in TESTS: for key, value in TESTS:
self.assertEqual(value, self.config.GetInt('section.%s' % (key,))) self.assertEqual(value, self.config.GetInt('section.%s' % (key,)))
def test_GetSyncAnalysisStateData(self):
"""Test config entries with a sync state analysis data."""
superproject_logging_data = {}
superproject_logging_data['test'] = False
options = type('options', (object,), {})()
options.verbose = 'true'
TESTS = (
('superproject.test', 'false'),
('options.verbose', 'true'),
('main.version', '1'),
)
self.config.UpdateSyncAnalysisState(options, superproject_logging_data)
sync_data = self.config.GetSyncAnalysisStateData()
for key, value in TESTS:
self.assertEqual(sync_data[f'{git_config.SYNC_STATE_PREFIX}{key}'], value)
self.assertTrue(sync_data[f'{git_config.SYNC_STATE_PREFIX}main.synctime'])
class GitConfigReadWriteTests(unittest.TestCase): class GitConfigReadWriteTests(unittest.TestCase):
"""Read/write tests of the GitConfig class.""" """Read/write tests of the GitConfig class."""