sync: Added logging of repo sync state and config options for analysis.

git_config.py:
+ Added SyncAnalysisState class, which saves the following data
  into the config object.
  ++ sys.argv, options, superproject's logging data.
  ++ repo.*, branch.* and remote.* parameters from config object.
  ++ current time as synctime.
  ++ Version number of the object.
+ All the keys for the above data are prepended with 'repo.syncstate.'
+ Added GetSyncAnalysisStateData and UpdateSyncAnalysisState methods
  to GitConfig object to save/get the above data.

git_trace2_event_log.py:
+ Added LogConfigEvents method with code from DefParamRepoEvents
  to log events.

sync.py:
+ superproject_logging_data is a dictionary that collects all the
  superproject data that is to be logged as trace2 event.
+ Sync at the end logs the previously saved syncstate.* parameters
  as previous_sync_state. Then it calls config's UpdateSyncAnalysisState
  to save and log all the current options, superproject logged data.

docs/internal-fs-layout.md:
+ Added doc string explaining [repo.syncstate ...] sections of
  .repo/manifests.git/config file.

test_git_config.py:
+ Added unit test for the new methods of GitConfig object.

Tested:
$ ./run_tests

$ repo_dev init --use-superproject -u https://android.googlesource.com/platform/manifest

Tested it by running the following command multiple times.
$ repo_dev sync -j 20
  repo sync has finished successfully

  Verified config file has [syncstate ...] data saved.

Bug: [google internal] b/188573450
Change-Id: I1f914ce50f3382111b72940ca56de7c41b53d460
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/313123
Tested-by: Raman Tenneti <rtenneti@google.com>
Reviewed-by: Mike Frysinger <vapier@google.com>
Reviewed-by: Xin Li <delphij@google.com>
This commit is contained in:
Raman Tenneti 2021-07-28 14:36:49 -07:00
parent ae86a46022
commit 7954de13b7
6 changed files with 152 additions and 9 deletions

View File

@ -146,7 +146,12 @@ Instead, you should use standard Git workflows like [git worktree] or
The `.repo/manifests.git/config` file is used to track settings for the entire
repo client checkout.
Most settings use the `[repo]` section to avoid conflicts with git.
Everything under `[repo.syncstate.*]` is used to keep track of sync details for logging
purposes.
User controlled settings are initialized when running `repo init`.
| Setting | `repo init` Option | Use/Meaning |

View File

@ -13,6 +13,7 @@
# limitations under the License.
import contextlib
import datetime
import errno
from http.client import HTTPException
import json
@ -30,6 +31,10 @@ from repo_trace import Trace
from git_command import GitCommand
from git_refs import R_CHANGES, R_HEADS, R_TAGS
# Prefix that is prepended to all the keys of SyncAnalysisState's data
# that is saved in the config.
SYNC_STATE_PREFIX = 'repo.syncstate.'
ID_RE = re.compile(r'^[0-9a-f]{40}$')
REVIEW_CACHE = dict()
@ -262,6 +267,22 @@ class GitConfig(object):
self._branches[b.name] = b
return b
def GetSyncAnalysisStateData(self):
"""Returns data to be logged for the analysis of sync performance."""
return {k: v for k, v in self.DumpConfigDict().items() if k.startswith(SYNC_STATE_PREFIX)}
def UpdateSyncAnalysisState(self, options, superproject_logging_data):
"""Update Config's SYNC_STATE_PREFIX* data with the latest sync data.
Args:
options: Options passed to sync returned from optparse. See _Options().
superproject_logging_data: A dictionary of superproject data that is to be logged.
Returns:
SyncAnalysisState object.
"""
return SyncAnalysisState(self, options, superproject_logging_data)
def GetSubSections(self, section):
"""List all subsection names matching $section.*.*
"""
@ -717,3 +738,69 @@ class Branch(object):
def _Get(self, key, all_keys=False):
key = 'branch.%s.%s' % (self.name, key)
return self._config.GetString(key, all_keys=all_keys)
class SyncAnalysisState:
"""Configuration options related to logging of sync state for analysis.
This object is versioned.
"""
def __init__(self, config, options, superproject_logging_data):
"""Initializes SyncAnalysisState.
Saves the following data into the |config| object.
- sys.argv, options, superproject's logging data.
- repo.*, branch.* and remote.* parameters from config object.
- Current time as synctime.
- Version number of the object.
All the keys saved by this object are prepended with SYNC_STATE_PREFIX.
Args:
config: GitConfig object to store all options.
options: Options passed to sync returned from optparse. See _Options().
superproject_logging_data: A dictionary of superproject data that is to be logged.
"""
self._config = config
now = datetime.datetime.utcnow()
self._Set('main.synctime', now.isoformat() + 'Z')
self._Set('main.version', '1')
self._Set('sys.argv', sys.argv)
for key, value in superproject_logging_data.items():
self._Set(f'superproject.{key}', value)
for key, value in options.__dict__.items():
self._Set(f'options.{key}', value)
config_items = config.DumpConfigDict().items()
EXTRACT_NAMESPACES = {'repo', 'branch', 'remote'}
self._SetDictionary({k: v for k, v in config_items
if not k.startswith(SYNC_STATE_PREFIX) and
k.split('.', 1)[0] in EXTRACT_NAMESPACES})
def _SetDictionary(self, data):
"""Save all key/value pairs of |data| dictionary.
Args:
data: A dictionary whose key/value are to be saved.
"""
for key, value in data.items():
self._Set(key, value)
def _Set(self, key, value):
"""Set the |value| for a |key| in the |_config| member.
|key| is prepended with the value of SYNC_STATE_PREFIX constant.
Args:
key: Name of the key.
value: |value| could be of any type. If it is 'bool', it will be saved
as a Boolean and for all other types, it will be saved as a String.
"""
if value is None:
return
sync_key = f'{SYNC_STATE_PREFIX}{key}'
if isinstance(value, str):
self._config.SetString(sync_key, value)
elif isinstance(value, bool):
self._config.SetBoolean(sync_key, value)
else:
self._config.SetString(sync_key, str(value))

View File

@ -144,6 +144,19 @@ class EventLog(object):
command_event['subcommands'] = subcommands
self._log.append(command_event)
def LogConfigEvents(self, config, event_dict_name):
"""Append a |event_dict_name| event for each config key in |config|.
Args:
config: Configuration dictionary.
event_dict_name: Name of the event dictionary for items to be logged under.
"""
for param, value in config.items():
event = self._CreateEventDict(event_dict_name)
event['param'] = param
event['value'] = value
self._log.append(event)
def DefParamRepoEvents(self, config):
"""Append a 'def_param' event for each repo.* config key to the current log.
@ -152,12 +165,7 @@ class EventLog(object):
"""
# Only output the repo.* config parameters.
repo_config = {k: v for k, v in config.items() if k.startswith('repo.')}
for param, value in repo_config.items():
def_param_event = self._CreateEventDict('def_param')
def_param_event['param'] = param
def_param_event['value'] = value
self._log.append(def_param_event)
self.LogConfigEvents(repo_config, 'def_param')
def ErrorEvent(self, msg, fmt):
"""Append a 'error' event to the current log."""

View File

@ -282,7 +282,7 @@ later is required to fix a server side protocol bug.
"""Returns True if current-branch or use-superproject options are enabled."""
return opt.current_branch_only or git_superproject.UseSuperproject(opt, self.manifest)
def _UpdateProjectsRevisionId(self, opt, args, load_local_manifests):
def _UpdateProjectsRevisionId(self, opt, args, load_local_manifests, superproject_logging_data):
"""Update revisionId of every project with the SHA from superproject.
This function updates each project's revisionId with SHA from superproject.
@ -293,6 +293,7 @@ later is required to fix a server side protocol bug.
args: Arguments to pass to GetProjects. See the GetProjects
docstring for details.
load_local_manifests: Whether to load local manifests.
superproject_logging_data: A dictionary of superproject data that is to be logged.
Returns:
Returns path to the overriding manifest file instead of None.
@ -312,6 +313,7 @@ later is required to fix a server side protocol bug.
submodules_ok=opt.fetch_submodules)
update_result = superproject.UpdateProjectsRevisionId(all_projects)
manifest_path = update_result.manifest_path
superproject_logging_data['updatedrevisionid'] = bool(manifest_path)
if manifest_path:
self._ReloadManifest(manifest_path, load_local_manifests)
else:
@ -964,8 +966,14 @@ later is required to fix a server side protocol bug.
self._UpdateManifestProject(opt, mp, manifest_name)
load_local_manifests = not self.manifest.HasLocalManifests
if git_superproject.UseSuperproject(opt, self.manifest):
manifest_name = self._UpdateProjectsRevisionId(opt, args, load_local_manifests) or opt.manifest_name
use_superproject = git_superproject.UseSuperproject(opt, self.manifest)
superproject_logging_data = {
'superproject': use_superproject,
'haslocalmanifests': bool(self.manifest.HasLocalManifests),
}
if use_superproject:
manifest_name = self._UpdateProjectsRevisionId(
opt, args, load_local_manifests, superproject_logging_data) or opt.manifest_name
if self.gitc_manifest:
gitc_manifest_projects = self.GetProjects(args,
@ -1079,6 +1087,15 @@ later is required to fix a server side protocol bug.
file=sys.stderr)
sys.exit(1)
# Log the previous sync analysis state from the config.
self.git_event_log.LogConfigEvents(mp.config.GetSyncAnalysisStateData(),
'previous_sync_state')
# Update and log with the new sync analysis state.
mp.config.UpdateSyncAnalysisState(opt, superproject_logging_data)
self.git_event_log.LogConfigEvents(mp.config.GetSyncAnalysisStateData(),
'current_sync_state')
if not opt.quiet:
print('repo sync has finished successfully.')

View File

@ -11,3 +11,12 @@
intk = 10k
intm = 10m
intg = 10g
[repo "syncstate.main"]
synctime = 2021-07-29T19:18:53.201328Z
version = 1
[repo "syncstate.sys"]
argv = ['/usr/bin/pytest-3']
[repo "syncstate.superproject"]
test = false
[repo "syncstate.options"]
verbose = true

View File

@ -104,6 +104,23 @@ class GitConfigReadOnlyTests(unittest.TestCase):
for key, value in TESTS:
self.assertEqual(value, self.config.GetInt('section.%s' % (key,)))
def test_GetSyncAnalysisStateData(self):
"""Test config entries with a sync state analysis data."""
superproject_logging_data = {}
superproject_logging_data['test'] = False
options = type('options', (object,), {})()
options.verbose = 'true'
TESTS = (
('superproject.test', 'false'),
('options.verbose', 'true'),
('main.version', '1'),
)
self.config.UpdateSyncAnalysisState(options, superproject_logging_data)
sync_data = self.config.GetSyncAnalysisStateData()
for key, value in TESTS:
self.assertEqual(sync_data[f'{git_config.SYNC_STATE_PREFIX}{key}'], value)
self.assertTrue(sync_data[f'{git_config.SYNC_STATE_PREFIX}main.synctime'])
class GitConfigReadWriteTests(unittest.TestCase):
"""Read/write tests of the GitConfig class."""