Enable git trace2 event format logging.

Ways to enable logging:
1) Set git's trace2.eventtarget config variable to desired logging path
2) Specify path via --git_trace2_event_log option

A unique logfile name is generated per repo execution (based on the repo session-id).

Testing:
1) Verified git config 'trace2.eventtarget' and flag enable logging.
2) Verified version/start/end events are expected format:
  https://git-scm.com/docs/api-trace2#_event_format
3) Unit tests

Bug: https://crbug.com/gerrit/13706
Change-Id: I335eba68124055321c4149979bec36ac16ef81eb
Tested-by: Ian Kasprzak <iankaz@google.com>
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/292262
Reviewed-by: Mike Frysinger <vapier@google.com>
Reviewed-by: Jonathan Nieder <jrn@google.com>
This commit is contained in:
Ian Kasprzak 2020-12-23 10:08:20 -08:00
parent ce9b6c43b2
commit 30bc354e25
3 changed files with 375 additions and 0 deletions

197
git_trace2_event_log.py Normal file
View File

@ -0,0 +1,197 @@
# Copyright (C) 2020 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Provide event logging in the git trace2 EVENT format.
The git trace2 EVENT format is defined at:
https://www.kernel.org/pub/software/scm/git/docs/technical/api-trace2.html#_event_format
https://git-scm.com/docs/api-trace2#_the_event_format_target
Usage:
git_trace_log = EventLog()
git_trace_log.StartEvent()
...
git_trace_log.ExitEvent()
git_trace_log.Write()
"""
import datetime
import json
import os
import sys
import tempfile
import threading
from git_command import GitCommand, RepoSourceVersion
class EventLog(object):
"""Event log that records events that occurred during a repo invocation.
Events are written to the log as a consecutive JSON entries, one per line.
Entries follow the git trace2 EVENT format.
Each entry contains the following common keys:
- event: The event name
- sid: session-id - Unique string to allow process instance to be identified.
- thread: The thread name.
- time: is the UTC time of the event.
Valid 'event' names and event specific fields are documented here:
https://git-scm.com/docs/api-trace2#_event_format
"""
def __init__(self, env=None):
"""Initializes the event log."""
self._log = []
# Try to get session-id (sid) from environment (setup in repo launcher).
KEY = 'GIT_TRACE2_PARENT_SID'
if env is None:
env = os.environ
now = datetime.datetime.utcnow()
# Save both our sid component and the complete sid.
# We use our sid component (self._sid) as the unique filename prefix and
# the full sid (self._full_sid) in the log itself.
self._sid = 'repo-%s-P%08x' % (now.strftime('%Y%m%dT%H%M%SZ'), os.getpid())
parent_sid = env.get(KEY)
# Append our sid component to the parent sid (if it exists).
if parent_sid is not None:
self._full_sid = parent_sid + '/' + self._sid
else:
self._full_sid = self._sid
# Set/update the environment variable.
# Environment handling across systems is messy.
try:
env[KEY] = self._full_sid
except UnicodeEncodeError:
env[KEY] = self._full_sid.encode()
# Add a version event to front of the log.
self._AddVersionEvent()
@property
def full_sid(self):
return self._full_sid
def _AddVersionEvent(self):
"""Adds a 'version' event at the beginning of current log."""
version_event = self._CreateEventDict('version')
version_event['evt'] = 2
version_event['exe'] = RepoSourceVersion()
self._log.insert(0, version_event)
def _CreateEventDict(self, event_name):
"""Returns a dictionary with the common keys/values for git trace2 events.
Args:
event_name: The event name.
Returns:
Dictionary with the common event fields populated.
"""
return {
'event': event_name,
'sid': self._full_sid,
'thread': threading.currentThread().getName(),
'time': datetime.datetime.utcnow().isoformat() + 'Z',
}
def StartEvent(self):
"""Append a 'start' event to the current log."""
start_event = self._CreateEventDict('start')
start_event['argv'] = sys.argv
self._log.append(start_event)
def ExitEvent(self, result):
"""Append an 'exit' event to the current log.
Args:
result: Exit code of the event
"""
exit_event = self._CreateEventDict('exit')
# Consider 'None' success (consistent with event_log result handling).
if result is None:
result = 0
exit_event['code'] = result
self._log.append(exit_event)
def Write(self, path=None):
"""Writes the log out to a file.
Log is only written if 'path' or 'git config --get trace2.eventtarget'
provide a valid path to write logs to.
Logging filename format follows the git trace2 style of being a unique
(exclusive writable) file.
Args:
path: Path to where logs should be written.
Returns:
log_path: Path to the log file if log is written, otherwise None
"""
log_path = None
# If no logging path is specified, get the path from 'trace2.eventtarget'.
if path is None:
cmd = ['config', '--get', 'trace2.eventtarget']
# TODO(https://crbug.com/gerrit/13706): Use GitConfig when it supports
# system git config variables.
p = GitCommand(None, cmd, capture_stdout=True, capture_stderr=True,
bare=True)
retval = p.Wait()
if retval == 0:
# Strip trailing carriage-return in path.
path = p.stdout.rstrip('\n')
elif retval != 1:
# `git config --get` is documented to produce an exit status of `1` if
# the requested variable is not present in the configuration. Report any
# other return value as an error.
print("repo: error: 'git config --get' call failed with return code: %r, stderr: %r" % (
retval, p.stderr), file=sys.stderr)
if isinstance(path, str):
# Get absolute path.
path = os.path.abspath(os.path.expanduser(path))
else:
raise TypeError('path: str required but got %s.' % type(path))
# Git trace2 requires a directory to write log to.
# TODO(https://crbug.com/gerrit/13706): Support file (append) mode also.
if not os.path.isdir(path):
return None
# Use NamedTemporaryFile to generate a unique filename as required by git trace2.
try:
with tempfile.NamedTemporaryFile(mode='x', prefix=self._sid, dir=path,
delete=False) as f:
# TODO(https://crbug.com/gerrit/13706): Support writing events as they
# occur.
for e in self._log:
# Dump in compact encoding mode.
# See 'Compact encoding' in Python docs:
# https://docs.python.org/3/library/json.html#module-json
json.dump(e, f, indent=None, separators=(',', ':'))
f.write('\n')
log_path = f.name
except FileExistsError as err:
print('repo: warning: git trace2 logging failed: %r' % err,
file=sys.stderr)
return None
return log_path

View File

@ -50,6 +50,7 @@ import event_log
from repo_trace import SetTrace
from git_command import user_agent
from git_config import init_ssh, close_ssh, RepoConfig
from git_trace2_event_log import EventLog
from command import InteractiveCommand
from command import MirrorSafeCommand
from command import GitcAvailableCommand, GitcClientCommand
@ -130,6 +131,8 @@ global_options.add_option('--version',
global_options.add_option('--event-log',
dest='event_log', action='store',
help='filename of event log to append timeline to')
global_options.add_option('--git-trace2-event-log', action='store',
help='directory to write git trace2 event log to')
class _Repo(object):
@ -211,6 +214,7 @@ class _Repo(object):
file=sys.stderr)
return 1
git_trace2_event_log = EventLog()
cmd.repodir = self.repodir
cmd.client = RepoClient(cmd.repodir)
cmd.manifest = cmd.client.manifest
@ -261,6 +265,8 @@ class _Repo(object):
start = time.time()
cmd_event = cmd.event_log.Add(name, event_log.TASK_COMMAND, start)
cmd.event_log.SetParent(cmd_event)
git_trace2_event_log.StartEvent()
try:
cmd.ValidateOptions(copts, cargs)
result = cmd.Execute(copts, cargs)
@ -303,10 +309,13 @@ class _Repo(object):
cmd.event_log.FinishEvent(cmd_event, finish,
result is None or result == 0)
git_trace2_event_log.ExitEvent(result)
if gopts.event_log:
cmd.event_log.Write(os.path.abspath(
os.path.expanduser(gopts.event_log)))
git_trace2_event_log.Write(gopts.git_trace2_event_log)
return result

View File

@ -0,0 +1,169 @@
# Copyright (C) 2020 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unittests for the git_trace2_event_log.py module."""
import json
import tempfile
import unittest
import git_trace2_event_log
class EventLogTestCase(unittest.TestCase):
"""TestCase for the EventLog module."""
PARENT_SID_KEY = 'GIT_TRACE2_PARENT_SID'
PARENT_SID_VALUE = 'parent_sid'
SELF_SID_REGEX = r'repo-\d+T\d+Z-.*'
FULL_SID_REGEX = r'^%s/%s' % (PARENT_SID_VALUE, SELF_SID_REGEX)
def setUp(self):
"""Load the event_log module every time."""
self._event_log_module = None
# By default we initialize with the expected case where
# repo launches us (so GIT_TRACE2_PARENT_SID is set).
env = {
self.PARENT_SID_KEY: self.PARENT_SID_VALUE,
}
self._event_log_module = git_trace2_event_log.EventLog(env=env)
self._log_data = None
def verifyCommonKeys(self, log_entry, expected_event_name, full_sid=True):
"""Helper function to verify common event log keys."""
self.assertIn('event', log_entry)
self.assertIn('sid', log_entry)
self.assertIn('thread', log_entry)
self.assertIn('time', log_entry)
# Do basic data format validation.
self.assertEqual(expected_event_name, log_entry['event'])
if full_sid:
self.assertRegex(log_entry['sid'], self.FULL_SID_REGEX)
else:
self.assertRegex(log_entry['sid'], self.SELF_SID_REGEX)
self.assertRegex(log_entry['time'], r'^\d+-\d+-\d+T\d+:\d+:\d+\.\d+Z$')
def readLog(self, log_path):
"""Helper function to read log data into a list."""
log_data = []
with open(log_path, mode='rb') as f:
for line in f:
log_data.append(json.loads(line))
return log_data
def test_initial_state_with_parent_sid(self):
"""Test initial state when 'GIT_TRACE2_PARENT_SID' is set by parent."""
self.assertRegex(self._event_log_module.full_sid, self.FULL_SID_REGEX)
def test_initial_state_no_parent_sid(self):
"""Test initial state when 'GIT_TRACE2_PARENT_SID' is not set."""
# Setup an empty environment dict (no parent sid).
self._event_log_module = git_trace2_event_log.EventLog(env={})
self.assertRegex(self._event_log_module.full_sid, self.SELF_SID_REGEX)
def test_version_event(self):
"""Test 'version' event data is valid.
Verify that the 'version' event is written even when no other
events are addded.
Expected event log:
<version event>
"""
with tempfile.TemporaryDirectory(prefix='event_log_tests') as tempdir:
log_path = self._event_log_module.Write(path=tempdir)
self._log_data = self.readLog(log_path)
# A log with no added events should only have the version entry.
self.assertEqual(len(self._log_data), 1)
version_event = self._log_data[0]
self.verifyCommonKeys(version_event, expected_event_name='version')
# Check for 'version' event specific fields.
self.assertIn('evt', version_event)
self.assertIn('exe', version_event)
def test_start_event(self):
"""Test and validate 'start' event data is valid.
Expected event log:
<version event>
<start event>
"""
self._event_log_module.StartEvent()
with tempfile.TemporaryDirectory(prefix='event_log_tests') as tempdir:
log_path = self._event_log_module.Write(path=tempdir)
self._log_data = self.readLog(log_path)
self.assertEqual(len(self._log_data), 2)
start_event = self._log_data[1]
self.verifyCommonKeys(self._log_data[0], expected_event_name='version')
self.verifyCommonKeys(start_event, expected_event_name='start')
# Check for 'start' event specific fields.
self.assertIn('argv', start_event)
self.assertTrue(isinstance(start_event['argv'], list))
def test_exit_event_result_none(self):
"""Test 'exit' event data is valid when result is None.
We expect None result to be converted to 0 in the exit event data.
Expected event log:
<version event>
<exit event>
"""
self._event_log_module.ExitEvent(None)
with tempfile.TemporaryDirectory(prefix='event_log_tests') as tempdir:
log_path = self._event_log_module.Write(path=tempdir)
self._log_data = self.readLog(log_path)
self.assertEqual(len(self._log_data), 2)
exit_event = self._log_data[1]
self.verifyCommonKeys(self._log_data[0], expected_event_name='version')
self.verifyCommonKeys(exit_event, expected_event_name='exit')
# Check for 'exit' event specific fields.
self.assertIn('code', exit_event)
# 'None' result should convert to 0 (successful) return code.
self.assertEqual(exit_event['code'], 0)
def test_exit_event_result_integer(self):
"""Test 'exit' event data is valid when result is an integer.
Expected event log:
<version event>
<exit event>
"""
self._event_log_module.ExitEvent(2)
with tempfile.TemporaryDirectory(prefix='event_log_tests') as tempdir:
log_path = self._event_log_module.Write(path=tempdir)
self._log_data = self.readLog(log_path)
self.assertEqual(len(self._log_data), 2)
exit_event = self._log_data[1]
self.verifyCommonKeys(self._log_data[0], expected_event_name='version')
self.verifyCommonKeys(exit_event, expected_event_name='exit')
# Check for 'exit' event specific fields.
self.assertIn('code', exit_event)
self.assertEqual(exit_event['code'], 2)
# TODO(https://crbug.com/gerrit/13706): Add additional test coverage for
# Write() where:
# - path=None (using git config call)
# - path=<Non-String type> (raises TypeError)
# - path=<Non-Directory> (should return None)
# - tempfile.NamedTemporaryFile errors with FileExistsError (should return None)
if __name__ == '__main__':
unittest.main()