From 30bc354e25cb5ccfbbb276a93ec54195e6b966a4 Mon Sep 17 00:00:00 2001 From: Ian Kasprzak Date: Wed, 23 Dec 2020 10:08:20 -0800 Subject: [PATCH] Enable git trace2 event format logging. Ways to enable logging: 1) Set git's trace2.eventtarget config variable to desired logging path 2) Specify path via --git_trace2_event_log option A unique logfile name is generated per repo execution (based on the repo session-id). Testing: 1) Verified git config 'trace2.eventtarget' and flag enable logging. 2) Verified version/start/end events are expected format: https://git-scm.com/docs/api-trace2#_event_format 3) Unit tests Bug: https://crbug.com/gerrit/13706 Change-Id: I335eba68124055321c4149979bec36ac16ef81eb Tested-by: Ian Kasprzak Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/292262 Reviewed-by: Mike Frysinger Reviewed-by: Jonathan Nieder --- git_trace2_event_log.py | 197 +++++++++++++++++++++++++++++ main.py | 9 ++ tests/test_git_trace2_event_log.py | 169 +++++++++++++++++++++++++ 3 files changed, 375 insertions(+) create mode 100644 git_trace2_event_log.py create mode 100644 tests/test_git_trace2_event_log.py diff --git a/git_trace2_event_log.py b/git_trace2_event_log.py new file mode 100644 index 00000000..4a8e0347 --- /dev/null +++ b/git_trace2_event_log.py @@ -0,0 +1,197 @@ +# Copyright (C) 2020 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Provide event logging in the git trace2 EVENT format. + +The git trace2 EVENT format is defined at: +https://www.kernel.org/pub/software/scm/git/docs/technical/api-trace2.html#_event_format +https://git-scm.com/docs/api-trace2#_the_event_format_target + + Usage: + + git_trace_log = EventLog() + git_trace_log.StartEvent() + ... + git_trace_log.ExitEvent() + git_trace_log.Write() +""" + + +import datetime +import json +import os +import sys +import tempfile +import threading + +from git_command import GitCommand, RepoSourceVersion + + +class EventLog(object): + """Event log that records events that occurred during a repo invocation. + + Events are written to the log as a consecutive JSON entries, one per line. + Entries follow the git trace2 EVENT format. + + Each entry contains the following common keys: + - event: The event name + - sid: session-id - Unique string to allow process instance to be identified. + - thread: The thread name. + - time: is the UTC time of the event. + + Valid 'event' names and event specific fields are documented here: + https://git-scm.com/docs/api-trace2#_event_format + """ + + def __init__(self, env=None): + """Initializes the event log.""" + self._log = [] + # Try to get session-id (sid) from environment (setup in repo launcher). + KEY = 'GIT_TRACE2_PARENT_SID' + if env is None: + env = os.environ + + now = datetime.datetime.utcnow() + + # Save both our sid component and the complete sid. + # We use our sid component (self._sid) as the unique filename prefix and + # the full sid (self._full_sid) in the log itself. + self._sid = 'repo-%s-P%08x' % (now.strftime('%Y%m%dT%H%M%SZ'), os.getpid()) + parent_sid = env.get(KEY) + # Append our sid component to the parent sid (if it exists). + if parent_sid is not None: + self._full_sid = parent_sid + '/' + self._sid + else: + self._full_sid = self._sid + + # Set/update the environment variable. + # Environment handling across systems is messy. + try: + env[KEY] = self._full_sid + except UnicodeEncodeError: + env[KEY] = self._full_sid.encode() + + # Add a version event to front of the log. + self._AddVersionEvent() + + @property + def full_sid(self): + return self._full_sid + + def _AddVersionEvent(self): + """Adds a 'version' event at the beginning of current log.""" + version_event = self._CreateEventDict('version') + version_event['evt'] = 2 + version_event['exe'] = RepoSourceVersion() + self._log.insert(0, version_event) + + def _CreateEventDict(self, event_name): + """Returns a dictionary with the common keys/values for git trace2 events. + + Args: + event_name: The event name. + + Returns: + Dictionary with the common event fields populated. + """ + return { + 'event': event_name, + 'sid': self._full_sid, + 'thread': threading.currentThread().getName(), + 'time': datetime.datetime.utcnow().isoformat() + 'Z', + } + + def StartEvent(self): + """Append a 'start' event to the current log.""" + start_event = self._CreateEventDict('start') + start_event['argv'] = sys.argv + self._log.append(start_event) + + def ExitEvent(self, result): + """Append an 'exit' event to the current log. + + Args: + result: Exit code of the event + """ + exit_event = self._CreateEventDict('exit') + + # Consider 'None' success (consistent with event_log result handling). + if result is None: + result = 0 + exit_event['code'] = result + self._log.append(exit_event) + + def Write(self, path=None): + """Writes the log out to a file. + + Log is only written if 'path' or 'git config --get trace2.eventtarget' + provide a valid path to write logs to. + + Logging filename format follows the git trace2 style of being a unique + (exclusive writable) file. + + Args: + path: Path to where logs should be written. + + Returns: + log_path: Path to the log file if log is written, otherwise None + """ + log_path = None + # If no logging path is specified, get the path from 'trace2.eventtarget'. + if path is None: + cmd = ['config', '--get', 'trace2.eventtarget'] + # TODO(https://crbug.com/gerrit/13706): Use GitConfig when it supports + # system git config variables. + p = GitCommand(None, cmd, capture_stdout=True, capture_stderr=True, + bare=True) + retval = p.Wait() + if retval == 0: + # Strip trailing carriage-return in path. + path = p.stdout.rstrip('\n') + elif retval != 1: + # `git config --get` is documented to produce an exit status of `1` if + # the requested variable is not present in the configuration. Report any + # other return value as an error. + print("repo: error: 'git config --get' call failed with return code: %r, stderr: %r" % ( + retval, p.stderr), file=sys.stderr) + + if isinstance(path, str): + # Get absolute path. + path = os.path.abspath(os.path.expanduser(path)) + else: + raise TypeError('path: str required but got %s.' % type(path)) + + # Git trace2 requires a directory to write log to. + + # TODO(https://crbug.com/gerrit/13706): Support file (append) mode also. + if not os.path.isdir(path): + return None + # Use NamedTemporaryFile to generate a unique filename as required by git trace2. + try: + with tempfile.NamedTemporaryFile(mode='x', prefix=self._sid, dir=path, + delete=False) as f: + # TODO(https://crbug.com/gerrit/13706): Support writing events as they + # occur. + for e in self._log: + # Dump in compact encoding mode. + # See 'Compact encoding' in Python docs: + # https://docs.python.org/3/library/json.html#module-json + json.dump(e, f, indent=None, separators=(',', ':')) + f.write('\n') + log_path = f.name + except FileExistsError as err: + print('repo: warning: git trace2 logging failed: %r' % err, + file=sys.stderr) + return None + return log_path diff --git a/main.py b/main.py index e152de4f..a7c3c684 100755 --- a/main.py +++ b/main.py @@ -50,6 +50,7 @@ import event_log from repo_trace import SetTrace from git_command import user_agent from git_config import init_ssh, close_ssh, RepoConfig +from git_trace2_event_log import EventLog from command import InteractiveCommand from command import MirrorSafeCommand from command import GitcAvailableCommand, GitcClientCommand @@ -130,6 +131,8 @@ global_options.add_option('--version', global_options.add_option('--event-log', dest='event_log', action='store', help='filename of event log to append timeline to') +global_options.add_option('--git-trace2-event-log', action='store', + help='directory to write git trace2 event log to') class _Repo(object): @@ -211,6 +214,7 @@ class _Repo(object): file=sys.stderr) return 1 + git_trace2_event_log = EventLog() cmd.repodir = self.repodir cmd.client = RepoClient(cmd.repodir) cmd.manifest = cmd.client.manifest @@ -261,6 +265,8 @@ class _Repo(object): start = time.time() cmd_event = cmd.event_log.Add(name, event_log.TASK_COMMAND, start) cmd.event_log.SetParent(cmd_event) + git_trace2_event_log.StartEvent() + try: cmd.ValidateOptions(copts, cargs) result = cmd.Execute(copts, cargs) @@ -303,10 +309,13 @@ class _Repo(object): cmd.event_log.FinishEvent(cmd_event, finish, result is None or result == 0) + git_trace2_event_log.ExitEvent(result) + if gopts.event_log: cmd.event_log.Write(os.path.abspath( os.path.expanduser(gopts.event_log))) + git_trace2_event_log.Write(gopts.git_trace2_event_log) return result diff --git a/tests/test_git_trace2_event_log.py b/tests/test_git_trace2_event_log.py new file mode 100644 index 00000000..3905630f --- /dev/null +++ b/tests/test_git_trace2_event_log.py @@ -0,0 +1,169 @@ +# Copyright (C) 2020 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unittests for the git_trace2_event_log.py module.""" + +import json +import tempfile +import unittest + +import git_trace2_event_log + + +class EventLogTestCase(unittest.TestCase): + """TestCase for the EventLog module.""" + + PARENT_SID_KEY = 'GIT_TRACE2_PARENT_SID' + PARENT_SID_VALUE = 'parent_sid' + SELF_SID_REGEX = r'repo-\d+T\d+Z-.*' + FULL_SID_REGEX = r'^%s/%s' % (PARENT_SID_VALUE, SELF_SID_REGEX) + + def setUp(self): + """Load the event_log module every time.""" + self._event_log_module = None + # By default we initialize with the expected case where + # repo launches us (so GIT_TRACE2_PARENT_SID is set). + env = { + self.PARENT_SID_KEY: self.PARENT_SID_VALUE, + } + self._event_log_module = git_trace2_event_log.EventLog(env=env) + self._log_data = None + + def verifyCommonKeys(self, log_entry, expected_event_name, full_sid=True): + """Helper function to verify common event log keys.""" + self.assertIn('event', log_entry) + self.assertIn('sid', log_entry) + self.assertIn('thread', log_entry) + self.assertIn('time', log_entry) + + # Do basic data format validation. + self.assertEqual(expected_event_name, log_entry['event']) + if full_sid: + self.assertRegex(log_entry['sid'], self.FULL_SID_REGEX) + else: + self.assertRegex(log_entry['sid'], self.SELF_SID_REGEX) + self.assertRegex(log_entry['time'], r'^\d+-\d+-\d+T\d+:\d+:\d+\.\d+Z$') + + def readLog(self, log_path): + """Helper function to read log data into a list.""" + log_data = [] + with open(log_path, mode='rb') as f: + for line in f: + log_data.append(json.loads(line)) + return log_data + + def test_initial_state_with_parent_sid(self): + """Test initial state when 'GIT_TRACE2_PARENT_SID' is set by parent.""" + self.assertRegex(self._event_log_module.full_sid, self.FULL_SID_REGEX) + + def test_initial_state_no_parent_sid(self): + """Test initial state when 'GIT_TRACE2_PARENT_SID' is not set.""" + # Setup an empty environment dict (no parent sid). + self._event_log_module = git_trace2_event_log.EventLog(env={}) + self.assertRegex(self._event_log_module.full_sid, self.SELF_SID_REGEX) + + def test_version_event(self): + """Test 'version' event data is valid. + + Verify that the 'version' event is written even when no other + events are addded. + + Expected event log: + + """ + with tempfile.TemporaryDirectory(prefix='event_log_tests') as tempdir: + log_path = self._event_log_module.Write(path=tempdir) + self._log_data = self.readLog(log_path) + + # A log with no added events should only have the version entry. + self.assertEqual(len(self._log_data), 1) + version_event = self._log_data[0] + self.verifyCommonKeys(version_event, expected_event_name='version') + # Check for 'version' event specific fields. + self.assertIn('evt', version_event) + self.assertIn('exe', version_event) + + def test_start_event(self): + """Test and validate 'start' event data is valid. + + Expected event log: + + + """ + self._event_log_module.StartEvent() + with tempfile.TemporaryDirectory(prefix='event_log_tests') as tempdir: + log_path = self._event_log_module.Write(path=tempdir) + self._log_data = self.readLog(log_path) + + self.assertEqual(len(self._log_data), 2) + start_event = self._log_data[1] + self.verifyCommonKeys(self._log_data[0], expected_event_name='version') + self.verifyCommonKeys(start_event, expected_event_name='start') + # Check for 'start' event specific fields. + self.assertIn('argv', start_event) + self.assertTrue(isinstance(start_event['argv'], list)) + + def test_exit_event_result_none(self): + """Test 'exit' event data is valid when result is None. + + We expect None result to be converted to 0 in the exit event data. + + Expected event log: + + + """ + self._event_log_module.ExitEvent(None) + with tempfile.TemporaryDirectory(prefix='event_log_tests') as tempdir: + log_path = self._event_log_module.Write(path=tempdir) + self._log_data = self.readLog(log_path) + + self.assertEqual(len(self._log_data), 2) + exit_event = self._log_data[1] + self.verifyCommonKeys(self._log_data[0], expected_event_name='version') + self.verifyCommonKeys(exit_event, expected_event_name='exit') + # Check for 'exit' event specific fields. + self.assertIn('code', exit_event) + # 'None' result should convert to 0 (successful) return code. + self.assertEqual(exit_event['code'], 0) + + def test_exit_event_result_integer(self): + """Test 'exit' event data is valid when result is an integer. + + Expected event log: + + + """ + self._event_log_module.ExitEvent(2) + with tempfile.TemporaryDirectory(prefix='event_log_tests') as tempdir: + log_path = self._event_log_module.Write(path=tempdir) + self._log_data = self.readLog(log_path) + + self.assertEqual(len(self._log_data), 2) + exit_event = self._log_data[1] + self.verifyCommonKeys(self._log_data[0], expected_event_name='version') + self.verifyCommonKeys(exit_event, expected_event_name='exit') + # Check for 'exit' event specific fields. + self.assertIn('code', exit_event) + self.assertEqual(exit_event['code'], 2) + + # TODO(https://crbug.com/gerrit/13706): Add additional test coverage for + # Write() where: + # - path=None (using git config call) + # - path= (raises TypeError) + # - path= (should return None) + # - tempfile.NamedTemporaryFile errors with FileExistsError (should return None) + + +if __name__ == '__main__': + unittest.main()