Added changed_lines.py: extracts changed lines from git diff
Added a file changed_lines.py to take as input a `git diff` output
and extract the lines that have been changed for every modified file.
We're hoping to use this in conjunction with lcov_parser.py
(see https://kunit-review.googlesource.com/c/prow-presubmit/+/2269)
to compute incremental coverage for KUnit.
Signed-off-by: Darya Verzhbinsky <daryaver@google.com>
Change-Id: Iaff0a6089f0f020bda0918f5bdcb29a8c3b7d922
diff --git a/BUILD.bazel b/BUILD.bazel
index a6af0ed..5de783c 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -64,3 +64,15 @@
python_version = "PY3",
deps = ["lcov_parser/lcov_parser.py"],
)
+
+py_library(
+ name = "get_changed_lines",
+ srcs = ["lcov_parser/get_changed_lines.py"],
+)
+
+py_test(
+ name = "get_changed_lines_test",
+ srcs = ["lcov_parser/get_changed_lines_test.py"],
+ python_version = "PY3",
+ deps = ["lcov_parser/get_changed_lines.py"],
+)
diff --git a/lcov_parser/changed_lines.py b/lcov_parser/changed_lines.py
new file mode 100644
index 0000000..1281248
--- /dev/null
+++ b/lcov_parser/changed_lines.py
@@ -0,0 +1,102 @@
+"""This module parses `git diff` output to determine the added/changed
+lines per file.
+"""
+from typing import Tuple, Text, List, Dict
+import re
+
+
+class GitDiffSyntaxError(Exception):
+ """GitDiffSyntaxError is raised when there are formatting issues in the `git diff`
+ command output.
+ """
+ pass
+
+
+def from_diff(git_diff: Text) -> Dict[Text, List[int]]:
+ """Converts `git diff` output into a map of files to their changed lines
+ (in the new version).
+ """
+
+ git_diff = git_diff.strip()
+
+ # check if `git diff` returned nothing, meaning no files had modified lines
+ if git_diff == '':
+ return {}
+
+ # splits the `git diff` result into per-file data
+ file_breakdown = git_diff.split('diff --git ')
+
+ # .split() returns an empty string at index 0 if the input starts with the separator
+ file_breakdown = file_breakdown[1:]
+
+ file_to_lines = {} # type: Dict[Text, List[int]]
+ for file_diff in file_breakdown:
+ file_name, changed_lines = _parse_file_diff(file_diff)
+
+ # this might happen if only lines were deleted but none were added
+ if not changed_lines:
+ continue
+
+ file_to_lines[file_name] = sorted(changed_lines)
+
+ return file_to_lines
+
+
+def _parse_file_diff(file_diff: Text) -> Tuple[Text, List[int]]:
+ """Takes in a section of the `git diff` command that contains
+ all the information about a given file and returns the file's name and it's associated
+ changed lines.
+
+ Raises:
+ GitDiffSyntaxError if there are formatting issues parsing file name in `file_diff`.
+ """
+
+ first_line = file_diff.split('\n')[0]
+
+ m = re.search(r'a/(\S+) b/\S+', first_line)
+ if not m:
+ raise GitDiffSyntaxError('Expected first line to contain `a/fileName b/fileName`,' + \
+ 'got: ' + first_line)
+
+ file_name = m.group(1)
+
+ hunks = file_diff.split('\n@@ ')[1:]
+
+ changed_lines = []
+
+ for hunk in hunks:
+ changed_lines += _changed_lines_in_hunk(hunk)
+
+ return (file_name, changed_lines)
+
+
+def _changed_lines_in_hunk(hunk: Text) -> List[int]:
+ """Returns the added/modified line numbers in a hunk (in the new version).
+
+ Raises:
+ GitDiffSyntaxError if there are formatting issues with the first line
+ of `hunk`.
+ """
+ lines = hunk.split('\n')
+ changed_lines = []
+
+ m = re.search(r'-\d+,\d+ \+(\d+),\d+ @@', lines[0])
+ if not m:
+ raise GitDiffSyntaxError('Expected first line in hunk to be in the format:' + \
+ '\n\t-#,# +#,# @@\ngot: ' + lines[0])
+
+ starting_line_number = int(m.group(1))
+
+ curr_line = starting_line_number
+
+ for line in lines[1:]:
+ if line == '' or line[0] == '-':
+ continue
+
+ if line[0] == '+':
+ changed_lines.append(curr_line)
+
+ curr_line += 1
+
+
+ return sorted(changed_lines)
\ No newline at end of file
diff --git a/lcov_parser/changed_lines_test.py b/lcov_parser/changed_lines_test.py
new file mode 100755
index 0000000..9c2612b
--- /dev/null
+++ b/lcov_parser/changed_lines_test.py
@@ -0,0 +1,94 @@
+#!/usr/bin/python3
+
+import unittest
+import changed_lines
+from io import StringIO
+
+
+class GetChangedLinesTest(unittest.TestCase):
+
+ def test_normal_output(self):
+ want_lines = {
+ 'file1.txt': [22, 23, 46, 47, 48],
+ 'file2.txt': [6]
+ }
+
+ lines = changed_lines.from_diff(_TEST_FILE_DATA.strip())
+
+ self.assertEqual(lines, want_lines)
+
+ def test_empty_input(self):
+ self.assertFalse(changed_lines.from_diff(''))
+
+ def test_added_file(self):
+ want_lines = {
+ 'addedFile': [1,2,3,4]
+ }
+
+ lines = changed_lines.from_diff(_TEST_FILE_DATA_ADDED.strip())
+
+ self.assertEqual(lines, want_lines)
+
+ def test_removed_file(self):
+ lines = changed_lines.from_diff(_TEST_FILE_DATA_DELETED.strip())
+ self.assertFalse(lines)
+
+
+_TEST_FILE_DATA = """
+diff --git a/file1.txt b/file1.txt
+index 170f11f..041325e 100755
+--- a/file1.txt
++++ b/file1.txt
+@@ -19,10 +19,10 @@
+ line 19
+ line 20
+ line 21
+-line 22
++#line 22
+-line 23
++#line 23
+
+@@ -45,8 +45,15 @@
+
+-echo
++#echo
++omg
++wow
+diff --git a/file2.txt b/file2.txt
+index 6675c3b..e69b66c 100644
+--- a/file2.txt
++++ b/file2.txt
+@@ -4,25 +4,28 @@
+'''some comment went here'''
+
+-from typing import Dict
++from typing import Dict, Text,
+"""
+
+_TEST_FILE_DATA_DELETED = """
+diff --git a/deletedFile b/deletedFile
+deleted file mode 100644
+index 005304205482..000000000000
+--- a/deletedFile
++++ /dev/null
+@@ -1,84 +0,0 @@
+-deleted
+-lines
+"""
+
+_TEST_FILE_DATA_ADDED = """
+diff --git a/addedFile b/addedFile
+new file mode 100644
+index 000000000000..d906e785c4bd
+--- /dev/null
++++ b/addedFile
+@@ -0,0 +1,5 @@
++extra
++lines
++are
++added
+"""
+
+
+if __name__ == '__main__':
+ unittest.main()