Added lcov_parser.py to parse LCOV ".info" files.
Added a file lcov_parser.py to parse LCOV ".info" files
and convert them into a simplified form.
This parser gets rid of function-related data and only
looks at what lines are instrumented vs those that
are not.
We're hoping to use this to compute incremental coverage
for kunit.
Signed-off-by: Darya Verzhbinsky <daryaver@google.com>
Change-Id: Ic3527c8d9c646a280e20a5bee54cb264402622c0
diff --git a/BUILD.bazel b/BUILD.bazel
index 887a663..94e75a4 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -52,3 +52,15 @@
tars = ["@kunit_repo//:files"],
workdir = "/kunit/linux",
)
+
+py_library(
+ name = "lcov_parser",
+ srcs = ["lcov_parser/lcov_parser.py"],
+)
+
+py_test(
+ name = "lcov_parser_test",
+ src = ["lcov_parser/lcov_parser_test.py"],
+ python_version = "PY3",
+ deps = ["lcov_parser/lcov_parser.py"],
+)
diff --git a/lcov_parser/lcov_parser.py b/lcov_parser/lcov_parser.py
new file mode 100644
index 0000000..c623efc
--- /dev/null
+++ b/lcov_parser/lcov_parser.py
@@ -0,0 +1,137 @@
+"""This file provides a parse() method to extract the per-file line coverage
+from an LCOV ".info" file.
+"""
+
+from typing import Dict, Text, Tuple, Iterable, IO, Optional
+
+
+Lines = Dict[int, bool]
+
+def _merge_lines(dst: Lines, src: Lines) -> Lines:
+ """_merge_lines merges `src` into `dst` and returns `dst`. """
+ for line, coverage in src.items():
+ if line in dst:
+ dst[line] = dst[line] or coverage
+ else:
+ dst[line] = coverage
+ return dst
+
+
+class Record:
+ """Record holds the data from an LCOV record."""
+ def __init__(self):
+ self.lines = {} # type: Lines
+ self.file = ''
+
+
+class LcovSyntaxError(Exception):
+ """LcovSyntaxError is raised when there is formatting issues in the ".info" file."""
+ pass
+
+
+def _parse_line(line: Text) -> Tuple[Text, Text]:
+ """_parse_line takes in a line from a file and returns it in two parts.
+
+ LCOV lines are of the form
+ <field>:<data>
+ eg: "TN:<test name>"
+
+ Args:
+ line: line from the input file.
+
+ Returns:
+ a tuple containing the part before and the part after a ":" in the line.
+ """
+ try:
+ field, value = line.split(':', 1)
+ except:
+ raise LcovSyntaxError('invalid data line, needs to be of the form' +
+ '\n\tDA:<line #>,<execution count>[,<checksum>]\n ' +
+ 'or\n \tTN:<test name>\ngot: ' + line)
+
+ return (field, value.strip())
+
+
+def _read_record(input_file: Iterable[Text]) -> Optional[Record]:
+ """_read_record the next LCOV record from `input_file`.
+
+ LCOV records are of the form
+ SF:<file name>
+ ...
+ end_of_record
+
+ Args:
+ input_file: the ".info" file that is being read.
+
+ Returns:
+ the text Record in the file.
+
+ Raises:
+ LcovSyntaxError: if the format of the input file doesn't match correct
+ LCOV format.
+ """
+ rec = Record()
+
+ for line in input_file:
+
+ line = line.strip()
+
+ if line == 'end_of_record':
+ return rec
+
+ field, value = _parse_line(line)
+
+ if field == 'SF':
+ rec.file = value
+
+ elif field == 'DA':
+ parts = value.split(',', 2)
+ if len(parts) < 2:
+ raise LcovSyntaxError('invalid data line, needs to be of the form ' +
+ '\n\tDA:<line #>,<execution count>[,<checksum>]\ngot: ' + line)
+
+ try:
+ line_num = int(parts[0])
+ except:
+ raise LcovSyntaxError('invalid data line, needs to be of the form ' +
+ '\n\tDA:<line #>,<execution count>[,<checksum>]\ngot: ' + line)
+
+ # LCOV explicitly reports a '0' execution count for instrumented lines that don't get run.
+ rec.lines[line_num] = parts[1] != '0'
+
+ return None
+
+
+def parse(input_file: IO) -> Tuple[Text, Dict[Text, Lines]]:
+ """parse reads an LCOV ".info" file.
+
+ Args:
+ input_file: the file that is being read.
+
+ Returns:
+ a tuple containing the test_name and the associated files w/ coverage.
+
+ Raises:
+ LcovSyntaxError: if the first line of the input file does not start with "TN".
+ """
+
+ # the first line in a LCOV report is the test name
+ field, test_name = _parse_line(input_file.readline())
+
+ if field != 'TN':
+ raise LcovSyntaxError('first line in LCOV report should be "TN:<name>", got: ' +
+ field + ':' + test_name)
+
+ files = {} # type: Dict[Text, Lines]
+
+ while True:
+ rec = _read_record(input_file)
+ if rec is None:
+ break
+
+ if rec.file in files:
+ files[rec.file] = _merge_lines(files[rec.file], rec.lines)
+ else:
+ files[rec.file] = rec.lines
+
+ return (test_name, files)
diff --git a/lcov_parser/lcov_parser_test.py b/lcov_parser/lcov_parser_test.py
new file mode 100755
index 0000000..c6f2155
--- /dev/null
+++ b/lcov_parser/lcov_parser_test.py
@@ -0,0 +1,60 @@
+#!/usr/bin/python3
+
+import unittest
+import lcov_parser
+from io import StringIO
+
+
+def expected_coverage(covered, uncovered):
+ lines = {} # Type: Dict[int, bool]
+
+ for cov in covered:
+ lines[cov] = True
+ for uncov in uncovered:
+ lines[uncov] = False
+
+ return lines
+
+
+class LcovParserTest(unittest.TestCase):
+
+ def test_small_dir_passes(self):
+ want_data = {} # Type: Dict[Text, lcov_parser.FileData]
+
+ want_data['/kunit/test/strerror.c'] = expected_coverage([156], [154])
+ want_data['/kunit/test/strerror-test.c'] = expected_coverage([14], [16])
+
+ with StringIO(_TEST_FILE_DATA.strip()) as input:
+ test_name, coverage_data = lcov_parser.parse(input)
+ self.assertEqual(test_name, 'kunit_presubmit_tests')
+ self.assertEqual(coverage_data, want_data)
+
+ def test_raises_errors(self):
+
+ with self.assertRaises(lcov_parser.LcovSyntaxError):
+ with StringIO('') as input_data:
+ test_name, data = lcov_parser.parse(input_data)
+
+ with self.assertRaises(lcov_parser.LcovSyntaxError):
+ with StringIO('invalid_first_line\n') as input_data:
+ test_name, data = lcov_parser.parse(input_data)
+
+ with self.assertRaises(lcov_parser.LcovSyntaxError):
+ with StringIO('TN:valid_line\nDA:invalid_second,str\n') as input_data:
+ test_name, data = lcov_parser.parse(input_data)
+
+
+_TEST_FILE_DATA = """
+TN:kunit_presubmit_tests
+SF:/kunit/test/strerror.c
+DA:154,0
+DA:156,10
+end_of_record
+SF:/kunit/test/strerror-test.c
+DA:14,1
+DA:16,0
+end_of_record
+"""
+
+if __name__ == '__main__':
+ unittest.main()