| """This module parses `git diff` output to determine the added/changed |
| lines per file. |
| """ |
| from typing import Tuple, Text, List, Dict |
| import re |
| |
| |
| class GitDiffSyntaxError(Exception): |
| """GitDiffSyntaxError is raised when there are formatting issues in the `git diff` |
| command output. |
| """ |
| pass |
| |
| |
| def from_diff(git_diff: Text) -> Dict[Text, List[int]]: |
| """Converts `git diff` output into a map of files to their changed lines |
| (in the new version). |
| """ |
| |
| git_diff = git_diff.strip() |
| |
| # check if `git diff` returned nothing, meaning no files had modified lines |
| if git_diff == '': |
| return {} |
| |
| # splits the `git diff` result into per-file data |
| file_breakdown = git_diff.split('diff --git ') |
| |
| # .split() returns an empty string at index 0 if the input starts with the separator |
| file_breakdown = file_breakdown[1:] |
| |
| file_to_lines = {} # type: Dict[Text, List[int]] |
| for file_diff in file_breakdown: |
| file_name, changed_lines = _parse_file_diff(file_diff) |
| |
| # this might happen if only lines were deleted but none were added |
| if not changed_lines: |
| continue |
| |
| file_to_lines[file_name] = sorted(changed_lines) |
| |
| return file_to_lines |
| |
| |
| def _parse_file_diff(file_diff: Text) -> Tuple[Text, List[int]]: |
| """Takes in a section of the `git diff` command that contains |
| all the information about a given file and returns the file's name and it's associated |
| changed lines. |
| |
| Raises: |
| GitDiffSyntaxError if there are formatting issues parsing file name in `file_diff`. |
| """ |
| |
| first_line = file_diff.split('\n')[0] |
| |
| m = re.search(r'a/(\S+) b/\S+', first_line) |
| if not m: |
| raise GitDiffSyntaxError('Expected first line to contain `a/fileName b/fileName`,' + \ |
| 'got: ' + first_line) |
| |
| file_name = m.group(1) |
| |
| hunks = file_diff.split('\n@@ ')[1:] |
| |
| changed_lines = [] |
| |
| for hunk in hunks: |
| changed_lines += _changed_lines_in_hunk(hunk) |
| |
| return (file_name, changed_lines) |
| |
| |
| def _changed_lines_in_hunk(hunk: Text) -> List[int]: |
| """Returns the added/modified line numbers in a hunk (in the new version). |
| |
| Raises: |
| GitDiffSyntaxError if there are formatting issues with the first line |
| of `hunk`. |
| """ |
| lines = hunk.split('\n') |
| changed_lines = [] |
| |
| m = re.search(r'-\d+,\d+ \+(\d+),\d+ @@', lines[0]) |
| if not m: |
| raise GitDiffSyntaxError('Expected first line in hunk to be in the format:' + \ |
| '\n\t-#,# +#,# @@\ngot: ' + lines[0]) |
| |
| starting_line_number = int(m.group(1)) |
| |
| curr_line = starting_line_number |
| |
| for line in lines[1:]: |
| if line == '' or line[0] == '-': |
| continue |
| |
| if line[0] == '+': |
| changed_lines.append(curr_line) |
| |
| curr_line += 1 |
| |
| |
| return sorted(changed_lines) |