* [dpdk-ci] [PATCH] add script to decide best tree match for patches
@ 2019-02-04 14:19 Ali Alnubani
0 siblings, 0 replies; only message in thread
From: Ali Alnubani @ 2019-02-04 14:19 UTC (permalink / raw)
To: ci; +Cc: Thomas Monjalon, ferruh.yigit, Ori Kam
The script can be used to get the trees that best match
a patch or a series.
Signed-off-by: Ali Alnubani <alialnu@mellanox.com>
Signed-off-by: Ori Kam <orika@mellanox.com>
---
tools/get-tree.py | 245 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 245 insertions(+)
create mode 100755 tools/get-tree.py
diff --git a/tools/get-tree.py b/tools/get-tree.py
new file mode 100755
index 0000000..3727439
--- /dev/null
+++ b/tools/get-tree.py
@@ -0,0 +1,245 @@
+#!/usr/bin/env python
+
+# SPDX-License-Identifier: (BSD-3-Clause AND GPL-2.0-or-later AND MIT)
+# Copyright 2019 6WIND S.A.
+# Copyright 2019 Mellanox Technologies, Ltd
+
+import os
+import sys
+import re
+import argparse
+import copy
+import fnmatch
+
+from requests.exceptions import HTTPError
+
+from git_pw import config
+from git_pw import api
+from git_pw import utils
+
+"""
+This script uses the git-pw API to retrieve Patchwork's series/patches,
+and find a list of trees/repos that best match the series/patch.
+
+The rules on which matches are based, are taken from the MAINTAINERS file,
+and currently only based on the paths of the changed files. Results can be
+improved by adding more information to the MAINTAINERS file.
+
+TODO:
+ - Match using the subject of the patch/series.
+ - Add a configuration file to specify the priority of each tree.
+
+Configurations:
+The script uses tokens for authentication.
+If the arguments pw_{server,project,token} aren't passed, the environment
+variables PW_{SERVER,PROJECT,TOKEN} should be set. If not, the script will try
+to load the git configurations pw.{server,project,token}.
+
+Example usage:
+ ./get-tree.py --command list_trees_for_series 2054
+ ./get-tree.py --command list_trees_for_patch 2054
+
+The output will be a list of trees sorted based on number of matches,
+with the first line having the highest count.
+"""
+
+CONF = config.CONF
+CONF.debug = False
+
+MAINTAINERS_FILE_PATH = os.environ.get('MAINTAINERS_FILE_PATH')
+if not MAINTAINERS_FILE_PATH:
+ print('MAINTAINERS_FILE_PATH is not set.')
+ sys.exit(1)
+RULES = {}
+
+ignored_files_re = re.compile(r'^doc/|\.sh$|\.py$')
+
+def configure_git_pw(args=None):
+ """Configure git-pw."""
+ conf = {}
+ conf_keys = ['server', 'project', 'token']
+ for key in conf_keys:
+ value = getattr(args, 'pw_{}'.format(key))
+ if not value:
+ print('--{} is a required git-pw configuration'.format(arg))
+ sys.exit(1)
+ else:
+ setattr(CONF, key, value)
+
+def find_filenames(diff):
+ """Find file changes in a given diff.
+
+ Source: https://github.com/getpatchwork/patchwork/blob/master/patchwork/parser.py
+ Changes from source:
+ - Moved _filename_re into the method.
+ - Reduced newlines.
+ """
+ _filename_re = re.compile(r'^(---|\+\+\+) (\S+)')
+ # normalise spaces
+ diff = diff.replace('\r', '')
+ diff = diff.strip() + '\n'
+ filenames = {}
+ for line in diff.split('\n'):
+ if len(line) <= 0:
+ continue
+ filename_match = _filename_re.match(line)
+ if not filename_match:
+ continue
+ filename = filename_match.group(2)
+ if filename.startswith('/dev/null'):
+ continue
+ filename = '/'.join(filename.split('/')[1:])
+ filenames[filename] = True
+ filenames = sorted(filenames.keys())
+ return filenames
+
+def construct_rules():
+ """Build a dictionary of rules from the MAINTAINERS file."""
+ with open(MAINTAINERS_FILE_PATH) as fd:
+ maintainers = fd.read()
+ # Split into blocks of text for easier search.
+ maintainers = maintainers.split('\n\n')
+
+ # Extract blocks that have a tree and files.
+ tree_file_blocks = [_item for _item in maintainers \
+ if 'T: git://dpdk.org' in _item and 'F: ' in _item]
+ _dict = {}
+ for _item in tree_file_blocks:
+ # Get the tree url.
+ tree_match = re.search(r'T: (git://dpdk\.org[^\n]+)', _item)
+ if tree_match:
+ tree = tree_match.group(1)
+ else:
+ continue
+ if tree not in _dict:
+ _dict[tree] = {}
+ _dict[tree]['paths'] = []
+ paths = re.findall(r'F: ([^\n]+)', _item)
+ _paths = copy.deepcopy(paths)
+ for path in paths:
+ # Remove don't-care paths
+ if ignored_files_re.search(path):
+ _paths.remove(path)
+ _dict[tree]['paths'] += _paths
+ return _dict
+
+def get_subject(resource):
+ """Get subject from patch/series object,
+ remove its prefix and strip it.
+ """
+ name = resource['name']
+ return re.sub('^\[.*\]', '', name).strip()
+
+def find_matches(files):
+ """Find trees that the changed files in a patch match,
+ and stop at first match for each file."""
+ matches = []
+ for _file in files:
+ if ignored_files_re.search(_file):
+ continue
+ match_found = False
+ for tree in RULES.keys():
+ for rule in RULES[tree]['paths']:
+ if rule.endswith('/'):
+ rule = '{}*'.format(rule)
+ if fnmatch.fnmatch(_file, rule):
+ matches.append(tree)
+ match_found = True
+ break
+ if match_found:
+ break
+ return matches
+
+def get_ordered_matches(matches):
+ """Order matches by occurrences."""
+ match_counts = {item:matches.count(item) for item in matches}
+ return sorted(match_counts, key=match_counts.get, reverse=True)
+
+def list_trees_for_patch(patch):
+ """Find matching trees for a specific patch.
+ For a patch to match a tree, both its subject and
+ at least one changed path has to match the tree.
+ """
+ subject = get_subject(patch)
+ files = find_filenames(patch['diff'])
+
+ matches = find_matches(files)
+ return matches
+
+def list_trees_for_series(series):
+ """Find matching trees for a series."""
+ patch_list = series['patches']
+
+ matches = []
+
+ for patch in patch_list:
+ matches = matches + \
+ list_trees_for_patch(api_get('patches', patch['id']))
+
+ return matches
+
+def parse_args():
+ """Parse command-line arguments."""
+ parser = argparse.ArgumentParser()
+ git_pw_conf_parser = parser.add_argument_group('git-pw configurations')
+ options_parser = parser.add_argument_group('optional arguments')
+
+ options_parser.add_argument('--command',
+ choices=('list_trees_for_patch',
+ 'list_trees_for_series'),
+ required=True, help='command to perform on patch/series')
+
+ git_pw_conf_parser.add_argument('--pw_server', type=str,
+ default=os.environ.get('PW_SERVER', utils.git_config('pw.server')),
+ help='PW.SERVER')
+ git_pw_conf_parser.add_argument('--pw_project', type=str,
+ default=os.environ.get('PW_PROJECT', utils.git_config('pw.project')),
+ help='PW.PROJECT')
+ git_pw_conf_parser.add_argument('--pw_token', type=str,
+ default=os.environ.get('PW_TOKEN', utils.git_config('pw.token')),
+ help='PW.TOKEN')
+
+ parser.add_argument('id', type=int,
+ help='patch/series id')
+
+ args = parser.parse_args()
+
+ return args
+
+def main():
+ """Main procedure."""
+ args = parse_args()
+ configure_git_pw(args)
+
+ command = args.command
+ _id = args.id
+
+ global RULES
+ RULES = construct_rules()
+
+ tree_list = []
+
+ if command == 'list_trees_for_patch':
+ patch = api_get('patches', _id)
+ tree_list = list_trees_for_patch(patch)
+
+ elif command == 'list_trees_for_series':
+ series = api_get('series', _id)
+ tree_list = list_trees_for_series(series)
+
+ tree_list = get_ordered_matches(tree_list)
+
+ print('{}'.format('\n'.join(tree_list)))
+
+def api_get(resource_type, resource_id):
+ """Retrieve an API resource."""
+ try:
+ return api.detail(resource_type, resource_id)
+ except HTTPError as err:
+ if '404' in str(err):
+ sys.exit(1)
+ else:
+ raise
+
+if __name__ == '__main__':
+ main()
--
2.11.0
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2019-02-04 14:19 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-04 14:19 [dpdk-ci] [PATCH] add script to decide best tree match for patches Ali Alnubani
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).