From: Adam Hassick <ahassick@iol.unh.edu>
To: ci@dpdk.org
Cc: Adam Hassick <ahassick@iol.unh.edu>
Subject: [PATCH v2 1/2] tools: Add script to create artifacts
Date: Fri, 12 Jan 2024 17:40:13 -0500 [thread overview]
Message-ID: <20240112224014.30955-2-ahassick@iol.unh.edu> (raw)
In-Reply-To: <20240112224014.30955-1-ahassick@iol.unh.edu>
This script takes in a URL to a series on Patchwork and emits a
tarball which may be used for running tests.
Signed-off-by: Adam Hassick <ahassick@iol.unh.edu>
---
tools/create_series_artifact.py | 472 ++++++++++++++++++++++++++++++++
1 file changed, 472 insertions(+)
create mode 100755 tools/create_series_artifact.py
diff --git a/tools/create_series_artifact.py b/tools/create_series_artifact.py
new file mode 100755
index 0000000..c1fc636
--- /dev/null
+++ b/tools/create_series_artifact.py
@@ -0,0 +1,472 @@
+#!/usr/bin/env python3
+
+import argparse
+from dataclasses import dataclass
+
+import os
+from git_pw import api as pw_api
+import pathlib
+import pygit2
+import requests
+import shutil
+import subprocess
+import tarfile
+from typing import Any, Dict, List, Optional
+import yaml
+
+HELP = """This script will create an artifact given a URL to a Patchwork series.
+Much of the information provided is acquired by this script through the use of a configuration file.
+This configuration file can be found with the other script configs in the config directory of the CI repo.
+
+The configuration file is used to aggregate:
+- Git credentials
+- Patchwork configuration and the user token (user token is optional)
+- The URL of the DPDK Git mirror
+- Locations of dependency scripts and their configuration files
+
+More detail and examples can be found in the default configuration file.
+This default file is located at "config/artifacts.yml" in the dpdk-ci repository.
+
+Example usage:
+
+./create_series_artifact.py ../configs/artifacts.yml https://patches.dpdk.org/api/1.3/series/12345/
+
+"""
+
+# Map the outputs of pw_maintainers_cli to the names of branches on the
+# GitHub mirror. This is temporary, and should be moved elsewhere in
+# the future.
+BRANCH_NAME_MAP = {
+ "next-baseband": "next-baseband-for-main",
+ "next-crypto": "next-crypto-for-main",
+ "next-eventdev": "next-eventdev-for-main",
+ "next-net": "next-net-for-main",
+ "next-net-intel": "next-net-intel-for-next-net",
+ "next-net-brcm": "next-net-brcm-for-next-net",
+ "next-net-mlx": "next-net-mlx-for-next-net",
+ "next-net-mrvl": "next-net-mrvl-for-main",
+ "next-virtio": "next-virtio-for-next-net",
+}
+
+
+@dataclass
+class CreateSeriesParameters(object):
+ pw_server: str
+ pw_project: str
+ pw_token: str
+ git_user: str
+ git_email: str
+ series_url: str
+ patch_ids: List[int]
+ labels: List[str]
+ config: Dict
+ series: Dict
+ pw_mcli_script: pathlib.Path
+ patch_parser_script: pathlib.Path
+ patch_parser_cfg: pathlib.Path
+ lzma: bool
+ output_tarball: pathlib.Path
+ output_properties: pathlib.Path
+ no_depends: bool
+ docs_only: bool
+
+
+class ProjectTree(object):
+ artifact_path: pathlib.Path
+ tree: str
+ commit_id: str
+ path: pathlib.Path
+ log_file_path: pathlib.Path
+ props_file_path: pathlib.Path
+ data: CreateSeriesParameters
+ repo: pygit2.Repository
+ log_buf: List[str]
+ properties: Dict[str, Any]
+
+ def log(self, msg: str):
+ print(msg)
+ self.log_buf.append(msg)
+
+ def write_log(self):
+ with open(self.log_file_path, "w") as log_file:
+ log_file.write("\n".join([msg for msg in self.log_buf]))
+
+ def write_properties(self):
+ with open(self.props_file_path, "w") as prop_file:
+ for key, value in self.properties.items():
+ prop_file.write(f"{key}={value}\n")
+
+ def move_logs(self):
+ shutil.move(self.log_file_path, pathlib.Path(os.getcwd(), "log.txt"))
+ shutil.move(
+ self.props_file_path, pathlib.Path(os.getcwd(), self.data.output_properties)
+ )
+
+ def __init__(self, data: CreateSeriesParameters):
+ self.data = data
+ self.path = pathlib.Path(os.curdir, "dpdk").absolute()
+ self.log_buf = []
+ self.log_file_path = pathlib.Path(self.path, "log.txt")
+ self.props_file_path = pathlib.Path(self.path, data.output_properties)
+ self.tree = "main"
+ self.properties = {}
+ self.artifact_path = data.output_tarball
+
+ # Set the range of patch IDs this series (aka patchset) covers.
+ self.properties["patchset_range"] = f"{data.patch_ids[0]}-{data.patch_ids[-1]}"
+
+ # Set the tags using tags obtained by the params class
+ self.properties["tags"] = " ".join(data.labels)
+
+ # Record whether this patch is only documentation
+ self.properties["is_docs_only"] = str(data.docs_only)
+
+ if not self.path.exists():
+ # Find the URL to clone from based on the tree name.
+ repo_url = self.data.config["repo_url"]
+
+ # Pull down the git repo we found.
+ for i in range(1, 4):
+ self.log(f"Cloning the DPDK mirror at: {repo_url} (Attempt {i} of 3)")
+ try:
+ repo = pygit2.clone_repository(repo_url, self.path)
+ break
+ except pygit2.GitError as e:
+ self.log(f"Failed! Reason: {e}")
+ else:
+ self.log("Failed to clone from the upstream repository.")
+ exit(1)
+ else:
+ # Fetch any new changes.
+ repo = pygit2.Repository(self.path)
+ origin = repo.remotes["origin"]
+ origin.fetch()
+
+ self.log("Cleaning repository state...")
+ repo.state_cleanup()
+
+ # Initially, check out to main.
+ self.repo = repo
+ self.checkout("main")
+
+ self.log(f"Done: {self.tree} commit {self.commit_id}")
+
+ def checkout(self, branch: str) -> Optional[str]:
+ """
+ Check out to some branch.
+ Returns true if successful, false otherwise.
+ """
+
+ git_branch = self.repo.lookup_branch(
+ f"origin/{branch}", pygit2.GIT_BRANCH_REMOTE
+ )
+
+ if not git_branch:
+ self.log(f"Tried to checkout to non-existant branch: {branch}")
+ return None
+
+ self.log(f"Trying to checkout branch: {git_branch.branch_name}")
+ reference = self.repo.resolve_refish(git_branch.branch_name)
+ self.commit_id = str(reference[0].id)
+ self.repo.reset(reference[0].id, pygit2.GIT_RESET_HARD)
+ self.repo.checkout(reference[1])
+ self.tree = branch
+
+ self.log(f"Checked out to {branch} ({self.commit_id})")
+
+ return branch
+
+ def guess_git_tree(self) -> Optional[str]:
+ """
+ Run pw_maintainers_cli to guess the git tree of the patch series we are applying.
+ Returns None if the pw_maintainers_cli failed.
+ """
+
+ if "id" not in self.data.series:
+ raise Exception("ID was not found in the series JSON")
+
+ result = subprocess.run(
+ [
+ self.data.pw_mcli_script,
+ "--type",
+ "series",
+ "--pw-server",
+ self.data.pw_server,
+ "--pw-project",
+ self.data.pw_project,
+ "list-trees",
+ str(self.data.series["id"]),
+ ],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ cwd=self.path,
+ env={
+ "MAINTAINERS_FILE_PATH": "MAINTAINERS",
+ "PW_TOKEN": self.data.pw_token,
+ },
+ )
+
+ if result.returncode == 0:
+ branch = result.stdout.decode().strip()
+
+ if branch in ["main", "dpdk"]:
+ branch = "main"
+ else:
+ if branch.startswith("dpdk-"):
+ branch = branch[5:]
+
+ branch = BRANCH_NAME_MAP.get(branch)
+
+ return self.checkout(branch)
+ else:
+ self.log("Failed to guess git tree. Output from pw_maintainers_cli:")
+ self.log(result.stdout.decode())
+ self.log(result.stderr.decode())
+ return None
+
+ def set_properties(self):
+ self.properties["tree"] = self.tree
+ self.properties["applied_commit_id"] = self.commit_id
+
+ def apply_patch_series(self) -> bool:
+ # Run git-pw to apply the series.
+
+ # Configure the tree to point at the given patchwork server and project
+ self.repo.config["pw.server"] = self.data.pw_server
+ self.repo.config["pw.project"] = self.data.pw_project
+ self.repo.config["user.email"] = self.data.git_email
+ self.repo.config["user.name"] = self.data.git_user
+
+ result = subprocess.run(
+ ["git", "pw", "series", "apply", str(self.data.series["id"])],
+ cwd=self.path,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
+
+ # Write the log from the apply process to disk.
+ self.log("Applying patch...")
+ self.log(result.stdout.decode())
+ self.log(result.stderr.decode())
+
+ # Store whether there was an error, and return the flag.
+ error = result.returncode != 0
+ self.properties["apply_error"] = error
+ return not error
+
+ def test_build(self) -> bool:
+ ninja_result: Optional[subprocess.CompletedProcess] = None
+ meson_result: subprocess.CompletedProcess = subprocess.run(
+ ["meson", "setup", "build"],
+ cwd=self.path,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
+
+ build_error = meson_result.returncode != 0
+
+ self.log("Running test build...")
+ self.log(meson_result.stdout.decode())
+
+ if not build_error:
+ ninja_result = subprocess.run(
+ ["ninja", "-C", "build"],
+ cwd=self.path,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
+
+ build_error = build_error or ninja_result.returncode != 0
+ shutil.rmtree(pathlib.Path(self.path, "build"))
+
+ self.log(ninja_result.stdout.decode())
+ self.log(ninja_result.stderr.decode())
+
+ self.log(meson_result.stderr.decode())
+
+ if build_error:
+ self.log("Test build failed.")
+
+ self.properties["build_error"] = build_error
+ return not build_error
+
+ def create_tarball(self):
+ # Copy the logs into the artifact tarball.
+ self.write_log()
+ self.write_properties()
+
+ # Create a tar archive containing the DPDK sources.
+ with tarfile.open(
+ self.artifact_path, mode="w:xz" if self.data.lzma else "w:gz"
+ ) as tar_file:
+ tar_file.add(self.path, "dpdk", recursive=True)
+
+ # Move the log file out of the working directory.
+ self.move_logs()
+
+ return True
+
+
+def get_tags(
+ patch_parser_script: pathlib.Path,
+ patch_parser_cfg: pathlib.Path,
+ series: Dict,
+) -> List[str]:
+ series_filename = f"{series['id']}.patch"
+
+ # Pull down the patch series as a single file.
+ pw_api.download(series["mbox"], None, series_filename)
+
+ # Call the patch parser script to obtain the tags
+ parse_result = subprocess.run(
+ [patch_parser_script, patch_parser_cfg, series_filename],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
+
+ # Assert that patch parser succeeded.
+ parse_result.check_returncode()
+
+ # Return the output
+ return parse_result.stdout.decode().splitlines()
+
+
+def parse_args() -> CreateSeriesParameters:
+ """
+ Parses the arguments and returns an instance of a dataclass containing parameters
+ and some derived information.
+ """
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description=HELP,
+ )
+ parser.add_argument(
+ "config",
+ type=argparse.FileType(),
+ help="The config file to load. Must be a path to a YAML file.",
+ )
+ parser.add_argument("series_url", type=str, help="The URL to a Patchwork series.")
+ parser.add_argument(
+ "-t",
+ "--pw-token",
+ dest="pw_token",
+ type=str,
+ help="The Patchwork token to use",
+ )
+ parser.add_argument(
+ "-l",
+ "--lzma",
+ action="store_true",
+ help="When set, use LZMA compression rather than GNU zip compression.",
+ )
+ parser.add_argument(
+ "-nd",
+ "--no-depends",
+ action="store_true",
+ help="When set, does not acknowledge the Depends-on label.",
+ )
+
+ args = parser.parse_args()
+
+ # Read the configuration file.
+ with args.config as config_file:
+ config = yaml.safe_load(config_file)
+
+ pw_server = config["patchwork"]["server"]
+ pw_project = config["patchwork"]["project"]
+ pw_token = args.pw_token or config["patchwork"].get("token")
+
+ if not pw_token:
+ print("Failed to obtain the Patchworks token.")
+ exit(1)
+
+ pw_mcli_script = pathlib.Path(config["pw_maintainers_cli"]["path"]).absolute()
+
+ git_user = config["git"]["user"]
+ git_email = config["git"]["email"]
+
+ patch_parser_script = pathlib.Path(config["patch_parser"]["path"]).absolute()
+ patch_parser_cfg = pathlib.Path(config["patch_parser"]["config"]).absolute()
+
+ if args.lzma:
+ tarball_name = "dpdk.tar.xz"
+ else:
+ tarball_name = "dpdk.tar.gz"
+
+ output_tarball = pathlib.Path(tarball_name)
+ output_properties = pathlib.Path(f"{tarball_name}.properties")
+
+ # Pull the series JSON down.
+ resp = requests.get(args.series_url)
+ resp.raise_for_status()
+ series = resp.json()
+
+ # Get the labels using the patch parser.
+ labels = get_tags(patch_parser_script, patch_parser_cfg, series)
+
+ # See if this is a documentation-only patch.
+ docs_only = len(labels) == 1 and labels[0] == "documentation"
+
+ # Get the patch ids in this patch series.
+ patch_ids = list(map(lambda x: int(x["id"]), series["patches"]))
+ patch_ids.sort()
+
+ return CreateSeriesParameters(
+ pw_server=pw_server,
+ pw_project=pw_project,
+ pw_token=pw_token,
+ git_user=git_user,
+ git_email=git_email,
+ series_url=args.series_url,
+ patch_ids=patch_ids,
+ labels=labels,
+ config=config,
+ series=series,
+ pw_mcli_script=pw_mcli_script,
+ patch_parser_script=patch_parser_script,
+ patch_parser_cfg=patch_parser_cfg,
+ lzma=args.lzma,
+ output_tarball=output_tarball,
+ output_properties=output_properties,
+ no_depends=args.no_depends,
+ docs_only=docs_only,
+ )
+
+
+def try_to_apply(tree: ProjectTree) -> bool:
+ tree.set_properties()
+ return tree.apply_patch_series() and tree.test_build() and tree.create_tarball()
+
+
+def main() -> int:
+ data = parse_args()
+
+ # Pull down the DPDK mirror.
+ tree = ProjectTree(data)
+
+ # Try to guess the Git tree for this patchset.
+ guessed_tree = tree.guess_git_tree()
+
+ if not guessed_tree:
+ print("Failed to guess git tree.")
+ return 1
+
+ # Try to apply this patch.
+ if not (
+ try_to_apply(tree) # First, try to apply on the guessed tree.
+ or guessed_tree != "main" # If that fails, and the guessed tree was not main
+ and tree.checkout("main") # Checkout to main, then
+ and try_to_apply(tree) # Try to apply on main
+ ):
+ tree.write_log()
+ tree.write_properties()
+ tree.move_logs()
+
+ print("FAILURE")
+
+ return 1
+ return 0
+
+
+if __name__ == "__main__":
+ exit(main())
--
2.43.0
next prev parent reply other threads:[~2024-01-12 22:41 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-12 22:40 [PATCH v2 0/2] Add a script to create series artifacts Adam Hassick
2024-01-12 22:40 ` Adam Hassick [this message]
2024-01-15 13:07 ` [PATCH v2 1/2] tools: Add script to create artifacts Juraj Linkeš
2024-01-12 22:40 ` [PATCH v2 2/2] config: Add example config file Adam Hassick
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240112224014.30955-2-ahassick@iol.unh.edu \
--to=ahassick@iol.unh.edu \
--cc=ci@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).