From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 3ACCD43892; Thu, 11 Jan 2024 12:55:43 +0100 (CET) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 1556040266; Thu, 11 Jan 2024 12:55:43 +0100 (CET) Received: from mail-ed1-f42.google.com (mail-ed1-f42.google.com [209.85.208.42]) by mails.dpdk.org (Postfix) with ESMTP id EE0874025E for ; Thu, 11 Jan 2024 12:55:40 +0100 (CET) Received: by mail-ed1-f42.google.com with SMTP id 4fb4d7f45d1cf-553ba2f0c8fso5938198a12.1 for ; Thu, 11 Jan 2024 03:55:40 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=pantheon.tech; s=google; t=1704974140; x=1705578940; darn=dpdk.org; h=content-transfer-encoding:cc:to:subject:message-id:date:from :in-reply-to:references:mime-version:from:to:cc:subject:date :message-id:reply-to; bh=BHm3d3dFug4YKIw9vksLowUHy+FHl9nzpNiXMDCzjXA=; b=KwFMbps5OlBfu8nNPb++zsc43Mqdn+n1WBsSoQcC6zNwODRV3wF8eSXIDTT62co84w Y8CJqjIITFAdvVSAMZf3DFvoioxBdCZyN37OZNPLHiihqNNVLaUjD7J4mJwspN99Obr7 PDYOx86Ivi54ktcfmbAIoC9f3woFsuTXM54XiO7411DGZr6k5msP0TfLiLjOw9O8tySQ ImZWF/Lt33P3vIza4kNKvpHqqlyxY4z2U6SnjYBEXqjERy8ndZqNDxOS2VnFOO+adU6y e9TmsWgl3wdUTi68YqjGBOvWvCNvEFA3bdvbkemYAkfwS8BxVyyzCytiE+QyjJORrtIX DvvQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1704974140; x=1705578940; h=content-transfer-encoding:cc:to:subject:message-id:date:from :in-reply-to:references:mime-version:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=BHm3d3dFug4YKIw9vksLowUHy+FHl9nzpNiXMDCzjXA=; b=rSXtxutH+y3mEpLs0j8fFZSxgUhtiETOz5NNLktWk8psn7wNk8AalQe+nxeTvtRP0I R/Uiprmg4uz9XMWKr7kSesrCDJjJVd2pzUzWUrIkgCnlZaLOu0uSMh2Ksbk4j5TJUQGa oxNjBRQHQj25CjdcEdXqWc+WxXHpk0I94oAzDoQPTqh53XUfyf2wTrSwM27PwVDf8jpp K/th7IRjM0BYNgbxNiJRdLwVqv/JKD5RA9sIxE39tJHbxXBgSmp3OQJ9ZdqHO4itaZwf 1tVeqt3zmkWlFF5LNMpe7eIa45DLHr+u1nKvoFr7psph5OlWnd2v/Wr3PLrA0z0gRJcn uRuA== X-Gm-Message-State: AOJu0YyrXXIMp93CVFg5woUNC5TE4zqGHEn9HroGK8psbj8tFiPaiszW DiUeJ9HTfP+BgjckUD0mkokzEUnKP1b5A7r9e6FDgIUfhaGUdg== X-Google-Smtp-Source: AGHT+IH9Cp9Ig749EYzXNNXxqAFUoeORXDg0E6LwLPagkt5cjTjini3B7Fny20Tr4NJ2PWUm2RvjclWoKxkIc6FXt4k= X-Received: by 2002:aa7:d60f:0:b0:557:3ce0:5a69 with SMTP id c15-20020aa7d60f000000b005573ce05a69mr294271edr.73.1704974140309; Thu, 11 Jan 2024 03:55:40 -0800 (PST) MIME-Version: 1.0 References: <20240110145715.28157-1-ahassick@iol.unh.edu> <20240110145715.28157-2-ahassick@iol.unh.edu> In-Reply-To: <20240110145715.28157-2-ahassick@iol.unh.edu> From: =?UTF-8?Q?Juraj_Linke=C5=A1?= Date: Thu, 11 Jan 2024 12:55:29 +0100 Message-ID: Subject: Re: [PATCH 1/2] tools: Add script to create artifacts To: Adam Hassick Cc: ci@dpdk.org, aconole@redhat.com, alialnu@nvidia.com Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-BeenThere: ci@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK CI discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: ci-bounces@dpdk.org On Wed, Jan 10, 2024 at 3:58=E2=80=AFPM Adam Hassick = wrote: > > This script takes in a URL to a series on Patchwork and emits a > tarball which may be used for running tests. > > Signed-off-by: Adam Hassick > --- > tools/create_series_artifact.py | 453 ++++++++++++++++++++++++++++++++ > 1 file changed, 453 insertions(+) > create mode 100755 tools/create_series_artifact.py > > diff --git a/tools/create_series_artifact.py b/tools/create_series_artifa= ct.py > new file mode 100755 > index 0000000..3049aaa > --- /dev/null > +++ b/tools/create_series_artifact.py > @@ -0,0 +1,453 @@ > +#!/usr/bin/env python3 > + > +import argparse > +import os > +import subprocess > +import requests > +import pathlib > +import yaml > +import pygit2 > +import requests > +import shutil > + > +from dataclasses import dataclass > +from typing import Optional, Dict, Tuple, Any, List > + > + > +@dataclass What's the reason for this being a dataclass when you define the __init__ method? The dataclass does more than just defining __init__, but not using a dataclass for that seems suboptimal. > +class CreateSeriesParameters(object): > + pw_server: str > + pw_project: str > + pw_token: str > + git_user: str > + git_email: str > + series_url: str > + patch_ids: List[int] > + labels: List[str] > + apply_error: Optional[Tuple[str, str]] > + config: Dict > + series: Dict > + pw_mcli_script: pathlib.Path > + patch_parser_script: pathlib.Path > + patch_parser_cfg: pathlib.Path > + lzma: bool > + output_tarball: pathlib.Path > + output_properties: pathlib.Path > + no_depends: bool > + docs_only: bool > + > + def __get_tags(self) -> List[str]: > + series_filename =3D f"{self.series['id']}.patch" > + > + # Pull down the patch series as a single file. > + pulldown_result =3D subprocess.run( > + [ > + "git", > + "pw", > + "--server", # Pass in the pw server we wish to download= from. > + self.pw_server, > + "series", > + "download", > + "--combined", # Specifies that we want the series in on= e patch file. > + str(self.series["id"]), > + series_filename, > + ], > + stdout=3Dsubprocess.DEVNULL, > + stderr=3Dsubprocess.DEVNULL, > + ) > + > + # Assert that this succeeds. > + pulldown_result.check_returncode() > + > + # Call the patch parser script to obtain the tags > + parse_result =3D subprocess.run( > + [self.patch_parser_script, self.patch_parser_cfg, series_fil= ename], > + stdout=3Dsubprocess.PIPE, > + stderr=3Dsubprocess.PIPE, > + ) > + > + # Assert that patch parser succeeded. > + parse_result.check_returncode() > + > + # Return the output > + return parse_result.stdout.decode().splitlines() > + > + def __init__(self): This init method basically mixes two things together which really should be separate. I doubt the purpose of this class is to parse the arguments, I understand it's here mainly to hold the data. As such, I'd move the argument parsing outside the class and then you could utilize the dataclass fully, i.e. you won't need to define the init method and just pass the parameters. > + parser =3D argparse.ArgumentParser( > + formatter_class=3Dargparse.RawDescriptionHelpFormatter, > + description=3D"""This script will create an artifact given a= URL to a Patchwork series. > +Much of the information provided is acquired by this script through the = use of a configuration file. > +This configuration file can be found with the other script configs in th= e config directory of the CI repo. > + > +The configuration file is used to aggregate: > + - Git credentials > + - Patchwork configuration and the user token (user token is optional) > + - The URL of the DPDK Git mirror > + - Locations of dependency scripts and their configuration files > + > +More detail and examples can be found in the default configuration file. > +This default file is located at "config/artifacts.yml" in the dpdk-ci re= pository. > +""", > + ) > + parser.add_argument( > + "config", > + type=3Dargparse.FileType(), > + help=3D"The config file to load. Must be a path to a YAML fi= le.", > + ) > + parser.add_argument( > + "series_url", type=3Dstr, help=3D"The URL to a Patchwork ser= ies." > + ) > + parser.add_argument( > + "-t", > + "--pw-token", > + dest=3D"pw_token", > + type=3Dstr, > + help=3D"The Patchwork token", > + ) > + parser.add_argument( > + "-l", > + "--lzma", > + action=3D"store_true", > + help=3D"Use LZMA compression rather than GNU zip compression= .", > + ) > + parser.add_argument( > + "-nd", > + "--no-depends", > + action=3D"store_true", > + help=3D"Do not use the Depends-on label.", > + ) > + > + args =3D parser.parse_args() > + > + # Collect basic arguments. > + self.series_url =3D args.series_url > + self.no_depends =3D args.no_depends > + self.lzma =3D args.lzma > + > + # Read the configuration file. > + with args.config as config_file: > + self.config =3D yaml.safe_load(config_file) > + > + self.pw_server =3D self.config["patchwork"]["server"] > + self.pw_project =3D self.config["patchwork"]["project"] There is actually some processing of what's passed to an instance. In this case, there could be a separate method (from __init__) that would process some of the passed parameters and create the instance, such as: @dataclass class CreateSeriesParameters(object): parameters def create_instance(config, *args): return CreateSeriesParameters(config, config["patchwork"]["server"], config["patchwork"]["project"], args) But you could do without this extra method as this is not a lot of processing and could easily be done before creating the instance/when passing the arguments. > + > + if args.pw_token: > + self.pw_token =3D args.pw_token > + else: > + self.pw_token =3D self.config["patchwork"].get("token") > + This could be rewritten as: self.pw_token =3D args.pw_token if args.pw_token else self.config["patchwork"].get("token") A bit more concise, but mainly a matter of preference. > + if not self.pw_token: > + print("Failed to obtain the Patchworks token.") > + exit(1) > + > + self.pw_mcli_script =3D pathlib.Path( > + self.config["pw_maintainers_cli"]["path"] > + ).absolute() > + self.git_user =3D self.config["git"]["user"] > + self.git_email =3D self.config["git"]["email"] > + > + self.patch_parser_script =3D pathlib.Path( > + self.config["patch_parser"]["path"] > + ).absolute() > + self.patch_parser_cfg =3D pathlib.Path( > + self.config["patch_parser"]["config"] > + ).absolute() > + > + if self.lzma: > + tarball_name =3D "dpdk.tar.xz" > + else: > + tarball_name =3D "dpdk.tar.gz" > + > + self.output_tarball =3D pathlib.Path(tarball_name) > + self.output_properties =3D pathlib.Path(f"{tarball_name}.propert= ies") > + > + # Pull the series JSON down. > + resp =3D requests.get(self.series_url) > + resp.raise_for_status() > + self.series =3D resp.json() > + > + # Get the labels using the patch parser. > + self.labels =3D self.__get_tags() > + > + # See if this is a documentation-only patch. > + self.docs_only =3D len(self.labels) =3D=3D 1 and self.labels[0] = =3D=3D "documentation" > + > + # Get the patch ids in this patch series. > + self.patch_ids =3D list(map(lambda x: int(x["id"]), self.series[= "patches"])) > + self.patch_ids.sort() > + > + > +class ProjectTree(object): > + artifact_path: pathlib.Path > + tree: str > + commit_id: str > + path: pathlib.Path > + log_file_path: pathlib.Path > + props_file_path: pathlib.Path > + data: CreateSeriesParameters > + repo: pygit2.Repository > + log_buf: List[str] > + properties: Dict[str, Any] > + > + def log(self, msg: str): > + print(msg) > + self.log_buf.append(msg) > + > + def write_log(self): > + with open(self.log_file_path, "w") as log_file: > + log_file.write("\n".join([msg for msg in self.log_buf])) > + > + def write_properties(self): > + with open(self.props_file_path, "w") as prop_file: > + for key, value in self.properties.items(): > + prop_file.write(f"{key}=3D{value}\n") > + > + def move_logs(self): > + shutil.move(self.log_file_path, pathlib.Path(os.getcwd(), "log.t= xt")) > + shutil.move( > + self.props_file_path, pathlib.Path(os.getcwd(), self.data.ou= tput_properties) > + ) > + > + def __init__(self, data: CreateSeriesParameters): > + self.data =3D data > + self.path =3D pathlib.Path(os.curdir, "dpdk").absolute() > + self.log_buf =3D [] > + self.log_file_path =3D pathlib.Path(self.path, "log.txt") > + self.props_file_path =3D pathlib.Path(self.path, data.output_pro= perties) > + self.tree =3D "main" > + self.properties =3D {} You have your own class for holding data, but not for properties. These are two different approaches for basically the same thing so it's a bit weird. > + self.artifact_path =3D data.output_tarball > + > + # Set the range of patch IDs this series (aka patchset) covers. > + self.properties["patchset_range"] =3D f"{data.patch_ids[0]}-{dat= a.patch_ids[-1]}" > + > + # Set the tags using tags obtained by the params class > + self.properties["tags"] =3D " ".join(data.labels) > + > + # Record whether this patch is only documentation > + self.properties["is_docs_only"] =3D str(data.docs_only) > + > + if not self.path.exists(): > + # Find the URL to clone from based on the tree name. > + repo =3D self.data.config["repo_url"] > + > + self.log(f"Cloning the DPDK mirror at: {repo}") > + > + # Pull down the git repo we found. > + repo =3D pygit2.clone_repository(repo, self.path) > + else: > + # Fetch any changes. > + repo =3D pygit2.Repository(self.path) > + > + self.log(f"Fetching the remote for tree: {self.tree}") > + > + origin: pygit2.Remote =3D repo.remotes["origin"] > + > + progress =3D origin.fetch() > + > + self.log( > + f"Received objects: {progress.received_objects} of {prog= ress.total_objects}" > + ) > + > + self.log("Cleaning repository state...") > + > + repo.state_cleanup() > + > + # Initially, check out to main. > + self.repo =3D repo > + self.checkout("main") > + > + self.log(f"Done: {self.tree} commit {self.commit_id}") > + > + def checkout(self, branch: str) -> Optional[str]: > + """ > + Check out to some branch. > + Returns true if successful, false otherwise. > + """ > + if branch not in self.repo.branches: > + return None > + > + git_branch =3D self.repo.branches[branch] > + self.log(f"Trying to checkout branch: {git_branch.branch_name}") > + reference =3D self.repo.resolve_refish(git_branch.branch_name) > + self.commit_id =3D str(reference[0].id) > + self.repo.checkout(reference[1]) > + self.tree =3D branch > + > + return branch > + > + def guess_git_tree(self) -> Optional[str]: > + """ > + Run pw_maintainers_cli to guess the git tree of the patch series= we are applying. > + Returns None if the pw_maintainers_cli failed. > + """ > + > + if "id" not in self.data.series: > + raise Exception("ID was not found in the series JSON") > + > + result =3D subprocess.run( > + [ > + self.data.pw_mcli_script, > + "--type", > + "series", > + "--pw-server", > + self.data.pw_server, > + "--pw-project", > + self.data.pw_project, > + "list-trees", > + str(self.data.series["id"]), > + ], > + stdout=3Dsubprocess.PIPE, > + cwd=3Dself.path, > + env=3D{ > + "MAINTAINERS_FILE_PATH": "MAINTAINERS", > + "PW_TOKEN": self.data.pw_token, > + }, > + ) > + > + if result.returncode =3D=3D 0: > + branch =3D result.stdout.decode().strip() > + > + if branch in ["main", "dpdk"]: > + branch =3D "main" > + else: > + return None > + > + if branch[0:5] =3D=3D "dpdk-": > + branch =3D branch[5 : len(branch)] > + > + return self.checkout(branch) > + > + def set_properties(self): > + self.properties["tree"] =3D self.tree > + self.properties["applied_commit_id"] =3D self.commit_id > + > + def apply_patch_series(self) -> bool: > + # Run git-pw to apply the series. > + > + # Configure the tree to point at the given patchwork server and = project > + self.repo.config["pw.server"] =3D self.data.pw_server > + self.repo.config["pw.project"] =3D self.data.pw_project > + self.repo.config["user.email"] =3D self.data.git_email > + self.repo.config["user.name"] =3D self.data.git_user > + > + result =3D subprocess.run( > + ["git", "pw", "series", "apply", str(self.data.series["id"])= ], > + cwd=3Dself.path, > + stdout=3Dsubprocess.PIPE, > + stderr=3Dsubprocess.PIPE, > + ) > + > + # Write the log from the apply process to disk. > + self.log(f"Applying patch...") > + self.log(result.stdout.decode()) > + self.log(result.stderr.decode()) > + > + # Store whether there was an error, and return the flag. > + error =3D result.returncode !=3D 0 > + self.properties["apply_error"] =3D error > + return not error > + > + def test_build(self) -> bool: > + ninja_result: Optional[subprocess.CompletedProcess] =3D None > + meson_result: subprocess.CompletedProcess =3D subprocess.run( > + ["meson", "setup", "build"], > + cwd=3Dself.path, > + stdout=3Dsubprocess.PIPE, > + stderr=3Dsubprocess.PIPE, > + ) > + > + build_error =3D meson_result.returncode !=3D 0 > + > + self.log("Running test build...") > + self.log(meson_result.stdout.decode()) > + > + if not build_error: > + ninja_result =3D subprocess.run( > + ["ninja", "-C", "build"], > + cwd=3Dself.path, > + stdout=3Dsubprocess.PIPE, > + stderr=3Dsubprocess.PIPE, > + ) > + > + build_error =3D build_error or ninja_result.returncode !=3D = 0 > + shutil.rmtree(pathlib.Path(self.path, "build")) > + > + self.log(ninja_result.stdout.decode()) > + self.log(ninja_result.stderr.decode()) > + > + self.log(meson_result.stderr.decode()) > + > + if build_error: > + self.log("Test build failed.") > + > + self.properties["build_error"] =3D build_error > + return not build_error > + > + def create_tarball(self): > + # Copy the logs into the artifact tarball. > + self.write_log() > + self.write_properties() > + > + tar_args =3D ["tar"] > + > + if self.data.lzma: > + tar_args.append("--lzma") > + else: > + tar_args.append("-z") > + > + tar_args.extend(["-cf", self.artifact_path, "-C", self.path, "."= ]) > + > + result =3D subprocess.run( > + tar_args, > + stdout=3Dsubprocess.DEVNULL, > + stderr=3Dsubprocess.DEVNULL, > + ) > + As Aaron mentioned, this could be done with the tarfile module: https://docs.python.org/3/library/tarfile.html. It should be able to write gzip and lzma compressed archives. > + if result.returncode !=3D 0: > + return False > + > + print("Successfully created artifact:", self.artifact_path) > + > + # Move the log file out of the working directory. > + self.move_logs() > + > + return True > + > + > +def try_to_apply(tree: ProjectTree) -> bool: > + tree.set_properties() > + return tree.apply_patch_series() and tree.test_build() and tree.crea= te_tarball() > + > + > +def main() -> int: > + data =3D CreateSeriesParameters() > + > + # Get the main branch. > + # We solve the chicken and egg problem of pw_maintainers_cli needing > + # information in the repository by always pulling down the main tree= . > + tree =3D ProjectTree(data) > + > + # Try to guess the Git tree for this patchset. > + guessed_tree =3D tree.guess_git_tree() > + > + # Try to apply this patch. > + if not ( > + try_to_apply(tree) # First, try to apply on the guessed tree. > + or guessed_tree !=3D "main" # If that fails, and the guessed tr= ee was not main > + and tree.checkout("main") # Checkout to main, then > + and try_to_apply(tree) # Try to apply on main > + ): > + tree.write_log() > + tree.write_properties() > + tree.move_logs() > + > + print("FAILURE") > + > + return 1 > + > + return 0 > + > + > +if __name__ =3D=3D "__main__": > + exit(main()) > -- > 2.43.0 >