* Re: [PATCH 1/2] tools: Add script to create artifacts
2024-01-10 14:57 ` [PATCH 1/2] tools: Add script to create artifacts Adam Hassick
@ 2024-01-10 16:54 ` Aaron Conole
2024-01-10 18:56 ` Patrick Robb
[not found] ` <CAC-YWqhfQAPQ2=yaqpgG+axPSG9n=kScJULkYv9ZybTnSiZUog@mail.gmail.com>
2024-01-11 11:55 ` Juraj Linkeš
1 sibling, 2 replies; 8+ messages in thread
From: Aaron Conole @ 2024-01-10 16:54 UTC (permalink / raw)
To: Adam Hassick; +Cc: ci, alialnu
Adam Hassick <ahassick@iol.unh.edu> writes:
> This script takes in a URL to a series on Patchwork and emits a
> tarball which may be used for running tests.
>
> Signed-off-by: Adam Hassick <ahassick@iol.unh.edu>
> ---
Just a quick glance.
Is it possible to include a doc on how it is expected to run this
script? Maybe that could help to also evaluate it as well.
> tools/create_series_artifact.py | 453 ++++++++++++++++++++++++++++++++
> 1 file changed, 453 insertions(+)
> create mode 100755 tools/create_series_artifact.py
>
> diff --git a/tools/create_series_artifact.py b/tools/create_series_artifact.py
> new file mode 100755
> index 0000000..3049aaa
> --- /dev/null
> +++ b/tools/create_series_artifact.py
> @@ -0,0 +1,453 @@
As a general note, you're using lots of calls to subprocess, but the
utilities you're calling are python utilities. Is it possible to import
these and make direct calls instead? I think that will speed up
execution. I also think it prevents weird PATH issues.
> +#!/usr/bin/env python3
> +
> +import argparse
> +import os
> +import subprocess
> +import requests
> +import pathlib
> +import yaml
> +import pygit2
> +import requests
> +import shutil
Looks like you imported 'requests' twice. I suggest rewriting this so
that it is alphabetical order, which would prevent this kind of mistake
as well.
> +
> +from dataclasses import dataclass
> +from typing import Optional, Dict, Tuple, Any, List
> +
> +
> +@dataclass
> +class CreateSeriesParameters(object):
> + pw_server: str
> + pw_project: str
> + pw_token: str
> + git_user: str
> + git_email: str
> + series_url: str
> + patch_ids: List[int]
> + labels: List[str]
> + apply_error: Optional[Tuple[str, str]]
> + config: Dict
> + series: Dict
> + pw_mcli_script: pathlib.Path
> + patch_parser_script: pathlib.Path
> + patch_parser_cfg: pathlib.Path
> + lzma: bool
> + output_tarball: pathlib.Path
> + output_properties: pathlib.Path
> + no_depends: bool
> + docs_only: bool
> +
> + def __get_tags(self) -> List[str]:
> + series_filename = f"{self.series['id']}.patch"
> +
> + # Pull down the patch series as a single file.
> + pulldown_result = subprocess.run(
> + [
> + "git",
> + "pw",
> + "--server", # Pass in the pw server we wish to download from.
> + self.pw_server,
> + "series",
> + "download",
> + "--combined", # Specifies that we want the series in one patch file.
> + str(self.series["id"]),
> + series_filename,
> + ],
> + stdout=subprocess.DEVNULL,
> + stderr=subprocess.DEVNULL,
> + )
> +
> + # Assert that this succeeds.
> + pulldown_result.check_returncode()
> +
> + # Call the patch parser script to obtain the tags
> + parse_result = subprocess.run(
> + [self.patch_parser_script, self.patch_parser_cfg, series_filename],
> + stdout=subprocess.PIPE,
> + stderr=subprocess.PIPE,
> + )
> +
> + # Assert that patch parser succeeded.
> + parse_result.check_returncode()
> +
> + # Return the output
> + return parse_result.stdout.decode().splitlines()
> +
> + def __init__(self):
> + parser = argparse.ArgumentParser(
> + formatter_class=argparse.RawDescriptionHelpFormatter,
> + description="""This script will create an artifact given a URL to a Patchwork series.
> +Much of the information provided is acquired by this script through the use of a configuration file.
> +This configuration file can be found with the other script configs in the config directory of the CI repo.
> +
> +The configuration file is used to aggregate:
> + - Git credentials
> + - Patchwork configuration and the user token (user token is optional)
> + - The URL of the DPDK Git mirror
> + - Locations of dependency scripts and their configuration files
> +
> +More detail and examples can be found in the default configuration file.
> +This default file is located at "config/artifacts.yml" in the dpdk-ci repository.
> +""",
> + )
> + parser.add_argument(
> + "config",
> + type=argparse.FileType(),
> + help="The config file to load. Must be a path to a YAML file.",
> + )
> + parser.add_argument(
> + "series_url", type=str, help="The URL to a Patchwork series."
> + )
> + parser.add_argument(
> + "-t",
> + "--pw-token",
> + dest="pw_token",
> + type=str,
> + help="The Patchwork token",
> + )
> + parser.add_argument(
> + "-l",
> + "--lzma",
> + action="store_true",
> + help="Use LZMA compression rather than GNU zip compression.",
> + )
> + parser.add_argument(
> + "-nd",
> + "--no-depends",
> + action="store_true",
> + help="Do not use the Depends-on label.",
> + )
> +
> + args = parser.parse_args()
> +
> + # Collect basic arguments.
> + self.series_url = args.series_url
> + self.no_depends = args.no_depends
> + self.lzma = args.lzma
> +
> + # Read the configuration file.
> + with args.config as config_file:
> + self.config = yaml.safe_load(config_file)
> +
> + self.pw_server = self.config["patchwork"]["server"]
> + self.pw_project = self.config["patchwork"]["project"]
> +
> + if args.pw_token:
> + self.pw_token = args.pw_token
> + else:
> + self.pw_token = self.config["patchwork"].get("token")
> +
> + if not self.pw_token:
> + print("Failed to obtain the Patchworks token.")
> + exit(1)
> +
> + self.pw_mcli_script = pathlib.Path(
> + self.config["pw_maintainers_cli"]["path"]
> + ).absolute()
> + self.git_user = self.config["git"]["user"]
> + self.git_email = self.config["git"]["email"]
> +
> + self.patch_parser_script = pathlib.Path(
> + self.config["patch_parser"]["path"]
> + ).absolute()
> + self.patch_parser_cfg = pathlib.Path(
> + self.config["patch_parser"]["config"]
> + ).absolute()
> +
> + if self.lzma:
> + tarball_name = "dpdk.tar.xz"
> + else:
> + tarball_name = "dpdk.tar.gz"
> +
> + self.output_tarball = pathlib.Path(tarball_name)
> + self.output_properties = pathlib.Path(f"{tarball_name}.properties")
> +
> + # Pull the series JSON down.
> + resp = requests.get(self.series_url)
> + resp.raise_for_status()
> + self.series = resp.json()
> +
> + # Get the labels using the patch parser.
> + self.labels = self.__get_tags()
> +
> + # See if this is a documentation-only patch.
> + self.docs_only = len(self.labels) == 1 and self.labels[0] == "documentation"
> +
> + # Get the patch ids in this patch series.
> + self.patch_ids = list(map(lambda x: int(x["id"]), self.series["patches"]))
> + self.patch_ids.sort()
> +
> +
> +class ProjectTree(object):
> + artifact_path: pathlib.Path
> + tree: str
> + commit_id: str
> + path: pathlib.Path
> + log_file_path: pathlib.Path
> + props_file_path: pathlib.Path
> + data: CreateSeriesParameters
> + repo: pygit2.Repository
> + log_buf: List[str]
> + properties: Dict[str, Any]
> +
> + def log(self, msg: str):
> + print(msg)
> + self.log_buf.append(msg)
> +
> + def write_log(self):
> + with open(self.log_file_path, "w") as log_file:
> + log_file.write("\n".join([msg for msg in self.log_buf]))
> +
> + def write_properties(self):
> + with open(self.props_file_path, "w") as prop_file:
> + for key, value in self.properties.items():
> + prop_file.write(f"{key}={value}\n")
> +
> + def move_logs(self):
> + shutil.move(self.log_file_path, pathlib.Path(os.getcwd(), "log.txt"))
> + shutil.move(
> + self.props_file_path, pathlib.Path(os.getcwd(), self.data.output_properties)
> + )
> +
> + def __init__(self, data: CreateSeriesParameters):
> + self.data = data
> + self.path = pathlib.Path(os.curdir, "dpdk").absolute()
> + self.log_buf = []
> + self.log_file_path = pathlib.Path(self.path, "log.txt")
> + self.props_file_path = pathlib.Path(self.path, data.output_properties)
> + self.tree = "main"
> + self.properties = {}
> + self.artifact_path = data.output_tarball
> +
> + # Set the range of patch IDs this series (aka patchset) covers.
> + self.properties["patchset_range"] = f"{data.patch_ids[0]}-{data.patch_ids[-1]}"
> +
> + # Set the tags using tags obtained by the params class
> + self.properties["tags"] = " ".join(data.labels)
> +
> + # Record whether this patch is only documentation
> + self.properties["is_docs_only"] = str(data.docs_only)
> +
> + if not self.path.exists():
> + # Find the URL to clone from based on the tree name.
> + repo = self.data.config["repo_url"]
> +
> + self.log(f"Cloning the DPDK mirror at: {repo}")
> +
> + # Pull down the git repo we found.
> + repo = pygit2.clone_repository(repo, self.path)
> + else:
> + # Fetch any changes.
> + repo = pygit2.Repository(self.path)
> +
> + self.log(f"Fetching the remote for tree: {self.tree}")
> +
> + origin: pygit2.Remote = repo.remotes["origin"]
> +
> + progress = origin.fetch()
> +
> + self.log(
> + f"Received objects: {progress.received_objects} of {progress.total_objects}"
> + )
> +
> + self.log("Cleaning repository state...")
> +
> + repo.state_cleanup()
> +
> + # Initially, check out to main.
> + self.repo = repo
> + self.checkout("main")
> +
> + self.log(f"Done: {self.tree} commit {self.commit_id}")
> +
> + def checkout(self, branch: str) -> Optional[str]:
> + """
> + Check out to some branch.
> + Returns true if successful, false otherwise.
> + """
> + if branch not in self.repo.branches:
> + return None
> +
> + git_branch = self.repo.branches[branch]
> + self.log(f"Trying to checkout branch: {git_branch.branch_name}")
> + reference = self.repo.resolve_refish(git_branch.branch_name)
> + self.commit_id = str(reference[0].id)
> + self.repo.checkout(reference[1])
> + self.tree = branch
> +
> + return branch
> +
> + def guess_git_tree(self) -> Optional[str]:
> + """
> + Run pw_maintainers_cli to guess the git tree of the patch series we are applying.
> + Returns None if the pw_maintainers_cli failed.
> + """
> +
> + if "id" not in self.data.series:
> + raise Exception("ID was not found in the series JSON")
> +
> + result = subprocess.run(
> + [
> + self.data.pw_mcli_script,
> + "--type",
> + "series",
> + "--pw-server",
> + self.data.pw_server,
> + "--pw-project",
> + self.data.pw_project,
> + "list-trees",
> + str(self.data.series["id"]),
> + ],
> + stdout=subprocess.PIPE,
> + cwd=self.path,
> + env={
> + "MAINTAINERS_FILE_PATH": "MAINTAINERS",
> + "PW_TOKEN": self.data.pw_token,
> + },
> + )
> +
> + if result.returncode == 0:
> + branch = result.stdout.decode().strip()
> +
> + if branch in ["main", "dpdk"]:
> + branch = "main"
> + else:
> + return None
> +
> + if branch[0:5] == "dpdk-":
> + branch = branch[5 : len(branch)]
I think this can be rewritten like:
if branch.startswith("dpdk-"):
branch = branch[5:]
I think there is a conditional strip for that as well, but I can't
recall right now.
> +
> + return self.checkout(branch)
> +
> + def set_properties(self):
> + self.properties["tree"] = self.tree
> + self.properties["applied_commit_id"] = self.commit_id
> +
> + def apply_patch_series(self) -> bool:
> + # Run git-pw to apply the series.
> +
> + # Configure the tree to point at the given patchwork server and project
> + self.repo.config["pw.server"] = self.data.pw_server
> + self.repo.config["pw.project"] = self.data.pw_project
> + self.repo.config["user.email"] = self.data.git_email
> + self.repo.config["user.name"] = self.data.git_user
> +
> + result = subprocess.run(
> + ["git", "pw", "series", "apply", str(self.data.series["id"])],
> + cwd=self.path,
> + stdout=subprocess.PIPE,
> + stderr=subprocess.PIPE,
> + )
> +
> + # Write the log from the apply process to disk.
> + self.log(f"Applying patch...")
As far as I understand, f-strings are supposed to have parameters but it
looks like this isn't a proper f-string (according to my
understanding). At least, it should look like:
f'{"Applying patch..."}'
Something like that. But also, why do we use an f-string here at all?
> + self.log(result.stdout.decode())
> + self.log(result.stderr.decode())
> +
> + # Store whether there was an error, and return the flag.
> + error = result.returncode != 0
> + self.properties["apply_error"] = error
> + return not error
> +
> + def test_build(self) -> bool:
> + ninja_result: Optional[subprocess.CompletedProcess] = None
> + meson_result: subprocess.CompletedProcess = subprocess.run(
> + ["meson", "setup", "build"],
> + cwd=self.path,
> + stdout=subprocess.PIPE,
> + stderr=subprocess.PIPE,
> + )
> +
> + build_error = meson_result.returncode != 0
> +
> + self.log("Running test build...")
> + self.log(meson_result.stdout.decode())
> +
> + if not build_error:
> + ninja_result = subprocess.run(
> + ["ninja", "-C", "build"],
> + cwd=self.path,
> + stdout=subprocess.PIPE,
> + stderr=subprocess.PIPE,
> + )
> +
> + build_error = build_error or ninja_result.returncode != 0
> + shutil.rmtree(pathlib.Path(self.path, "build"))
> +
> + self.log(ninja_result.stdout.decode())
> + self.log(ninja_result.stderr.decode())
> +
> + self.log(meson_result.stderr.decode())
> +
> + if build_error:
> + self.log("Test build failed.")
> +
> + self.properties["build_error"] = build_error
> + return not build_error
> +
> + def create_tarball(self):
> + # Copy the logs into the artifact tarball.
> + self.write_log()
> + self.write_properties()
> +
> + tar_args = ["tar"]
> +
> + if self.data.lzma:
> + tar_args.append("--lzma")
> + else:
> + tar_args.append("-z")
> +
> + tar_args.extend(["-cf", self.artifact_path, "-C", self.path, "."])
> +
> + result = subprocess.run(
> + tar_args,
> + stdout=subprocess.DEVNULL,
> + stderr=subprocess.DEVNULL,
> + )
> +
> + if result.returncode != 0:
> + return False
> +
> + print("Successfully created artifact:", self.artifact_path)
> +
> + # Move the log file out of the working directory.
> + self.move_logs()
> +
> + return True
> +
> +
> +def try_to_apply(tree: ProjectTree) -> bool:
> + tree.set_properties()
> + return tree.apply_patch_series() and tree.test_build() and tree.create_tarball()
> +
> +
> +def main() -> int:
> + data = CreateSeriesParameters()
> +
> + # Get the main branch.
> + # We solve the chicken and egg problem of pw_maintainers_cli needing
> + # information in the repository by always pulling down the main tree.
> + tree = ProjectTree(data)
> +
> + # Try to guess the Git tree for this patchset.
> + guessed_tree = tree.guess_git_tree()
> +
> + # Try to apply this patch.
> + if not (
> + try_to_apply(tree) # First, try to apply on the guessed tree.
> + or guessed_tree != "main" # If that fails, and the guessed tree was not main
> + and tree.checkout("main") # Checkout to main, then
> + and try_to_apply(tree) # Try to apply on main
> + ):
> + tree.write_log()
> + tree.write_properties()
> + tree.move_logs()
> +
> + print("FAILURE")
> +
> + return 1
> +
> + return 0
> +
> +
> +if __name__ == "__main__":
> + exit(main())
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] tools: Add script to create artifacts
2024-01-10 14:57 ` [PATCH 1/2] tools: Add script to create artifacts Adam Hassick
2024-01-10 16:54 ` Aaron Conole
@ 2024-01-11 11:55 ` Juraj Linkeš
1 sibling, 0 replies; 8+ messages in thread
From: Juraj Linkeš @ 2024-01-11 11:55 UTC (permalink / raw)
To: Adam Hassick; +Cc: ci, aconole, alialnu
On Wed, Jan 10, 2024 at 3:58 PM Adam Hassick <ahassick@iol.unh.edu> wrote:
>
> This script takes in a URL to a series on Patchwork and emits a
> tarball which may be used for running tests.
>
> Signed-off-by: Adam Hassick <ahassick@iol.unh.edu>
> ---
> tools/create_series_artifact.py | 453 ++++++++++++++++++++++++++++++++
> 1 file changed, 453 insertions(+)
> create mode 100755 tools/create_series_artifact.py
>
> diff --git a/tools/create_series_artifact.py b/tools/create_series_artifact.py
> new file mode 100755
> index 0000000..3049aaa
> --- /dev/null
> +++ b/tools/create_series_artifact.py
> @@ -0,0 +1,453 @@
> +#!/usr/bin/env python3
> +
> +import argparse
> +import os
> +import subprocess
> +import requests
> +import pathlib
> +import yaml
> +import pygit2
> +import requests
> +import shutil
> +
> +from dataclasses import dataclass
> +from typing import Optional, Dict, Tuple, Any, List
> +
> +
> +@dataclass
What's the reason for this being a dataclass when you define the
__init__ method? The dataclass does more than just defining __init__,
but not using a dataclass for that seems suboptimal.
> +class CreateSeriesParameters(object):
> + pw_server: str
> + pw_project: str
> + pw_token: str
> + git_user: str
> + git_email: str
> + series_url: str
> + patch_ids: List[int]
> + labels: List[str]
> + apply_error: Optional[Tuple[str, str]]
> + config: Dict
> + series: Dict
> + pw_mcli_script: pathlib.Path
> + patch_parser_script: pathlib.Path
> + patch_parser_cfg: pathlib.Path
> + lzma: bool
> + output_tarball: pathlib.Path
> + output_properties: pathlib.Path
> + no_depends: bool
> + docs_only: bool
> +
> + def __get_tags(self) -> List[str]:
> + series_filename = f"{self.series['id']}.patch"
> +
> + # Pull down the patch series as a single file.
> + pulldown_result = subprocess.run(
> + [
> + "git",
> + "pw",
> + "--server", # Pass in the pw server we wish to download from.
> + self.pw_server,
> + "series",
> + "download",
> + "--combined", # Specifies that we want the series in one patch file.
> + str(self.series["id"]),
> + series_filename,
> + ],
> + stdout=subprocess.DEVNULL,
> + stderr=subprocess.DEVNULL,
> + )
> +
> + # Assert that this succeeds.
> + pulldown_result.check_returncode()
> +
> + # Call the patch parser script to obtain the tags
> + parse_result = subprocess.run(
> + [self.patch_parser_script, self.patch_parser_cfg, series_filename],
> + stdout=subprocess.PIPE,
> + stderr=subprocess.PIPE,
> + )
> +
> + # Assert that patch parser succeeded.
> + parse_result.check_returncode()
> +
> + # Return the output
> + return parse_result.stdout.decode().splitlines()
> +
> + def __init__(self):
This init method basically mixes two things together which really
should be separate. I doubt the purpose of this class is to parse the
arguments, I understand it's here mainly to hold the data. As such,
I'd move the argument parsing outside the class and then you could
utilize the dataclass fully, i.e. you won't need to define the init
method and just pass the parameters.
> + parser = argparse.ArgumentParser(
> + formatter_class=argparse.RawDescriptionHelpFormatter,
> + description="""This script will create an artifact given a URL to a Patchwork series.
> +Much of the information provided is acquired by this script through the use of a configuration file.
> +This configuration file can be found with the other script configs in the config directory of the CI repo.
> +
> +The configuration file is used to aggregate:
> + - Git credentials
> + - Patchwork configuration and the user token (user token is optional)
> + - The URL of the DPDK Git mirror
> + - Locations of dependency scripts and their configuration files
> +
> +More detail and examples can be found in the default configuration file.
> +This default file is located at "config/artifacts.yml" in the dpdk-ci repository.
> +""",
> + )
> + parser.add_argument(
> + "config",
> + type=argparse.FileType(),
> + help="The config file to load. Must be a path to a YAML file.",
> + )
> + parser.add_argument(
> + "series_url", type=str, help="The URL to a Patchwork series."
> + )
> + parser.add_argument(
> + "-t",
> + "--pw-token",
> + dest="pw_token",
> + type=str,
> + help="The Patchwork token",
> + )
> + parser.add_argument(
> + "-l",
> + "--lzma",
> + action="store_true",
> + help="Use LZMA compression rather than GNU zip compression.",
> + )
> + parser.add_argument(
> + "-nd",
> + "--no-depends",
> + action="store_true",
> + help="Do not use the Depends-on label.",
> + )
> +
> + args = parser.parse_args()
> +
> + # Collect basic arguments.
> + self.series_url = args.series_url
> + self.no_depends = args.no_depends
> + self.lzma = args.lzma
> +
> + # Read the configuration file.
> + with args.config as config_file:
> + self.config = yaml.safe_load(config_file)
> +
> + self.pw_server = self.config["patchwork"]["server"]
> + self.pw_project = self.config["patchwork"]["project"]
There is actually some processing of what's passed to an instance. In
this case, there could be a separate method (from __init__) that would
process some of the passed parameters and create the instance, such
as:
@dataclass
class CreateSeriesParameters(object):
parameters
def create_instance(config, *args):
return CreateSeriesParameters(config,
config["patchwork"]["server"], config["patchwork"]["project"], args)
But you could do without this extra method as this is not a lot of
processing and could easily be done before creating the instance/when
passing the arguments.
> +
> + if args.pw_token:
> + self.pw_token = args.pw_token
> + else:
> + self.pw_token = self.config["patchwork"].get("token")
> +
This could be rewritten as:
self.pw_token = args.pw_token if args.pw_token else
self.config["patchwork"].get("token")
A bit more concise, but mainly a matter of preference.
> + if not self.pw_token:
> + print("Failed to obtain the Patchworks token.")
> + exit(1)
> +
> + self.pw_mcli_script = pathlib.Path(
> + self.config["pw_maintainers_cli"]["path"]
> + ).absolute()
> + self.git_user = self.config["git"]["user"]
> + self.git_email = self.config["git"]["email"]
> +
> + self.patch_parser_script = pathlib.Path(
> + self.config["patch_parser"]["path"]
> + ).absolute()
> + self.patch_parser_cfg = pathlib.Path(
> + self.config["patch_parser"]["config"]
> + ).absolute()
> +
> + if self.lzma:
> + tarball_name = "dpdk.tar.xz"
> + else:
> + tarball_name = "dpdk.tar.gz"
> +
> + self.output_tarball = pathlib.Path(tarball_name)
> + self.output_properties = pathlib.Path(f"{tarball_name}.properties")
> +
> + # Pull the series JSON down.
> + resp = requests.get(self.series_url)
> + resp.raise_for_status()
> + self.series = resp.json()
> +
> + # Get the labels using the patch parser.
> + self.labels = self.__get_tags()
> +
> + # See if this is a documentation-only patch.
> + self.docs_only = len(self.labels) == 1 and self.labels[0] == "documentation"
> +
> + # Get the patch ids in this patch series.
> + self.patch_ids = list(map(lambda x: int(x["id"]), self.series["patches"]))
> + self.patch_ids.sort()
> +
> +
> +class ProjectTree(object):
> + artifact_path: pathlib.Path
> + tree: str
> + commit_id: str
> + path: pathlib.Path
> + log_file_path: pathlib.Path
> + props_file_path: pathlib.Path
> + data: CreateSeriesParameters
> + repo: pygit2.Repository
> + log_buf: List[str]
> + properties: Dict[str, Any]
> +
> + def log(self, msg: str):
> + print(msg)
> + self.log_buf.append(msg)
> +
> + def write_log(self):
> + with open(self.log_file_path, "w") as log_file:
> + log_file.write("\n".join([msg for msg in self.log_buf]))
> +
> + def write_properties(self):
> + with open(self.props_file_path, "w") as prop_file:
> + for key, value in self.properties.items():
> + prop_file.write(f"{key}={value}\n")
> +
> + def move_logs(self):
> + shutil.move(self.log_file_path, pathlib.Path(os.getcwd(), "log.txt"))
> + shutil.move(
> + self.props_file_path, pathlib.Path(os.getcwd(), self.data.output_properties)
> + )
> +
> + def __init__(self, data: CreateSeriesParameters):
> + self.data = data
> + self.path = pathlib.Path(os.curdir, "dpdk").absolute()
> + self.log_buf = []
> + self.log_file_path = pathlib.Path(self.path, "log.txt")
> + self.props_file_path = pathlib.Path(self.path, data.output_properties)
> + self.tree = "main"
> + self.properties = {}
You have your own class for holding data, but not for properties.
These are two different approaches for basically the same thing so
it's a bit weird.
> + self.artifact_path = data.output_tarball
> +
> + # Set the range of patch IDs this series (aka patchset) covers.
> + self.properties["patchset_range"] = f"{data.patch_ids[0]}-{data.patch_ids[-1]}"
> +
> + # Set the tags using tags obtained by the params class
> + self.properties["tags"] = " ".join(data.labels)
> +
> + # Record whether this patch is only documentation
> + self.properties["is_docs_only"] = str(data.docs_only)
> +
> + if not self.path.exists():
> + # Find the URL to clone from based on the tree name.
> + repo = self.data.config["repo_url"]
> +
> + self.log(f"Cloning the DPDK mirror at: {repo}")
> +
> + # Pull down the git repo we found.
> + repo = pygit2.clone_repository(repo, self.path)
> + else:
> + # Fetch any changes.
> + repo = pygit2.Repository(self.path)
> +
> + self.log(f"Fetching the remote for tree: {self.tree}")
> +
> + origin: pygit2.Remote = repo.remotes["origin"]
> +
> + progress = origin.fetch()
> +
> + self.log(
> + f"Received objects: {progress.received_objects} of {progress.total_objects}"
> + )
> +
> + self.log("Cleaning repository state...")
> +
> + repo.state_cleanup()
> +
> + # Initially, check out to main.
> + self.repo = repo
> + self.checkout("main")
> +
> + self.log(f"Done: {self.tree} commit {self.commit_id}")
> +
> + def checkout(self, branch: str) -> Optional[str]:
> + """
> + Check out to some branch.
> + Returns true if successful, false otherwise.
> + """
> + if branch not in self.repo.branches:
> + return None
> +
> + git_branch = self.repo.branches[branch]
> + self.log(f"Trying to checkout branch: {git_branch.branch_name}")
> + reference = self.repo.resolve_refish(git_branch.branch_name)
> + self.commit_id = str(reference[0].id)
> + self.repo.checkout(reference[1])
> + self.tree = branch
> +
> + return branch
> +
> + def guess_git_tree(self) -> Optional[str]:
> + """
> + Run pw_maintainers_cli to guess the git tree of the patch series we are applying.
> + Returns None if the pw_maintainers_cli failed.
> + """
> +
> + if "id" not in self.data.series:
> + raise Exception("ID was not found in the series JSON")
> +
> + result = subprocess.run(
> + [
> + self.data.pw_mcli_script,
> + "--type",
> + "series",
> + "--pw-server",
> + self.data.pw_server,
> + "--pw-project",
> + self.data.pw_project,
> + "list-trees",
> + str(self.data.series["id"]),
> + ],
> + stdout=subprocess.PIPE,
> + cwd=self.path,
> + env={
> + "MAINTAINERS_FILE_PATH": "MAINTAINERS",
> + "PW_TOKEN": self.data.pw_token,
> + },
> + )
> +
> + if result.returncode == 0:
> + branch = result.stdout.decode().strip()
> +
> + if branch in ["main", "dpdk"]:
> + branch = "main"
> + else:
> + return None
> +
> + if branch[0:5] == "dpdk-":
> + branch = branch[5 : len(branch)]
> +
> + return self.checkout(branch)
> +
> + def set_properties(self):
> + self.properties["tree"] = self.tree
> + self.properties["applied_commit_id"] = self.commit_id
> +
> + def apply_patch_series(self) -> bool:
> + # Run git-pw to apply the series.
> +
> + # Configure the tree to point at the given patchwork server and project
> + self.repo.config["pw.server"] = self.data.pw_server
> + self.repo.config["pw.project"] = self.data.pw_project
> + self.repo.config["user.email"] = self.data.git_email
> + self.repo.config["user.name"] = self.data.git_user
> +
> + result = subprocess.run(
> + ["git", "pw", "series", "apply", str(self.data.series["id"])],
> + cwd=self.path,
> + stdout=subprocess.PIPE,
> + stderr=subprocess.PIPE,
> + )
> +
> + # Write the log from the apply process to disk.
> + self.log(f"Applying patch...")
> + self.log(result.stdout.decode())
> + self.log(result.stderr.decode())
> +
> + # Store whether there was an error, and return the flag.
> + error = result.returncode != 0
> + self.properties["apply_error"] = error
> + return not error
> +
> + def test_build(self) -> bool:
> + ninja_result: Optional[subprocess.CompletedProcess] = None
> + meson_result: subprocess.CompletedProcess = subprocess.run(
> + ["meson", "setup", "build"],
> + cwd=self.path,
> + stdout=subprocess.PIPE,
> + stderr=subprocess.PIPE,
> + )
> +
> + build_error = meson_result.returncode != 0
> +
> + self.log("Running test build...")
> + self.log(meson_result.stdout.decode())
> +
> + if not build_error:
> + ninja_result = subprocess.run(
> + ["ninja", "-C", "build"],
> + cwd=self.path,
> + stdout=subprocess.PIPE,
> + stderr=subprocess.PIPE,
> + )
> +
> + build_error = build_error or ninja_result.returncode != 0
> + shutil.rmtree(pathlib.Path(self.path, "build"))
> +
> + self.log(ninja_result.stdout.decode())
> + self.log(ninja_result.stderr.decode())
> +
> + self.log(meson_result.stderr.decode())
> +
> + if build_error:
> + self.log("Test build failed.")
> +
> + self.properties["build_error"] = build_error
> + return not build_error
> +
> + def create_tarball(self):
> + # Copy the logs into the artifact tarball.
> + self.write_log()
> + self.write_properties()
> +
> + tar_args = ["tar"]
> +
> + if self.data.lzma:
> + tar_args.append("--lzma")
> + else:
> + tar_args.append("-z")
> +
> + tar_args.extend(["-cf", self.artifact_path, "-C", self.path, "."])
> +
> + result = subprocess.run(
> + tar_args,
> + stdout=subprocess.DEVNULL,
> + stderr=subprocess.DEVNULL,
> + )
> +
As Aaron mentioned, this could be done with the tarfile module:
https://docs.python.org/3/library/tarfile.html. It should be able to
write gzip and lzma compressed archives.
> + if result.returncode != 0:
> + return False
> +
> + print("Successfully created artifact:", self.artifact_path)
> +
> + # Move the log file out of the working directory.
> + self.move_logs()
> +
> + return True
> +
> +
> +def try_to_apply(tree: ProjectTree) -> bool:
> + tree.set_properties()
> + return tree.apply_patch_series() and tree.test_build() and tree.create_tarball()
> +
> +
> +def main() -> int:
> + data = CreateSeriesParameters()
> +
> + # Get the main branch.
> + # We solve the chicken and egg problem of pw_maintainers_cli needing
> + # information in the repository by always pulling down the main tree.
> + tree = ProjectTree(data)
> +
> + # Try to guess the Git tree for this patchset.
> + guessed_tree = tree.guess_git_tree()
> +
> + # Try to apply this patch.
> + if not (
> + try_to_apply(tree) # First, try to apply on the guessed tree.
> + or guessed_tree != "main" # If that fails, and the guessed tree was not main
> + and tree.checkout("main") # Checkout to main, then
> + and try_to_apply(tree) # Try to apply on main
> + ):
> + tree.write_log()
> + tree.write_properties()
> + tree.move_logs()
> +
> + print("FAILURE")
> +
> + return 1
> +
> + return 0
> +
> +
> +if __name__ == "__main__":
> + exit(main())
> --
> 2.43.0
>
^ permalink raw reply [flat|nested] 8+ messages in thread