* [PATCH v3 1/2] treewide: Add a User Agent for CURL requests
2024-01-22 23:40 [PATCH v3 0/2] Reduced checks API usage Aaron Conole
@ 2024-01-22 23:40 ` Aaron Conole
2024-01-23 10:47 ` Ilya Maximets
2024-01-22 23:40 ` [PATCH v3 2/2] post_pw: Store submitted checks locally as well Aaron Conole
2024-01-23 13:46 ` [PATCH v3 0/2] Reduced checks API usage Aaron Conole
2 siblings, 1 reply; 7+ messages in thread
From: Aaron Conole @ 2024-01-22 23:40 UTC (permalink / raw)
To: ci; +Cc: Michael Santana, Ilya Maximets, Jeremy Kerr
This will help identifying robot usage across PW instances.
Acked-by: Michael Santana <msantana@redhat.com>
Signed-off-by: Aaron Conole <aconole@redhat.com>
---
v2: Added the pw-ci prefix to all curl commands
v3: no change
ci_mon | 2 +-
github_get_logs.sh | 8 ++++----
github_mon | 2 +-
github_restart | 6 ++++--
jenkins_lib.sh | 5 +++--
post_pw.sh | 8 ++++----
pw_mon | 14 +++++++-------
7 files changed, 24 insertions(+), 21 deletions(-)
diff --git a/ci_mon b/ci_mon
index 2e30da4..94ec1a3 100755
--- a/ci_mon
+++ b/ci_mon
@@ -159,7 +159,7 @@ for I in travis github obs dummy; do
author=$(echo $SERIES_LINE | cut -d\| -f4)
email=$(echo $SERIES_LINE | cut -d\| -f5)
- PATCHDATA=$(curl -s $url)
+ PATCHDATA=$(curl -A "(pw-ci) ci-mon-${PROJECT}" -s $url)
patch_id="$(get_patch_id_by_series_id_and_sha "$series_id" "$shasum" "$pw_chk_instance")"
if [ "X$patch_id" == "X" ]; then
diff --git a/github_get_logs.sh b/github_get_logs.sh
index 9f44840..9ebbdc2 100755
--- a/github_get_logs.sh
+++ b/github_get_logs.sh
@@ -74,14 +74,14 @@ print_errored_logs_for_commit () {
run_id="$(echo "$run_meta" | jq -r ".id")"
# Get the real logs url, download logs, and unzip logs
- logs_url="$(curl -s -S -H "${AUTH}" -H "${APP}" "${redirect_url}" -I | \
+ logs_url="$(curl -A "(pw-ci) github-logs-crawler" -s -S -H "${AUTH}" -H "${APP}" "${redirect_url}" -I | \
grep -i 'Location: '| sed 's/Location: //i' | tr -d '\r')"
- curl -s -S "$logs_url" -o "build_logs_series_${series_id}.zip"
+ curl -A "(pw-ci) github-logs-crawler" -s -S "$logs_url" -o "build_logs_series_${series_id}.zip"
unzip -o -q "build_logs_series_${series_id}.zip" -d "build_logs_series_${series_id}"
# Get the names of the failed jobs and the steps that failed
tmp_url="$GITHUB_API/repos/$repo_name/actions/runs/${run_id}/jobs"
- jobs_results="$(curl -s -S -H "${AUTH}" -H "${APP}" "${tmp_url}")"
+ jobs_results="$(curl -A "(pw-ci) github-logs-crawler" -s -S -H "${AUTH}" -H "${APP}" "${tmp_url}")"
jobs_results="$(echo "$jobs_results" | jq "[.jobs[] | \
select(.conclusion==\"failure\") | {name, failed_step: .steps[] | \
select(.conclusion==\"failure\") | {name, conclusion, number}}]")"
@@ -135,7 +135,7 @@ repo_name=$(echo "$repo_name" | sed -e 's@%2F@/@g' -e 's,%40,@,g')
# Get GHA runs
tmp_url="$GITHUB_API/repos/$repo_name/actions/runs?per_page=9000"
-all_runs="$(curl -s -S -H "${AUTH}" -H "${APP}" "${tmp_url}")"
+all_runs="$(curl -A "(pw-ci) github-logs-crawler" -s -S -H "${AUTH}" -H "${APP}" "${tmp_url}")"
runs=$(echo $all_runs | jq -rc ".workflow_runs[] | select(.head_branch == \"series_$series_id\")")
not_found="$(echo "$runs" | jq -rc ".message")"
if [ "$not_found" == "Not Found" ]; then
diff --git a/github_mon b/github_mon
index 8670ce8..d1e54a8 100755
--- a/github_mon
+++ b/github_mon
@@ -117,7 +117,7 @@ while IFS="|" read -r series_id patch_id patch_url patch_name sha patchwork_inst
tmp_url="$GITHUB_API/repos/$repo_name/actions/runs?per_page=9000"
if [ "$tmp_url" != "$prev_url" ]; then
prev_url="$tmp_url"
- all_runs="$(curl -s -S -H "${AUTH}" -H "${APP}" "${tmp_url}")"
+ all_runs="$(curl -A "(pw-ci) github-mon-${pw_project}" -s -S -H "${AUTH}" -H "${APP}" "${tmp_url}")"
fi
runs=$(echo "$all_runs" | jq -rc ".workflow_runs[] | select(.head_branch == \"series_$series_id\")")
diff --git a/github_restart b/github_restart
index d5d63b9..f4e9ebd 100755
--- a/github_restart
+++ b/github_restart
@@ -114,7 +114,8 @@ if [ "X$runid" == "X" ]; then
fi
comma=""
- for job in $(curl -s -S -H "${AUTH}" -H "${APP}" \
+ for job in $(curl -A "(pw-ci) github-restart-${pw_project}" -s -S -H "${AUTH}" \
+ -H "${APP}" \
"https://api.github.com/repos/${reponame}/actions/runs?head_sha=${sha}" | \
jq -rc ".workflow_runs[] ${workflow_select} .id")
do
@@ -126,7 +127,8 @@ fi
echo -n "{\"results\":["
comma=""
for job in $(echo "$runid" | sed 's/,/ /'); do
- result=$(curl -s -X POST -L -S -H "${AUTH}" -H "${APP}" \
+ result=$(curl -A "(pw-ci) github-restart-${pw_project}" -s -X POST -L -S \
+ -H "${AUTH}" -H "${APP}" \
"https://api.github.com/repos/${reponame}/actions/runs/$job/rerun")
msg=$(echo "$result" | jq -rc '.message')
diff --git a/jenkins_lib.sh b/jenkins_lib.sh
index fc7cb1d..e3826c8 100644
--- a/jenkins_lib.sh
+++ b/jenkins_lib.sh
@@ -41,12 +41,12 @@ if [ "X$jenkins_token" != "X" ]; then
jenkins_credentials="$jenkins_user:$jenkins_token"
fi
-jenkins_crumb_value=$(curl -s "http://${jenkins_credentials}@${jenkins_url}/crumbIssuer/api/xml?xpath=concat(//crumbRequestField,\":\",//crumb)")
+jenkins_crumb_value=$(curl -A "(pw-ci) jenkins-sub-${PROJECT}" -s "http://${jenkins_credentials}@${jenkins_url}/crumbIssuer/api/xml?xpath=concat(//crumbRequestField,\":\",//crumb)")
function jenkins_check_for_job() {
local jenkins_job=jenkins_${pw_project}_job
- curl -s -f -X GET \
+ curl -A "(pw-ci) jenkins-sub-${PROJECT}" -s -f -X GET \
"http://${jenkins_credentials}@${jenkins_url}/job/${!jenkins_job}/config.xml" >/dev/null
}
@@ -86,6 +86,7 @@ function jenkins_submit_series() {
}'
curl -s -f -X POST \
+ -A "(pw-ci) jenkins-sub-${PROJECT}" \
-H "${jenkins_crumb_value}" \
--data token="${!jenkins_job_token}" \
--data-urlencode json="$json_data" \
diff --git a/post_pw.sh b/post_pw.sh
index e98e8ab..9163ea1 100755
--- a/post_pw.sh
+++ b/post_pw.sh
@@ -53,7 +53,7 @@ send_post() {
return 0
fi
- report="$(curl -sSf "${link}")"
+ report="$(curl -A "(pw-ci) pw-post" -sSf "${link}")"
if [ $? -ne 0 ]; then
echo "Failed to get proper server response on link ${link}" 1>&2
return 0
@@ -79,7 +79,7 @@ send_post() {
fi
# Get reports from patch
- checks="$(curl -sSf -X GET \
+ checks="$(curl -A "(pw-ci) pw-post" -sSf -X GET \
--header "Content-Type: application/json" \
"$api_url")"
if [ $? -ne 0 ]; then
@@ -105,7 +105,7 @@ send_post() {
\"description\": \"$description\"\
}"
- curl -sSf -X POST \
+ curl -A "(pw-ci) pw-post" -sSf -X POST \
-H "Authorization: Token ${token}" \
--header "Content-Type: application/json" \
--data "$data" \
@@ -120,7 +120,7 @@ send_post() {
}
year_month="$(date +"%Y-%B")"
-reports="$(curl -sSf "${mail_archive}${year_month}/thread.html" | \
+reports="$(curl -A "(pw-ci) pw-post" -sSf "${mail_archive}${year_month}/thread.html" | \
grep -i 'HREF=' | sed -e 's@[0-9]*<LI><A HREF="@\|@' -e 's@">@\|@')"
if [ $? -ne 0 ]; then
echo "Failed to get proper server response on link ${reports}" 1>&2
diff --git a/pw_mon b/pw_mon
index ee77256..068fe1a 100755
--- a/pw_mon
+++ b/pw_mon
@@ -146,7 +146,7 @@ function check_new_series() {
GET_URL="http://${INSTANCE}/api/series/?project=${PROJECT}&since=${SINCE}"
- response=$(curl -s "$userpw" "$GET_URL")
+ response=$(curl -A "(pw-ci) pw-mon-${PROJECT}" -s "$userpw" "$GET_URL")
series_info=$(echo "$response" | jq -rc '.[] | (.id|tostring) + ";" + .url + ";" + .submitter.name + ";" + .submitter.email + ";" + (.received_all|tostring)')
@@ -162,7 +162,7 @@ function check_new_series() {
function check_completed_series() {
get_uncompleted_jobs_as_line "$pw_instance" "$pw_project" | while IFS=\| read -r id url submitter_name submitter_email; do
echo "Checking on series: $id"
- local RESPONSE=$(curl -s "$userpw" "$url" | jq -rc '.received_all')
+ local RESPONSE=$(curl -A "(pw-ci) pw-mon-${PROJECT}" -s "$userpw" "$url" | jq -rc '.received_all')
if [ "$RESPONSE" = "true" ]; then
echo "Setting series $id to completed"
series_id_set_complete "$pw_instance" "$id"
@@ -183,8 +183,8 @@ function check_superseded_series() {
local pw_instance="$1"
series_get_active_branches "$pw_instance" | while IFS=\| read -r series_id project url repo branchname; do
# first query the patch states
- local last_patch_url=$(curl -s "$userpw" "$url" | jq -rc '.patches[-1].url')
- local patch_state=$(curl -s "$userpw" "$last_patch_url" | jq -rc '.state')
+ local last_patch_url=$(curl -A "(pw-ci) pw-mon-${PROJECT}" -s "$userpw" "$url" | jq -rc '.patches[-1].url')
+ local patch_state=$(curl -A "(pw-ci) pw-mon-${PROJECT}" -s "$userpw" "$last_patch_url" | jq -rc '.state')
# now check to see if the patch should even be reported...
if [ "$patch_state" = "superseded" -o "$patch_state" = "rejected" -o "$patch_state" = "accepted" \
@@ -214,7 +214,7 @@ function check_patch_for_retest_request() {
local pw_project="$2"
local patch_url="$3"
- local patch_json=$(curl -s "$userpw" "$patch_url")
+ local patch_json=$(curl -A "(pw-ci) pw-mon-${PROJECT}" -s "$userpw" "$patch_url")
local patch_comments_url=$(echo "$patch_json" | jq -rc '.comments')
local patch_id=$(echo "$patch_json" | jq -rc '.id')
local series_id=$(echo "$patch_json" | jq -rc '.series[].id')
@@ -231,7 +231,7 @@ function check_patch_for_retest_request() {
fi
if [ "Xnull" != "X$patch_comments_url" ]; then
- local comments_json=$(curl -s "$userpw" "$patch_comments_url")
+ local comments_json=$(curl -A "(pw-ci) pw-mon-${PROJECT}" -s "$userpw" "$patch_comments_url")
local seq_end=$(echo "$comments_json" | jq -rc 'length')
if [ "$seq_end" -a $seq_end -gt 0 ]; then
seq_end=$((seq_end-1))
@@ -250,7 +250,7 @@ function check_series_needs_retest() {
local pw_instance="$1"
local pw_project="$2"
- local series_list=$(curl -s "$userpw" "http://${pw_instance}/api/series/?project=${pw_project}&state=new&state=rfc&state=under-review&archived=false&order=-id")
+ local series_list=$(curl -A "(pw-ci) pw-mon-${PROJECT}" -s "$userpw" "http://${pw_instance}/api/series/?project=${pw_project}&state=new&state=rfc&state=under-review&archived=false&order=-id")
local n=$(echo "$series_list" | jq -rc 'length')
if [ "Xnull" == "X$n" -o "X" == "X$n" ]; then
--
2.41.0
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v3 2/2] post_pw: Store submitted checks locally as well
2024-01-22 23:40 [PATCH v3 0/2] Reduced checks API usage Aaron Conole
2024-01-22 23:40 ` [PATCH v3 1/2] treewide: Add a User Agent for CURL requests Aaron Conole
@ 2024-01-22 23:40 ` Aaron Conole
2024-01-23 5:49 ` Michael Santana
2024-01-23 10:49 ` Ilya Maximets
2024-01-23 13:46 ` [PATCH v3 0/2] Reduced checks API usage Aaron Conole
2 siblings, 2 replies; 7+ messages in thread
From: Aaron Conole @ 2024-01-22 23:40 UTC (permalink / raw)
To: ci; +Cc: Michael Santana, Ilya Maximets, Jeremy Kerr
Jeremy Kerr reports that our PW checks reporting submitted 43000 API calls
in just a single day. That is alarmingly unacceptable. We can store the
URLs we've already submitted and then just skip over any additional
processing at least on the PW side.
This patch does two things to try and mitigate this issue:
1. Store each patch ID and URL in the series DB to show that we reported
the check. This means we don't need to poll patchwork for check status
2. Store the last modified time of the reports mailing list. This means
we only poll the mailing list when a new email has surely landed.
Signed-off-by: Aaron Conole <aconole@redhat.com>
---
v2: fixed up the Last-Modified grep and storage
v3: Simplified the logic of creating the last-access file
post_pw.sh | 35 ++++++++++++++++++++++++++++++++++-
series_db_lib.sh | 25 +++++++++++++++++++++++++
2 files changed, 59 insertions(+), 1 deletion(-)
diff --git a/post_pw.sh b/post_pw.sh
index 9163ea1..a8111ff 100755
--- a/post_pw.sh
+++ b/post_pw.sh
@@ -20,6 +20,7 @@
# License for the specific language governing permissions and limitations
# under the License.
+[ -f "$(dirname $0)/series_db_lib.sh" ] && source "$(dirname $0)/series_db_lib.sh" || exit 1
[ -f "${HOME}/.mail_patchwork_sync.rc" ] && source "${HOME}/.mail_patchwork_sync.rc"
# Patchwork instance to update with new reports from mailing list
@@ -75,6 +76,13 @@ send_post() {
if [ -z "$context" -o -z "$state" -o -z "$description" -o -z "$patch_id" ]; then
echo "Skpping \"$link\" due to missing context, state, description," \
"or patch_id" 1>&2
+ # Just don't want to even bother seeing these "bad" patches as well.
+ add_check_scanned_url "$patch_id" "$target_url"
+ return 0
+ fi
+
+ if check_id_exists "$patch_id" "$target_url" ; then
+ echo "Skipping \"$link\" - already reported." 1>&2
return 0
fi
@@ -84,6 +92,7 @@ send_post() {
"$api_url")"
if [ $? -ne 0 ]; then
echo "Failed to get proper server response on link ${api_url}" 1>&2
+ # Don't store these as processed in case the server has a temporary issue.
return 0
fi
@@ -95,6 +104,9 @@ send_post() {
jq -e "[.[].target_url] | contains([\"$mail_url\"])" >/dev/null
then
echo "Report ${target_url} already pushed to patchwork. Skipping." 1>&2
+ # Somehow this was not stored (for example, first time we apply the tracking
+ # feature). Store it now.
+ add_check_scanned_url "$patch_id" "$target_url"
return 0
fi
@@ -114,12 +126,31 @@ send_post() {
if [ $? -ne 0 ]; then
echo -e "Failed to push retults based on report ${link} to the"\
"patchwork instance ${pw_instance} using the following REST"\
- "API Endpoint ${api_url} with the following data:\n$data\n"
+ "API Endpoint ${api_url} with the following data:\n$data\n" 1>&2
return 0
fi
+
+ add_check_scanned_url "$patch_id" "$target_url"
}
+# Collect the date. NOTE: this needs some accomodate to catch the month change-overs
year_month="$(date +"%Y-%B")"
+
+# Get the last modified time
+report_last_mod=$(curl --head -A "(pw-ci) pw-post" -sSf "${mail_archive}${year_month}/thread.html" | grep -i Last-Modified)
+
+mailing_list_save_file=$(echo ".post_pw_${mail_archive}${year_month}" | sed -e "s@/@_@g" -e "s@:@_@g" -e "s,@,_,g")
+
+if [ -e "${HOME}/${mailing_list_save_file}" ]; then
+ last_read_date=$(cat "${HOME}/${mailing_list_save_file}")
+ if [ "$last_read_date" -a "$last_read_date" == "$report_last_mod" ]; then
+ echo "Last modified times match. Skipping list parsing."
+ exit 0
+ fi
+fi
+
+last_read_date="$report_last_mod"
+
reports="$(curl -A "(pw-ci) pw-post" -sSf "${mail_archive}${year_month}/thread.html" | \
grep -i 'HREF=' | sed -e 's@[0-9]*<LI><A HREF="@\|@' -e 's@">@\|@')"
if [ $? -ne 0 ]; then
@@ -132,3 +163,5 @@ echo "$reports" | while IFS='|' read -r blank link title; do
send_post "${mail_archive}${year_month}/$link"
fi
done
+
+echo "$last_read_date" > "${HOME}/${mailing_list_save_file}"
diff --git a/series_db_lib.sh b/series_db_lib.sh
index c5f42e0..0635469 100644
--- a/series_db_lib.sh
+++ b/series_db_lib.sh
@@ -130,6 +130,17 @@ recheck_sync INTEGER
EOF
run_db_command "INSERT INTO series_schema_version(id) values (8);"
fi
+
+ run_db_command "select * from series_schema_version;" | egrep '^9$' > /dev/null 2>&1
+ if [ $? -eq 1 ]; then
+ sqlite3 ${HOME}/.series-db <<EOF
+CREATE TABLE check_id_scanned (
+check_patch_id INTEGER,
+check_url STRING
+)
+EOF
+ run_db_command "INSERT INTO series_schema_version(id) values (9);"
+ fi
}
function series_db_exists() {
@@ -468,3 +479,17 @@ function set_recheck_request_state() {
echo "UPDATE recheck_requests set recheck_sync=$recheck_state where patchwork_instance=\"$recheck_instance\" and patchwork_project=\"$recheck_project\" and recheck_requested_by=\"$recheck_requested_by\" and recheck_series=\"$recheck_series\";" | series_db_execute
}
+
+function add_check_scanned_url() {
+ local patch_id="$1"
+ local url="$2"
+
+ echo "INSERT into check_id_scanned (check_patch_id, check_url) values (${patch_id}, \"$url\");" | series_db_execute
+}
+
+function check_id_exists() {
+ local patch_id="$1"
+ local url="$2"
+
+ echo "select * from check_id_scanned where check_patch_id=$patch_id and check_url=\"$url\";" | series_db_execute | grep "$url" >/dev/null 2>&1
+}
--
2.41.0
^ permalink raw reply [flat|nested] 7+ messages in thread