From: Anatoly Burakov <anatoly.burakov@intel.com>
To: dev@dpdk.org
Subject: [PATCH v6 1/3] cmdline: use C standard library as number parser
Date: Thu, 8 May 2025 10:53:36 +0100 [thread overview]
Message-ID: <19ac627a40341b3095e9148a5254683b73fcc20e.1746698010.git.anatoly.burakov@intel.com> (raw)
In-Reply-To: <7ac1444b7d2d64dc467a22e7ac65cf3cc16246dc.1746188833.git.anatoly.burakov@intel.com>
Remove custom number parser and use C standard library instead. In order to
keep compatibility with earlier versions of the parser, we have to take
into account a couple of quirks:
- We did not consider "negative" numbers to be valid for anything other
than base-10 numbers, whereas C standard library does. Adjust the tests
to match the new behavior.
- We did not consider numbers such as "+4" to be valid, whereas C
standard library does. Adjust the tests to match the new behavior.
- C standard library's strtoull does not do range checks on negative
numbers, so we have to parse knowingly-negative numbers as signed.
- C standard library does not support binary numbers, so we keep around the
relevant parts of the custom parser in place to support binary numbers.
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
Notes:
v5 -> v6:
- Allowed more negative numbers (such as negative octals or hex)
- Updated unit tests to check new cases
- Small refactoring of code to reduce amount of noise
- More verbose debug output
v4 -> v5:
- Added this commit
app/test/test_cmdline_num.c | 19 +-
lib/cmdline/cmdline_parse_num.c | 364 ++++++++++++++++----------------
2 files changed, 195 insertions(+), 188 deletions(-)
diff --git a/app/test/test_cmdline_num.c b/app/test/test_cmdline_num.c
index 9276de59bd..dd70f6f824 100644
--- a/app/test/test_cmdline_num.c
+++ b/app/test/test_cmdline_num.c
@@ -139,6 +139,8 @@ const struct num_signed_str num_valid_negative_strs[] = {
{"-2147483648", INT32_MIN },
{"-2147483649", INT32_MIN - 1LL },
{"-9223372036854775808", INT64_MIN },
+ {"-0x8000000000000000", INT64_MIN },
+ {"-01000000000000000000000", INT64_MIN },
};
const struct num_unsigned_str num_garbage_positive_strs[] = {
@@ -175,12 +177,27 @@ const struct num_unsigned_str num_garbage_positive_strs[] = {
const struct num_signed_str num_garbage_negative_strs[] = {
/* valid strings with garbage on the end, should still be valid */
+ /* negative numbers */
{"-9223372036854775808\0garbage", INT64_MIN },
{"-9223372036854775808\rgarbage", INT64_MIN },
{"-9223372036854775808\tgarbage", INT64_MIN },
{"-9223372036854775808\ngarbage", INT64_MIN },
{"-9223372036854775808#garbage", INT64_MIN },
{"-9223372036854775808 garbage", INT64_MIN },
+ /* negative hex */
+ {"-0x8000000000000000\0garbage", INT64_MIN },
+ {"-0x8000000000000000\rgarbage", INT64_MIN },
+ {"-0x8000000000000000\tgarbage", INT64_MIN },
+ {"-0x8000000000000000\ngarbage", INT64_MIN },
+ {"-0x8000000000000000#garbage", INT64_MIN },
+ {"-0x8000000000000000 garbage", INT64_MIN },
+ /* negative octal */
+ {"-01000000000000000000000\0garbage", INT64_MIN },
+ {"-01000000000000000000000\rgarbage", INT64_MIN },
+ {"-01000000000000000000000\tgarbage", INT64_MIN },
+ {"-01000000000000000000000\ngarbage", INT64_MIN },
+ {"-01000000000000000000000#garbage", INT64_MIN },
+ {"-01000000000000000000000 garbage", INT64_MIN },
};
const char * num_invalid_strs[] = {
@@ -197,7 +214,6 @@ const char * num_invalid_strs[] = {
"0b01110101017001",
/* false negative numbers */
"-12345F623",
- "-0x1234580A",
"-0b0111010101",
/* too long (128+ chars) */
("0b1111000011110000111100001111000011110000111100001111000011110000"
@@ -205,7 +221,6 @@ const char * num_invalid_strs[] = {
"1E3",
"0A",
"-B",
- "+4",
"1.23G",
"",
" ",
diff --git a/lib/cmdline/cmdline_parse_num.c b/lib/cmdline/cmdline_parse_num.c
index f21796bedb..ff2bdc28c8 100644
--- a/lib/cmdline/cmdline_parse_num.c
+++ b/lib/cmdline/cmdline_parse_num.c
@@ -4,6 +4,7 @@
* All rights reserved.
*/
+#include <errno.h>
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
@@ -29,23 +30,6 @@ struct cmdline_token_ops cmdline_token_num_ops = {
.get_help = cmdline_get_help_num,
};
-enum num_parse_state_t {
- START,
- DEC_NEG,
- BIN,
- HEX,
-
- ERROR,
-
- FIRST_OK, /* not used */
- ZERO_OK,
- HEX_OK,
- OCTAL_OK,
- BIN_OK,
- DEC_NEG_OK,
- DEC_POS_OK,
-};
-
/* Keep it sync with enum in .h */
static const char * num_help[] = {
"UINT8", "UINT16", "UINT32", "UINT64",
@@ -53,13 +37,13 @@ static const char * num_help[] = {
};
static inline int
-add_to_res(unsigned int c, uint64_t *res, unsigned int base)
+add_to_bin(unsigned int c, uint64_t *res)
{
/* overflow */
- if ((UINT64_MAX - c) / base < *res)
+ if ((UINT64_MAX - c) / 2 < *res)
return -1;
- *res = (uint64_t) (*res * base + c);
+ *res = (uint64_t) (*res * 2 + c);
return 0;
}
@@ -88,138 +72,119 @@ check_res_size(struct cmdline_token_num_data *nd, unsigned ressize)
return -1;
break;
default:
+ debug_printf("Check size: unsupported number format: %s\n",
+ num_help[nd->type]);
return -1;
}
return 0;
}
-/* parse an int */
-RTE_EXPORT_SYMBOL(cmdline_parse_num)
-int
-cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res,
- unsigned ressize)
+static int
+check_parsed_num(enum cmdline_numtype type, int neg, uint64_t uintres)
{
- struct cmdline_token_num_data nd;
- enum num_parse_state_t st = START;
+ int lo_ok, hi_ok;
+
+ switch (type) {
+ case RTE_UINT8:
+ lo_ok = !neg;
+ hi_ok = uintres <= UINT8_MAX;
+ break;
+ case RTE_UINT16:
+ lo_ok = !neg;
+ hi_ok = uintres <= UINT16_MAX;
+ break;
+ case RTE_UINT32:
+ lo_ok = !neg;
+ hi_ok = uintres <= UINT32_MAX;
+ break;
+ case RTE_UINT64:
+ lo_ok = !neg;
+ hi_ok = 1; /* can't be out of range if parsed successfully */
+ break;
+ case RTE_INT8:
+ lo_ok = !neg || (int64_t)uintres >= INT8_MIN;
+ hi_ok = neg || uintres <= INT8_MAX;
+ break;
+ case RTE_INT16:
+ lo_ok = !neg || (int64_t)uintres >= INT16_MIN;
+ hi_ok = neg || uintres <= INT16_MAX;
+ break;
+ case RTE_INT32:
+ lo_ok = !neg || (int64_t)uintres >= INT32_MIN;
+ hi_ok = neg || uintres <= INT32_MAX;
+ break;
+ case RTE_INT64:
+ lo_ok = 1; /* can't be out of range if parsed successfully */
+ hi_ok = neg || uintres <= INT64_MAX;
+ break;
+ default:
+ debug_printf("Check range failed: unsupported number format: %s\n",
+ num_help[type]);
+ return -1;
+ }
+ /* check ranges */
+ if (!lo_ok || !hi_ok)
+ return -1;
+ return 0;
+}
+
+static int
+parse_num(const char *srcbuf, uint64_t *resptr)
+{
+ uint64_t uintres;
+ char *end;
+ int neg = *srcbuf == '-';
+
+ /*
+ * strtoull does not do range checks on negative numbers, so we need to
+ * use strtoll if we know the value we're parsing looks like a negative
+ * one. we use base 0 for both, 0 means autodetect base.
+ */
+ errno = 0;
+ if (neg)
+ uintres = (uint64_t)strtoll(srcbuf, &end, 0);
+ else
+ uintres = strtoull(srcbuf, &end, 0);
+
+ if (end == srcbuf || !cmdline_isendoftoken(*end) || errno == ERANGE)
+ return -1;
+
+ *resptr = uintres;
+ return end - srcbuf;
+}
+
+static int
+parse_bin(const char *srcbuf, uint64_t *res)
+{
+ uint64_t uintres = 0;
+ enum {
+ ERROR,
+ START,
+ BIN,
+ ZERO_OK,
+ BIN_OK,
+ } st = START;
const char * buf;
char c;
- uint64_t res1 = 0;
-
- if (!tk)
- return -1;
-
- if (!srcbuf || !*srcbuf)
- return -1;
buf = srcbuf;
c = *buf;
-
- memcpy(&nd, &((struct cmdline_token_num *)tk)->num_data, sizeof(nd));
-
- /* check that we have enough room in res */
- if (res) {
- if (check_res_size(&nd, ressize) < 0)
- return -1;
- }
-
while (st != ERROR && c && !cmdline_isendoftoken(c)) {
debug_printf("%c %x -> ", c, c);
switch (st) {
case START:
- if (c == '-') {
- st = DEC_NEG;
- }
- else if (c == '0') {
+ if (c == '0') {
st = ZERO_OK;
}
- else if (c >= '1' && c <= '9') {
- if (add_to_res(c - '0', &res1, 10) < 0)
- st = ERROR;
- else
- st = DEC_POS_OK;
- }
- else {
+ else {
st = ERROR;
}
break;
case ZERO_OK:
- if (c == 'x') {
- st = HEX;
- }
- else if (c == 'b') {
+ if (c == 'b') {
st = BIN;
}
- else if (c >= '0' && c <= '7') {
- if (add_to_res(c - '0', &res1, 10) < 0)
- st = ERROR;
- else
- st = OCTAL_OK;
- }
- else {
- st = ERROR;
- }
- break;
-
- case DEC_NEG:
- if (c >= '0' && c <= '9') {
- if (add_to_res(c - '0', &res1, 10) < 0)
- st = ERROR;
- else
- st = DEC_NEG_OK;
- }
- else {
- st = ERROR;
- }
- break;
-
- case DEC_NEG_OK:
- if (c >= '0' && c <= '9') {
- if (add_to_res(c - '0', &res1, 10) < 0)
- st = ERROR;
- }
- else {
- st = ERROR;
- }
- break;
-
- case DEC_POS_OK:
- if (c >= '0' && c <= '9') {
- if (add_to_res(c - '0', &res1, 10) < 0)
- st = ERROR;
- }
- else {
- st = ERROR;
- }
- break;
-
- case HEX:
- st = HEX_OK;
- /* fall-through */
- case HEX_OK:
- if (c >= '0' && c <= '9') {
- if (add_to_res(c - '0', &res1, 16) < 0)
- st = ERROR;
- }
- else if (c >= 'a' && c <= 'f') {
- if (add_to_res(c - 'a' + 10, &res1, 16) < 0)
- st = ERROR;
- }
- else if (c >= 'A' && c <= 'F') {
- if (add_to_res(c - 'A' + 10, &res1, 16) < 0)
- st = ERROR;
- }
- else {
- st = ERROR;
- }
- break;
-
-
- case OCTAL_OK:
- if (c >= '0' && c <= '7') {
- if (add_to_res(c - '0', &res1, 8) < 0)
- st = ERROR;
- }
else {
st = ERROR;
}
@@ -230,7 +195,7 @@ cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res,
/* fall-through */
case BIN_OK:
if (c >= '0' && c <= '1') {
- if (add_to_res(c - '0', &res1, 2) < 0)
+ if (add_to_bin(c - '0', &uintres) < 0)
st = ERROR;
}
else {
@@ -239,10 +204,10 @@ cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res,
break;
default:
debug_printf("not impl ");
-
+ st = ERROR;
}
- debug_printf("(%"PRIu64")\n", res1);
+ debug_printf("(%"PRIu64")\n", uintres);
buf ++;
c = *buf;
@@ -252,66 +217,93 @@ cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res,
return -1;
}
- switch (st) {
- case ZERO_OK:
- case DEC_POS_OK:
- case HEX_OK:
- case OCTAL_OK:
- case BIN_OK:
- if (nd.type == RTE_INT8 && res1 <= INT8_MAX) {
- if (res) *(int8_t *)res = (int8_t) res1;
- return buf-srcbuf;
- } else if (nd.type == RTE_INT16 && res1 <= INT16_MAX) {
- if (res) *(int16_t *)res = (int16_t) res1;
- return buf-srcbuf;
- } else if (nd.type == RTE_INT32 && res1 <= INT32_MAX) {
- if (res) *(int32_t *)res = (int32_t) res1;
- return buf-srcbuf;
- } else if (nd.type == RTE_INT64 && res1 <= INT64_MAX) {
- if (res) *(int64_t *)res = (int64_t) res1;
- return buf-srcbuf;
- } else if (nd.type == RTE_UINT8 && res1 <= UINT8_MAX) {
- if (res) *(uint8_t *)res = (uint8_t) res1;
- return buf-srcbuf;
- } else if (nd.type == RTE_UINT16 && res1 <= UINT16_MAX) {
- if (res) *(uint16_t *)res = (uint16_t) res1;
- return buf-srcbuf;
- } else if (nd.type == RTE_UINT32 && res1 <= UINT32_MAX) {
- if (res) *(uint32_t *)res = (uint32_t) res1;
- return buf-srcbuf;
- } else if (nd.type == RTE_UINT64) {
- if (res) *(uint64_t *)res = res1;
- return buf-srcbuf;
- } else {
- return -1;
- }
- break;
+ if (st != BIN_OK)
+ return -1;
+
+ *res = uintres;
+ return buf - srcbuf;
+}
- case DEC_NEG_OK:
- if (nd.type == RTE_INT8 &&
- res1 <= INT8_MAX + 1) {
- if (res) *(int8_t *)res = (int8_t) (-res1);
- return buf-srcbuf;
- } else if (nd.type == RTE_INT16 &&
- res1 <= (uint16_t)INT16_MAX + 1) {
- if (res) *(int16_t *)res = (int16_t) (-res1);
- return buf-srcbuf;
- } else if (nd.type == RTE_INT32 &&
- res1 <= (uint32_t)INT32_MAX + 1) {
- if (res) *(int32_t *)res = (int32_t) (-res1);
- return buf-srcbuf;
- } else if (nd.type == RTE_INT64 &&
- res1 <= (uint64_t)INT64_MAX + 1) {
- if (res) *(int64_t *)res = (int64_t) (-res1);
- return buf-srcbuf;
- } else {
- return -1;
- }
+static int
+write_num(enum cmdline_numtype type, void *res, uint64_t uintres)
+{
+ switch (type) {
+ case RTE_UINT8:
+ *(uint8_t *)res = (uint8_t)uintres;
+ break;
+ case RTE_UINT16:
+ *(uint16_t *)res = (uint16_t)uintres;
+ break;
+ case RTE_UINT32:
+ *(uint32_t *)res = (uint32_t)uintres;
+ break;
+ case RTE_UINT64:
+ *(uint64_t *)res = uintres;
+ break;
+ case RTE_INT8:
+ *(int8_t *)res = (int8_t)uintres;
+ break;
+ case RTE_INT16:
+ *(int16_t *)res = (int16_t)uintres;
+ break;
+ case RTE_INT32:
+ *(int32_t *)res = (int32_t)uintres;
+ break;
+ case RTE_INT64:
+ *(int64_t *)res = (int64_t)uintres;
break;
default:
- debug_printf("error\n");
+ debug_printf("Write failed: unsupported number format: %s\n",
+ num_help[type]);
return -1;
}
+ return 0;
+}
+
+/* parse an int */
+RTE_EXPORT_SYMBOL(cmdline_parse_num)
+int
+cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res,
+ unsigned ressize)
+{
+ struct cmdline_token_num_data nd;
+
+ if (!tk)
+ return -1;
+
+ if (!srcbuf || !*srcbuf)
+ return -1;
+
+ memcpy(&nd, &((struct cmdline_token_num *)tk)->num_data, sizeof(nd));
+
+ /* check that we have enough room in res */
+ if (res && check_res_size(&nd, ressize) < 0)
+ return -1;
+
+ if (nd.type >= RTE_UINT8 && nd.type <= RTE_INT64) {
+ int ret, neg = *srcbuf == '-';
+ uint64_t uintres;
+
+ /* try parsing as number */
+ ret = parse_num(srcbuf, &uintres);
+
+ if (ret < 0) {
+ /* parse failed, try parsing as binary */
+ ret = parse_bin(srcbuf, &uintres);
+ if (ret < 0)
+ return -1;
+ }
+ /* check if we're within valid range */
+ if (check_parsed_num(nd.type, neg, uintres) < 0)
+ return -1;
+
+ /* parsing succeeded, write the value if necessary */
+ if (res && write_num(nd.type, res, uintres) < 0)
+ return -1;
+
+ return ret;
+ }
+ return -1;
}
--
2.47.1
next prev parent reply other threads:[~2025-05-08 9:53 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-02 12:27 [PATCH v1 1/1] app/testpmd: add sleep command Anatoly Burakov
2025-05-02 12:37 ` Bruce Richardson
2025-05-02 14:35 ` Burakov, Anatoly
2025-05-02 14:43 ` Bruce Richardson
2025-05-02 15:33 ` Morten Brørup
2025-05-02 15:42 ` Stephen Hemminger
2025-05-06 12:36 ` Burakov, Anatoly
2025-05-06 13:08 ` [PATCH v2 1/2] cmdline: add floating point support Anatoly Burakov
2025-05-06 13:08 ` [PATCH v2 2/2] app/testpmd: add sleep command Anatoly Burakov
2025-05-06 13:38 ` [PATCH v2 1/2] cmdline: add floating point support Bruce Richardson
2025-05-07 9:02 ` Burakov, Anatoly
2025-05-07 9:50 ` [PATCH v3 " Anatoly Burakov
2025-05-07 9:50 ` [PATCH v3 2/2] app/testpmd: add sleep command Anatoly Burakov
2025-05-07 9:53 ` [PATCH v3 1/2] cmdline: add floating point support Burakov, Anatoly
2025-05-07 10:01 ` [PATCH v4 " Anatoly Burakov
2025-05-07 10:01 ` [PATCH v4 2/2] app/testpmd: add sleep command Anatoly Burakov
2025-05-07 10:35 ` [PATCH v4 1/2] cmdline: add floating point support Konstantin Ananyev
2025-05-07 11:06 ` Burakov, Anatoly
2025-05-07 12:24 ` Konstantin Ananyev
2025-05-07 14:06 ` Burakov, Anatoly
2025-05-07 15:22 ` [PATCH v5 1/3] cmdline: use C standard library as number parser Anatoly Burakov
2025-05-07 15:22 ` [PATCH v5 2/3] cmdline: add floating point support Anatoly Burakov
2025-05-07 15:22 ` [PATCH v5 3/3] app/testpmd: add sleep command Anatoly Burakov
2025-05-08 7:27 ` [PATCH v5 1/3] cmdline: use C standard library as number parser Bruce Richardson
2025-05-08 8:35 ` Burakov, Anatoly
2025-05-08 9:53 ` Anatoly Burakov [this message]
2025-05-08 9:53 ` [PATCH v6 2/3] cmdline: add floating point support Anatoly Burakov
2025-05-08 9:53 ` [PATCH v6 3/3] app/testpmd: add sleep command Anatoly Burakov
2025-05-08 10:01 ` [PATCH v7 1/3] cmdline: use C standard library as number parser Anatoly Burakov
2025-05-08 10:01 ` [PATCH v7 2/3] cmdline: add floating point support Anatoly Burakov
2025-05-08 10:09 ` Burakov, Anatoly
2025-05-08 10:01 ` [PATCH v7 3/3] app/testpmd: add sleep command Anatoly Burakov
2025-05-08 13:16 ` [PATCH v8 1/3] cmdline: use C standard library as number parser Anatoly Burakov
2025-05-08 13:16 ` [PATCH v8 2/3] cmdline: add floating point support Anatoly Burakov
2025-05-08 13:16 ` [PATCH v8 3/3] app/testpmd: add sleep command Anatoly Burakov
2025-05-09 13:02 ` [PATCH v8 1/3] cmdline: use C standard library as number parser Burakov, Anatoly
2025-05-09 13:08 ` Burakov, Anatoly
2025-05-09 13:27 ` Burakov, Anatoly
2025-05-09 13:39 ` [PATCH v9 " Anatoly Burakov
2025-05-09 13:39 ` [PATCH v9 2/3] cmdline: add floating point support Anatoly Burakov
2025-05-09 13:39 ` [PATCH v9 3/3] app/testpmd: add sleep command Anatoly Burakov
2025-05-09 13:42 ` [PATCH v9 1/3] cmdline: use C standard library as number parser Burakov, Anatoly
2025-05-09 14:41 ` [PATCH v10 " Anatoly Burakov
2025-05-09 14:41 ` [PATCH v10 2/3] cmdline: add floating point support Anatoly Burakov
2025-05-09 14:41 ` [PATCH v10 3/3] app/testpmd: add sleep command Anatoly Burakov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=19ac627a40341b3095e9148a5254683b73fcc20e.1746698010.git.anatoly.burakov@intel.com \
--to=anatoly.burakov@intel.com \
--cc=dev@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).