c7cb07f092
GitOrigin-RevId: 1536926ef5621b09bba54035ae2bb6d806d72ac8
252 lines
11 KiB
Diff
252 lines
11 KiB
Diff
diff --git a/lib/internal.h b/lib/internal.h
|
|
index cce71e4c..a217b3f9 100644
|
|
--- a/lib/internal.h
|
|
+++ b/lib/internal.h
|
|
@@ -31,7 +31,7 @@
|
|
Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org>
|
|
Copyright (c) 2018 Yury Gribov <tetra2005@gmail.com>
|
|
Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
|
|
- Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
|
|
+ Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
|
|
Licensed under the MIT license:
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining
|
|
@@ -162,7 +162,7 @@ const char *unsignedCharToPrintable(unsigned char c);
|
|
#endif
|
|
|
|
extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c
|
|
-extern unsigned int g_parseAttempts; // used for testing only
|
|
+extern unsigned int g_bytesScanned; // used for testing only
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
|
|
index aaf0fa9c..6de99d99 100644
|
|
--- a/lib/xmlparse.c
|
|
+++ b/lib/xmlparse.c
|
|
@@ -38,7 +38,7 @@
|
|
Copyright (c) 2022 Jann Horn <jannh@google.com>
|
|
Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
|
|
Copyright (c) 2023 Owain Davies <owaind@bath.edu>
|
|
- Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
|
|
+ Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
|
|
Licensed under the MIT license:
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining
|
|
@@ -630,7 +630,7 @@ static unsigned long getDebugLevel(const char *variableName,
|
|
: ((*((pool)->ptr)++ = c), 1))
|
|
|
|
XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c
|
|
-unsigned int g_parseAttempts = 0; // used for testing only
|
|
+unsigned int g_bytesScanned = 0; // used for testing only
|
|
|
|
struct XML_ParserStruct {
|
|
/* The first member must be m_userData so that the XML_GetUserData
|
|
@@ -1017,7 +1017,7 @@ callProcessor(XML_Parser parser, const char *start, const char *end,
|
|
return XML_ERROR_NONE;
|
|
}
|
|
}
|
|
- g_parseAttempts += 1;
|
|
+ g_bytesScanned += (unsigned)have_now;
|
|
const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
|
|
if (ret == XML_ERROR_NONE) {
|
|
// if we consumed nothing, remember what we had on this parse attempt.
|
|
diff --git a/tests/basic_tests.c b/tests/basic_tests.c
|
|
index 7112a440..a9cc3861 100644
|
|
--- a/tests/basic_tests.c
|
|
+++ b/tests/basic_tests.c
|
|
@@ -5202,13 +5202,7 @@ START_TEST(test_nested_entity_suspend) {
|
|
END_TEST
|
|
|
|
/* Regression test for quadratic parsing on large tokens */
|
|
-START_TEST(test_big_tokens_take_linear_time) {
|
|
- const char *const too_slow_failure_message
|
|
- = "Compared to the baseline runtime of the first test, this test has a "
|
|
- "slowdown of more than <max_slowdown>. "
|
|
- "Please keep increasing the value by 1 until it reliably passes the "
|
|
- "test on your hardware and open a bug sharing that number with us. "
|
|
- "Thanks in advance!";
|
|
+START_TEST(test_big_tokens_scale_linearly) {
|
|
const struct {
|
|
const char *pre;
|
|
const char *post;
|
|
@@ -5220,65 +5214,57 @@ START_TEST(test_big_tokens_take_linear_time) {
|
|
{"<e><", "/></e>"}, // big elem name, used to be O(N²)
|
|
};
|
|
const int num_cases = sizeof(text) / sizeof(text[0]);
|
|
- // For the test we need a <max_slowdown> value that is:
|
|
- // (1) big enough that the test passes reliably (avoiding flaky tests), and
|
|
- // (2) small enough that the test actually catches regressions.
|
|
- const int max_slowdown = 15;
|
|
char aaaaaa[4096];
|
|
const int fillsize = (int)sizeof(aaaaaa);
|
|
const int fillcount = 100;
|
|
+ const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
|
|
+ const unsigned max_factor = 4;
|
|
+ const unsigned max_scanned = max_factor * approx_bytes;
|
|
|
|
memset(aaaaaa, 'a', fillsize);
|
|
|
|
if (! g_reparseDeferralEnabledDefault) {
|
|
return; // heuristic is disabled; we would get O(n^2) and fail.
|
|
}
|
|
-#if ! defined(__linux__)
|
|
- if (CLOCKS_PER_SEC < 100000) {
|
|
- // Skip this test if clock() doesn't have reasonably good resolution.
|
|
- // This workaround is primarily targeting Windows and FreeBSD, since
|
|
- // XSI requires the value to be 1.000.000 (10x the condition here), and
|
|
- // we want to be very sure that at least one platform in CI can catch
|
|
- // regressions (through a failing test).
|
|
- return;
|
|
- }
|
|
-#endif
|
|
|
|
- clock_t baseline = 0;
|
|
for (int i = 0; i < num_cases; ++i) {
|
|
XML_Parser parser = XML_ParserCreate(NULL);
|
|
assert_true(parser != NULL);
|
|
enum XML_Status status;
|
|
- set_subtest("max_slowdown=%d text=\"%saaaaaa%s\"", max_slowdown,
|
|
- text[i].pre, text[i].post);
|
|
- const clock_t start = clock();
|
|
+ set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
|
|
|
|
// parse the start text
|
|
+ g_bytesScanned = 0;
|
|
status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
|
|
(int)strlen(text[i].pre), XML_FALSE);
|
|
if (status != XML_STATUS_OK) {
|
|
xml_failure(parser);
|
|
}
|
|
+
|
|
// parse lots of 'a', failing the test early if it takes too long
|
|
+ unsigned past_max_count = 0;
|
|
for (int f = 0; f < fillcount; ++f) {
|
|
status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
|
|
if (status != XML_STATUS_OK) {
|
|
xml_failure(parser);
|
|
}
|
|
- // i == 0 means we're still calculating the baseline value
|
|
- if (i > 0) {
|
|
- const clock_t now = clock();
|
|
- const clock_t clocks_so_far = now - start;
|
|
- const int slowdown = clocks_so_far / baseline;
|
|
- if (slowdown >= max_slowdown) {
|
|
- fprintf(
|
|
- stderr,
|
|
- "fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n",
|
|
- f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown);
|
|
- fail(too_slow_failure_message);
|
|
- }
|
|
+ if (g_bytesScanned > max_scanned) {
|
|
+ // We're not done, and have already passed the limit -- the test will
|
|
+ // definitely fail. This block allows us to save time by failing early.
|
|
+ const unsigned pushed
|
|
+ = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
|
|
+ fprintf(
|
|
+ stderr,
|
|
+ "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
|
|
+ f + 1, fillcount, pushed, g_bytesScanned,
|
|
+ g_bytesScanned / (double)pushed, max_scanned, max_factor);
|
|
+ past_max_count++;
|
|
+ // We are failing, but allow a few log prints first. If we don't reach
|
|
+ // a count of five, the test will fail after the loop instead.
|
|
+ assert_true(past_max_count < 5);
|
|
}
|
|
}
|
|
+
|
|
// parse the end text
|
|
status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
|
|
(int)strlen(text[i].post), XML_TRUE);
|
|
@@ -5286,18 +5272,14 @@ START_TEST(test_big_tokens_take_linear_time) {
|
|
xml_failure(parser);
|
|
}
|
|
|
|
- // how long did it take in total?
|
|
- const clock_t end = clock();
|
|
- const clock_t taken = end - start;
|
|
- if (i == 0) {
|
|
- assert_true(taken > 0); // just to make sure we don't div-by-0 later
|
|
- baseline = taken;
|
|
- }
|
|
- const int slowdown = taken / baseline;
|
|
- if (slowdown >= max_slowdown) {
|
|
- fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n",
|
|
- (int)taken, (int)baseline, slowdown, max_slowdown);
|
|
- fail(too_slow_failure_message);
|
|
+ assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
|
|
+ if (g_bytesScanned > max_scanned) {
|
|
+ fprintf(
|
|
+ stderr,
|
|
+ "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
|
|
+ g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
|
|
+ max_factor);
|
|
+ fail("scanned too many bytes");
|
|
}
|
|
|
|
XML_ParserFree(parser);
|
|
@@ -5774,19 +5756,17 @@ START_TEST(test_varying_buffer_fills) {
|
|
fillsize[2], fillsize[3]);
|
|
XML_Parser parser = XML_ParserCreate(NULL);
|
|
assert_true(parser != NULL);
|
|
- g_parseAttempts = 0;
|
|
|
|
CharData storage;
|
|
CharData_Init(&storage);
|
|
XML_SetUserData(parser, &storage);
|
|
XML_SetStartElementHandler(parser, start_element_event_handler);
|
|
|
|
+ g_bytesScanned = 0;
|
|
int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
|
|
- int scanned_bytes = 0; // sum of (buffered bytes at each actual parse)
|
|
int offset = 0;
|
|
while (*fillsize >= 0) {
|
|
assert_true(offset + *fillsize <= document_length); // or test is invalid
|
|
- const unsigned attempts_before = g_parseAttempts;
|
|
const enum XML_Status status
|
|
= XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
|
|
if (status != XML_STATUS_OK) {
|
|
@@ -5796,28 +5776,20 @@ START_TEST(test_varying_buffer_fills) {
|
|
fillsize++;
|
|
assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
|
|
worstcase_bytes += offset; // we might've tried to parse all pending bytes
|
|
- if (g_parseAttempts != attempts_before) {
|
|
- assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse
|
|
- assert_true(offset <= INT_MAX - scanned_bytes); // avoid overflow
|
|
- scanned_bytes += offset; // we *did* try to parse all pending bytes
|
|
- }
|
|
}
|
|
assert_true(storage.count == 1); // the big token should've been parsed
|
|
- assert_true(scanned_bytes > 0); // test-the-test: does our counter work?
|
|
+ assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
|
|
if (g_reparseDeferralEnabledDefault) {
|
|
// heuristic is enabled; some XML_Parse calls may have deferred reparsing
|
|
- const int max_bytes_scanned = -*fillsize;
|
|
- if (scanned_bytes > max_bytes_scanned) {
|
|
+ const unsigned max_bytes_scanned = -*fillsize;
|
|
+ if (g_bytesScanned > max_bytes_scanned) {
|
|
fprintf(stderr,
|
|
- "bytes scanned in parse attempts: actual=%d limit=%d \n",
|
|
- scanned_bytes, max_bytes_scanned);
|
|
+ "bytes scanned in parse attempts: actual=%u limit=%u \n",
|
|
+ g_bytesScanned, max_bytes_scanned);
|
|
fail("too many bytes scanned in parse attempts");
|
|
}
|
|
- assert_true(scanned_bytes <= worstcase_bytes);
|
|
- } else {
|
|
- // heuristic is disabled; every XML_Parse() will have reparsed
|
|
- assert_true(scanned_bytes == worstcase_bytes);
|
|
}
|
|
+ assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
|
|
|
|
XML_ParserFree(parser);
|
|
}
|
|
@@ -6065,7 +6037,7 @@ make_basic_test_case(Suite *s) {
|
|
tcase_add_test__ifdef_xml_dtd(tc_basic,
|
|
test_pool_integrity_with_unfinished_attr);
|
|
tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
|
|
- tcase_add_test(tc_basic, test_big_tokens_take_linear_time);
|
|
+ tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
|
|
tcase_add_test(tc_basic, test_set_reparse_deferral);
|
|
tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
|
|
tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
|