From b2e3367ef0cb161e9a822ab433c40b7baefe211b Mon Sep 17 00:00:00 2001 From: Tim van Dijen Date: Tue, 26 Nov 2024 16:36:42 +0100 Subject: [PATCH 01/10] Add assertion to test XPath filters against an allow-list for axes and functions --- src/Assert/Assert.php | 1 + src/Assert/XPathFilterTrait.php | 180 ++++++++++++++++++++++++++++++++ src/Constants.php | 61 ++++++++--- 3 files changed, 230 insertions(+), 12 deletions(-) create mode 100644 src/Assert/XPathFilterTrait.php diff --git a/src/Assert/Assert.php b/src/Assert/Assert.php index fcaf71fc..d2e7ec70 100644 --- a/src/Assert/Assert.php +++ b/src/Assert/Assert.php @@ -38,4 +38,5 @@ class Assert extends BaseAssert use HexBinTrait; use NamesTrait; use TokensTrait; + use XPathFilterTrait; } diff --git a/src/Assert/XPathFilterTrait.php b/src/Assert/XPathFilterTrait.php new file mode 100644 index 00000000..851241ca --- /dev/null +++ b/src/Assert/XPathFilterTrait.php @@ -0,0 +1,180 @@ +-[a-z]++)*+)\s*+\(/' + * ( # Start a capturing group + * [a-z]++ # Match one or more lower-case alpha characters + * (?> # Start an atomic group (no capturing) + * - # Match a hyphen + * [a-z]++ # Match one or more lower-case alpha characters, possessively + * )*+ # Repeat the atomic group zero or more times, + * ) # End of the capturing group + * \s*+ # Match zero or more whitespace characters, possessively + * \( # Match an opening parenthesis + */ + private static string $regex_xpfilter_functions = '/([a-z]++(?>-[a-z]++)*+)\\s*+\\(/'; + + /** + * We use the same rules for matching Axis names as we do for function names. + * The only difference is that we match the '::' instead of the '(' + * so everything that was said about the regular expression for function names + * applies here as well. + * + * '/([a-z]++(?>-[a-z]++)*+)\s*+::' + * ( # Start a capturing group + * [a-z]++ # Match one or more lower-case alpha characters + * (?> # Start an atomic group (no capturing) + * - # Match a hyphen + * [a-z]++ # Match one or more lower-case alpha characters, possessively + * )*+ # Repeat the atomic group zero or more times, + * ) # End of the capturing group + * \s*+ # Match zero or more whitespace characters, possessively + * \( # Match an opening parenthesis + */ + private static string $regex_xpfilter_axes = '/([a-z]++(?>-[a-z]++)*+)\\s*+::/'; + + + /*********************************************************************************** + * NOTE: Custom assertions may be added below this line. * + * They SHOULD be marked as `private` to ensure the call is forced * + * through __callStatic(). * + * Assertions marked `public` are called directly and will * + * not handle any custom exception passed to it. * + ***********************************************************************************/ + + /** + * Check an XPath expression for allowed axes and functions + * The goal is preventing DoS attacks by limiting the complexity of the XPath expression by only allowing + * a select subset of functions and axes. + * The check uses a list of allowed functions and axes, and throws an exception when an unknown function + * or axis is found in the $xpath_expression. + * + * Limitations: + * - The implementation is based on regular expressions, and does not employ an XPath 1.0 parser. It may not + * evaluate all possible valid XPath expressions correctly and cause either false positives for valid + * expressions or false negatives for invalid expressions. + * - The check may still allow expressions that are not safe, I.e. expressions that consist of only + * functions and axes that are deemed "save", but that are still slow to evaluate. The time it takes to + * evaluate an XPath expression depends on the complexity of both the XPath expression and the XML document. + * This check, however, does not take the XML document into account, nor is it aware of the internals of the + * XPath processor that will evaluate the expression. + * - The check was written with the XPath 1.0 syntax in mind, but should work equally well for XPath 2.0 and 3.0. + * + * @param string $value + * @param array $allowed_axes + * @param array $allowed_functions + * @param string $message + */ + private static function allowedXPathFilter( + string $value, + array $allowed_axes = C::DEFAULT_ALLOWED_AXES, + array $allowed_functions = C::DEFAULT_ALLOWED_FUNCTIONS, + string $message = '', + ): void { + BaseAssert::allString($allowed_axes); + BaseAssert::allString($allowed_functions); + BaseAssert::maxLength( + $value, + C::XPATH_FILTER_MAX_LENGTH, + sprintf('XPath Filter exceeds the limit of 100 characters.'), + ); + + $strippedValue = preg_replace( + self::$regex_xpfilter_remove_strings, + // Replace the content with two of the quotes that were matched + "\\1\\1", + $value, + ); + + if ($strippedValue === null) { + throw new Exception("Error in preg_replace."); + } + + /** + * Check if the $xpath_expression uses an XPath function that is not in the list of allowed functions + * + * Look for the function specifier '(' and look for a function name before it. + * Ignoring whitespace before the '(' and the function name. + * All functions must match a string on a list of allowed function names + */ + $matches = []; + $res = preg_match_all(self::$regex_xpfilter_functions, $strippedValue, $matches); + if ($res === false) { + throw new Exception("Error in preg_match_all."); + } + + // Check that all the function names we found are in the list of allowed function names + foreach ($matches[1] as $match) { + if (!in_array($match, $allowed_functions)) { + throw new InvalidArgumentException(sprintf( + $message ?: '\'%s\' is not an allowed XPath function.', + $match, + )); + } + } + + /** + * Check if the $xpath_expression uses an XPath axis that is not in the list of allowed axes + * + * Look for the axis specifier '::' and look for a function name before it. + * Ignoring whitespace before the '::' and the axis name. + * All axes must match a string on a list of allowed axis names + */ + $matches = []; + $res = preg_match_all(self::$regex_xpfilter_axes, $strippedValue, $matches); + if ($res === false) { + throw new Exception("Error in preg_match_all."); + } + + // Check that all the axes names we found are in the list of allowed axes names + foreach ($matches[1] as $match) { + if (!in_array($match, $allowed_axes)) { + throw new InvalidArgumentException(sprintf( + $message ?: '\'%s\' is not an allowed XPath axis.', + $match, + )); + } + } + } +} diff --git a/src/Constants.php b/src/Constants.php index 60712ecc..7cf52dbe 100644 --- a/src/Constants.php +++ b/src/Constants.php @@ -37,18 +37,55 @@ class Constants */ public const XPATH10_URI = 'http://www.w3.org/TR/1999/REC-xpath-19991116'; - /** - * The namespace for the XML Path Language 2.0 - */ - public const XPATH20_URI = 'http://www.w3.org/TR/2010/REC-xpath20-20101214/'; + /** @var array */ + public const DEFAULT_ALLOWED_AXES = [ + 'ancestor', + 'ancestor-or-self', + 'attribute', + 'child', + 'descendant', + 'descendant-or-self', + 'following', + 'following-sibling', + // 'namespace', // By default, we do not allow using the namespace axis + 'parent', + 'preceding', + 'preceding-sibling', + 'self', + ]; - /** - * The namespace for the XML Path Language 3.0 - */ - public const XPATH30_URI = 'https://www.w3.org/TR/2014/REC-xpath-30-20140408/'; + /** @var array */ + public const DEFAULT_ALLOWED_FUNCTIONS = [ + // 'boolean', + // 'ceiling', + // 'concat', + // 'contains', + // 'count', + // 'false', + // 'floor', + // 'id', + // 'lang', + // 'last', + // 'local-name', + // 'name', + // 'namespace-uri', + // 'normalize-space', + 'not', + // 'number', + // 'position', + // 'round', + // 'starts-with', + // 'string', + // 'string-length', + // 'substring', + // 'substring-after', + // 'substring-before', + // 'sum', + // 'text', + // 'translate', + // 'true', + ]; - /** - * The namespace for the XML Path Language 3.1 - */ - public const XPATH31_URI = 'https://www.w3.org/TR/2017/REC-xpath-31-20170321/'; + /** @var int */ + public const XPATH_FILTER_MAX_LENGTH = 100; } From e9c912be513eec6dbcc2579f29445a3a97bae1f0 Mon Sep 17 00:00:00 2001 From: Tim van Dijen Date: Tue, 26 Nov 2024 17:26:50 +0100 Subject: [PATCH 02/10] Add unit tests --- tests/Assert/XPathFilterTest.php | 105 +++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 tests/Assert/XPathFilterTest.php diff --git a/tests/Assert/XPathFilterTest.php b/tests/Assert/XPathFilterTest.php new file mode 100644 index 00000000..1d0f3ed7 --- /dev/null +++ b/tests/Assert/XPathFilterTest.php @@ -0,0 +1,105 @@ + $axes + * @param array $functions + */ + #[DataProvider('provideXPathFilter')] + public function testDefaultAllowedXPathFilter( + bool $shouldPass, + string $filter, + array $axes = C::DEFAULT_ALLOWED_AXES, + array $functions = C::DEFAULT_ALLOWED_FUNCTIONS, + ): void { + try { + XMLAssert::allowedXPathFilter($filter, $axes, $functions); + $this->assertTrue($shouldPass); + } catch (AssertionFailedException $e) { + $this->assertFalse($shouldPass); + } + } + + + /** + * @return array + */ + public static function provideXPathFilter(): array + { + return [ + // Axes + 'ancestor' => [true, 'ancestor::book'], + 'ancestor-or-self' => [true, 'ancestor-or-self::book'], + 'attribute' => [true, 'attribute::book'], + 'child' => [true, 'child::book'], + 'descendant' => [true, 'descendant::book'], + 'descendant-or-self' => [true, 'descendant-or-self::book'], + 'following' => [true, 'following::book'], + 'following-sibling' => [true, 'following-sibling::book'], + 'namespace' => [false, 'namespace::book'], + 'namespace whitelist' => [true, 'namespace::book', ['namespace']], + 'parent' => [true, 'parent::book'], + 'preceding' => [true, 'preceding::book'], + 'preceding-sibling' => [true, 'preceding-sibling::book'], + 'self' => [true, 'self::book'], + + // Functions + 'boolean' => [false, 'boolean(Data/username/text())'], + 'ceiling' => [false, 'ceiling(//items/item[1]/price)'], + 'concat' => [false, "concat('A', '_', 'B')"], + 'contains' => [false, "contains(//username, 'o')"], + 'count' => [false, "count(//Sales.Order[Sales.Customer_Order/Sales.Customer/Name = 'Jansen'])"], + 'false' => [false, '//Sales.Customer[IsGoldCustomer = false()]'], + 'floor' => [false, 'floor(//items/item[1]/price)'], + 'id' => [false, 'SalesInvoiceLines[id(1)]'], + 'lang' => [false, 'lang("en-US")'], + 'last' => [false, 'last()'], + 'local-name' => [false, 'local-name(SalesInvoiceLines) '], + 'name' => [false, 'name(SalesInvoiceLines)'], + 'namespace-uri' => [false, 'namespace-uri(ReportData)'], + 'normalize-space' => [false, 'normalize-space(" Hello World ")'], + 'not' => [true, "//Sales.Customer[not(Name = 'Jansen')]"], + 'number' => [false, 'number("123")'], + 'position' => [false, 'position()'], + 'round' => [false, 'round(//items/item[1]/price)'], + 'starts-with' => [false, "//Sales.Customer[starts-with(Name, 'Jans')]"], + 'string' => [false, 'string(123)'], + 'string-length' => [false, 'string-length(//email)string-length(//email)'], + 'substring' => [false, "/bookstore/book[substring(title,1,5)='Harry']"], + 'substring-after' => [false, "/bookstore/book[substring-after(title,1,5)='Harry']"], + 'substring-before' => [false, "/bookstore/book[substring-before(title,1,5)='Harry']"], + 'sum' => [false, 'sum(//Sales.Order/TotalPrice)'], + 'text' => [false, '//lastname/text()'], + 'translate' => [false, "translate(//email, '@', '_')"], + 'true' => [false, '//Sales.Customer[IsGoldCustomer = true()]'], + + // Edge-cases + 'unknown axis' => [false, 'unknown::book'], + 'unknown function' => [false, 'unknown()'], + 'too long' => [false, str_pad('a', 120, 'a')], + ]; + } +} From 165d1fd758fd1786b0b74762d63c7f2d93402579 Mon Sep 17 00:00:00 2001 From: Tim van Dijen Date: Tue, 26 Nov 2024 17:29:13 +0100 Subject: [PATCH 03/10] Add missing use-statement --- src/Assert/XPathFilterTrait.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Assert/XPathFilterTrait.php b/src/Assert/XPathFilterTrait.php index 851241ca..73760a3b 100644 --- a/src/Assert/XPathFilterTrait.php +++ b/src/Assert/XPathFilterTrait.php @@ -4,6 +4,7 @@ namespace SimpleSAML\XML\Assert; +use Exception; use InvalidArgumentException; use SimpleSAML\Assert\Assert as BaseAssert; use SimpleSAML\XML\Constants as C; From 03455400497f4295a9a05fde301b7f30e6a2dbd7 Mon Sep 17 00:00:00 2001 From: Tim van Dijen Date: Tue, 26 Nov 2024 17:33:31 +0100 Subject: [PATCH 04/10] Remove unused use-statements --- tests/Assert/XPathFilterTest.php | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/Assert/XPathFilterTest.php b/tests/Assert/XPathFilterTest.php index 1d0f3ed7..e12ac7c3 100644 --- a/tests/Assert/XPathFilterTest.php +++ b/tests/Assert/XPathFilterTest.php @@ -4,7 +4,6 @@ namespace SimpleSAML\XML\Test\Assert; -use InvalidArgumentException; use PHPUnit\Framework\Attributes\CoversClass; use PHPUnit\Framework\Attributes\DataProvider; use PHPUnit\Framework\TestCase; From 4efe2abcd3ece394f6c2ce3003e457c441a1942d Mon Sep 17 00:00:00 2001 From: Tim van Dijen Date: Sat, 31 May 2025 23:22:30 +0200 Subject: [PATCH 05/10] Set public method --- src/Assert/XPathFilterTrait.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Assert/XPathFilterTrait.php b/src/Assert/XPathFilterTrait.php index 73760a3b..a2d0ff4d 100644 --- a/src/Assert/XPathFilterTrait.php +++ b/src/Assert/XPathFilterTrait.php @@ -107,7 +107,7 @@ trait XPathFilterTrait * @param array $allowed_functions * @param string $message */ - private static function allowedXPathFilter( + public static function allowedXPathFilter( string $value, array $allowed_axes = C::DEFAULT_ALLOWED_AXES, array $allowed_functions = C::DEFAULT_ALLOWED_FUNCTIONS, From bf0e5639aeb161a26cfc16c54e29c93f48b3e818 Mon Sep 17 00:00:00 2001 From: Tim van Dijen Date: Sat, 31 May 2025 23:28:33 +0200 Subject: [PATCH 06/10] Fix raised exception --- src/Assert/XPathFilterTrait.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Assert/XPathFilterTrait.php b/src/Assert/XPathFilterTrait.php index a2d0ff4d..1ceaf4e8 100644 --- a/src/Assert/XPathFilterTrait.php +++ b/src/Assert/XPathFilterTrait.php @@ -5,8 +5,8 @@ namespace SimpleSAML\XML\Assert; use Exception; -use InvalidArgumentException; use SimpleSAML\Assert\Assert as BaseAssert; +use SimpleSAML\Assert\AssertionFailedException; use SimpleSAML\XML\Constants as C; use function in_array; @@ -148,7 +148,7 @@ public static function allowedXPathFilter( // Check that all the function names we found are in the list of allowed function names foreach ($matches[1] as $match) { if (!in_array($match, $allowed_functions)) { - throw new InvalidArgumentException(sprintf( + throw new AssertionFailedException(sprintf( $message ?: '\'%s\' is not an allowed XPath function.', $match, )); @@ -171,7 +171,7 @@ public static function allowedXPathFilter( // Check that all the axes names we found are in the list of allowed axes names foreach ($matches[1] as $match) { if (!in_array($match, $allowed_axes)) { - throw new InvalidArgumentException(sprintf( + throw new AssertionFailedException(sprintf( $message ?: '\'%s\' is not an allowed XPath axis.', $match, )); From f3c2f165ea0cbd3fa21b9929615e65bbd6051afb Mon Sep 17 00:00:00 2001 From: Tim van Dijen Date: Sun, 1 Jun 2025 00:36:21 +0200 Subject: [PATCH 07/10] Split into multiple --- src/Assert/Assert.php | 9 +++++++++ src/Assert/XPathFilterTrait.php | 33 +++++++++++++++++++++++++++++--- tests/Assert/XPathFilterTest.php | 2 +- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/Assert/Assert.php b/src/Assert/Assert.php index d2e7ec70..2b33d976 100644 --- a/src/Assert/Assert.php +++ b/src/Assert/Assert.php @@ -9,6 +9,9 @@ /** * @package simplesamlphp/xml-common * + * @method static void validAllowedXPathAxes(mixed $value, array $allowed_axes, string $message = '', string $exception = '') + * @method static void validAllowedXPathFilter(mixed $value, array $allowed_axes, array $allowed_functions, string $message = '', string $exception = '') + * @method static void validAllowedXPathFunctions(mixed $value, array $allowed_functions, string $message = '', string $exception = '') * @method static void validHexBinary(mixed $value, string $message = '', string $exception = '') * @method static void validNMToken(mixed $value, string $message = '', string $exception = '') * @method static void validNMTokens(mixed $value, string $message = '', string $exception = '') @@ -16,6 +19,9 @@ * @method static void validDateTime(mixed $value, string $message = '', string $exception = '') * @method static void validNCName(mixed $value, string $message = '', string $exception = '') * @method static void validQName(mixed $value, string $message = '', string $exception = '') + * @method static void nullOrValidAllowedXPathAxes(mixed $value, array $allowed_axes, string $message = '', string $exception = '') + * @method static void nullOrValidAllowedXPathFilter(mixed $value, array $allowed_axes, array $allowed_functions, string $message = '', string $exception = '') + * @method static void nullOrValidAllowedXPathFunctions(mixed $value, array $allowed_functions, string $message = '', string $exception = '') * @method static void nullOrValidHexBinary(mixed $value, string $message = '', string $exception = '') * @method static void nullOrValidNMToken(mixed $value, string $message = '', string $exception = '') * @method static void nullOrValidNMTokens(mixed $value, string $message = '', string $exception = '') @@ -23,6 +29,9 @@ * @method static void nullOrValidDateTime(mixed $value, string $message = '', string $exception = '') * @method static void nullOrValidNCName(mixed $value, string $message = '', string $exception = '') * @method static void nullOrValidQName(mixed $value, string $message = '', string $exception = '') + * @method static void allValidAllowedXPathAxes(mixed $value, array $allowed_axes, string $message = '', string $exception = '') + * @method static void allValidAllowedXPathFilter(mixed $value, array $allowed_axes, array $allowed_functions, string $message = '', string $exception = '') + * @method static void allValidAllowedXPathFunctions(mixed $value, array $allowed_functions, string $message = '', string $exception = '') * @method static void allValidHexBinary(mixed $value, string $message = '', string $exception = '') * @method static void allValidNMToken(mixed $value, string $message = '', string $exception = '') * @method static void allValidNMTokens(mixed $value, string $message = '', string $exception = '') diff --git a/src/Assert/XPathFilterTrait.php b/src/Assert/XPathFilterTrait.php index 1ceaf4e8..6abc5ea4 100644 --- a/src/Assert/XPathFilterTrait.php +++ b/src/Assert/XPathFilterTrait.php @@ -107,7 +107,7 @@ trait XPathFilterTrait * @param array $allowed_functions * @param string $message */ - public static function allowedXPathFilter( + public static function validAllowedXPathFilter( string $value, array $allowed_axes = C::DEFAULT_ALLOWED_AXES, array $allowed_functions = C::DEFAULT_ALLOWED_FUNCTIONS, @@ -132,6 +132,21 @@ public static function allowedXPathFilter( throw new Exception("Error in preg_replace."); } + self::validAllowedXpathFunctions($strippedValue, $allowed_functions); + self::validAllowedXpathAxes($strippedValue, $allowed_axes); + } + + + /** + * @param string $value + * @param array $allowed_functions + * @param string $message + */ + public static function validAllowedXPathFunctions( + string $value, + array $allowed_functions = C::DEFAULT_ALLOWED_FUNCTIONS, + string $message = '', + ): void { /** * Check if the $xpath_expression uses an XPath function that is not in the list of allowed functions * @@ -140,7 +155,7 @@ public static function allowedXPathFilter( * All functions must match a string on a list of allowed function names */ $matches = []; - $res = preg_match_all(self::$regex_xpfilter_functions, $strippedValue, $matches); + $res = preg_match_all(self::$regex_xpfilter_functions, $value, $matches); if ($res === false) { throw new Exception("Error in preg_match_all."); } @@ -154,7 +169,19 @@ public static function allowedXPathFilter( )); } } + } + + /** + * @param string $value + * @param array $allowed_axes + * @param string $message + */ + public static function validAllowedXPathAxes( + string $value, + array $allowed_axes = C::DEFAULT_ALLOWED_AXES, + string $message = '', + ): void { /** * Check if the $xpath_expression uses an XPath axis that is not in the list of allowed axes * @@ -163,7 +190,7 @@ public static function allowedXPathFilter( * All axes must match a string on a list of allowed axis names */ $matches = []; - $res = preg_match_all(self::$regex_xpfilter_axes, $strippedValue, $matches); + $res = preg_match_all(self::$regex_xpfilter_axes, $value, $matches); if ($res === false) { throw new Exception("Error in preg_match_all."); } diff --git a/tests/Assert/XPathFilterTest.php b/tests/Assert/XPathFilterTest.php index e12ac7c3..6ddc989f 100644 --- a/tests/Assert/XPathFilterTest.php +++ b/tests/Assert/XPathFilterTest.php @@ -35,7 +35,7 @@ public function testDefaultAllowedXPathFilter( array $functions = C::DEFAULT_ALLOWED_FUNCTIONS, ): void { try { - XMLAssert::allowedXPathFilter($filter, $axes, $functions); + XMLAssert::validAllowedXPathFilter($filter, $axes, $functions); $this->assertTrue($shouldPass); } catch (AssertionFailedException $e) { $this->assertFalse($shouldPass); From bed821b0d8e1657a6d8f9b69358820a1db6724cb Mon Sep 17 00:00:00 2001 From: Tim van Dijen Date: Mon, 2 Jun 2025 21:37:38 +0200 Subject: [PATCH 08/10] Throw library-exception instead of PHP-native --- src/Assert/XPathFilterTrait.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Assert/XPathFilterTrait.php b/src/Assert/XPathFilterTrait.php index 6abc5ea4..fff8feb9 100644 --- a/src/Assert/XPathFilterTrait.php +++ b/src/Assert/XPathFilterTrait.php @@ -4,10 +4,10 @@ namespace SimpleSAML\XML\Assert; -use Exception; use SimpleSAML\Assert\Assert as BaseAssert; use SimpleSAML\Assert\AssertionFailedException; use SimpleSAML\XML\Constants as C; +use SimpleSAML\XML\Exception\RuntimeException; use function in_array; use function preg_match_all; @@ -129,7 +129,7 @@ public static function validAllowedXPathFilter( ); if ($strippedValue === null) { - throw new Exception("Error in preg_replace."); + throw new RuntimeException("Error in preg_replace."); } self::validAllowedXpathFunctions($strippedValue, $allowed_functions); @@ -157,7 +157,7 @@ public static function validAllowedXPathFunctions( $matches = []; $res = preg_match_all(self::$regex_xpfilter_functions, $value, $matches); if ($res === false) { - throw new Exception("Error in preg_match_all."); + throw new RuntimeException("Error in preg_match_all."); } // Check that all the function names we found are in the list of allowed function names @@ -192,7 +192,7 @@ public static function validAllowedXPathAxes( $matches = []; $res = preg_match_all(self::$regex_xpfilter_axes, $value, $matches); if ($res === false) { - throw new Exception("Error in preg_match_all."); + throw new RuntimeException("Error in preg_match_all."); } // Check that all the axes names we found are in the list of allowed axes names From 4f66ed15139f08df49cf92c4041c9d086a8615fb Mon Sep 17 00:00:00 2001 From: Tim van Dijen Date: Mon, 2 Jun 2025 21:41:33 +0200 Subject: [PATCH 09/10] Fix phpdocs --- src/Assert/XPathFilterTrait.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Assert/XPathFilterTrait.php b/src/Assert/XPathFilterTrait.php index fff8feb9..f4d6bc28 100644 --- a/src/Assert/XPathFilterTrait.php +++ b/src/Assert/XPathFilterTrait.php @@ -89,7 +89,7 @@ trait XPathFilterTrait * The goal is preventing DoS attacks by limiting the complexity of the XPath expression by only allowing * a select subset of functions and axes. * The check uses a list of allowed functions and axes, and throws an exception when an unknown function - * or axis is found in the $xpath_expression. + * or axis is found in the $value. * * Limitations: * - The implementation is based on regular expressions, and does not employ an XPath 1.0 parser. It may not @@ -183,7 +183,7 @@ public static function validAllowedXPathAxes( string $message = '', ): void { /** - * Check if the $xpath_expression uses an XPath axis that is not in the list of allowed axes + * Check if the $value uses an XPath axis that is not in the list of allowed axes * * Look for the axis specifier '::' and look for a function name before it. * Ignoring whitespace before the '::' and the axis name. From eae1f258731136390d2e0192f5596abb726fe927 Mon Sep 17 00:00:00 2001 From: Tim van Dijen Date: Mon, 2 Jun 2025 23:06:15 +0200 Subject: [PATCH 10/10] Refactor --- src/Assert/Assert.php | 6 - src/Assert/XPathFilterTrait.php | 177 +++------------- src/Utils/XPathFilter.php | 163 +++++++++++++++ tests/Assert/XPathFilterTest.php | 87 +++----- tests/XML/Utils/XPathFilterTest.php | 304 ++++++++++++++++++++++++++++ 5 files changed, 523 insertions(+), 214 deletions(-) create mode 100644 src/Utils/XPathFilter.php create mode 100644 tests/XML/Utils/XPathFilterTest.php diff --git a/src/Assert/Assert.php b/src/Assert/Assert.php index 2b33d976..d61cbd55 100644 --- a/src/Assert/Assert.php +++ b/src/Assert/Assert.php @@ -9,9 +9,7 @@ /** * @package simplesamlphp/xml-common * - * @method static void validAllowedXPathAxes(mixed $value, array $allowed_axes, string $message = '', string $exception = '') * @method static void validAllowedXPathFilter(mixed $value, array $allowed_axes, array $allowed_functions, string $message = '', string $exception = '') - * @method static void validAllowedXPathFunctions(mixed $value, array $allowed_functions, string $message = '', string $exception = '') * @method static void validHexBinary(mixed $value, string $message = '', string $exception = '') * @method static void validNMToken(mixed $value, string $message = '', string $exception = '') * @method static void validNMTokens(mixed $value, string $message = '', string $exception = '') @@ -19,9 +17,7 @@ * @method static void validDateTime(mixed $value, string $message = '', string $exception = '') * @method static void validNCName(mixed $value, string $message = '', string $exception = '') * @method static void validQName(mixed $value, string $message = '', string $exception = '') - * @method static void nullOrValidAllowedXPathAxes(mixed $value, array $allowed_axes, string $message = '', string $exception = '') * @method static void nullOrValidAllowedXPathFilter(mixed $value, array $allowed_axes, array $allowed_functions, string $message = '', string $exception = '') - * @method static void nullOrValidAllowedXPathFunctions(mixed $value, array $allowed_functions, string $message = '', string $exception = '') * @method static void nullOrValidHexBinary(mixed $value, string $message = '', string $exception = '') * @method static void nullOrValidNMToken(mixed $value, string $message = '', string $exception = '') * @method static void nullOrValidNMTokens(mixed $value, string $message = '', string $exception = '') @@ -29,9 +25,7 @@ * @method static void nullOrValidDateTime(mixed $value, string $message = '', string $exception = '') * @method static void nullOrValidNCName(mixed $value, string $message = '', string $exception = '') * @method static void nullOrValidQName(mixed $value, string $message = '', string $exception = '') - * @method static void allValidAllowedXPathAxes(mixed $value, array $allowed_axes, string $message = '', string $exception = '') * @method static void allValidAllowedXPathFilter(mixed $value, array $allowed_axes, array $allowed_functions, string $message = '', string $exception = '') - * @method static void allValidAllowedXPathFunctions(mixed $value, array $allowed_functions, string $message = '', string $exception = '') * @method static void allValidHexBinary(mixed $value, string $message = '', string $exception = '') * @method static void allValidNMToken(mixed $value, string $message = '', string $exception = '') * @method static void allValidNMTokens(mixed $value, string $message = '', string $exception = '') diff --git a/src/Assert/XPathFilterTrait.php b/src/Assert/XPathFilterTrait.php index f4d6bc28..59dc2305 100644 --- a/src/Assert/XPathFilterTrait.php +++ b/src/Assert/XPathFilterTrait.php @@ -4,14 +4,12 @@ namespace SimpleSAML\XML\Assert; +use InvalidArgumentException; use SimpleSAML\Assert\Assert as BaseAssert; -use SimpleSAML\Assert\AssertionFailedException; use SimpleSAML\XML\Constants as C; use SimpleSAML\XML\Exception\RuntimeException; +use SimpleSAML\XML\Utils\XPathFilter; -use function in_array; -use function preg_match_all; -use function preg_replace; use function sprintf; /** @@ -19,63 +17,6 @@ */ trait XPathFilterTrait { - /** - * Remove the content from all single or double-quoted strings in $input, leaving only quotes. - * Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking. - * - * '/(["\'])(?:(?!\1).)*+\1/' - * (["\']) # Match a single or double quote and capture it in group 1 - * (?: # Start a non-capturing group - * (?! # Negative lookahead - * \1 # Match the same quote as in group 1 - * ) # End of negative lookahead - * . # Match any character (that is not a quote, because of the negative lookahead) - * )*+ # Repeat the non-capturing group zero or more times, possessively - * \1 # Match the same quote as in group 1 - */ - private static string $regex_xpfilter_remove_strings = '/(["\'])(?:(?!\1).)*+\1/'; - - /** - * Function names are lower-case alpha (i.e. [a-z]) and can contain one or more hyphens, - * but cannot start or end with a hyphen. To match this, we start with matching one or more - * lower-case alpha characters, followed by zero or more atomic groups that start with a hyphen - * and then match one or more lower-case alpha characters. This ensures that the function name - * cannot start or end with a hyphen, but can contain one or more hyphens. - * More than one consecutive hyphen does not match. - * - * '/([a-z]++(?>-[a-z]++)*+)\s*+\(/' - * ( # Start a capturing group - * [a-z]++ # Match one or more lower-case alpha characters - * (?> # Start an atomic group (no capturing) - * - # Match a hyphen - * [a-z]++ # Match one or more lower-case alpha characters, possessively - * )*+ # Repeat the atomic group zero or more times, - * ) # End of the capturing group - * \s*+ # Match zero or more whitespace characters, possessively - * \( # Match an opening parenthesis - */ - private static string $regex_xpfilter_functions = '/([a-z]++(?>-[a-z]++)*+)\\s*+\\(/'; - - /** - * We use the same rules for matching Axis names as we do for function names. - * The only difference is that we match the '::' instead of the '(' - * so everything that was said about the regular expression for function names - * applies here as well. - * - * '/([a-z]++(?>-[a-z]++)*+)\s*+::' - * ( # Start a capturing group - * [a-z]++ # Match one or more lower-case alpha characters - * (?> # Start an atomic group (no capturing) - * - # Match a hyphen - * [a-z]++ # Match one or more lower-case alpha characters, possessively - * )*+ # Repeat the atomic group zero or more times, - * ) # End of the capturing group - * \s*+ # Match zero or more whitespace characters, possessively - * \( # Match an opening parenthesis - */ - private static string $regex_xpfilter_axes = '/([a-z]++(?>-[a-z]++)*+)\\s*+::/'; - - /*********************************************************************************** * NOTE: Custom assertions may be added below this line. * * They SHOULD be marked as `private` to ensure the call is forced * @@ -89,7 +30,7 @@ trait XPathFilterTrait * The goal is preventing DoS attacks by limiting the complexity of the XPath expression by only allowing * a select subset of functions and axes. * The check uses a list of allowed functions and axes, and throws an exception when an unknown function - * or axis is found in the $value. + * or axis is found in the $xpathExpression. * * Limitations: * - The implementation is based on regular expressions, and does not employ an XPath 1.0 parser. It may not @@ -102,107 +43,37 @@ trait XPathFilterTrait * XPath processor that will evaluate the expression. * - The check was written with the XPath 1.0 syntax in mind, but should work equally well for XPath 2.0 and 3.0. * - * @param string $value - * @param array $allowed_axes - * @param array $allowed_functions + * @param string $xpathExpression + * @param array $allowedAxes + * @param array $allowedFunctions * @param string $message */ public static function validAllowedXPathFilter( - string $value, - array $allowed_axes = C::DEFAULT_ALLOWED_AXES, - array $allowed_functions = C::DEFAULT_ALLOWED_FUNCTIONS, + string $xpathExpression, + array $allowedAxes = C::DEFAULT_ALLOWED_AXES, + array $allowedFunctions = C::DEFAULT_ALLOWED_FUNCTIONS, string $message = '', ): void { - BaseAssert::allString($allowed_axes); - BaseAssert::allString($allowed_functions); + BaseAssert::allString($allowedAxes); + BaseAssert::allString($allowedFunctions); BaseAssert::maxLength( - $value, + $xpathExpression, C::XPATH_FILTER_MAX_LENGTH, sprintf('XPath Filter exceeds the limit of 100 characters.'), ); - $strippedValue = preg_replace( - self::$regex_xpfilter_remove_strings, - // Replace the content with two of the quotes that were matched - "\\1\\1", - $value, - ); - - if ($strippedValue === null) { - throw new RuntimeException("Error in preg_replace."); - } - - self::validAllowedXpathFunctions($strippedValue, $allowed_functions); - self::validAllowedXpathAxes($strippedValue, $allowed_axes); - } - - - /** - * @param string $value - * @param array $allowed_functions - * @param string $message - */ - public static function validAllowedXPathFunctions( - string $value, - array $allowed_functions = C::DEFAULT_ALLOWED_FUNCTIONS, - string $message = '', - ): void { - /** - * Check if the $xpath_expression uses an XPath function that is not in the list of allowed functions - * - * Look for the function specifier '(' and look for a function name before it. - * Ignoring whitespace before the '(' and the function name. - * All functions must match a string on a list of allowed function names - */ - $matches = []; - $res = preg_match_all(self::$regex_xpfilter_functions, $value, $matches); - if ($res === false) { - throw new RuntimeException("Error in preg_match_all."); - } - - // Check that all the function names we found are in the list of allowed function names - foreach ($matches[1] as $match) { - if (!in_array($match, $allowed_functions)) { - throw new AssertionFailedException(sprintf( - $message ?: '\'%s\' is not an allowed XPath function.', - $match, - )); - } - } - } - - - /** - * @param string $value - * @param array $allowed_axes - * @param string $message - */ - public static function validAllowedXPathAxes( - string $value, - array $allowed_axes = C::DEFAULT_ALLOWED_AXES, - string $message = '', - ): void { - /** - * Check if the $value uses an XPath axis that is not in the list of allowed axes - * - * Look for the axis specifier '::' and look for a function name before it. - * Ignoring whitespace before the '::' and the axis name. - * All axes must match a string on a list of allowed axis names - */ - $matches = []; - $res = preg_match_all(self::$regex_xpfilter_axes, $value, $matches); - if ($res === false) { - throw new RuntimeException("Error in preg_match_all."); - } - - // Check that all the axes names we found are in the list of allowed axes names - foreach ($matches[1] as $match) { - if (!in_array($match, $allowed_axes)) { - throw new AssertionFailedException(sprintf( - $message ?: '\'%s\' is not an allowed XPath axis.', - $match, - )); - } + try { + // First remove the contents of any string literals in the $xpath to prevent false positives + $xpathWithoutStringLiterals = XPathFilter::removeStringContents($xpathExpression); + + // Then check that the xpath expression only contains allowed functions and axes, throws when it doesn't + XPathFilter::filterXPathFunction($xpathWithoutStringLiterals, $allowedFunctions); + XPathFilter::filterXPathAxis($xpathWithoutStringLiterals, $allowedAxes); + } catch (RuntimeException $e) { + throw new InvalidArgumentException(sprintf( + $message ?: $e->getMessage(), + $xpathExpression, + )); } } } diff --git a/src/Utils/XPathFilter.php b/src/Utils/XPathFilter.php new file mode 100644 index 00000000..c8a3ab5b --- /dev/null +++ b/src/Utils/XPathFilter.php @@ -0,0 +1,163 @@ +-[a-z]++)*+)\s*+\(/' + * ( # Start a capturing group + * [a-z]++ # Match one or more lower-case alpha characters + * (?> # Start an atomic group (no capturing) + * - # Match a hyphen + * [a-z]++ # Match one or more lower-case alpha characters, possessively + * )*+ # Repeat the atomic group zero or more times, + * ) # End of the capturing group + * \s*+ # Match zero or more whitespace characters, possessively + * \( # Match an opening parenthesis + */ + + '/([a-z]++(?>-[a-z]++)*+)\\s*+\\(/', + $xpathExpression, + $matches, + ); + + // Check that all the function names we found are in the list of allowed function names + foreach ($matches[1] as $match) { + if (!in_array($match, $allowedFunctions)) { + throw new RuntimeException("Invalid function: '" . $match . "'"); + } + } + } + + + /** + * Check if the $xpath_expression uses an XPath axis that is not in the list of allowed axes + * + * @param string $xpathExpression the expression to check. Should be a valid xpath expression + * @param string[] $allowedAxes array of string with a list of allowed axes names + * @throws \SimpleSAML\XML\Exception\RuntimeException + */ + public static function filterXPathAxis(string $xpathExpression, array $allowedAxes): void + { + /** + * Look for the axis specifier '::' and look for a function name before it. + * Ignoring whitespace before the '::' and the axis name. + * All axes must match a string on a list of allowed axis names + */ + $matches = []; + $res = preg_match_all( + /** + * We use the same rules for matching Axis names as we do for function names. + * The only difference is that we match the '::' instead of the '(' + * so everything that was said about the regular expression for function names + * applies here as well. + * + * Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking + * and thus prevent a ReDOS. + * + * '/([a-z]++(?>-[a-z]++)*+)\s*+::' + * ( # Start a capturing group + * [a-z]++ # Match one or more lower-case alpha characters + * (?> # Start an atomic group (no capturing) + * - # Match a hyphen + * [a-z]++ # Match one or more lower-case alpha characters, possessively + * )*+ # Repeat the atomic group zero or more times, + * ) # End of the capturing group + * \s*+ # Match zero or more whitespace characters, possessively + * \( # Match an opening parenthesis + */ + + '/([a-z]++(?>-[a-z]++)*+)\\s*+::/', + $xpathExpression, + $matches, + ); + + // Check that all the axes names we found are in the list of allowed axes names + foreach ($matches[1] as $match) { + if (!in_array($match, $allowedAxes)) { + throw new RuntimeException("Invalid axis: '" . $match . "'"); + } + } + } +} diff --git a/tests/Assert/XPathFilterTest.php b/tests/Assert/XPathFilterTest.php index 6ddc989f..8e2bb73f 100644 --- a/tests/Assert/XPathFilterTest.php +++ b/tests/Assert/XPathFilterTest.php @@ -4,15 +4,13 @@ namespace SimpleSAML\XML\Test\Assert; +use InvalidArgumentException; use PHPUnit\Framework\Attributes\CoversClass; use PHPUnit\Framework\Attributes\DataProvider; use PHPUnit\Framework\TestCase; -use SimpleSAML\Assert\AssertionFailedException; use SimpleSAML\XML\Assert\Assert as XMLAssert; use SimpleSAML\XML\Constants as C; -use function str_pad; - /** * Class \SimpleSAML\XML\Assert\XPathFilterTest * @@ -22,83 +20,62 @@ final class XPathFilterTest extends TestCase { /** - * @param boolean $shouldPass * @param string $filter + * @param boolean $shouldPass * @param array $axes * @param array $functions */ #[DataProvider('provideXPathFilter')] public function testDefaultAllowedXPathFilter( - bool $shouldPass, string $filter, + bool $shouldPass, array $axes = C::DEFAULT_ALLOWED_AXES, array $functions = C::DEFAULT_ALLOWED_FUNCTIONS, ): void { try { XMLAssert::validAllowedXPathFilter($filter, $axes, $functions); $this->assertTrue($shouldPass); - } catch (AssertionFailedException $e) { + } catch (InvalidArgumentException $e) { $this->assertFalse($shouldPass); } } /** - * @return array + * @return array */ public static function provideXPathFilter(): array { return [ - // Axes - 'ancestor' => [true, 'ancestor::book'], - 'ancestor-or-self' => [true, 'ancestor-or-self::book'], - 'attribute' => [true, 'attribute::book'], - 'child' => [true, 'child::book'], - 'descendant' => [true, 'descendant::book'], - 'descendant-or-self' => [true, 'descendant-or-self::book'], - 'following' => [true, 'following::book'], - 'following-sibling' => [true, 'following-sibling::book'], - 'namespace' => [false, 'namespace::book'], - 'namespace whitelist' => [true, 'namespace::book', ['namespace']], - 'parent' => [true, 'parent::book'], - 'preceding' => [true, 'preceding::book'], - 'preceding-sibling' => [true, 'preceding-sibling::book'], - 'self' => [true, 'self::book'], + // [ 'xpath_expression', allowed ] + + // Evil + ['count(//. | //@* | //namespace::*)', false], + + // Perfectly normal + ["//ElementToEncrypt[@attribute='value']", true], + ["/RootElement/ChildElement[@id='123']", true], + ["not(self::UnwantedNode)", true ], + ["//ElementToEncrypt[not(@attribute='value')]", true], + + // From https://www.w3.org/TR/xmlenc-core1/ + ['self::text()[parent::enc:CipherValue[@Id="example1"]]', false ], + ['self::xenc:EncryptedData[@Id="example1"]', true], + + // Nonsense, but allowed by the filter as it doesn't understand XPath. + ['self::not()[parent::enc:CipherValue[@Id="example1"]]', true ], + + // namespace in element name + ["not(self::namespace)", true], + + // using "namespace" as a Namespace prefix + ["//namespace:ElementName", true], - // Functions - 'boolean' => [false, 'boolean(Data/username/text())'], - 'ceiling' => [false, 'ceiling(//items/item[1]/price)'], - 'concat' => [false, "concat('A', '_', 'B')"], - 'contains' => [false, "contains(//username, 'o')"], - 'count' => [false, "count(//Sales.Order[Sales.Customer_Order/Sales.Customer/Name = 'Jansen'])"], - 'false' => [false, '//Sales.Customer[IsGoldCustomer = false()]'], - 'floor' => [false, 'floor(//items/item[1]/price)'], - 'id' => [false, 'SalesInvoiceLines[id(1)]'], - 'lang' => [false, 'lang("en-US")'], - 'last' => [false, 'last()'], - 'local-name' => [false, 'local-name(SalesInvoiceLines) '], - 'name' => [false, 'name(SalesInvoiceLines)'], - 'namespace-uri' => [false, 'namespace-uri(ReportData)'], - 'normalize-space' => [false, 'normalize-space(" Hello World ")'], - 'not' => [true, "//Sales.Customer[not(Name = 'Jansen')]"], - 'number' => [false, 'number("123")'], - 'position' => [false, 'position()'], - 'round' => [false, 'round(//items/item[1]/price)'], - 'starts-with' => [false, "//Sales.Customer[starts-with(Name, 'Jans')]"], - 'string' => [false, 'string(123)'], - 'string-length' => [false, 'string-length(//email)string-length(//email)'], - 'substring' => [false, "/bookstore/book[substring(title,1,5)='Harry']"], - 'substring-after' => [false, "/bookstore/book[substring-after(title,1,5)='Harry']"], - 'substring-before' => [false, "/bookstore/book[substring-before(title,1,5)='Harry']"], - 'sum' => [false, 'sum(//Sales.Order/TotalPrice)'], - 'text' => [false, '//lastname/text()'], - 'translate' => [false, "translate(//email, '@', '_')"], - 'true' => [false, '//Sales.Customer[IsGoldCustomer = true()]'], + // namespace in attribute value + ["//ElementToEncrypt[@attribute='namespace::x']", true], - // Edge-cases - 'unknown axis' => [false, 'unknown::book'], - 'unknown function' => [false, 'unknown()'], - 'too long' => [false, str_pad('a', 120, 'a')], + // function in attribute value + ["//ElementToEncrypt[@attribute='ns1::count()']", true], ]; } } diff --git a/tests/XML/Utils/XPathFilterTest.php b/tests/XML/Utils/XPathFilterTest.php new file mode 100644 index 00000000..047c8c74 --- /dev/null +++ b/tests/XML/Utils/XPathFilterTest.php @@ -0,0 +1,304 @@ +assertEquals($input, XPathFilter::removeStringContents($input)); + $end = microtime(true); + $this->assertLessThan(1, $end - $start, "Processing time was too long"); + } + + + /** + */ + #[DataProvider('provideStringContents')] + public function testRemoveStringContents(string $input, string $expected): void + { + $this->assertEquals($expected, XPathFilter::removeStringContents($input)); + } + + + /** + * @return array + */ + public static function provideStringContents(): array + { + return [ + // Newline + ["\n", "\n"], // 0 + + // Empty string + ['', ''], // 1 + + // No quotes + ['foo', 'foo'], // 2 + ['foo bar', 'foo bar'], //3 + + // Empty quotes + ['""', '""'], //4 + ["''", "''"], //5 + ['"" ""', '"" ""'], //6 + ["'' ''", "'' ''"], //7 + ['"" "" ""', '"" "" ""'], //8 + ["'' '' ''", "'' '' ''"], //9 + + // Quoted string + ['"foo"', '""'], //10 + ["'foo'", "''"], //11 + + // Multiple quoted strings + ['"foo" "bar"', '"" ""'], //12 + ["'foo' 'bar'", "'' ''"], //13 + + // Multiple quoted strings with newlines + ['"foo" "bar"' . "\n" . '"abc"', '"" ""' . "\n" . '""'], //14 + ["'foo' 'bar'\n'abc'", "'' ''\n''"], //15 + + // Multiple quoted strings with text + ['"foo"abc"bar"', '""abc""'], //16 + ["'foo'abc'bar'", "''abc''"], //17 + ["'foo'def'bar'", "''def''"], //18 + + // Mixed quotes + ['"foo" \'bar\'', '"" \'\''], //19 + ["'foo' \"bar\"", "'' \"\""], //20 + + // No WS between quotes + ['"foo""bar"', '""""'], //21 + ["'foo''bar'", "''''"], //22 + ['"foo" "bar" "baz"', '"" "" ""'], //23 + ["'foo' 'bar' 'baz'", "'' '' ''"], //24 + ['"foo" \'"bar" "baz"\' "qux"', '"" \'\' ""'], //25 + ["'foo' \"'bar' 'baz'\" 'qux'", "'' \"\" ''"], //26 + ["'foo' 'bar' 'baz'", "'' '' ''"], //27 + ['"foo" \'"bar" "baz"\' "qux"', '"" \'\' ""'], //28 + ["'foo' \"'bar' 'baz'\" 'qux'", "'' \"\" ''"], //29 + ]; + } + + + /** + */ + public function testFilterXPathFunctionSpeed(): void + { + // Measure the time it takes to process a large input, should be less than 1 second + $start = microtime(true); + // a + -a * 10000 + space * 10000 + ( + $input = 'a' . str_repeat('-a', 10000) . str_repeat(' ', 10000) . "("; + $this->expectException(RuntimeException::class); + XPathFilter::filterXPathFunction($input, ['a']); + $end = microtime(true); + $this->assertLessThan(1, $end - $start, "Processing time was too long"); + + // Because filterXPathAxis() uses the same regex structure, we don't test it separately + } + + + /** + * @param string[] $allowedFunctions + */ + #[DataProvider('provideXPathFunction')] + public function testFilterXPathFunction(string $input, array $allowedFunctions, ?string $expected = null): void + { + if ($expected) { + // Function must throw an exception + $this->expectException(RuntimeException::class); + $this->expectExceptionMessage("Invalid function: '" . $expected . "'"); + } else { + // Function must not throw an exception + $this->expectNotToPerformAssertions(); + } + XPathFilter::filterXPathFunction($input, $allowedFunctions); + } + + + /** + * @return array + */ + public static function provideXPathFunction(): array + { + return [ + // [xpath, allowed functions, expected result (null = OK; string = name of the denied function)] + ['', ['not'], null], + ['not()', ['not'], null], + ['count()', ['bar'], 'count'], + ['not()', [], 'not'], + ['count ()', ['foo', 'bar'], 'count'], + [' count ()', [], 'count'], + ['-count ()', [], 'count'], + ['- count ()', [], 'count'], + ['- (count ())', [], 'count'], + ['(-count())', [], 'count'], + ['not(not(),not())', ['not'], null], + ['not((not()),(not()))', ['not'], null], // 11; + ['not(not(.),not(""))', ['not'], null], // 12; + ['not( not(.), not(""))', ['not'], null], // 13; + + ['', [], null], + ['not(count(),not())', ['not'], 'count'], + ['not(not(),count())', ['not'], 'count'], + ['count(not(),not())', ['not'], 'count'], + ['(count(not(),not()))', ['not'], 'count'], + ['( count(not(),not()))', ['not'], 'count'], + ['(count (not(),not()))', ['not'], 'count'], + ['not((not()),(not()))', [], 'not'], + ['not(not(.),not(""))', [], 'not'], // 22; + ['not( not(.), not(""))', [], 'not'], + + ['abc-def', [], ''], + ['(abc-def)', [], ''], + ['(abc-def ( ) )', [], 'abc-def'], + + ['abc-def', ['abc', 'def'], null], + ['(abc-def)', ['abc', 'def'], null], + ['(abc-def ( ) )', ['abc', 'def'], 'abc-def'], + ['', ['not'], null], + ['not()', ['not'], null], + ['count()', ['bar'], 'count'], + ['not()', [], 'not'], + ['count ()', ['foo', 'bar'], 'count'], + [' count ()', [], 'count'], + ['-count ()', [], 'count'], + ['- count ()', [], 'count'], + ['- (count ())', [], 'count'], + ['(-count())', [], 'count'], + ['not(not(),not())', ['not'], null], + ['not((not()),(not()))', ['not'], null], // 11; + ['not(not(.),not(""))', ['not'], null], // 12; + ['not( not(.), not(""))', ['not'], null], // 13; + + ['', [], null], + ['not(count(),not())', ['not'], 'count'], + ['not(not(),count())', ['not'], 'count'], + ['count(not(),not())', ['not'], 'count'], + ['(count(not(),not()))', ['not'], 'count'], + ['( count(not(),not()))', ['not'], 'count'], + ['(count (not(),not()))', ['not'], 'count'], + ['not((not()),(not()))', [], 'not'], + ['not(not(.),not(""))', [], 'not'], // 22; + ['not( not(.), not(""))', [], 'not'], + + ['abc-def', [], ''], + ['(abc-def)', [], ''], + ['(abc-def ( ) )', [], 'abc-def'], + + ['abc-def', ['abc', 'def'], null], + ['(abc-def)', ['abc', 'def'], null], + ['(abc-def ( ) )', ['abc', 'def'], 'abc-def'], + + // Evil + ['count(//. | //@* | //namespace::*)', ['not', 'foo', 'bar'], 'count'], + + // Perfectly normal + ["//ElementToEncrypt[@attribute='value']", ['not', 'foo', 'bar'], null], + ["/RootElement/ChildElement[@id='123']", ['not', 'foo', 'bar'], null], + ["not(self::UnwantedNode)", ['not', 'foo', 'bar'], null], + ["//ElementToEncrypt[not(@attribute='value')]", ['not', 'foo', 'bar'], null], + + // From https://www.w3.org/TR/xmlenc-core1/ + ['self::text()[parent::enc:CipherValue[@Id="example1"]]', ['not', 'text'], null], + ['self::xenc:EncryptedData[@Id="example1"]', ['not', 'foo', 'bar'], null], + + // count in element name + ["not(self::count)", ['not', 'foo', 'bar'], null], + + // using "namespace" as a Namespace prefix + ["//namespace:ElementName", ['not', 'foo', 'bar'], null], + + // count in attribute value + //["//ElementToEncrypt[@attribute='count()']", ['not', 'foo', 'bar'], null], + ]; + } + + + /** + * @param string[] $allowedAxes + */ + #[DataProvider('provideXPathAxis')] + public function testFilterXPathAxis(string $input, array $allowedAxes, ?string $expected = null): void + { + if ($expected) { + // Function must throw an exception + $this->expectException(RuntimeException::class); + $this->expectExceptionMessage("Invalid axis: '" . $expected . "'"); + } else { + // Function must not throw an exception + $this->expectNotToPerformAssertions(); + } + XPathFilter::filterXPathAxis($input, $allowedAxes); + } + + + /** + * @return array + */ + public static function provideXPathAxis(): array + { + return [ + // [xpath, allowed axes, exception (null = OK; string = is name of the denied axis)] + ['', ['self'], null], + ['self::', [], 'self'], + [' self::', [], 'self'], + [' self ::', [], 'self'], + ['//self::X', [], 'self'], + ['./self::', [], 'self'], + ['namespace:element', [], null], + ['ancestor-or-self::some-node', ['self'], 'ancestor-or-self'], + [' ancestor-or-self::some-node', ['self'], 'ancestor-or-self'], + ['/ancestor-or-self::some-node', ['self'], 'ancestor-or-self'], + + ['self::*/child::price', ['self'], 'child'], + + // Evil + ['count(//. | //@* | //namespace::*)', ['self', 'foo', 'bar'], 'namespace'], + + // Perfectly normal + ["//ElementToEncrypt[@attribute='value']", ['self'], null], + ["/RootElement/ChildElement[@id='123']", ['self'], null], + ["not(self::UnwantedNode)", ['self'], null], + ["not(self::UnwantedNode)", [], 'self'], + ["//ElementToEncrypt[not(@attribute='value')]", ['self'], null], + + // From https://www.w3.org/TR/xmlenc-core1/ + ['self::text()[parent::enc:CipherValue[@Id="example1"]]', ['self', 'parent'], null], + ['self::text()[parent::enc:CipherValue[@Id="example1"]]', ['self'], 'parent'], + ['self::text()[parent::enc:CipherValue[@Id="example1"]]', ['parent'], 'self'], + ['self::xenc:EncryptedData[@Id="example1"]', ['self'], null], + ['self::xenc:EncryptedData[@Id="example1"]', [], 'self'], + + // namespace in element name + ["not(self::namespace)", ['self'], null], + + // using "namespace" as a Namespace prefix + ["//namespace:ElementName", ['self'], null], + + // namespace in attribute value + // ["//ElementToEncrypt[@attribute='namespace::x']", ['self'], null], + ]; + } +}