diff --git a/src/Assert/Assert.php b/src/Assert/Assert.php index fcaf71fc..d61cbd55 100644 --- a/src/Assert/Assert.php +++ b/src/Assert/Assert.php @@ -9,6 +9,7 @@ /** * @package simplesamlphp/xml-common * + * @method static void validAllowedXPathFilter(mixed $value, array $allowed_axes, array $allowed_functions, string $message = '', string $exception = '') * @method static void validHexBinary(mixed $value, string $message = '', string $exception = '') * @method static void validNMToken(mixed $value, string $message = '', string $exception = '') * @method static void validNMTokens(mixed $value, string $message = '', string $exception = '') @@ -16,6 +17,7 @@ * @method static void validDateTime(mixed $value, string $message = '', string $exception = '') * @method static void validNCName(mixed $value, string $message = '', string $exception = '') * @method static void validQName(mixed $value, string $message = '', string $exception = '') + * @method static void nullOrValidAllowedXPathFilter(mixed $value, array $allowed_axes, array $allowed_functions, string $message = '', string $exception = '') * @method static void nullOrValidHexBinary(mixed $value, string $message = '', string $exception = '') * @method static void nullOrValidNMToken(mixed $value, string $message = '', string $exception = '') * @method static void nullOrValidNMTokens(mixed $value, string $message = '', string $exception = '') @@ -23,6 +25,7 @@ * @method static void nullOrValidDateTime(mixed $value, string $message = '', string $exception = '') * @method static void nullOrValidNCName(mixed $value, string $message = '', string $exception = '') * @method static void nullOrValidQName(mixed $value, string $message = '', string $exception = '') + * @method static void allValidAllowedXPathFilter(mixed $value, array $allowed_axes, array $allowed_functions, string $message = '', string $exception = '') * @method static void allValidHexBinary(mixed $value, string $message = '', string $exception = '') * @method static void allValidNMToken(mixed $value, string $message = '', string $exception = '') * @method static void allValidNMTokens(mixed $value, string $message = '', string $exception = '') @@ -38,4 +41,5 @@ class Assert extends BaseAssert use HexBinTrait; use NamesTrait; use TokensTrait; + use XPathFilterTrait; } diff --git a/src/Assert/XPathFilterTrait.php b/src/Assert/XPathFilterTrait.php new file mode 100644 index 00000000..59dc2305 --- /dev/null +++ b/src/Assert/XPathFilterTrait.php @@ -0,0 +1,79 @@ + $allowedAxes + * @param array $allowedFunctions + * @param string $message + */ + public static function validAllowedXPathFilter( + string $xpathExpression, + array $allowedAxes = C::DEFAULT_ALLOWED_AXES, + array $allowedFunctions = C::DEFAULT_ALLOWED_FUNCTIONS, + string $message = '', + ): void { + BaseAssert::allString($allowedAxes); + BaseAssert::allString($allowedFunctions); + BaseAssert::maxLength( + $xpathExpression, + C::XPATH_FILTER_MAX_LENGTH, + sprintf('XPath Filter exceeds the limit of 100 characters.'), + ); + + try { + // First remove the contents of any string literals in the $xpath to prevent false positives + $xpathWithoutStringLiterals = XPathFilter::removeStringContents($xpathExpression); + + // Then check that the xpath expression only contains allowed functions and axes, throws when it doesn't + XPathFilter::filterXPathFunction($xpathWithoutStringLiterals, $allowedFunctions); + XPathFilter::filterXPathAxis($xpathWithoutStringLiterals, $allowedAxes); + } catch (RuntimeException $e) { + throw new InvalidArgumentException(sprintf( + $message ?: $e->getMessage(), + $xpathExpression, + )); + } + } +} diff --git a/src/Constants.php b/src/Constants.php index 60712ecc..7cf52dbe 100644 --- a/src/Constants.php +++ b/src/Constants.php @@ -37,18 +37,55 @@ class Constants */ public const XPATH10_URI = 'http://www.w3.org/TR/1999/REC-xpath-19991116'; - /** - * The namespace for the XML Path Language 2.0 - */ - public const XPATH20_URI = 'http://www.w3.org/TR/2010/REC-xpath20-20101214/'; + /** @var array */ + public const DEFAULT_ALLOWED_AXES = [ + 'ancestor', + 'ancestor-or-self', + 'attribute', + 'child', + 'descendant', + 'descendant-or-self', + 'following', + 'following-sibling', + // 'namespace', // By default, we do not allow using the namespace axis + 'parent', + 'preceding', + 'preceding-sibling', + 'self', + ]; - /** - * The namespace for the XML Path Language 3.0 - */ - public const XPATH30_URI = 'https://www.w3.org/TR/2014/REC-xpath-30-20140408/'; + /** @var array */ + public const DEFAULT_ALLOWED_FUNCTIONS = [ + // 'boolean', + // 'ceiling', + // 'concat', + // 'contains', + // 'count', + // 'false', + // 'floor', + // 'id', + // 'lang', + // 'last', + // 'local-name', + // 'name', + // 'namespace-uri', + // 'normalize-space', + 'not', + // 'number', + // 'position', + // 'round', + // 'starts-with', + // 'string', + // 'string-length', + // 'substring', + // 'substring-after', + // 'substring-before', + // 'sum', + // 'text', + // 'translate', + // 'true', + ]; - /** - * The namespace for the XML Path Language 3.1 - */ - public const XPATH31_URI = 'https://www.w3.org/TR/2017/REC-xpath-31-20170321/'; + /** @var int */ + public const XPATH_FILTER_MAX_LENGTH = 100; } diff --git a/src/Utils/XPathFilter.php b/src/Utils/XPathFilter.php new file mode 100644 index 00000000..c8a3ab5b --- /dev/null +++ b/src/Utils/XPathFilter.php @@ -0,0 +1,163 @@ +-[a-z]++)*+)\s*+\(/' + * ( # Start a capturing group + * [a-z]++ # Match one or more lower-case alpha characters + * (?> # Start an atomic group (no capturing) + * - # Match a hyphen + * [a-z]++ # Match one or more lower-case alpha characters, possessively + * )*+ # Repeat the atomic group zero or more times, + * ) # End of the capturing group + * \s*+ # Match zero or more whitespace characters, possessively + * \( # Match an opening parenthesis + */ + + '/([a-z]++(?>-[a-z]++)*+)\\s*+\\(/', + $xpathExpression, + $matches, + ); + + // Check that all the function names we found are in the list of allowed function names + foreach ($matches[1] as $match) { + if (!in_array($match, $allowedFunctions)) { + throw new RuntimeException("Invalid function: '" . $match . "'"); + } + } + } + + + /** + * Check if the $xpath_expression uses an XPath axis that is not in the list of allowed axes + * + * @param string $xpathExpression the expression to check. Should be a valid xpath expression + * @param string[] $allowedAxes array of string with a list of allowed axes names + * @throws \SimpleSAML\XML\Exception\RuntimeException + */ + public static function filterXPathAxis(string $xpathExpression, array $allowedAxes): void + { + /** + * Look for the axis specifier '::' and look for a function name before it. + * Ignoring whitespace before the '::' and the axis name. + * All axes must match a string on a list of allowed axis names + */ + $matches = []; + $res = preg_match_all( + /** + * We use the same rules for matching Axis names as we do for function names. + * The only difference is that we match the '::' instead of the '(' + * so everything that was said about the regular expression for function names + * applies here as well. + * + * Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking + * and thus prevent a ReDOS. + * + * '/([a-z]++(?>-[a-z]++)*+)\s*+::' + * ( # Start a capturing group + * [a-z]++ # Match one or more lower-case alpha characters + * (?> # Start an atomic group (no capturing) + * - # Match a hyphen + * [a-z]++ # Match one or more lower-case alpha characters, possessively + * )*+ # Repeat the atomic group zero or more times, + * ) # End of the capturing group + * \s*+ # Match zero or more whitespace characters, possessively + * \( # Match an opening parenthesis + */ + + '/([a-z]++(?>-[a-z]++)*+)\\s*+::/', + $xpathExpression, + $matches, + ); + + // Check that all the axes names we found are in the list of allowed axes names + foreach ($matches[1] as $match) { + if (!in_array($match, $allowedAxes)) { + throw new RuntimeException("Invalid axis: '" . $match . "'"); + } + } + } +} diff --git a/tests/Assert/XPathFilterTest.php b/tests/Assert/XPathFilterTest.php new file mode 100644 index 00000000..8e2bb73f --- /dev/null +++ b/tests/Assert/XPathFilterTest.php @@ -0,0 +1,81 @@ + $axes + * @param array $functions + */ + #[DataProvider('provideXPathFilter')] + public function testDefaultAllowedXPathFilter( + string $filter, + bool $shouldPass, + array $axes = C::DEFAULT_ALLOWED_AXES, + array $functions = C::DEFAULT_ALLOWED_FUNCTIONS, + ): void { + try { + XMLAssert::validAllowedXPathFilter($filter, $axes, $functions); + $this->assertTrue($shouldPass); + } catch (InvalidArgumentException $e) { + $this->assertFalse($shouldPass); + } + } + + + /** + * @return array + */ + public static function provideXPathFilter(): array + { + return [ + // [ 'xpath_expression', allowed ] + + // Evil + ['count(//. | //@* | //namespace::*)', false], + + // Perfectly normal + ["//ElementToEncrypt[@attribute='value']", true], + ["/RootElement/ChildElement[@id='123']", true], + ["not(self::UnwantedNode)", true ], + ["//ElementToEncrypt[not(@attribute='value')]", true], + + // From https://www.w3.org/TR/xmlenc-core1/ + ['self::text()[parent::enc:CipherValue[@Id="example1"]]', false ], + ['self::xenc:EncryptedData[@Id="example1"]', true], + + // Nonsense, but allowed by the filter as it doesn't understand XPath. + ['self::not()[parent::enc:CipherValue[@Id="example1"]]', true ], + + // namespace in element name + ["not(self::namespace)", true], + + // using "namespace" as a Namespace prefix + ["//namespace:ElementName", true], + + // namespace in attribute value + ["//ElementToEncrypt[@attribute='namespace::x']", true], + + // function in attribute value + ["//ElementToEncrypt[@attribute='ns1::count()']", true], + ]; + } +} diff --git a/tests/XML/Utils/XPathFilterTest.php b/tests/XML/Utils/XPathFilterTest.php new file mode 100644 index 00000000..047c8c74 --- /dev/null +++ b/tests/XML/Utils/XPathFilterTest.php @@ -0,0 +1,304 @@ +assertEquals($input, XPathFilter::removeStringContents($input)); + $end = microtime(true); + $this->assertLessThan(1, $end - $start, "Processing time was too long"); + } + + + /** + */ + #[DataProvider('provideStringContents')] + public function testRemoveStringContents(string $input, string $expected): void + { + $this->assertEquals($expected, XPathFilter::removeStringContents($input)); + } + + + /** + * @return array + */ + public static function provideStringContents(): array + { + return [ + // Newline + ["\n", "\n"], // 0 + + // Empty string + ['', ''], // 1 + + // No quotes + ['foo', 'foo'], // 2 + ['foo bar', 'foo bar'], //3 + + // Empty quotes + ['""', '""'], //4 + ["''", "''"], //5 + ['"" ""', '"" ""'], //6 + ["'' ''", "'' ''"], //7 + ['"" "" ""', '"" "" ""'], //8 + ["'' '' ''", "'' '' ''"], //9 + + // Quoted string + ['"foo"', '""'], //10 + ["'foo'", "''"], //11 + + // Multiple quoted strings + ['"foo" "bar"', '"" ""'], //12 + ["'foo' 'bar'", "'' ''"], //13 + + // Multiple quoted strings with newlines + ['"foo" "bar"' . "\n" . '"abc"', '"" ""' . "\n" . '""'], //14 + ["'foo' 'bar'\n'abc'", "'' ''\n''"], //15 + + // Multiple quoted strings with text + ['"foo"abc"bar"', '""abc""'], //16 + ["'foo'abc'bar'", "''abc''"], //17 + ["'foo'def'bar'", "''def''"], //18 + + // Mixed quotes + ['"foo" \'bar\'', '"" \'\''], //19 + ["'foo' \"bar\"", "'' \"\""], //20 + + // No WS between quotes + ['"foo""bar"', '""""'], //21 + ["'foo''bar'", "''''"], //22 + ['"foo" "bar" "baz"', '"" "" ""'], //23 + ["'foo' 'bar' 'baz'", "'' '' ''"], //24 + ['"foo" \'"bar" "baz"\' "qux"', '"" \'\' ""'], //25 + ["'foo' \"'bar' 'baz'\" 'qux'", "'' \"\" ''"], //26 + ["'foo' 'bar' 'baz'", "'' '' ''"], //27 + ['"foo" \'"bar" "baz"\' "qux"', '"" \'\' ""'], //28 + ["'foo' \"'bar' 'baz'\" 'qux'", "'' \"\" ''"], //29 + ]; + } + + + /** + */ + public function testFilterXPathFunctionSpeed(): void + { + // Measure the time it takes to process a large input, should be less than 1 second + $start = microtime(true); + // a + -a * 10000 + space * 10000 + ( + $input = 'a' . str_repeat('-a', 10000) . str_repeat(' ', 10000) . "("; + $this->expectException(RuntimeException::class); + XPathFilter::filterXPathFunction($input, ['a']); + $end = microtime(true); + $this->assertLessThan(1, $end - $start, "Processing time was too long"); + + // Because filterXPathAxis() uses the same regex structure, we don't test it separately + } + + + /** + * @param string[] $allowedFunctions + */ + #[DataProvider('provideXPathFunction')] + public function testFilterXPathFunction(string $input, array $allowedFunctions, ?string $expected = null): void + { + if ($expected) { + // Function must throw an exception + $this->expectException(RuntimeException::class); + $this->expectExceptionMessage("Invalid function: '" . $expected . "'"); + } else { + // Function must not throw an exception + $this->expectNotToPerformAssertions(); + } + XPathFilter::filterXPathFunction($input, $allowedFunctions); + } + + + /** + * @return array + */ + public static function provideXPathFunction(): array + { + return [ + // [xpath, allowed functions, expected result (null = OK; string = name of the denied function)] + ['', ['not'], null], + ['not()', ['not'], null], + ['count()', ['bar'], 'count'], + ['not()', [], 'not'], + ['count ()', ['foo', 'bar'], 'count'], + [' count ()', [], 'count'], + ['-count ()', [], 'count'], + ['- count ()', [], 'count'], + ['- (count ())', [], 'count'], + ['(-count())', [], 'count'], + ['not(not(),not())', ['not'], null], + ['not((not()),(not()))', ['not'], null], // 11; + ['not(not(.),not(""))', ['not'], null], // 12; + ['not( not(.), not(""))', ['not'], null], // 13; + + ['', [], null], + ['not(count(),not())', ['not'], 'count'], + ['not(not(),count())', ['not'], 'count'], + ['count(not(),not())', ['not'], 'count'], + ['(count(not(),not()))', ['not'], 'count'], + ['( count(not(),not()))', ['not'], 'count'], + ['(count (not(),not()))', ['not'], 'count'], + ['not((not()),(not()))', [], 'not'], + ['not(not(.),not(""))', [], 'not'], // 22; + ['not( not(.), not(""))', [], 'not'], + + ['abc-def', [], ''], + ['(abc-def)', [], ''], + ['(abc-def ( ) )', [], 'abc-def'], + + ['abc-def', ['abc', 'def'], null], + ['(abc-def)', ['abc', 'def'], null], + ['(abc-def ( ) )', ['abc', 'def'], 'abc-def'], + ['', ['not'], null], + ['not()', ['not'], null], + ['count()', ['bar'], 'count'], + ['not()', [], 'not'], + ['count ()', ['foo', 'bar'], 'count'], + [' count ()', [], 'count'], + ['-count ()', [], 'count'], + ['- count ()', [], 'count'], + ['- (count ())', [], 'count'], + ['(-count())', [], 'count'], + ['not(not(),not())', ['not'], null], + ['not((not()),(not()))', ['not'], null], // 11; + ['not(not(.),not(""))', ['not'], null], // 12; + ['not( not(.), not(""))', ['not'], null], // 13; + + ['', [], null], + ['not(count(),not())', ['not'], 'count'], + ['not(not(),count())', ['not'], 'count'], + ['count(not(),not())', ['not'], 'count'], + ['(count(not(),not()))', ['not'], 'count'], + ['( count(not(),not()))', ['not'], 'count'], + ['(count (not(),not()))', ['not'], 'count'], + ['not((not()),(not()))', [], 'not'], + ['not(not(.),not(""))', [], 'not'], // 22; + ['not( not(.), not(""))', [], 'not'], + + ['abc-def', [], ''], + ['(abc-def)', [], ''], + ['(abc-def ( ) )', [], 'abc-def'], + + ['abc-def', ['abc', 'def'], null], + ['(abc-def)', ['abc', 'def'], null], + ['(abc-def ( ) )', ['abc', 'def'], 'abc-def'], + + // Evil + ['count(//. | //@* | //namespace::*)', ['not', 'foo', 'bar'], 'count'], + + // Perfectly normal + ["//ElementToEncrypt[@attribute='value']", ['not', 'foo', 'bar'], null], + ["/RootElement/ChildElement[@id='123']", ['not', 'foo', 'bar'], null], + ["not(self::UnwantedNode)", ['not', 'foo', 'bar'], null], + ["//ElementToEncrypt[not(@attribute='value')]", ['not', 'foo', 'bar'], null], + + // From https://www.w3.org/TR/xmlenc-core1/ + ['self::text()[parent::enc:CipherValue[@Id="example1"]]', ['not', 'text'], null], + ['self::xenc:EncryptedData[@Id="example1"]', ['not', 'foo', 'bar'], null], + + // count in element name + ["not(self::count)", ['not', 'foo', 'bar'], null], + + // using "namespace" as a Namespace prefix + ["//namespace:ElementName", ['not', 'foo', 'bar'], null], + + // count in attribute value + //["//ElementToEncrypt[@attribute='count()']", ['not', 'foo', 'bar'], null], + ]; + } + + + /** + * @param string[] $allowedAxes + */ + #[DataProvider('provideXPathAxis')] + public function testFilterXPathAxis(string $input, array $allowedAxes, ?string $expected = null): void + { + if ($expected) { + // Function must throw an exception + $this->expectException(RuntimeException::class); + $this->expectExceptionMessage("Invalid axis: '" . $expected . "'"); + } else { + // Function must not throw an exception + $this->expectNotToPerformAssertions(); + } + XPathFilter::filterXPathAxis($input, $allowedAxes); + } + + + /** + * @return array + */ + public static function provideXPathAxis(): array + { + return [ + // [xpath, allowed axes, exception (null = OK; string = is name of the denied axis)] + ['', ['self'], null], + ['self::', [], 'self'], + [' self::', [], 'self'], + [' self ::', [], 'self'], + ['//self::X', [], 'self'], + ['./self::', [], 'self'], + ['namespace:element', [], null], + ['ancestor-or-self::some-node', ['self'], 'ancestor-or-self'], + [' ancestor-or-self::some-node', ['self'], 'ancestor-or-self'], + ['/ancestor-or-self::some-node', ['self'], 'ancestor-or-self'], + + ['self::*/child::price', ['self'], 'child'], + + // Evil + ['count(//. | //@* | //namespace::*)', ['self', 'foo', 'bar'], 'namespace'], + + // Perfectly normal + ["//ElementToEncrypt[@attribute='value']", ['self'], null], + ["/RootElement/ChildElement[@id='123']", ['self'], null], + ["not(self::UnwantedNode)", ['self'], null], + ["not(self::UnwantedNode)", [], 'self'], + ["//ElementToEncrypt[not(@attribute='value')]", ['self'], null], + + // From https://www.w3.org/TR/xmlenc-core1/ + ['self::text()[parent::enc:CipherValue[@Id="example1"]]', ['self', 'parent'], null], + ['self::text()[parent::enc:CipherValue[@Id="example1"]]', ['self'], 'parent'], + ['self::text()[parent::enc:CipherValue[@Id="example1"]]', ['parent'], 'self'], + ['self::xenc:EncryptedData[@Id="example1"]', ['self'], null], + ['self::xenc:EncryptedData[@Id="example1"]', [], 'self'], + + // namespace in element name + ["not(self::namespace)", ['self'], null], + + // using "namespace" as a Namespace prefix + ["//namespace:ElementName", ['self'], null], + + // namespace in attribute value + // ["//ElementToEncrypt[@attribute='namespace::x']", ['self'], null], + ]; + } +}