false; } /** * Removes non-printable ASCII characters from string & trims it * * @param string $value * @return bool */ protected function removeNonPrintableCharacters($value) { return trim(preg_replace('/[^ -~]/xu','',$value)); } /** * Does this attribute value have a remote reference? * * @param $value * @return bool */ protected function hasRemoteReference($value) { $value = $this->removeNonPrintableCharacters($value); $wrapped_in_url = preg_match('~^url\(\s*[\'"]\s*(.*)\s*[\'"]\s*\)$~xi', $value, $match); if (!$wrapped_in_url){ return false; } $value = trim($match[1], '\'"'); return preg_match('~^((https?|ftp|file):)?//~xi', $value); } /** * Should we minify the output? * * @param bool $shouldMinify */ public function minify($shouldMinify = false) { $this->minifyXML = (bool) $shouldMinify; } /** * Should we remove the XML tag in the header? * * @param bool $removeXMLTag */ public function removeXMLTag($removeXMLTag = false) { $this->removeXMLTag = (bool) $removeXMLTag; } /** * Whether `` elements shall be * removed in case expansion would exceed this threshold. * * @param int $useThreshold */ public function useThreshold($useThreshold = 1000) { $this->useThreshold = (int)$useThreshold; } /** * Check to see if an attribute is an aria attribute or not * * @param $attributeName * * @return bool */ protected function isAriaAttribute($attributeName) { return strpos($attributeName, 'aria-') === 0; } /** * Check to see if an attribute is an data attribute or not * * @param $attributeName * * @return bool */ protected function isDataAttribute($attributeName) { return strpos($attributeName, 'data-') === 0; } /** * Make sure our use tag is only referencing internal resources * * @param \DOMElement $element * @return bool */ protected function isUseTagDirty(\DOMElement $element) { $href = Helper::getElementHref($element); return $href && strpos($href, '#') !== 0; } /** * Determines whether `` is expanded * recursively in order to create DoS scenarios. The amount of a actually * used element needs to be below `$this->useThreshold`. * * @param \DOMElement $element * @return bool */ protected function isUseTagExceedingThreshold(\DOMElement $element) { if ($this->useThreshold <= 0) { return false; } $useId = Helper::extractIdReferenceFromHref( Helper::getElementHref($element) ); if ($useId === null) { return false; } foreach ($this->elementReferenceResolver->findByElementId($useId) as $subject) { if ($subject->countUse() >= $this->useThreshold) { return true; } } return false; } /** * Set the nesting limit for tags. * * @param $limit */ public function setUseNestingLimit($limit) { $this->useNestingLimit = (int) $limit; } /** * Remove nodes that are either invalid or malformed. * * @param \DOMNode $currentElement The current element. */ protected function cleanUnsafeNodes(\DOMNode $currentElement) { // Replace CDATA node with encoded text node if ($currentElement instanceof \DOMCdataSection) { $textNode = $currentElement->ownerDocument->createTextNode($currentElement->nodeValue); $currentElement->parentNode->replaceChild($textNode, $currentElement); // If the element doesn't have a tagname, remove it and continue with next iteration } elseif (!$currentElement instanceof \DOMElement && !$currentElement instanceof \DOMText) { $currentElement->parentNode->removeChild($currentElement); $this->xmlIssues[] = array( 'message' => 'Suspicious node \'' . $currentElement->nodeName . '\'', 'line' => $currentElement->getLineNo(), ); return; } if ( $currentElement->childNodes && $currentElement->childNodes->length > 0 ) { for ($j = $currentElement->childNodes->length - 1; $j >= 0; $j--) { /** @var \DOMElement $childElement */ $childElement = $currentElement->childNodes->item($j); $this->cleanUnsafeNodes($childElement); } } } /** * Retrieve array of errors * @return array */ private static function getXmlErrors() { $errors = []; foreach (libxml_get_errors() as $error) { $errors[] = [ 'message' => trim($error->message), 'line' => $error->line, ]; } return $errors; } }