/', '', $data);
$data = preg_replace('/<\/div>$/', '', $data);
} else {
$data = preg_replace('/^
/', '
', $data);
}
$data = str_replace('', '', $data);
}
if ($type & \SimplePie\SimplePie::CONSTRUCT_IRI) {
$absolute = $this->registry->call(Misc::class, 'absolutize_url', [$data, $base]);
if ($absolute !== false) {
$data = $absolute;
}
}
if ($type & (\SimplePie\SimplePie::CONSTRUCT_TEXT | \SimplePie\SimplePie::CONSTRUCT_IRI)) {
$data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
}
if ($this->output_encoding !== 'UTF-8') {
$data = $this->registry->call(Misc::class, 'change_encoding', [$data, 'UTF-8', $this->output_encoding]);
}
}
return $data;
}
protected function preprocess($html, $type)
{
$ret = '';
$html = preg_replace('%?(?:html|body)[^>]*?'.'>%is', '', $html);
if ($type & ~\SimplePie\SimplePie::CONSTRUCT_XHTML) {
// Atom XHTML constructs are wrapped with a div by default
// Note: No protection if $html contains a stray
!
$html = '
' . $html . '
';
$ret .= '';
$content_type = 'text/html';
} else {
$ret .= '';
$content_type = 'application/xhtml+xml';
}
$ret .= '';
$ret .= '
';
$ret .= '' . $html . '';
return $ret;
}
public function replace_urls($document, $tag, $attributes)
{
if (!is_array($attributes)) {
$attributes = [$attributes];
}
if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) {
$elements = $document->getElementsByTagName($tag);
foreach ($elements as $element) {
foreach ($attributes as $attribute) {
if ($element->hasAttribute($attribute)) {
$value = $this->registry->call(Misc::class, 'absolutize_url', [$element->getAttribute($attribute), $this->base]);
if ($value !== false) {
$value = $this->https_url($value);
$element->setAttribute($attribute, $value);
}
}
}
}
}
}
public function do_strip_htmltags($match)
{
if ($this->encode_instead_of_strip) {
if (isset($match[4]) && !in_array(strtolower($match[1]), ['script', 'style'])) {
$match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8');
$match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8');
return "<$match[1]$match[2]>$match[3]</$match[1]>";
} else {
return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8');
}
} elseif (isset($match[4]) && !in_array(strtolower($match[1]), ['script', 'style'])) {
return $match[4];
} else {
return '';
}
}
protected function strip_tag($tag, $document, $xpath, $type)
{
$elements = $xpath->query('body//' . $tag);
if ($this->encode_instead_of_strip) {
foreach ($elements as $element) {
$fragment = $document->createDocumentFragment();
// For elements which aren't script or style, include the tag itself
if (!in_array($tag, ['script', 'style'])) {
$text = '<' . $tag;
if ($element->hasAttributes()) {
$attrs = [];
foreach ($element->attributes as $name => $attr) {
$value = $attr->value;
// In XHTML, empty values should never exist, so we repeat the value
if (empty($value) && ($type & \SimplePie\SimplePie::CONSTRUCT_XHTML)) {
$value = $name;
}
// For HTML, empty is fine
elseif (empty($value) && ($type & \SimplePie\SimplePie::CONSTRUCT_HTML)) {
$attrs[] = $name;
continue;
}
// Standard attribute text
$attrs[] = $name . '="' . $attr->value . '"';
}
$text .= ' ' . implode(' ', $attrs);
}
$text .= '>';
$fragment->appendChild(new \DOMText($text));
}
$number = $element->childNodes->length;
for ($i = $number; $i > 0; $i--) {
$child = $element->childNodes->item(0);
$fragment->appendChild($child);
}
if (!in_array($tag, ['script', 'style'])) {
$fragment->appendChild(new \DOMText('' . $tag . '>'));
}
$element->parentNode->replaceChild($fragment, $element);
}
return;
} elseif (in_array($tag, ['script', 'style'])) {
foreach ($elements as $element) {
$element->parentNode->removeChild($element);
}
return;
} else {
foreach ($elements as $element) {
$fragment = $document->createDocumentFragment();
$number = $element->childNodes->length;
for ($i = $number; $i > 0; $i--) {
$child = $element->childNodes->item(0);
$fragment->appendChild($child);
}
$element->parentNode->replaceChild($fragment, $element);
}
}
}
protected function strip_attr($attrib, $xpath)
{
$elements = $xpath->query('//*[@' . $attrib . ']');
foreach ($elements as $element) {
$element->removeAttribute($attrib);
}
}
protected function rename_attr($attrib, $xpath)
{
$elements = $xpath->query('//*[@' . $attrib . ']');
foreach ($elements as $element) {
$element->setAttribute('data-sanitized-' . $attrib, $element->getAttribute($attrib));
$element->removeAttribute($attrib);
}
}
protected function add_attr($tag, $valuePairs, $document)
{
$elements = $document->getElementsByTagName($tag);
foreach ($elements as $element) {
foreach ($valuePairs as $attrib => $value) {
$element->setAttribute($attrib, $value);
}
}
}
/**
* Get a DataCache
*
* @param string $image_url Only needed for BC, can be removed in SimplePie 2.0.0
*
* @return DataCache
*/
private function get_cache($image_url = '')
{
if ($this->cache === null) {
// @trigger_error(sprintf('Not providing as PSR-16 cache implementation is deprecated since SimplePie 1.8.0, please use "SimplePie\SimplePie::set_cache()".'), \E_USER_DEPRECATED);
$cache = $this->registry->call(Cache::class, 'get_handler', [
$this->cache_location,
$image_url,
Base::TYPE_IMAGE
]);
return new BaseDataCache($cache);
}
return $this->cache;
}
}
class_alias('SimplePie\Sanitize', 'SimplePie_Sanitize');
src/Content/Type/Sniffer.php 0000644 00000022112 15120770020 0011763 0 ustar 00 file = $file;
}
/**
* Get the Content-Type of the specified file
*
* @return string Actual Content-Type
*/
public function get_type()
{
if (isset($this->file->headers['content-type'])) {
if (!isset($this->file->headers['content-encoding'])
&& ($this->file->headers['content-type'] === 'text/plain'
|| $this->file->headers['content-type'] === 'text/plain; charset=ISO-8859-1'
|| $this->file->headers['content-type'] === 'text/plain; charset=iso-8859-1'
|| $this->file->headers['content-type'] === 'text/plain; charset=UTF-8')) {
return $this->text_or_binary();
}
if (($pos = strpos($this->file->headers['content-type'], ';')) !== false) {
$official = substr($this->file->headers['content-type'], 0, $pos);
} else {
$official = $this->file->headers['content-type'];
}
$official = trim(strtolower($official));
if ($official === 'unknown/unknown'
|| $official === 'application/unknown') {
return $this->unknown();
} elseif (substr($official, -4) === '+xml'
|| $official === 'text/xml'
|| $official === 'application/xml') {
return $official;
} elseif (substr($official, 0, 6) === 'image/') {
if ($return = $this->image()) {
return $return;
}
return $official;
} elseif ($official === 'text/html') {
return $this->feed_or_html();
}
return $official;
}
return $this->unknown();
}
/**
* Sniff text or binary
*
* @return string Actual Content-Type
*/
public function text_or_binary()
{
if (substr($this->file->body, 0, 2) === "\xFE\xFF"
|| substr($this->file->body, 0, 2) === "\xFF\xFE"
|| substr($this->file->body, 0, 4) === "\x00\x00\xFE\xFF"
|| substr($this->file->body, 0, 3) === "\xEF\xBB\xBF") {
return 'text/plain';
} elseif (preg_match('/[\x00-\x08\x0E-\x1A\x1C-\x1F]/', $this->file->body)) {
return 'application/octet-stream';
}
return 'text/plain';
}
/**
* Sniff unknown
*
* @return string Actual Content-Type
*/
public function unknown()
{
$ws = strspn($this->file->body, "\x09\x0A\x0B\x0C\x0D\x20");
if (strtolower(substr($this->file->body, $ws, 14)) === 'file->body, $ws, 5)) === 'file->body, $ws, 7)) === '