mirror of
https://github.com/godotengine/godot-question2answer.git
synced 2026-01-01 01:48:37 +03:00
This project lived only on the server without version control. This is now the starting point for the repository.
168 lines
6.7 KiB
PHP
168 lines
6.7 KiB
PHP
<?php
|
|
/*
|
|
Question2Answer by Gideon Greenspan and contributors
|
|
http://www.question2answer.org/
|
|
|
|
Description: Basic viewer module for displaying HTML or plain text
|
|
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; either version 2
|
|
of the License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
More about this license: http://www.question2answer.org/license.php
|
|
*/
|
|
|
|
class qa_viewer_basic
|
|
{
|
|
private $htmllineseparators;
|
|
private $htmlparagraphseparators;
|
|
|
|
|
|
public function load_module($localdir, $htmldir)
|
|
{
|
|
$this->htmllineseparators = 'br|option';
|
|
$this->htmlparagraphseparators = 'address|applet|blockquote|center|cite|col|div|dd|dl|dt|embed|form|frame|frameset|h1|h2|h3|h4|h5|h6' .
|
|
'|hr|iframe|input|li|marquee|ol|p|pre|samp|select|spacer|table|tbody|td|textarea|tfoot|th|thead|tr|ul';
|
|
}
|
|
|
|
public function calc_quality($content, $format)
|
|
{
|
|
if ($format == '' || $format == 'html')
|
|
return 1.0;
|
|
|
|
return 0.0001; // if there's nothing better this will give an error message for unknown formats
|
|
}
|
|
|
|
public function get_html($content, $format, $options)
|
|
{
|
|
if ($format == 'html') {
|
|
$html = qa_sanitize_html($content, @$options['linksnewwindow'], false); // sanitize again for display, for extra safety, and due to new window setting
|
|
|
|
if (isset($options['blockwordspreg'])) { // filtering out blocked words inline within HTML is pretty complex, e.g. p<b>oo</b>p must be caught
|
|
require_once QA_INCLUDE_DIR . 'util/string.php';
|
|
|
|
$html = preg_replace('/<\s*(' . $this->htmllineseparators . ')[^A-Za-z0-9]/i', "\n\\0", $html); // tags to single new line
|
|
$html = preg_replace('/<\s*(' . $this->htmlparagraphseparators . ')[^A-Za-z0-9]/i', "\n\n\\0", $html); // tags to double new line
|
|
|
|
preg_match_all('/<[^>]*>/', $html, $pregmatches, PREG_OFFSET_CAPTURE); // find tag positions and lengths
|
|
$tagmatches = $pregmatches[0];
|
|
$text = preg_replace('/<[^>]*>/', '', $html); // effectively strip_tags() but use same regexp as above to ensure consistency
|
|
|
|
$blockmatches = qa_block_words_match_all($text, $options['blockwordspreg']); // search for blocked words within text
|
|
|
|
$nexttagmatch = array_shift($tagmatches);
|
|
$texttohtml = 0;
|
|
$htmlshift = 0;
|
|
|
|
foreach ($blockmatches as $textoffset => $textlength) {
|
|
while (isset($nexttagmatch) && ($nexttagmatch[1] <= ($textoffset + $texttohtml))) { // keep text and html in sync
|
|
$texttohtml += strlen($nexttagmatch[0]);
|
|
$nexttagmatch = array_shift($tagmatches);
|
|
}
|
|
|
|
while (1) {
|
|
$replacepart = $textlength;
|
|
if (isset($nexttagmatch))
|
|
$replacepart = min($replacepart, $nexttagmatch[1] - ($textoffset + $texttohtml)); // stop replacing early if we hit an HTML tag
|
|
|
|
$replacelength = qa_strlen(substr($text, $textoffset, $replacepart)); // to work with multi-byte characters
|
|
|
|
$html = substr_replace($html, str_repeat('*', $replacelength), $textoffset + $texttohtml + $htmlshift, $replacepart);
|
|
$htmlshift += $replacelength - $replacepart; // HTML might have moved around if we replaced multi-byte characters
|
|
|
|
if ($replacepart >= $textlength)
|
|
break; // we have replaced everything expected, otherwise more left (due to hitting an HTML tag)
|
|
|
|
$textlength -= $replacepart;
|
|
$textoffset += $replacepart;
|
|
$texttohtml += strlen($nexttagmatch[0]);
|
|
$nexttagmatch = array_shift($tagmatches);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (@$options['showurllinks']) { // we need to ensure here that we don't put new links inside existing ones
|
|
require_once QA_INCLUDE_DIR . 'util/string.php';
|
|
|
|
$htmlunlinkeds = array_reverse(preg_split('#<(a|code|pre)[^>]*>.*</(a|code|pre)\s*>#ims', $html, -1, PREG_SPLIT_OFFSET_CAPTURE)); // start from end so we substitute correctly
|
|
|
|
foreach ($htmlunlinkeds as $htmlunlinked) { // and that we don't detect links inside HTML, e.g. <img src="http://...">
|
|
$thishtmluntaggeds = array_reverse(preg_split('/<[^>]*>/', $htmlunlinked[0], -1, PREG_SPLIT_OFFSET_CAPTURE)); // again, start from end
|
|
|
|
foreach ($thishtmluntaggeds as $thishtmluntagged) {
|
|
$innerhtml = $thishtmluntagged[0];
|
|
|
|
if (is_numeric(strpos($innerhtml, '://'))) { // quick test first
|
|
$newhtml = qa_html_convert_urls($innerhtml, qa_opt('links_in_new_window'));
|
|
|
|
$html = substr_replace($html, $newhtml, $htmlunlinked[1] + $thishtmluntagged[1], strlen($innerhtml));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
} elseif ($format == '') {
|
|
if (isset($options['blockwordspreg'])) {
|
|
require_once QA_INCLUDE_DIR . 'util/string.php';
|
|
$content = qa_block_words_replace($content, $options['blockwordspreg']);
|
|
}
|
|
|
|
$html = qa_html($content, true);
|
|
|
|
if (@$options['showurllinks']) {
|
|
require_once QA_INCLUDE_DIR . 'app/format.php';
|
|
$html = qa_html_convert_urls($html, qa_opt('links_in_new_window'));
|
|
}
|
|
|
|
} else
|
|
$html = '[no viewer found for format: ' . qa_html($format) . ']'; // for unknown formats
|
|
|
|
return $html;
|
|
}
|
|
|
|
public function get_text($content, $format, $options)
|
|
{
|
|
if ($format == 'html') {
|
|
$text = strtr($content, "\n\r\t", ' '); // convert all white space in HTML to spaces
|
|
$text = preg_replace('/<\s*(' . $this->htmllineseparators . ')[^A-Za-z0-9]/i', "\n\\0", $text); // tags to single new line
|
|
$text = preg_replace('/<\s*(' . $this->htmlparagraphseparators . ')[^A-Za-z0-9]/i', "\n\n\\0", $text); // tags to double new line
|
|
$text = strip_tags($text); // all tags removed
|
|
$text = preg_replace('/ +/', ' ', $text); // combine multiple spaces into one
|
|
$text = preg_replace('/ *\n */', "\n", $text); // remove spaces either side new lines
|
|
$text = preg_replace('/\n\n\n+/', "\n\n", $text); // more than two new lines combine into two
|
|
$text = strtr($text, array(
|
|
'"' => "\x22",
|
|
'&' => "\x26",
|
|
''' => "\x27",
|
|
'<' => "\x3C",
|
|
'>' => "\x3E",
|
|
' ' => " ",
|
|
'"' => "\x22",
|
|
'&' => "\x26",
|
|
'<' => "\x3C",
|
|
'>' => "\x3E",
|
|
)); // base HTML entities (others should not be stored in database)
|
|
|
|
$text = trim($text);
|
|
|
|
} elseif ($format == '')
|
|
$text = $content;
|
|
else
|
|
$text = '[no viewer found for format: ' . $format . ']'; // for unknown formats
|
|
|
|
if (isset($options['blockwordspreg'])) {
|
|
require_once QA_INCLUDE_DIR . 'util/string.php';
|
|
$text = qa_block_words_replace($text, $options['blockwordspreg']);
|
|
}
|
|
|
|
return $text;
|
|
}
|
|
}
|