Add is_html_content() function to detect whether a string is HTML

This commit is contained in:
Kijin Sung 2017-06-29 18:25:36 +09:00
parent 61930d5129
commit 804533222c
2 changed files with 35 additions and 0 deletions

View file

@ -594,6 +594,29 @@ function utf8_trim($str)
return preg_replace('/^[\s\pZ\pC]+|[\s\pZ\pC]+$/u', '', $str);
}
/**
* Check if a string contains HTML content.
* This function checks whether a string seems to contain HTML.
* It checks for tags like <p>, <div>, <br> at the beginning and end of lines.
*
* @param string $str The input string
* @return bool
*/
function is_html_content($str)
{
$str = preg_replace('![\r\n]+!', "\n", utf8_trim(utf8_clean($str)));
$line_count = substr_count($str, "\n") + 1;
$tag_count = preg_match_all('!(?:^<(?:p|div)(?:>|\s*[a-z])|<(?:/p|/div|br\s?/?)>$)!im', $str);
if ($tag_count > 4 || ($tag_count > 0 && $tag_count >= $line_count - 1))
{
return true;
}
else
{
return false;
}
}
/**
* Check if HTML content is empty.
* This function checks whether any printable characters remain

View file

@ -168,6 +168,18 @@ class FunctionsTest extends \Codeception\TestCase\Test
$this->assertEquals("Trimmed", utf8_trim("\x20\xe2\x80\x80Trimmed\x0a\x0c\x07\x09"));
}
public function testIsHTMLContent()
{
$this->assertTrue(is_html_content("<p>Hello World</p>"));
$this->assertTrue(is_html_content("Hello World<br>"));
$this->assertTrue(is_html_content("Hello World<br/>"));
$this->assertTrue(is_html_content("Hello<br />\nWorld"));
$this->assertTrue(is_html_content("<p class='foo'>Hello</p>\n<p class='bar'>World</p>"));
$this->assertTrue(is_html_content("<div>Hello<br>\r\n\n\n\n\nWorld</div>"));
$this->assertFalse(is_html_content("You have to use a <p> tag."));
$this->assertFalse(is_html_content("This is multiline content.\n<p> tag is here.\nOther lines are here, too.<br>\nMost lines don't have any tags."));
}
public function testIsEmptyHTMLContent()
{
$this->assertTrue(is_empty_html_content('<p>&nbsp;<br><br></p>'));