Add formatter class and associated unit tests

This commit is contained in:
Kijin Sung 2016-03-18 14:10:10 +09:00
parent 2b008f7be6
commit ed50a57f9c
31 changed files with 692 additions and 0 deletions

View file

@ -0,0 +1,311 @@
<?php
namespace Rhymix\Framework;
/**
* The formatter class.
*/
class Formatter
{
/**
* Options for text to HTML conversion.
*/
const TEXT_NEWLINE_AS_P = 1;
const TEXT_DOUBLE_NEWLINE_AS_P = 2;
/**
* Options for Markdown to HTML conversion.
*/
const MD_NEWLINE_AS_BR = 16;
const MD_ENABLE_EXTRA = 128;
/**
* Convert plain text to HTML.
*
* @param string $text
* @param int $options (optional)
* @return string
*/
public static function text2html($text, $options = 0)
{
// This option uses <p> instead of <br> to separate lines.
if ($options & self::TEXT_NEWLINE_AS_P)
{
$lines = array_map('trim', explode("\n", escape(trim($text))));
$result = '';
foreach ($lines as $line)
{
$result .= "<p>$line</p>\n";
}
return $result;
}
// This option uses <br> to separate lines and <p> to separate paragraphs.
if ($options & self::TEXT_DOUBLE_NEWLINE_AS_P)
{
$lines = preg_replace('!(<br />)+\s*$!', '', nl2br(escape(trim($text))));
$lines = preg_split('!(<br />\s*)+<br />!', $lines);
foreach ($lines as $line)
{
$result .= "<p>\n" . trim($line) . "\n</p>\n";
}
return $result;
}
// The default is to use <br> always.
return nl2br(escape(trim($text))) . "<br />\n";
}
/**
* Convert HTML to plain text.
*
* @param string $html
* @return string
*/
public static function html2text($html)
{
// Add line breaks after <br> and <p> tags.
$html = preg_replace('!<br[^>]*>\s*!i', "\n", $html);
$html = preg_replace('!<p\b[^>]*>\s*!i', '', $html);
$html = preg_replace('!</p[^>]*>\s*!i', "\n\n", $html);
// Encode links and images to preserve essential information.
$html = preg_replace_callback('!<a\b[^>]*href="([^>"]+)"[^>]*>([^<]*)</a>!i', function($matches) {
return trim($matches[2] . ' &lt;' . $matches[1] . '&gt;');
}, $html);
$html = preg_replace_callback('!<img\b[^>]*src="([^>"]+)"[^>]*>!i', function($matches) {
$title = preg_match('!title="([^>"]+)"!i', $matches[0], $m) ? $m[1] : null;
$title = $title ?: (preg_match('!alt="([^>"]+)"!i', $matches[0], $m) ? $m[1] : 'IMAGE');
return trim('[' . $title . '] &lt;' . $matches[1] . '&gt;');
}, $html);
// Strip all other HTML.
$text = html_entity_decode(strip_tags($html));
unset($html);
// Normalize whitespace and return.
$text = str_replace("\r\n", "\n", $text);
$text = preg_replace('/\n(?:\s*\n)+/', "\n\n", $text);
return trim($text) . "\n";
}
/**
* Convert Markdown to HTML.
*
* @param string $markdown
* @param int $options (optional)
* @return string
*/
public static function markdown2html($markdown, $options = 0)
{
if ($options & self::MD_NEWLINE_AS_BR)
{
$markdown = preg_replace('/(?<!\n)\n(?![\n\*\#\-])/', " \n", $markdown);
}
if ($options & self::MD_ENABLE_EXTRA)
{
$class_name = '\\Michelf\\MarkdownExtra';
}
else
{
$class_name = '\\Michelf\\Markdown';
}
$html = $class_name::defaultTransform($markdown);
return Filters\HTMLFilter::clean($html);
}
/**
* Convert HTML to Markdown.
*
* @param string $html
* @return string
*/
public static function html2markdown($html)
{
$converter = new \League\HTMLToMarkdown\HtmlConverter();
$converter->getConfig()->setOption('bold_style', '**');
$converter->getConfig()->setOption('italic_style', '_');
$converter->getConfig()->setOption('strip_tags', true);
return trim($converter->convert($html)) . "\n";
}
/**
* Convert BBCode to HTML.
*
* @param string $bbcode
* @return string
*/
public static function bbcode($bbcode)
{
$parser = new \JBBCode\Parser;
$parser->addCodeDefinitionSet(new \JBBCode\DefaultCodeDefinitionSet());
$builder = new \JBBCode\CodeDefinitionBuilder('quote', '<blockquote>{param}</blockquote>');
$parser->addCodeDefinition($builder->build());
$builder = new \JBBCode\CodeDefinitionBuilder('code', '<pre><code>{param}</code></pre>');
$builder->setParseContent(false);
$parser->addCodeDefinition($builder->build());
$parser->parse($bbcode);
$html = $parser->getAsHtml();
return Filters\HTMLFilter::clean($html);
}
/**
* Apply smart quotes and other stylistic enhancements to HTML.
*
* @param string $html
* @return string
*/
public static function applySmartQuotes($html)
{
return \Michelf\SmartyPants::defaultTransform($html, 'qbBdDiew');
}
/**
* Compile LESS into CSS.
*
* @param string|array $source_filename
* @param string $target_filename
* @param array $variables (optional)
* @parsm bool $minify (optional)
* @return bool
*/
public static function compileLESS($source_filename, $target_filename, $variables = array(), $minify = false)
{
// Get the cleaned and concatenated content.
$content = self::_concatenate($source_filename, $target_filename);
// Compile!
try
{
$less_compiler = new \lessc;
$less_compiler->setFormatter($minify ? 'compressed' : 'lessjs');
$less_compiler->setImportDir(array(dirname(is_array($source_filename) ? array_first($source_filename) : $source_filename)));
if ($variables)
{
$less_compiler->setVariables($variables);
}
$content = '@charset "UTF-8";' . "\n" . $less_compiler->compile($content) . "\n";
$result = true;
}
catch (\Exception $e)
{
$content = '/*' . "\n" . 'Error while compiling LESS:' . "\n" . $e->getMessage() . "\n" . '*/' . "\n";
$result = false;
}
// Save the result to the target file.
\FileHandler::writeFile($target_filename, $content);
return $result;
}
/**
* Compile SCSS into CSS.
*
* @param string|array $source_filename
* @param string $target_filename
* @param array $variables (optional)
* @parsm bool $minify (optional)
* @return bool
*/
public static function compileSCSS($source_filename, $target_filename, $variables = array(), $minify = false)
{
// Get the cleaned and concatenated content.
$content = self::_concatenate($source_filename, $target_filename);
// Compile!
try
{
$scss_compiler = new \scssc;
$scss_compiler->setFormatter($minify ? 'scss_formatter_compressed' : 'scss_formatter');
$scss_compiler->setImportPaths(array(dirname(is_array($source_filename) ? array_first($source_filename) : $source_filename)));
if ($variables)
{
$scss_compiler->setVariables($variables);
}
$content = '@charset "UTF-8";' . "\n" . $scss_compiler->compile($content) . "\n";
$result = true;
}
catch (\Exception $e)
{
$content = '/*' . "\n" . 'Error while compiling SCSS:' . "\n" . $e->getMessage() . "\n" . '*/' . "\n";
$result = false;
}
// Save the result to the target file.
\FileHandler::writeFile($target_filename, $content);
return $result;
}
/**
* Minify CSS.
*
* @param string|array $source_filename
* @param string $target_filename
* @return bool
*/
public static function minifyCSS($source_filename, $target_filename)
{
$minifier = new \MatthiasMullie\Minify\CSS($source_filename);
$content = $minifier->execute($target_filename);
\FileHandler::writeFile($target_filename, $content);
return strlen($content) ? true : false;
}
/**
* Minify JS.
*
* @param string|array $source_filename
* @param string $target_filename
* @return bool
*/
public static function minifyJS($source_filename, $target_filename)
{
$minifier = new \MatthiasMullie\Minify\JS($source_filename);
$content = $minifier->execute($target_filename);
\FileHandler::writeFile($target_filename, $content);
return strlen($content) ? true : false;
}
/**
* CSS concatenation subroutine for compileLESS() and compileSCSS().
*
* @param string|array $source_filename
* @param string $target_filename
* @return string
*/
protected static function _concatenate($source_filename, $target_filename)
{
$result = '';
if (!is_array($source_filename))
{
$source_filename = array($source_filename);
}
foreach ($source_filename as $filename)
{
$content = utf8_clean(file_get_contents($filename));
$path_converter = new \MatthiasMullie\PathConverter\Converter($filename, $target_filename);
$content = preg_replace_callback('/\burl\\(([^)]+)\\)/iU', function($matches) use ($path_converter) {
$url = trim($matches[1], '\'"');
if (!strlen($url) || $url[0] === '/')
{
return $matches[0];
}
else
{
return 'url("' . escape_dqstr($path_converter->convert($url)) . '")';
}
}, $content);
unset($path_converter);
$result .= trim($content) . "\n\n";
}
return $result;
}
}

View file

@ -519,6 +519,27 @@ function utf8_check($str)
}
}
/**
* Remove BOM and invalid UTF-8 sequences from file content.
*
* @param string $str
* @return string
*/
function utf8_clean($str)
{
if (strlen($str) >= 3 && substr($str, 0, 3) === "\xEF\xBB\xBF")
{
$str = substr($str, 3);
}
if (!utf8_check($str))
{
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
}
return $str;
}
/**
* Encode UTF-8 characters outside of the Basic Multilingual Plane in the &#xxxxxx format.
* This allows emoticons and other characters to be stored in MySQL without utf8mb4 support.

View file

@ -0,0 +1,6 @@
This is the first paragraph.
It contains [b]bold[/b] [i]italic[/i] text.
[quote]This is quoted text.[/quote]
This example belongs to the test suite for [url="https://www.rhymix.org"]Rhymix[/url].

View file

@ -0,0 +1,6 @@
This is the first paragraph.
It contains <strong>bold</strong> <em>italic</em> text.
<blockquote>This is quoted text.</blockquote>
This example belongs to the test suite for <a href="https://www.rhymix.org">Rhymix</a>.

View file

@ -0,0 +1,13 @@
<p>This is a Markdown document.
These lines belong in the same paragraph.
Markdown usually ignores single line breaks.
</p>
<p>This is a <a href="foobar.html" title="title">link</a>.<br />
This is an <img src="foobar.jpg" title="image" alt="alt" />.
</p>
<ul>
<li>This is a list.</li>
<li>It has two items.</li>
</ul>

View file

@ -0,0 +1,7 @@
This is a Markdown document. These lines belong in the same paragraph. Markdown usually ignores single line breaks.
This is a [link](foobar.html "title").
This is an ![alt](foobar.jpg "image").
- This is a list.
- It has two items.

View file

@ -0,0 +1,13 @@
<p>
This is a sample text file.<br />
This is a paragraph with multiple lines.<br />
This is the third line.
</p>
<p>
This is another paragraph.
</p>
<p>
This is a <span>SPAN</span> element that will be stripped away.<br />
This is a <a href="foobar.html">link</a> that will be preserved.<br />
This is an <img src="test.jpg" alt="Image Title" /> that will be preserved.<br />
</p>

View file

@ -0,0 +1,9 @@
This is a sample text file.
This is a paragraph with multiple lines.
This is the third line.
This is another paragraph.
This is a SPAN element that will be stripped away.
This is a link <foobar.html> that will be preserved.
This is an [Image Title] <test.jpg> that will be preserved.

View file

@ -0,0 +1,3 @@
.myfunction(@size) {
margin: @size;
}

View file

@ -0,0 +1,7 @@
.rhymix {
color: @foo;
background: url('foo/bar.jpg');
span {
.myfunction(@bar);
}
}

View file

@ -0,0 +1,9 @@
@charset "UTF-8";
.rhymix {
color: #123456;
background: url("../_data/formatter/foo/bar.jpg");
}
.rhymix span {
margin: 320px;
}

View file

@ -0,0 +1,2 @@
@charset "UTF-8";
.rhymix{color:#123456;background:url("../_data/formatter/foo/bar.jpg");}.rhymix span{margin:320px;}

View file

@ -0,0 +1,17 @@
This is a Markdown document.
These lines belong in the same paragraph.
Markdown usually ignores single line breaks.
This is an indented code segment.
All Markdown variants will recognize it.
```
This is a fenced code segment.
Only Markdown Extra will recognize it.
```
This is a [link](foobar.html).
This is an ![image](foobar.jpg).
- This is a list.
- It has two items.

View file

@ -0,0 +1,17 @@
<p>This is a Markdown document.
These lines belong in the same paragraph.
Markdown usually ignores single line breaks.</p>
<pre><code>This is an indented code segment.
All Markdown variants will recognize it.
</code></pre>
<p><code>This is a fenced code segment.
Only Markdown Extra will recognize it.</code></p>
<p>This is a <a href="foobar.html">link</a>.
This is an <img src="foobar.jpg" alt="image" />.</p>
<ul><li>This is a list.</li>
<li>It has two items.</li>
</ul>

View file

@ -0,0 +1,17 @@
<p>This is a Markdown document.<br />
These lines belong in the same paragraph.<br />
Markdown usually ignores single line breaks.</p>
<pre><code>This is an indented code segment.
All Markdown variants will recognize it.
</code></pre>
<p><code>This is a fenced code segment.
Only Markdown Extra will recognize it.</code></p>
<p>This is a <a href="foobar.html">link</a>.<br />
This is an <img src="foobar.jpg" alt="image" />.</p>
<ul><li>This is a list. </li>
<li>It has two items. </li>
</ul>

View file

@ -0,0 +1,18 @@
<p>This is a Markdown document.<br />
These lines belong in the same paragraph.<br />
Markdown usually ignores single line breaks.</p>
<pre><code>This is an indented code segment.
All Markdown variants will recognize it.
</code></pre>
<pre><code>This is a fenced code segment.
Only Markdown Extra will recognize it.
</code></pre>
<p>This is a <a href="foobar.html">link</a>.<br />
This is an <img src="foobar.jpg" alt="image" />.</p>
<ul><li>This is a list. </li>
<li>It has two items. </li>
</ul>

View file

@ -0,0 +1,10 @@
@charset "UTF-8";
.rhymix {
background: url("foo/bar.jpg");
}
.wordpress {
border-radius: 4px;
}
.xpressengine {
margin: 320px;
}

View file

@ -0,0 +1,6 @@
(function($) {
$(".foo").click(function(event) {
event.preventDefault();
$(this).attr("bar", "baz");
});
})(jQuery);

View file

@ -0,0 +1 @@
@charset "UTF-8";.rhymix{background:url(../_data/formatter/foo/bar.jpg)}.wordpress{border-radius:4px}.xpressengine{margin:320px}

View file

@ -0,0 +1 @@
(function($){$(".foo").click(function(event){event.preventDefault();$(this).attr("bar","baz")})})(jQuery)

View file

@ -0,0 +1,3 @@
@mixin mymixin($size) {
margin: $size;
}

View file

@ -0,0 +1,7 @@
.rhymix {
color: $foo;
background: url('foo/bar.jpg');
span {
@include mymixin($bar);
}
}

View file

@ -0,0 +1,9 @@
@charset "UTF-8";
.rhymix {
color: #123456;
background: url("../_data/formatter/foo/bar.jpg");
}
.rhymix span {
margin: 320px;
}

View file

@ -0,0 +1,2 @@
@charset "UTF-8";
.rhymix{color:#123456;background:url("../_data/formatter/foo/bar.jpg")}.rhymix span{margin:320px}

View file

@ -0,0 +1,2 @@
<p>This paragraph contains &quot;dumb quotes&quot; and short -- dashes.</p>
<p>This paragraph contains ``backtick quotes'' and 'long' --- dashes...</p>

View file

@ -0,0 +1,2 @@
<p>This paragraph contains &#8220;dumb quotes&#8221; and short &#8212; dashes.</p>
<p>This paragraph contains &#8220;backtick quotes&#8221; and &#8217;long&#8217; &#8211; dashes&#8230;</p>

View file

@ -0,0 +1,9 @@
This is a sample text file.
Some of these lines are close together.
Other lines are separated by two newlines.
Or three newlines.
This is a <p>tag</p> that will be escaped if encoded.
Hello world!

View file

@ -0,0 +1,9 @@
This is a sample text file.<br />
Some of these lines are close together.<br />
<br />
Other lines are separated by two newlines.<br />
<br />
<br />
Or three newlines.<br />
This is a &lt;p&gt;tag&lt;/p&gt; that will be escaped if encoded.<br />
Hello world!<br />

View file

@ -0,0 +1,9 @@
<p>This is a sample text file.</p>
<p>Some of these lines are close together.</p>
<p></p>
<p>Other lines are separated by two newlines.</p>
<p></p>
<p></p>
<p>Or three newlines.</p>
<p>This is a &lt;p&gt;tag&lt;/p&gt; that will be escaped if encoded.</p>
<p>Hello world!</p>

View file

@ -0,0 +1,12 @@
<p>
This is a sample text file.<br />
Some of these lines are close together.
</p>
<p>
Other lines are separated by two newlines.
</p>
<p>
Or three newlines.<br />
This is a &lt;p&gt;tag&lt;/p&gt; that will be escaped if encoded.<br />
Hello world!
</p>

View file

@ -0,0 +1,134 @@
<?php
class FormatterTest extends \Codeception\TestCase\Test
{
public function testText2HTML()
{
$text = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/text2html.source.txt');
$html1 = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/text2html.target1.html');
$html2 = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/text2html.target2.html');
$html3 = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/text2html.target3.html');
$this->assertEquals($html1, Rhymix\Framework\Formatter::text2html($text));
$this->assertEquals($html2, Rhymix\Framework\Formatter::text2html($text, Rhymix\Framework\Formatter::TEXT_NEWLINE_AS_P));
$this->assertEquals($html3, Rhymix\Framework\Formatter::text2html($text, Rhymix\Framework\Formatter::TEXT_DOUBLE_NEWLINE_AS_P));
}
public function testHTML2Text()
{
$html = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/html2text.source.html');
$text = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/html2text.target.txt');
$this->assertEquals($text, Rhymix\Framework\Formatter::html2text($html));
}
public function testMarkdown2HTML()
{
$markdown = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/markdown2html.source.md');
$html1 = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/markdown2html.target1.html');
$html2 = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/markdown2html.target2.html');
$html3 = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/markdown2html.target3.html');
$this->assertEquals($html1, Rhymix\Framework\Formatter::markdown2html($markdown));
$this->assertEquals($html2, Rhymix\Framework\Formatter::markdown2html($markdown, Rhymix\Framework\Formatter::MD_NEWLINE_AS_BR));
$this->assertEquals($html3, Rhymix\Framework\Formatter::markdown2html($markdown, Rhymix\Framework\Formatter::MD_NEWLINE_AS_BR | Rhymix\Framework\Formatter::MD_ENABLE_EXTRA));
}
public function testHTML2Markdown()
{
$html = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/html2markdown.source.html');
$markdown = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/html2markdown.target.md');
$this->assertEquals($markdown, Rhymix\Framework\Formatter::html2markdown($html));
}
public function testBBCode()
{
$bbcode = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/bbcode.source.txt');
$html = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/bbcode.target.html');
$this->assertEquals($html, Rhymix\Framework\Formatter::bbcode($bbcode));
}
public function testApplySmartQuotes()
{
$before = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/smartypants.source.html');
$after = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/smartypants.target.html');
$this->assertEquals($after, Rhymix\Framework\Formatter::applySmartQuotes($before));
}
public function testCompileLESS()
{
$sources = array(
\RX_BASEDIR . 'tests/_data/formatter/less.source1.less',
\RX_BASEDIR . 'tests/_data/formatter/less.source2.less',
);
$variables = array(
'foo' => '#123456',
'bar' => '320px',
);
$real_target1 = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/less.target1.css');
$real_target2 = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/less.target2.css');
$test_target1 = \RX_BASEDIR . 'tests/_output/less.target1.css';
$test_target2 = \RX_BASEDIR . 'tests/_output/less.target2.css';
$this->assertTrue(Rhymix\Framework\Formatter::compileLESS($sources, $test_target1, $variables));
$this->assertEquals($real_target1, file_get_contents($test_target1));
$this->assertTrue(Rhymix\Framework\Formatter::compileLESS($sources, $test_target2, $variables, true));
$this->assertEquals($real_target2, file_get_contents($test_target2));
unlink($test_target1);
unlink($test_target2);
}
public function testCompileSCSS()
{
$sources = array(
\RX_BASEDIR . 'tests/_data/formatter/scss.source1.scss',
\RX_BASEDIR . 'tests/_data/formatter/scss.source2.scss',
);
$variables = array(
'foo' => '#123456',
'bar' => '320px',
);
$real_target1 = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/scss.target1.css');
$real_target2 = file_get_contents(\RX_BASEDIR . 'tests/_data/formatter/scss.target2.css');
$test_target1 = \RX_BASEDIR . 'tests/_output/scss.target1.css';
$test_target2 = \RX_BASEDIR . 'tests/_output/scss.target2.css';
$this->assertTrue(Rhymix\Framework\Formatter::compileSCSS($sources, $test_target1, $variables));
$this->assertEquals($real_target1, file_get_contents($test_target1));
$this->assertTrue(Rhymix\Framework\Formatter::compileSCSS($sources, $test_target2, $variables, true));
$this->assertEquals($real_target2, file_get_contents($test_target2));
unlink($test_target1);
unlink($test_target2);
}
public function testMinifyCSS()
{
$source = \RX_BASEDIR . 'tests/_data/formatter/minify.source.css';
$real_target = \RX_BASEDIR . 'tests/_data/formatter/minify.target.css';
$test_target = \RX_BASEDIR . 'tests/_output/minify.target.css';
$this->assertTrue(Rhymix\Framework\Formatter::minifyCSS($source, $test_target));
$this->assertEquals(file_get_contents($real_target), file_get_contents($test_target));
unlink($test_target);
}
public function testMinifyJS()
{
$source = \RX_BASEDIR . 'tests/_data/formatter/minify.source.js';
$real_target = \RX_BASEDIR . 'tests/_data/formatter/minify.target.js';
$test_target = \RX_BASEDIR . 'tests/_output/minify.target.js';
$this->assertTrue(Rhymix\Framework\Formatter::minifyJS($source, $test_target));
$this->assertEquals(file_get_contents($real_target), file_get_contents($test_target));
unlink($test_target);
}
}