mirror of
https://github.com/Lastorder-DC/rhymix.git
synced 2026-01-09 03:32:00 +09:00
Update composer dependencies
This commit is contained in:
parent
49cc39e507
commit
cbd324c35b
428 changed files with 17862 additions and 5885 deletions
46
vendor/league/html-to-markdown/CHANGELOG.md
vendored
46
vendor/league/html-to-markdown/CHANGELOG.md
vendored
|
|
@ -4,6 +4,46 @@ Updates should follow the [Keep a CHANGELOG](http://keepachangelog.com/) princip
|
|||
|
||||
## [Unreleased][unreleased]
|
||||
|
||||
## [4.4.1]
|
||||
|
||||
### Fixed
|
||||
- Fixed autolinking of invalid URLs (#129)
|
||||
|
||||
## [4.4.0]
|
||||
|
||||
### Added
|
||||
- Added `hard_break` configuration option (#112, #115)
|
||||
- The `HtmlConverter` can now be instantiated with an `Environment` (#118)
|
||||
|
||||
### Fixed
|
||||
- Fixed handling of paragraphs in list item elements (#47, #110)
|
||||
- Fixed phantom spaces when newlines follow `br` elements (#116, #117)
|
||||
- Fixed link converter not sanitizing inner spaces properly (#119, #120)
|
||||
|
||||
## [4.3.1]
|
||||
### Changed
|
||||
- Revised the sanitization implementation (#109)
|
||||
|
||||
### Fixed
|
||||
- Fixed tag-like content not being escaped (#67, #109)
|
||||
- Fixed thematic break-like content not being escaped (#65, #109)
|
||||
- Fixed codefence-like content not being escaped (#64, #109)
|
||||
|
||||
## [4.3.0]
|
||||
### Added
|
||||
- Added full support for PHP 7.0 and 7.1
|
||||
|
||||
### Changed
|
||||
- Changed `<pre>` and `<pre><code>` conversions to use backticks instead of indendation (#102)
|
||||
|
||||
### Fixed
|
||||
- Fixed issue where specified code language was not preserved (#70, #102)
|
||||
- Fixed issue where `<code>` tags nested in `<pre>` was not converted properly (#70, #102)
|
||||
- Fixed header-like content not being escaped (#76, #105)
|
||||
- Fixed blockquote-like content not being escaped (#77, #103)
|
||||
- Fixed ordered list-like content not being escaped (#73, #106)
|
||||
- Fixed unordered list-like content not being escaped (#71, #107)
|
||||
|
||||
## [4.2.2]
|
||||
### Fixed
|
||||
- Fixed sanitization bug which sometimes removes desired content (#63, #101)
|
||||
|
|
@ -148,7 +188,11 @@ not ideally set, so this releases fixes that. Moving forwards this should reduce
|
|||
### Added
|
||||
- Initial release
|
||||
|
||||
[unreleased]: https://github.com/thephpleague/html-to-markdown/compare/4.2.2...master
|
||||
[unreleased]: https://github.com/thephpleague/html-to-markdown/compare/4.4.1...master
|
||||
[4.4.1]: https://github.com/thephpleague/html-to-markdown/compare/4.4.0...4.4.1
|
||||
[4.4.0]: https://github.com/thephpleague/html-to-markdown/compare/4.3.1...4.4.0
|
||||
[4.3.1]: https://github.com/thephpleague/html-to-markdown/compare/4.3.0...4.3.1
|
||||
[4.3.0]: https://github.com/thephpleague/html-to-markdown/compare/4.2.2...4.3.0
|
||||
[4.2.2]: https://github.com/thephpleague/html-to-markdown/compare/4.2.1...4.2.2
|
||||
[4.2.1]: https://github.com/thephpleague/html-to-markdown/compare/4.2.0...4.2.1
|
||||
[4.2.0]: https://github.com/thephpleague/html-to-markdown/compare/4.1.1...4.2.0
|
||||
|
|
|
|||
85
vendor/league/html-to-markdown/README.md
vendored
85
vendor/league/html-to-markdown/README.md
vendored
|
|
@ -44,16 +44,20 @@ Add `require 'vendor/autoload.php';` to the top of your script.
|
|||
|
||||
Next, create a new HtmlConverter instance, passing in your valid HTML code to its `convert()` function:
|
||||
|
||||
use League\HTMLToMarkdown\HtmlConverter;
|
||||
```php
|
||||
use League\HTMLToMarkdown\HtmlConverter;
|
||||
|
||||
$converter = new HtmlConverter();
|
||||
$converter = new HtmlConverter();
|
||||
|
||||
$html = "<h3>Quick, to the Batpoles!</h3>";
|
||||
$markdown = $converter->convert($html);
|
||||
$html = "<h3>Quick, to the Batpoles!</h3>";
|
||||
$markdown = $converter->convert($html);
|
||||
```
|
||||
|
||||
The `$markdown` variable now contains the Markdown version of your HTML as a string:
|
||||
|
||||
echo $markdown; // ==> ### Quick, to the Batpoles!
|
||||
```php
|
||||
echo $markdown; // ==> ### Quick, to the Batpoles!
|
||||
```
|
||||
|
||||
The included `demo` directory contains an HTML->Markdown conversion form to try out.
|
||||
|
||||
|
|
@ -63,38 +67,79 @@ By default, HTML To Markdown preserves HTML tags without Markdown equivalents, l
|
|||
|
||||
To strip HTML tags that don't have a Markdown equivalent while preserving the content inside them, set `strip_tags` to true, like this:
|
||||
|
||||
$converter = new HtmlConverter(array('strip_tags' => true));
|
||||
```php
|
||||
$converter = new HtmlConverter(array('strip_tags' => true));
|
||||
|
||||
$html = '<span>Turnips!</span>';
|
||||
$markdown = $converter->convert($html); // $markdown now contains "Turnips!"
|
||||
$html = '<span>Turnips!</span>';
|
||||
$markdown = $converter->convert($html); // $markdown now contains "Turnips!"
|
||||
```
|
||||
|
||||
Or more explicitly, like this:
|
||||
|
||||
$converter = new HtmlConverter();
|
||||
$converter->getConfig()->setOption('strip_tags', true);
|
||||
```php
|
||||
$converter = new HtmlConverter();
|
||||
$converter->getConfig()->setOption('strip_tags', true);
|
||||
|
||||
$html = '<span>Turnips!</span>';
|
||||
$markdown = $converter->convert($html); // $markdown now contains "Turnips!"
|
||||
$html = '<span>Turnips!</span>';
|
||||
$markdown = $converter->convert($html); // $markdown now contains "Turnips!"
|
||||
```
|
||||
|
||||
Note that only the tags themselves are stripped, not the content they hold.
|
||||
|
||||
To strip tags and their content, pass a space-separated list of tags in `remove_nodes`, like this:
|
||||
|
||||
$converter = new HtmlConverter(array('remove_nodes' => 'span div'));
|
||||
```php
|
||||
$converter = new HtmlConverter(array('remove_nodes' => 'span div'));
|
||||
|
||||
$html = '<span>Turnips!</span><div>Monkeys!</div>';
|
||||
$markdown = $converter->convert($html); // $markdown now contains ""
|
||||
$html = '<span>Turnips!</span><div>Monkeys!</div>';
|
||||
$markdown = $converter->convert($html); // $markdown now contains ""
|
||||
```
|
||||
|
||||
### Style options
|
||||
|
||||
Bold and italic tags are converted using the asterisk syntax by default. Change this to the underlined syntax using the `bold_style` and `italic_style` options.
|
||||
|
||||
$converter = new HtmlConverter();
|
||||
$converter->getConfig()->setOption('italic_style', '_');
|
||||
$converter->getConfig()->setOption('bold_style', '__');
|
||||
```php
|
||||
$converter = new HtmlConverter();
|
||||
$converter->getConfig()->setOption('italic_style', '_');
|
||||
$converter->getConfig()->setOption('bold_style', '__');
|
||||
|
||||
$html = '<em>Italic</em> and a <strong>bold</strong>';
|
||||
$markdown = $converter->convert($html); // $markdown now contains "_Italic_ and a __bold__"
|
||||
$html = '<em>Italic</em> and a <strong>bold</strong>';
|
||||
$markdown = $converter->convert($html); // $markdown now contains "_Italic_ and a __bold__"
|
||||
```
|
||||
|
||||
### Line break options
|
||||
|
||||
By default, `br` tags are converted to two spaces followed by a newline character as per [traditional Markdown](https://daringfireball.net/projects/markdown/syntax#p). Set `hard_break` to `true` to omit the two spaces, as per GitHub Flavored Markdown (GFM).
|
||||
|
||||
```php
|
||||
$converter = new HtmlConverter();
|
||||
$html = '<p>test<br>line break</p>';
|
||||
|
||||
$converter->getConfig()->setOption('hard_break', true);
|
||||
$markdown = $converter->convert($html); // $markdown now contains "test\nline break"
|
||||
|
||||
$converter->getConfig()->setOption('hard_break', false); // default
|
||||
$markdown = $converter->convert($html); // $markdown now contains "test \nline break"
|
||||
```
|
||||
|
||||
### Passing custom Environment object
|
||||
|
||||
You can pass current `Environment` object to customize i.e. which converters should be used.
|
||||
|
||||
```php
|
||||
$environment = new Environment(array(
|
||||
// your configuration here
|
||||
));
|
||||
$environment->addConverter(new HeaderConverter()); // optionally - add converter manually
|
||||
|
||||
$converter = new HtmlConverter($environment);
|
||||
|
||||
$html = '<h3>Header</h3>
|
||||
<img src="" />
|
||||
';
|
||||
$markdown = $converter->convert($html); // $markdown now contains "### Header" and "<img src="" />"
|
||||
```
|
||||
|
||||
### Limitations
|
||||
|
||||
|
|
|
|||
2
vendor/league/html-to-markdown/composer.json
vendored
2
vendor/league/html-to-markdown/composer.json
vendored
|
|
@ -42,7 +42,7 @@
|
|||
"bin": ["bin/html-to-markdown"],
|
||||
"extra": {
|
||||
"branch-alias": {
|
||||
"dev-master": "4.3-dev"
|
||||
"dev-master": "4.5-dev"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
62
vendor/league/html-to-markdown/src/Converter/CodeConverter.php
vendored
Normal file
62
vendor/league/html-to-markdown/src/Converter/CodeConverter.php
vendored
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class CodeConverter implements ConverterInterface
|
||||
{
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
$language = null;
|
||||
|
||||
// Checking for language class on the code block
|
||||
$classes = $element->getAttribute('class');
|
||||
|
||||
if ($classes) {
|
||||
// Since tags can have more than one class, we need to find the one that starts with 'language-'
|
||||
$classes = explode(' ', $classes);
|
||||
foreach ($classes as $class) {
|
||||
if (strpos($class, 'language-') !== false) {
|
||||
// Found one, save it as the selected language and stop looping over the classes.
|
||||
// The space after the language avoids gluing the actual code with the language tag
|
||||
$language = str_replace('language-', '', $class) . ' ';
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$markdown = '';
|
||||
$code = html_entity_decode($element->getChildrenAsString());
|
||||
|
||||
// In order to remove the code tags we need to search for them and, in the case of the opening tag
|
||||
// use a regular expression to find the tag and the other attributes it might have
|
||||
$code = preg_replace('/<code\b[^>]*>/', '', $code);
|
||||
$code = str_replace('</code>', '', $code);
|
||||
|
||||
// Checking if the code has multiple lines
|
||||
$lines = preg_split('/\r\n|\r|\n/', $code);
|
||||
if (count($lines) > 1) {
|
||||
// Multiple lines detected, adding three backticks and newlines
|
||||
$markdown .= '```' . $language . "\n" . $code . "\n" . '```';
|
||||
} else {
|
||||
// One line of code, wrapping it on one backtick.
|
||||
$markdown .= '`' . $language . $code . '`';
|
||||
}
|
||||
|
||||
return $markdown;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array('code');
|
||||
}
|
||||
}
|
||||
|
|
@ -2,10 +2,25 @@
|
|||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\Configuration;
|
||||
use League\HTMLToMarkdown\ConfigurationAwareInterface;
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class HardBreakConverter implements ConverterInterface
|
||||
class HardBreakConverter implements ConverterInterface, ConfigurationAwareInterface
|
||||
{
|
||||
/**
|
||||
* @var Configuration
|
||||
*/
|
||||
protected $config;
|
||||
|
||||
/**
|
||||
* @param Configuration $config
|
||||
*/
|
||||
public function setConfig(Configuration $config)
|
||||
{
|
||||
$this->config = $config;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
|
|
@ -13,7 +28,7 @@ class HardBreakConverter implements ConverterInterface
|
|||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
return " \n";
|
||||
return $this->config->getOption('hard_break') ? "\n" : " \n";
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -36,9 +36,9 @@ class HeaderConverter implements ConverterInterface, ConfigurationAwareInterface
|
|||
|
||||
if (($level === 1 || $level === 2) && !$element->isDescendantOf('blockquote') && $style === self::STYLE_SETEXT) {
|
||||
return $this->createSetextHeader($level, $element->getValue());
|
||||
} else {
|
||||
return $this->createAtxHeader($level, $element->getValue());
|
||||
}
|
||||
|
||||
return $this->createAtxHeader($level, $element->getValue());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -57,7 +57,7 @@ class HeaderConverter implements ConverterInterface, ConfigurationAwareInterface
|
|||
*/
|
||||
private function createSetextHeader($level, $content)
|
||||
{
|
||||
$length = (function_exists('mb_strlen')) ? mb_strlen($content, 'utf-8') : strlen($content);
|
||||
$length = function_exists('mb_strlen') ? mb_strlen($content, 'utf-8') : strlen($content);
|
||||
$underline = ($level === 1) ? '=' : '-';
|
||||
|
||||
return $content . "\n" . str_repeat($underline, $length) . "\n\n";
|
||||
|
|
|
|||
|
|
@ -19,12 +19,10 @@ class ImageConverter implements ConverterInterface
|
|||
|
||||
if ($title !== '') {
|
||||
// No newlines added. <img> should be in a block-level element.
|
||||
$markdown = '';
|
||||
} else {
|
||||
$markdown = '';
|
||||
return '';
|
||||
}
|
||||
|
||||
return $markdown;
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -15,11 +15,11 @@ class LinkConverter implements ConverterInterface
|
|||
{
|
||||
$href = $element->getAttribute('href');
|
||||
$title = $element->getAttribute('title');
|
||||
$text = $element->getValue();
|
||||
$text = trim($element->getValue());
|
||||
|
||||
if ($title !== '') {
|
||||
$markdown = '[' . $text . '](' . $href . ' "' . $title . '")';
|
||||
} elseif ($href === $text) {
|
||||
} elseif ($href === $text && $this->isValidAutolink($href)) {
|
||||
$markdown = '<' . $href . '>';
|
||||
} else {
|
||||
$markdown = '[' . $text . '](' . $href . ')';
|
||||
|
|
@ -39,4 +39,14 @@ class LinkConverter implements ConverterInterface
|
|||
{
|
||||
return array('a');
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $href
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
private function isValidAutolink($href)
|
||||
{
|
||||
return preg_match('/^[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*/i', $href) === 1;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,24 +15,26 @@ class ListItemConverter implements ConverterInterface
|
|||
{
|
||||
// If parent is an ol, use numbers, otherwise, use dashes
|
||||
$list_type = $element->getParent()->getTagName();
|
||||
$value = $element->getValue();
|
||||
|
||||
// Add spaces to start for nested list items
|
||||
$level = $element->getListItemLevel($element);
|
||||
$prefix = str_repeat(' ', $level);
|
||||
|
||||
$prefixForParagraph = str_repeat(' ', $level + 1);
|
||||
$value = trim(implode("\n" . $prefixForParagraph, explode("\n", trim($element->getValue()))));
|
||||
|
||||
// If list item is the first in a nested list, add a newline before it
|
||||
$prefix = '';
|
||||
if ($level > 0 && $element->getSiblingPosition() === 1) {
|
||||
$prefix = "\n" . $prefix;
|
||||
$prefix = "\n";
|
||||
}
|
||||
|
||||
if ($list_type === 'ul') {
|
||||
$markdown = $prefix . '- ' . trim($value) . "\n";
|
||||
} else {
|
||||
$number = $element->getSiblingPosition();
|
||||
$markdown = $prefix . $number . '. ' . trim($value) . "\n";
|
||||
return $prefix . '- ' . $value . "\n";
|
||||
}
|
||||
|
||||
return $markdown;
|
||||
$number = $element->getSiblingPosition();
|
||||
|
||||
return $prefix . $number . '. ' . $value . "\n";
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -15,7 +15,19 @@ class ParagraphConverter implements ConverterInterface
|
|||
{
|
||||
$value = $element->getValue();
|
||||
|
||||
return trim($value) !== '' ? rtrim($value) . "\n\n" : '';
|
||||
$markdown = '';
|
||||
|
||||
$lines = preg_split('/\r\n|\r|\n/', $value);
|
||||
foreach ($lines as $line) {
|
||||
/*
|
||||
* Some special characters need to be escaped based on the position that they appear
|
||||
* The following function will deal with those special cases.
|
||||
*/
|
||||
$markdown .= $this->escapeSpecialCharacters($line);
|
||||
$markdown .= "\n";
|
||||
}
|
||||
|
||||
return trim($markdown) !== '' ? rtrim($markdown) . "\n\n" : '';
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -25,4 +37,88 @@ class ParagraphConverter implements ConverterInterface
|
|||
{
|
||||
return array('p');
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $line
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function escapeSpecialCharacters($line)
|
||||
{
|
||||
$line = $this->escapeFirstCharacters($line);
|
||||
$line = $this->escapeOtherCharacters($line);
|
||||
$line = $this->escapeOtherCharactersRegex($line);
|
||||
|
||||
return $line;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $line
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function escapeFirstCharacters($line)
|
||||
{
|
||||
$escapable = array(
|
||||
'>',
|
||||
'- ',
|
||||
'+ ',
|
||||
'--',
|
||||
'~~~',
|
||||
'---',
|
||||
'- - -'
|
||||
);
|
||||
|
||||
foreach ($escapable as $i) {
|
||||
if (strpos(ltrim($line), $i) === 0) {
|
||||
// Found a character that must be escaped, adding a backslash before
|
||||
return '\\' . ltrim($line);
|
||||
}
|
||||
}
|
||||
|
||||
return $line;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $line
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function escapeOtherCharacters($line)
|
||||
{
|
||||
$escapable = array(
|
||||
'<!--'
|
||||
);
|
||||
|
||||
foreach ($escapable as $i) {
|
||||
if (strpos($line, $i) !== false) {
|
||||
// Found an escapable character, escaping it
|
||||
$line = substr_replace($line, '\\', strpos($line, $i), 0);
|
||||
}
|
||||
}
|
||||
|
||||
return $line;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $line
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function escapeOtherCharactersRegex($line)
|
||||
{
|
||||
$regExs = array(
|
||||
// Match numbers ending on ')' or '.' that are at the beginning of the line.
|
||||
'/^[0-9]+(?=\)|\.)/'
|
||||
);
|
||||
|
||||
foreach ($regExs as $i) {
|
||||
if (preg_match($i, $line, $match)) {
|
||||
// Matched an escapable character, adding a backslash on the string before the offending character
|
||||
$line = substr_replace($line, '\\', strlen($match[0]), 0);
|
||||
}
|
||||
}
|
||||
|
||||
return $line;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,51 +13,37 @@ class PreformattedConverter implements ConverterInterface
|
|||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
// Store the content of the code block in an array, one entry for each line
|
||||
|
||||
$markdown = '';
|
||||
|
||||
$code_content = html_entity_decode($element->getChildrenAsString());
|
||||
$code_content = str_replace(array('<code>', '</code>'), '', $code_content);
|
||||
$code_content = str_replace(array('<pre>', '</pre>'), '', $code_content);
|
||||
$pre_content = html_entity_decode($element->getChildrenAsString());
|
||||
$pre_content = str_replace(array('<pre>', '</pre>'), '', $pre_content);
|
||||
|
||||
$lines = preg_split('/\r\n|\r|\n/', $code_content);
|
||||
$total = count($lines);
|
||||
/*
|
||||
* Checking for the code tag.
|
||||
* Usually pre tags are used along with code tags. This conditional will check for already converted code tags,
|
||||
* which use backticks, and if those backticks are at the beginning and at the end of the string it means
|
||||
* there's no more information to convert.
|
||||
*/
|
||||
|
||||
// If there's more than one line of code, prepend each line with four spaces and no backticks.
|
||||
if ($total > 1 || $element->getTagName() === 'pre') {
|
||||
// Remove the first and last line if they're empty
|
||||
$first_line = trim($lines[0]);
|
||||
$last_line = trim($lines[$total - 1]);
|
||||
$first_line = trim($first_line, '
'); //trim XML style carriage returns too
|
||||
$last_line = trim($last_line, '
');
|
||||
|
||||
if (empty($first_line)) {
|
||||
array_shift($lines);
|
||||
}
|
||||
|
||||
if (empty($last_line)) {
|
||||
array_pop($lines);
|
||||
}
|
||||
|
||||
$count = 1;
|
||||
foreach ($lines as $line) {
|
||||
$line = str_replace('
', '', $line);
|
||||
$markdown .= ' ' . $line;
|
||||
// Add newlines, except final line of the code
|
||||
if ($count !== $total) {
|
||||
$markdown .= "\n";
|
||||
}
|
||||
$count++;
|
||||
}
|
||||
$markdown .= "\n";
|
||||
} else {
|
||||
// There's only one line of code. It's a code span, not a block. Just wrap it with backticks.
|
||||
$markdown .= '`' . $lines[0] . '`';
|
||||
$firstBacktick = strpos(trim($pre_content), '`');
|
||||
$lastBacktick = strrpos(trim($pre_content), '`');
|
||||
if ($firstBacktick === 0 && $lastBacktick === strlen(trim($pre_content)) - 1) {
|
||||
return $pre_content;
|
||||
}
|
||||
|
||||
if ($element->getTagName() === 'pre') {
|
||||
$markdown = "\n" . $markdown . "\n";
|
||||
// If the execution reaches this point it means it's just a pre tag, with no code tag nested
|
||||
|
||||
// Normalizing new lines
|
||||
$pre_content = preg_replace('/\r\n|\r|\n/', PHP_EOL, $pre_content);
|
||||
|
||||
// Checking if the string has multiple lines
|
||||
$lines = preg_split('/\r\n|\r|\n/', $pre_content);
|
||||
if (count($lines) > 1) {
|
||||
// Multiple lines detected, adding three backticks and newlines
|
||||
$markdown .= '```' . "\n" . $pre_content . "\n" . '```';
|
||||
} else {
|
||||
// One line of code, wrapping it on one backtick.
|
||||
$markdown .= '`' . $pre_content . '`';
|
||||
}
|
||||
|
||||
return $markdown;
|
||||
|
|
@ -68,6 +54,6 @@ class PreformattedConverter implements ConverterInterface
|
|||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array('pre', 'code');
|
||||
return array('pre');
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,12 +13,16 @@ class TextConverter implements ConverterInterface
|
|||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
$value = $element->getValue();
|
||||
$markdown = $element->getValue();
|
||||
|
||||
$markdown = preg_replace('~\s+~u', ' ', $value);
|
||||
// Remove leftover \n at the beginning of the line
|
||||
$markdown = ltrim($markdown, "\n");
|
||||
|
||||
//escape the following characters: '*', '_' and '\'
|
||||
$markdown = preg_replace('~([*_\\\\])~u', '\\\\$1', $markdown);
|
||||
// Replace sequences of invisible characters with spaces
|
||||
$markdown = preg_replace('~\s+~u', ' ', $markdown);
|
||||
|
||||
// Escape the following characters: '*', '_', '[', ']' and '\'
|
||||
$markdown = preg_replace('~([*_\\[\\]\\\\])~u', '\\\\$1', $markdown);
|
||||
|
||||
$markdown = preg_replace('~^#~u', '\\\\#', $markdown);
|
||||
|
||||
|
|
|
|||
|
|
@ -126,6 +126,7 @@ class Element implements ElementInterface
|
|||
|
||||
/**
|
||||
* @param \DomNode $node
|
||||
* @param bool $checkChildren
|
||||
*
|
||||
* @return \DomNode|null
|
||||
*/
|
||||
|
|
@ -133,9 +134,13 @@ class Element implements ElementInterface
|
|||
{
|
||||
if ($checkChildren && $node->firstChild) {
|
||||
return $node->firstChild;
|
||||
} elseif ($node->nextSibling) {
|
||||
}
|
||||
|
||||
if ($node->nextSibling) {
|
||||
return $node->nextSibling;
|
||||
} elseif ($node->parentNode) {
|
||||
}
|
||||
|
||||
if ($node->parentNode) {
|
||||
return $this->getNextNode($node->parentNode, false);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
namespace League\HTMLToMarkdown;
|
||||
|
||||
use League\HTMLToMarkdown\Converter\BlockquoteConverter;
|
||||
use League\HTMLToMarkdown\Converter\CodeConverter;
|
||||
use League\HTMLToMarkdown\Converter\CommentConverter;
|
||||
use League\HTMLToMarkdown\Converter\ConverterInterface;
|
||||
use League\HTMLToMarkdown\Converter\DefaultConverter;
|
||||
|
|
@ -83,6 +84,7 @@ final class Environment
|
|||
$environment = new static($config);
|
||||
|
||||
$environment->addConverter(new BlockquoteConverter());
|
||||
$environment->addConverter(new CodeConverter());
|
||||
$environment->addConverter(new CommentConverter());
|
||||
$environment->addConverter(new DivConverter());
|
||||
$environment->addConverter(new EmphasisConverter());
|
||||
|
|
|
|||
|
|
@ -24,22 +24,27 @@ class HtmlConverter
|
|||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @param array $options Configuration options
|
||||
* @param Environment|array $options Environment object or configuration options
|
||||
*/
|
||||
public function __construct(array $options = array())
|
||||
public function __construct($options = array())
|
||||
{
|
||||
$defaults = array(
|
||||
'header_style' => 'setext', // Set to 'atx' to output H1 and H2 headers as # Header1 and ## Header2
|
||||
'suppress_errors' => true, // Set to false to show warnings when loading malformed HTML
|
||||
'strip_tags' => false, // Set to true to strip tags that don't have markdown equivalents. N.B. Strips tags, not their content. Useful to clean MS Word HTML output.
|
||||
'bold_style' => '**', // Set to '__' if you prefer the underlined style
|
||||
'italic_style' => '_', // Set to '*' if you prefer the asterisk style
|
||||
'remove_nodes' => '', // space-separated list of dom nodes that should be removed. example: 'meta style script'
|
||||
);
|
||||
if ($options instanceof Environment) {
|
||||
$this->environment = $options;
|
||||
} elseif (is_array($options)) {
|
||||
$defaults = array(
|
||||
'header_style' => 'setext', // Set to 'atx' to output H1 and H2 headers as # Header1 and ## Header2
|
||||
'suppress_errors' => true, // Set to false to show warnings when loading malformed HTML
|
||||
'strip_tags' => false, // Set to true to strip tags that don't have markdown equivalents. N.B. Strips tags, not their content. Useful to clean MS Word HTML output.
|
||||
'bold_style' => '**', // Set to '__' if you prefer the underlined style
|
||||
'italic_style' => '_', // Set to '*' if you prefer the asterisk style
|
||||
'remove_nodes' => '', // space-separated list of dom nodes that should be removed. example: 'meta style script'
|
||||
'hard_break' => false,// Set to true to turn <br> into `\n` instead of ` \n`
|
||||
);
|
||||
|
||||
$this->environment = Environment::createDefaultEnvironment($defaults);
|
||||
$this->environment = Environment::createDefaultEnvironment($defaults);
|
||||
|
||||
$this->environment->getConfig()->merge($options);
|
||||
$this->environment->getConfig()->merge($options);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -77,7 +82,9 @@ class HtmlConverter
|
|||
*
|
||||
* Loads HTML and passes to getMarkdown()
|
||||
*
|
||||
* @param $html
|
||||
* @param string $html
|
||||
*
|
||||
* @throws \InvalidArgumentException
|
||||
*
|
||||
* @return string The Markdown version of the html
|
||||
*/
|
||||
|
|
@ -100,9 +107,7 @@ class HtmlConverter
|
|||
// Store the now-modified DOMDocument as a string
|
||||
$markdown = $document->saveHTML();
|
||||
|
||||
$markdown = $this->sanitize($markdown);
|
||||
|
||||
return $markdown;
|
||||
return $this->sanitize($markdown);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -143,7 +148,8 @@ class HtmlConverter
|
|||
private function convertChildren(ElementInterface $element)
|
||||
{
|
||||
// Don't convert HTML code inside <code> and <pre> blocks to Markdown - that should stay as HTML
|
||||
if ($element->isDescendantOf(array('pre', 'code'))) {
|
||||
// except if the current node is a code tag, which needs to be converted by the CodeConverter.
|
||||
if ($element->isDescendantOf(array('pre', 'code')) && $element->getTagName() !== 'code') {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -220,8 +226,6 @@ class HtmlConverter
|
|||
}
|
||||
}
|
||||
|
||||
$markdown = trim($markdown, "\n\r\0\x0B");
|
||||
|
||||
return $markdown;
|
||||
return trim($markdown, "\n\r\0\x0B");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue