Encode non-BMP UTF-8 characters as HTML entities

This commit is contained in:
Kijin Sung 2016-01-17 14:09:45 +09:00
parent 7d44db1dcb
commit d474c20a36
4 changed files with 31 additions and 2 deletions

View file

@ -5,3 +5,19 @@
*
* Copyright (c) Rhymix Developers and Contributors
*/
/**
* Encode UTF-8 characters outside of the Basic Multilingual Plane in the &#xxxxxx format.
* This allows emoticons and other characters to be stored in MySQL without utf8mb4 support.
*
* @param $str The string to encode
* @return string
*/
function utf8_mbencode($str)
{
return preg_replace_callback('/[\xF0-\xF7][\x80-\xBF]{3}/', function($m) {
$bytes = array(ord($m[0][0]), ord($m[0][1]), ord($m[0][2]), ord($m[0][3]));
$codepoint = ((0x07 & $bytes[0]) << 18) + ((0x3F & $bytes[1]) << 12) + ((0x3F & $bytes[2]) << 6) + (0x3F & $bytes[3]);
return '&#x' . dechex($codepoint) . ';';
}, $str);
}

View file

@ -357,6 +357,7 @@ class commentController extends comment
{
$obj->content = removeHackTag($obj->content);
}
$obj->content = utf8_mbencode($obj->content);
if(!$obj->notify_message)
{
@ -777,6 +778,7 @@ class commentController extends comment
{
$obj->content = removeHackTag($obj->content);
}
$obj->content = utf8_mbencode($obj->content);
// begin transaction
$oDB = DB::getInstance();

View file

@ -132,7 +132,7 @@ class communicationController extends communication
$content = sprintf("%s<br /><br />From : <a href=\"%s\" target=\"_blank\">%s</a>", $content, $view_url, $view_url);
$oMail = new Mail();
$oMail->setTitle($title);
$oMail->setContent($content);
$oMail->setContent(utf8_mbencode(removeHackTag($content)));
$oMail->setSender($logged_info->nick_name, $logged_info->email_address);
$oMail->setReceiptor($receiver_member_info->nick_name, $receiver_member_info->email_address);
$oMail->send();
@ -172,8 +172,11 @@ class communicationController extends communication
*/
function sendMessage($sender_srl, $receiver_srl, $title, $content, $sender_log = TRUE)
{
$content = removeHackTag($content);
// Encode the title and content.
$title = htmlspecialchars($title, ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
$content = removeHackTag($content);
$title = utf8_mbencode($title);
$content = utf8_mbencode($content);
$message_srl = getNextSequence();
$related_srl = getNextSequence();

View file

@ -310,6 +310,10 @@ class documentController extends document
// An error appears if both log-in info and user name don't exist.
if(!$logged_info->member_srl && !$obj->nick_name) return new Object(-1,'msg_invalid_request');
// Fix encoding of non-BMP UTF-8 characters.
$obj->title = utf8_mbencode($obj->title);
$obj->content = utf8_mbencode($obj->content);
$obj->lang_code = Context::getLangType();
// Insert data into the DB
if(!$obj->status) $this->_checkDocumentStatusForOldVersion($obj);
@ -552,6 +556,10 @@ class documentController extends document
// if temporary document, regdate is now setting
if($source_obj->get('status') == $this->getConfigStatus('temp')) $obj->regdate = date('YmdHis');
// Fix encoding of non-BMP UTF-8 characters.
$obj->title = utf8_mbencode($obj->title);
$obj->content = utf8_mbencode($obj->content);
// Insert data into the DB
$output = executeQuery('document.updateDocument', $obj);
if(!$output->toBool())