html中内容的码是&#x这样的码表示的
有三种
空格等等特殊符号
&#十进制数字;
&#x十六进制;
前面一篇文章说过js的互相转换,这里我主要说php,具体看代码;
土办法
$str="测试";
echo json_encode($str);
echo json_decode('"\u6D4B\u8BD5"');
代码简洁效率版:
$str="测试";
echo htmlEncode($str).PHP_EOL;
echo htmlDecode('测试').PHP_EOL;
function htmlEncode($name,$pre='&#x',$back=';')
{
$str = '';
//$len = mb_strlen($name);
$str_ary=mb_str_split($name);
for($i = 0; $i < count($str_ary); $i++){
$mbc = $str_ary[$i];
//拼接成10位的
$dec ='&#'.mb_ord($mbc).';'; //方式一
//拼接成16位的
$hex ='&#x'.dechex(mb_ord($mbc)).';'; //方式一
$hex2='\u'.dechex(mb_ord($mbc)); //演示
$str .= $hex;
}
return $str;
}
function htmlDecode($name)
{
return html_entity_decode($name);
}
旧式复杂版,限中文,用来理解原理。
function unicode_encode($name,$pre='&#x',$back=';'){
$name = iconv('UTF-8', 'UCS-2', $name);
$len = strlen($name);
$str = '';
for($i = 0; $i < $len - 1; $i = $i + 2){
$c = $name[$i];
$c2 = $name[$i + 1];
if(ord($c) > 0){
// 两个字节的文字
$str .= $pre;
$str .= str_pad(dechex(ord($c2)),2,'0',STR_PAD_LEFT);
$str .= str_pad(dechex(ord($c)),2,'0',STR_PAD_LEFT);
$str .= $back;
/*
$str .= '\u';
//$str .= '&#x';
$str .= str_pad(base_convert(ord($c2), 10, 16),2,'0',STR_PAD_LEFT);
$str .= str_pad(base_convert(ord($c), 10, 16),2,'0',STR_PAD_LEFT);
$str .= ';';
*/
}
else{
$str .= $c2;
}
}
return $str;
}
function unicode_decode($name)
{
// 转换编码,将Unicode编码转换成可以浏览的utf-8编码
if(strpos($name, '&#x')!==false) $name=str_replace("&#x",'\u',$name,$count);
if(strpos($name, ';')!==false) $name=str_replace(';','',$name,$count);
$pattern = '/([\w]+)|(\\\u([\w]{4}))/i';
preg_match_all($pattern, $name, $matches);
if (!empty($matches))
{
$name = '';
for ($j = 0; $j < count($matches[0]); $j++)
{
$str = $matches[0][$j];
if (strpos($str, '\\u') === 0)
{
$code = base_convert(substr($str, 2, 2), 16, 10);
$code2 = base_convert(substr($str, 4), 16, 10);
$c = chr($code2).chr($code);
$c = iconv('UCS-2', 'UTF-8', $c);
$name .= $c;
}
else
{
$name .= $str;
}
}
}
return $name;
}