项目中使用经常遇到各站需求。本次即遇到这个需求。
在网上找了几份相关的方法,细数下来以下两个方法会稍微好一点。
虽然同样有很多识别不出来,但是比起其他已经好很多
第一个方法是我觉得用的比较好一些的
它是封装的一个类库
直接上代码
<?php namespace app\index\controller; /** * Modified by fuyong @ 2015-09-13 * 修复二分法查找方法 * 汉字拼音首字母工具类 * 注: 英文的字串:不变返回(包括数字) eg .abc123 => abc123 * 中文字符串:返回拼音首字符 eg. 测试字符串 => CSZFC * 中英混合串: 返回拼音首字符和英文 eg. 我i我j => WIWJ * eg. * $py = new pinyinfirstchar(); * $result = $py->getInitials(‘我想和你在一起‘); * $result = $py->getFirstchar(‘小时候我就想和你在一起‘); */ //下面3行测试代码 // $py = new pinyinfirstchar(); // $result = $py->getFirstchar(‘根据中文姓名‘); // print_r($result); class Aafirstchar { private $_pinyins = array( 176161 => ‘A‘, 176197 => ‘B‘, 178193 => ‘C‘, 180238 => ‘D‘, 182234 => ‘E‘, 183162 => ‘F‘, 184193 => ‘G‘, 185254 => ‘H‘, 187247 => ‘J‘, 191166 => ‘K‘, 192172 => ‘L‘, 194232 => ‘M‘, 196195 => ‘N‘, 197182 => ‘O‘, 197190 => ‘P‘, 198218 => ‘Q‘, 200187 => ‘R‘, 200246 => ‘S‘, 203250 => ‘T‘, 205218 => ‘W‘, 206244 => ‘X‘, 209185 => ‘Y‘, 212209 => ‘Z‘, ); private $_charset = null; /** * 构造函数, 指定需要的编码 default: utf-8 * 支持utf-8, gb2312 * * @param unknown_type $charset */ public function __construct( $charset = ‘utf-8‘ ) { $this->_charset = $charset; } /** * 中文字符串 substr * * @param string $str * @param int $start * @param int $len * @return string */ private function _msubstr ($str, $start, $len) { $start = $start * 2; $len = $len * 2; $strlen = strlen($str); $result = ‘‘; for ( $i = 0; $i < $strlen; $i++ ) { if ( $i >= $start && $i < ($start + $len) ) { if ( ord(substr($str, $i, 1)) > 129 ) $result .= substr($str, $i, 2); else $result .= substr($str, $i, 1); } if ( ord(substr($str, $i, 1)) > 129 ) $i++; } return $result; } /** * 字符串切分为数组 (汉字或者一个字符为单位) * * @param string $str * @return array */ private function _cutWord( $str ) { $words = array(); while ( $str != "" ) { if ( $this->_isAscii($str) ) {/*非中文*/ $words[] = $str[0]; $str = substr( $str, strlen($str[0]) ); }else{ $word = $this->_msubstr( $str, 0, 1 ); $words[] = $word; $str = substr( $str, strlen($word) ); } } return $words; } /** * 判断字符是否是ascii字符 * * @param string $char * @return bool */ private function _isAscii( $char ) { return ( ord( substr($char,0,1) ) < 160 ); } /** * 判断字符串前3个字符是否是ascii字符 * * @param string $str * @return bool */ private function _isAsciis( $str ) { $len = strlen($str) >= 3 ? 3: 2; $chars = array(); for( $i = 1; $i < $len -1; $i++ ){ $chars[] = $this->_isAscii( $str[$i] ) ? ‘yes‘:‘no‘; } $result = array_count_values( $chars ); if ( empty($result[‘no‘]) ){ return true; } return false; } /** * 获取中文字串的拼音首字符 * * @param string $str * @return string */ public function getInitials( $str ) { if ( empty($str) ) return ‘‘; if ( $this->_isAscii($str[0]) && $this->_isAsciis( $str )){ return $str; } $result = array(); if ( $this->_charset == ‘utf-8‘ ){ $str = iconv( ‘utf-8‘, ‘gbk//ignore‘, $str ); } $words = $this->_cutWord( $str ); foreach ( $words as $word ) { if ( $this->_isAscii($word) ) {/*非中文*/ $result[] = $word; continue; } $code = ord( substr($word,0,1) ) * 1000 + ord( substr($word,1,1) ); /*获取拼音首字母A--Z*/ if ( ($i = $this->_search($code)) != -1 ){ $result[] = $this->_pinyins[$i]; } } return strtoupper(implode(‘‘,$result)); } private function _getChar( $ascii ) { if ( $ascii >= 48 && $ascii <= 57){ return chr($ascii); /*数字*/ }elseif ( $ascii>=65 && $ascii<=90 ){ return chr($ascii); /* A--Z*/ }elseif ($ascii>=97 && $ascii<=122){ return chr($ascii-32); /* a--z*/ }else{ return ‘-‘; /*其他*/ } } /** * 查找需要的汉字内码(gb2312) 对应的拼音字符( 二分法 ) * * @param int $code * @return int */ private function _search( $code ) { $data = array_keys($this->_pinyins); $lower = 0; $upper = sizeof($data)-1; $middle = (int) round(($lower + $upper) / 2); if ( $code < $data[0] ) return -1; for (;;) { if ( $lower > $upper ){ return $data[$lower-1]; } $tmp = (int) round(($lower + $upper) / 2); if ( !isset($data[$tmp]) ){ return $data[$middle]; }else{ $middle = $tmp; } if ( $data[$middle] < $code ){ $lower = (int)$middle + 1; }else if ( $data[$middle] == $code ) { return $data[$middle]; }else{ $upper = (int)$middle - 1; } } } /** * 获取一整串中文字串的拼音首字符(只返回1个字符) * * @param string $str * @return string */ public function getFirstchar( $str ) { if ( empty($str) ) return ‘‘; return substr($this->getInitials($str), 0, 1); } } ?>
这个是我个人认为比其他的好一点的方法
借鉴与 liiuweii 大大博客 地址:https://blog.csdn.net/liiuweii/article/details/52239508
其他方法基本是使用ASCII码表来确定的,这种的获取不如上面的方法好
我选择了两个比较有对比性的存放了一下下
方法一
function getfirstchar($s0) { $s=iconv(‘UTF-8‘,‘gb2312‘, $s0); if (ord($s0)>128) { //汉字开头 $asc=ord($s{0})*256+ord($s{1})-65536; if($asc>=-20319 and $asc<=-20284)return "A"; if($asc>=-20283 and $asc<=-19776)return "B"; if($asc>=-19775 and $asc<=-19219)return "C"; if($asc>=-19218 and $asc<=-18711)return "D"; if($asc>=-18710 and $asc<=-18527)return "E"; if($asc>=-18526 and $asc<=-18240)return "F"; if($asc>=-18239 and $asc<=-17923)return "G"; if($asc>=-17922 and $asc<=-17418)return "I"; if($asc>=-17417 and $asc<=-16475)return "J"; if($asc>=-16474 and $asc<=-16213)return "K"; if($asc>=-16212 and $asc<=-15641)return "L"; if($asc>=-15640 and $asc<=-15166)return "M"; if($asc>=-15165 and $asc<=-14923)return "N"; if($asc>=-14922 and $asc<=-14915)return "O"; if($asc>=-14914 and $asc<=-14631)return "P"; if($asc>=-14630 and $asc<=-14150)return "Q"; if($asc>=-14149 and $asc<=-14091)return "R"; if($asc>=-14090 and $asc<=-13319)return "S"; if($asc>=-13318 and $asc<=-12839)return "T"; if($asc>=-12838 and $asc<=-12557)return "W"; if($asc>=-12556 and $asc<=-11848)return "X"; if($asc>=-11847 and $asc<=-11056)return "Y"; if($asc>=-11055 and $asc<=-10247)return "Z"; }else if(ord($s)>=48 and ord($s)<=57){ //数字开头 switch(iconv_substr($s,0,1,‘utf-8‘)) { case 1:return "Y"; case 2:return "E"; case 3:return "S"; case 4:return "S"; case 5:return "W"; case 6:return "L"; case 7:return "Q"; case 8:return "B"; case 9:return "J"; case 0:return "L"; } }else if(ord($s)>=65 and ord($s)<=90){ //大写英文开头 return substr($s,0,1); }else if(ord($s)>=97 and ord($s)<=122){ //小写英文开头 return strtoupper(substr($s,0,1)); } else { return iconv_substr($s0,0,1,‘utf-8‘);//中英混合的词语,不适合上面的各种情况,因此直接提取首个字符即可 } }
本篇借鉴与 php攻城师 大大博客 地址:https://blog.csdn.net/PHP1923880282/article/details/8833192
方法二
//获取汉字的首字母 function getFirstCharters($str) { if (empty($str)) { return ‘‘; } //取出参数字符串中的首个字符 $temp_str = substr($str,0,1); if(ord($temp_str) > 127){ $str = substr($str,0,3); }else{ $str = $temp_str; $fchar = ord($str{0}); if ($fchar >= ord(‘A‘) && $fchar <= ord(‘z‘)){ return strtoupper($temp_str); }else{ return null; } } $s1 = iconv(‘UTF-8‘, ‘gb2312//IGNORE‘, $str); if(empty($s1)){ return null; } $s2 = iconv(‘gb2312‘, ‘UTF-8‘, $s1); if(empty($s2)){ return null; } $s = $s2 == $str ? $s1 : $str; $asc = ord($s{0}) * 256 + ord($s{1}) - 65536; if ($asc >= -20319 && $asc <= -20284) return ‘A‘; if ($asc >= -20283 && $asc <= -19776) return ‘B‘; if ($asc >= -19775 && $asc <= -19219) return ‘C‘; if ($asc >= -19218 && $asc <= -18711) return ‘D‘; if ($asc >= -18710 && $asc <= -18527) return ‘E‘; if ($asc >= -18526 && $asc <= -18240) return ‘F‘; if ($asc >= -18239 && $asc <= -17923) return ‘G‘; if ($asc >= -17922 && $asc <= -17418) return ‘H‘; if ($asc >= -17417 && $asc <= -16475) return ‘J‘; if ($asc >= -16474 && $asc <= -16213) return ‘K‘; if ($asc >= -16212 && $asc <= -15641) return ‘L‘; if ($asc >= -15640 && $asc <= -15166) return ‘M‘; if ($asc >= -15165 && $asc <= -14923) return ‘N‘; if ($asc >= -14922 && $asc <= -14915) return ‘O‘; if ($asc >= -14914 && $asc <= -14631) return ‘P‘; if ($asc >= -14630 && $asc <= -14150) return ‘Q‘; if ($asc >= -14149 && $asc <= -14091) return ‘R‘; if ($asc >= -14090 && $asc <= -13319) return ‘S‘; if ($asc >= -13318 && $asc <= -12839) return ‘T‘; if ($asc >= -12838 && $asc <= -12557) return ‘W‘; if ($asc >= -12556 && $asc <= -11848) return ‘X‘; if ($asc >= -11847 && $asc <= -11056) return ‘Y‘; if ($asc >= -11055 && $asc <= -10247) return ‘Z‘; return rare_words($asc); } //百家姓中的生僻字 function rare_words($asc=‘‘){ $rare_arr = array( -3652=>array(‘word‘=>"窦",‘first_char‘=>‘D‘), -8503=>array(‘word‘=>"奚",‘first_char‘=>‘X‘), -9286=>array(‘word‘=>"酆",‘first_char‘=>‘F‘), -7761=>array(‘word‘=>"岑",‘first_char‘=>‘C‘), -5128=>array(‘word‘=>"滕",‘first_char‘=>‘T‘), -9479=>array(‘word‘=>"邬",‘first_char‘=>‘W‘), -5456=>array(‘word‘=>"臧",‘first_char‘=>‘Z‘), -7223=>array(‘word‘=>"闵",‘first_char‘=>‘M‘), -2877=>array(‘word‘=>"裘",‘first_char‘=>‘Q‘), -6191=>array(‘word‘=>"缪",‘first_char‘=>‘M‘), -5414=>array(‘word‘=>"贲",‘first_char‘=>‘B‘), -4102=>array(‘word‘=>"嵇",‘first_char‘=>‘J‘), -8969=>array(‘word‘=>"荀",‘first_char‘=>‘X‘), -4938=>array(‘word‘=>"於",‘first_char‘=>‘Y‘), -9017=>array(‘word‘=>"芮",‘first_char‘=>‘R‘), -2848=>array(‘word‘=>"羿",‘first_char‘=>‘Y‘), -9477=>array(‘word‘=>"邴",‘first_char‘=>‘B‘), -9485=>array(‘word‘=>"隗",‘first_char‘=>‘K‘), -6731=>array(‘word‘=>"宓",‘first_char‘=>‘M‘), -9299=>array(‘word‘=>"郗",‘first_char‘=>‘X‘), -5905=>array(‘word‘=>"栾",‘first_char‘=>‘L‘), -4393=>array(‘word‘=>"钭",‘first_char‘=>‘T‘), -9300=>array(‘word‘=>"郜",‘first_char‘=>‘G‘), -8706=>array(‘word‘=>"蔺",‘first_char‘=>‘L‘), -3613=>array(‘word‘=>"胥",‘first_char‘=>‘X‘), -8777=>array(‘word‘=>"莘",‘first_char‘=>‘S‘), -6708=>array(‘word‘=>"逄",‘first_char‘=>‘P‘), -9302=>array(‘word‘=>"郦",‘first_char‘=>‘L‘), -5965=>array(‘word‘=>"璩",‘first_char‘=>‘Q‘), -6745=>array(‘word‘=>"濮",‘first_char‘=>‘P‘), -4888=>array(‘word‘=>"扈",‘first_char‘=>‘H‘), -9309=>array(‘word‘=>"郏",‘first_char‘=>‘J‘), -5428=>array(‘word‘=>"晏",‘first_char‘=>‘Y‘), -2849=>array(‘word‘=>"暨",‘first_char‘=>‘J‘), -7206=>array(‘word‘=>"阙",‘first_char‘=>‘Q‘), -4945=>array(‘word‘=>"殳",‘first_char‘=>‘S‘), -9753=>array(‘word‘=>"夔",‘first_char‘=>‘K‘), -10041=>array(‘word‘=>"厍",‘first_char‘=>‘S‘), -5429=>array(‘word‘=>"晁",‘first_char‘=>‘C‘), -2396=>array(‘word‘=>"訾",‘first_char‘=>‘Z‘), -7205=>array(‘word‘=>"阚",‘first_char‘=>‘K‘), -10049=>array(‘word‘=>"乜",‘first_char‘=>‘N‘), -10015=>array(‘word‘=>"蒯",‘first_char‘=>‘K‘), -3133=>array(‘word‘=>"竺",‘first_char‘=>‘Z‘), -6698=>array(‘word‘=>"逯",‘first_char‘=>‘L‘), -9799=>array(‘word‘=>"俟",‘first_char‘=>‘Q‘), -6749=>array(‘word‘=>"澹",‘first_char‘=>‘T‘), -7220=>array(‘word‘=>"闾",‘first_char‘=>‘L‘), -10047=>array(‘word‘=>"亓",‘first_char‘=>‘Q‘), -10005=>array(‘word‘=>"仉",‘first_char‘=>‘Z‘), -3417=>array(‘word‘=>"颛",‘first_char‘=>‘Z‘), -6431=>array(‘word‘=>"驷",‘first_char‘=>‘S‘), -7226=>array(‘word‘=>"闫",‘first_char‘=>‘Y‘), -9293=>array(‘word‘=>"鄢",‘first_char‘=>‘Y‘), -6205=>array(‘word‘=>"缑",‘first_char‘=>‘G‘), -9764=>array(‘word‘=>"佘",‘first_char‘=>‘S‘), -9818=>array(‘word‘=>"佴",‘first_char‘=>‘N‘), -9509=>array(‘word‘=>"谯",‘first_char‘=>‘Q‘), -3122=>array(‘word‘=>"笪",‘first_char‘=>‘D‘), -9823=>array(‘word‘=>"佟",‘first_char‘=>‘T‘), ); if(array_key_exists($asc, $rare_arr) && $rare_arr[$asc][‘first_char‘]){ return $rare_arr[$asc][‘first_char‘] ; }else{ return null; } } //测试: echo getFirstCharters(‘窦‘);
本篇借鉴与 学知无涯 大大博客 地址:https://www.cnblogs.com/gyfluck/p/8521259.html
然后就是最基础版本的方法
function Getzimu($str) { if(empty($str)){return ‘‘;} $fchar=ord($str{0}); if($fchar>=ord(‘A‘)&&$fchar<=ord(‘z‘)) return strtoupper($str{0}); $s1=iconv(‘UTF-8‘,‘gb2312‘,$str); $s2=iconv(‘gb2312‘,‘UTF-8‘,$s1); $s=$s2==$str?$s1:$str; $asc=ord($s{0})*256+ord($s{1})-65536; if($asc>=-20319&&$asc<=-20284) return ‘A‘; if($asc>=-20283&&$asc<=-19776) return ‘B‘; if($asc>=-19775&&$asc<=-19219) return ‘C‘; if($asc>=-19218&&$asc<=-18711) return ‘D‘; if($asc>=-18710&&$asc<=-18527) return ‘E‘; if($asc>=-18526&&$asc<=-18240) return ‘F‘; if($asc>=-18239&&$asc<=-17923) return ‘G‘; if($asc>=-17922&&$asc<=-17418) return ‘H‘; if($asc>=-17417&&$asc<=-16475) return ‘J‘; if($asc>=-16474&&$asc<=-16213) return ‘K‘; if($asc>=-16212&&$asc<=-15641) return ‘L‘; if($asc>=-15640&&$asc<=-15166) return ‘M‘; if($asc>=-15165&&$asc<=-14923) return ‘N‘; if($asc>=-14922&&$asc<=-14915) return ‘O‘; if($asc>=-14914&&$asc<=-14631) return ‘P‘; if($asc>=-14630&&$asc<=-14150) return ‘Q‘; if($asc>=-14149&&$asc<=-14091) return ‘R‘; if($asc>=-14090&&$asc<=-13319) return ‘S‘; if($asc>=-13318&&$asc<=-12839) return ‘T‘; if($asc>=-12838&&$asc<=-12557) return ‘W‘; if($asc>=-12556&&$asc<=-11848) return ‘X‘; if($asc>=-11847&&$asc<=-11056) return ‘Y‘; if($asc>=-11055&&$asc<=-10247) return ‘Z‘; return "#"; }
本篇借鉴与 PEIZIJUN 大大博客 地址:https://www.jianshu.com/p/2ac2b9c369b3
最后就是个人找到的最近出版本,和上种方法基本一致
#获取中文首字母 function Getzimu($str) { $str= iconv("UTF-8","gb2312", $str);//如果程序是gbk的,此行就要注释掉 if (preg_match("/^[\x7f-\xff]/", $str)) { $fchar=ord($str{0}); if($fchar>=ord("A") and $fchar<=ord("z") )return strtoupper($str{0}); $a = $str; $val=ord($a{0})*256+ord($a{1})-65536; if($val>=-20319 and $val<=-20284)return "A"; if($val>=-20283 and $val<=-19776)return "B"; if($val>=-19775 and $val<=-19219)return "C"; if($val>=-19218 and $val<=-18711)return "D"; if($val>=-18710 and $val<=-18527)return "E"; if($val>=-18526 and $val<=-18240)return "F"; if($val>=-18239 and $val<=-17923)return "G"; if($val>=-17922 and $val<=-17418)return "H"; if($val>=-17417 and $val<=-16475)return "J"; if($val>=-16474 and $val<=-16213)return "K"; if($val>=-16212 and $val<=-15641)return "L"; if($val>=-15640 and $val<=-15166)return "M"; if($val>=-15165 and $val<=-14923)return "N"; if($val>=-14922 and $val<=-14915)return "O"; if($val>=-14914 and $val<=-14631)return "P"; if($val>=-14630 and $val<=-14150)return "Q"; if($val>=-14149 and $val<=-14091)return "R"; if($val>=-14090 and $val<=-13319)return "S"; if($val>=-13318 and $val<=-12839)return "T"; if($val>=-12838 and $val<=-12557)return "W"; if($val>=-12556 and $val<=-11848)return "X"; if($val>=-11847 and $val<=-11056)return "Y"; if($val>=-11055 and $val<=-10247)return "Z"; } else { return false; } }
以上就是本人找到的获取首字母的相关方法,希望对您有帮助
2020年07月01日