1 function string.utf8CharSize(c) 2 if not c then return 0 end 3 if c > 240 then return 4 end 4 if c > 225 then return 3 end 5 if c > 192 then return 2 end 6 return 1 7 end 8 9 function string.utf8StringLen(str) 10 if nil == str or "" == str then return 0 end 11 assert("string" == type(str), "not string") 12 13 local len = 0 14 local i = 1 15 while i <= #str do 16 local b = string.byte(str, i) 17 i = i + string.utf8CharSize(b) 18 len = len + 1 19 end 20 return len 21 end 22 23 function string.utf8StringSub(str, index, count) 24 if nil == index then index = 1 end 25 if nil == count then count = string.utf8StringLen(str) end 26 27 local byteCount = #str 28 local i = 1 29 --跳过起始的n个 30 while index > 1 and i <= byteCount do 31 local b = string.byte(str, i) 32 i = i + string.utf8CharSize(b) 33 index = index - 1 34 end 35 36 local j = i 37 --子串结束位置 38 while count > 0 and j <= byteCount do 39 local b = string.byte(str, j) 40 j = j + string.utf8CharSize(b) 41 count = count - 1 42 end 43 44 return str:sub(i, j-1) 45 end 46 47 function string.utf8CharArray(str, index, count, dstTb, dstIndex) 48 if nil == index then index = 1 end 49 if nil == count then count = string.utf8StringLen(str) end 50 if nil == dstTb then dstTb = {} end 51 52 local byteCount = #str 53 local i = 1 54 --跳过起始的n个 55 while index > 1 and i <= byteCount do 56 local b = string.byte(str, i) 57 i = i + string.utf8CharSize(b) 58 index = index - 1 59 end 60 61 local j = i 62 local leftCount = count 63 --子串结束位置 64 if nil == dstIndex then 65 while leftCount > 0 and j <= byteCount do 66 local b = string.byte(str, j) 67 local size = string.utf8CharSize(b) 68 local utf8Char = str:sub(j, j+size-1) 69 table.insert(dstTb, utf8Char) 70 j = j + size 71 leftCount = leftCount - 1 72 end 73 else 74 while leftCount > 0 and j <= byteCount do 75 local b = string.byte(str, j) 76 local size = string.utf8CharSize(b) 77 local utf8Char = str:sub(j, j+size-1) 78 dstTb[dstIndex] = utf8Char 79 j = j + size 80 leftCount = leftCount - 1 81 dstIndex = dstIndex + 1 82 end 83 end 84 85 return dstTb, count 86 end
测试代码:
1 local function Test1() 2 assert(6 == string.utf8StringLen("123abc")) 3 assert(2 == string.utf8StringLen("中文")) 4 assert(4 == string.utf8StringLen("中文1a")) 5 assert(5 == string.utf8StringLen(".,;<>")) 6 assert(5 == string.utf8StringLen("。,;《》")) 7 assert(10 == string.utf8StringLen(".,;<>。,;《》")) 8 9 assert("文1" == string.utf8StringSub("中文1a", 2, 2)) 10 assert("文啊" == string.utf8StringSub("中文啊呀", 2, 2)) 11 12 local tb = string.utf8CharArray("中文啊呀", 2, 2) 13 assert("文" == tb[1]) 14 assert("啊" == tb[2]) 15 end 16 17 Test1()