Lua 字符串处理

--[[
-- string扩展工具类，对string不支持的功能执行扩展
--]]

local unpack = unpack or table.unpack

-- 字符串分割
-- @split_string：被分割的字符串
-- @pattern：分隔符，可以为模式匹配
local function split(s, sSep) 
    local sSep, tFields = sSep or "\t", {}
    local sPattern = string.format("([^%s]+)", sSep)
    string.gsub(s, sPattern, function(c) table.insert(tFields, c) end)
    return tFields
end

-- 字符串连接
function join(join_table, joiner)
    if #join_table == 0 then
        return ""
    end

    local fmt = "%s"
    for i = 2, #join_table do
        fmt = fmt .. joiner .. "%s"
    end

    return string.format(fmt, unpack(join_table))
end

-- 是否包含
-- 注意：plain为true时，关闭模式匹配机制，此时函数仅做直接的 “查找子串”的操作
function contains(target_string, pattern, plain)
    -- plain = plain or true
    -- local find_pos_begin, find_pos_end = string.find(target_string, pattern, 1, plain)
    -- return find_pos_begin ~= nil

    --因为上面的方法在雷电模拟器3和某些手机上是错的
    local _,count = string.gsub(target_string, pattern, function() end)
    return count > 0
end

-- 以某个字符串开始
function startswith(target_string, start_pattern, plain)
    plain = plain or true
    local find_pos_begin, find_pos_end = string.find(target_string, start_pattern, 1, plain)
    return find_pos_begin == 1
end

-- 以某个字符串结尾
function endswith(target_string, start_pattern, plain)
    plain = plain or true
    local find_pos_begin, find_pos_end = string.find(target_string, start_pattern, -#start_pattern, plain)
    return find_pos_end == #target_string
end

string.split = split
string.join = join
string.contains = contains
string.startswith = startswith
string.endswith = endswith

--字符串分隔为整数
string.isplit = function(s, sSep) 
    local sSep, tFields = sSep or "\t", {}
    local sPattern = string.format("([^%s]+)", sSep)
    string.gsub(s, sPattern, function(c) table.insert(tFields, tonumber(c) or 0) end)
    return tFields
end

--字符串判空
string.IsNilOrEmpty = function (s)
    return s == nil or s == ""
end

-- 字符串计数
string.GetLength = function (str)
    if not str or type(str) ~= "string" then
        return nil
    end
    if #str <= 0 then
        return 0
    end
    local length = 0  -- 字符的个数
    local i = 1
    while true do
        local curByte = string.byte(str, i)
        local byteCount = 1
        if curByte > 239 then
            byteCount = 4  -- 4字节字符
        elseif curByte > 223 then
            byteCount = 3  -- 汉字
        elseif curByte > 128 then
            byteCount = 2  -- 双字节字符
        else
            byteCount = 1  -- 单字节字符
        end
        
        i = i + byteCount
        length = length + 1
        if i > #str then
            break
        end
    end
    return length
end

-- 截取字符串数字和英文字符部分
string.GetNumOrEnglish = function (str)
    if not str or type(str) ~= "string" or #str <= 0 then
        return nil
    end

    local i = 1
    local output = ""
    while true do
        local curByte = string.byte(str, i)
        local index = 1

        if curByte > 128 then
            output = string.sub(str, 1, i - 1)
            break
        end
        
        i = i + 1
        if i > #str then
            output = string.sub(str, 1, i)
            break
        end
    end
    return output
end

-- 将字符串拆分成单个单个的数组
string.splitChar = function(str)
    if not str or type(str) ~= "string" or #str <= 0 then
        return {}
    end

    local chars = {}
    local i = 1
    while true do
        local curByte = string.byte(str, i)
        local byteCount = 1
        if curByte > 239 then
            byteCount = 4  -- 4字节字符
        elseif curByte > 223 then
            byteCount = 3  -- 汉字
        elseif curByte > 128 then
            byteCount = 2  -- 双字节字符
        else
            byteCount = 1  -- 单字节字符
        end
        
        table.insert(chars, string.sub(str, i, i+byteCount-1))

        i = i + byteCount
        
        if i > #str then
            break
        end
    end
    return chars
end

string.SubStringUTF8 = function(str, startIndex, endIndex)
    if startIndex < 0 then
        startIndex = string.SubStringGetTotalIndex(str) + startIndex + 1;
    end

    if endIndex ~= nil and endIndex < 0 then
        endIndex = string.SubStringGetTotalIndex(str) + endIndex + 1;
    end

    if endIndex == nil then 
        return string.sub(str, string.SubStringGetTrueIndex(str, startIndex));
    else
        return string.sub(str, string.SubStringGetTrueIndex(str, startIndex), string.SubStringGetTrueIndex(str, endIndex + 1) - 1);
    end
end

--获取中英混合UTF8字符串的真实字符数量
string.SubStringGetTotalIndex = function(str)
    local curIndex = 0;
    local i = 1;
    local lastCount = 1;
    repeat 
        lastCount = string.SubStringGetByteCount(str, i)
        i = i + lastCount;
        curIndex = curIndex + 1;
    until(lastCount == 0);
    return curIndex - 1;
end

string.SubStringGetTrueIndex = function(str, index)
    local curIndex = 0;
    local i = 1;
    local lastCount = 1;
    repeat 
        lastCount = string.SubStringGetByteCount(str, i)
        i = i + lastCount;
        curIndex = curIndex + 1;
    until(curIndex >= index);
    return i - lastCount;
end

--返回当前字符实际占用的字符数
string.SubStringGetByteCount = function(str, index)
    local curByte = string.byte(str, index)
    local byteCount = 1;
    if curByte == nil then
        byteCount = 0
    elseif curByte > 0 and curByte <= 127 then
        byteCount = 1
    elseif curByte>=192 and curByte<=223 then
        byteCount = 2
    elseif curByte>=224 and curByte<=239 then
        byteCount = 3
    elseif curByte>=240 and curByte<=247 then
        byteCount = 4
    end
    return byteCount;
end

string.trim = function(s)
    return (s:gsub("^%s*(.-)%s*$", "%1"))
end
Lua 字符串处理

推荐阅读更多精彩内容