golang截取字符串 按字节截取 utf-8不乱码

2012-06-24 10:52  4812人阅读  评论 (0)
Tags: golanggo

golang的字符串是utf-8编码的,所以在转换为[]byte时直接截取会出现乱码现象,所以必须自定义函数来解决.

// 按字节截取字符串 utf-8不乱码
func SubstrByByte(str string, length int) string {
    bs := []byte(str)[:length]
    bl := 0
    for i:=len(bs)-1; i>=0; i-- {
        switch {
        case bs[i] >= 0 && bs[i] <= 127:
            return string(bs[:i+1])
        case bs[i] >= 128 && bs[i] <= 191:
            bl++;
        case bs[i] >= 192 && bs[i] <= 253:
            cl := 0
            switch {
            case bs[i] & 252 == 252:
                cl = 6
            case bs[i] & 248 == 248:
                cl = 5
            case bs[i] & 240 == 240:
                cl = 4
            case bs[i] & 224 == 224:
                cl = 3
            default:
                cl = 2
            }
            if bl+1 == cl {
                return string(bs[:i+cl])
            }
            return string(bs[:i])
        }
    }
    return ""
}
豫ICP备09035262号-1