Check Unicode Kanji
Check length chữ Kanji Fullsize Halfsize (Japanese)
Java
import java.io.UnsupportedEncodingException; /** * * @author lonelyCat */ public class demo { public static void main(String[] args) { String fullKana = "ナ"; String halfKana = "ナ"; String kanji = "使"; try { System.out.println("=====fullKana==Shift_JIS===" + fullKana.getBytes("Shift_JIS").length); System.out.println("=====halfKana==Shift_JIS===" + halfKana.getBytes("Shift_JIS").length); System.out.println("=====kanji==Shift_JIS===" + kanji.getBytes("Shift_JIS").length); System.out.println("=====fullKana==UTF8===" + fullKana.getBytes("UTF8").length); System.out.println("=====halfKana==UTF8===" + halfKana.getBytes("UTF8").length); System.out.println("=====kanji==UTF8===" + kanji.getBytes("UTF8").length); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } } }
run: =====fullKana==Shift_JIS===2 =====halfKana==Shift_JIS===1 =====kanji==Shift_JIS===2 =====fullKana==UTF8===3 =====halfKana==UTF8===3 =====kanji==UTF8===3 BUILD SUCCESSFUL (total time: 0 seconds)
// REFERENCE UNICODE TABLES:
// kanji_codes.unicode.shtml
// unicode.html
//
// TEST EDITOR:
// tools/regex
//
// UNICODE RANGE : DESCRIPTION
//
// [\u3000-\u303F] : punctuation
// [\u3040-\u309F] : hiragana
// [\u30A0-\u30FF] : katakana
// [\uFF00-\uFFEF] : Full-width roman characters and half-width katakana
// [\u4E00-\u9FAF] : CJK unifed ideographs - Common and uncommon kanji
//
// Non-Japanese punctuation/formatting characters commonly used in Japanese text
// 2605-2606 : Stars
// 2190-2195 : Arrows
// u203B : Weird asterisk thing
- Japanese style punctuation:
[\u3000-\u303f]
- Hiragana:
[\u3040-\u309f]
- Katakana:
[\u30a0-\u30ff]
- Roman characters + half-width katakana:
[\uff00-\uffef]
- Kanji:
[\u4e00-\u9faf]|[\u3400-\u4dbf]
0 nhận xét:
Post a Comment