#!/usr/bin/env ruby # coding: utf-8 # 異体字含有文書ジェネレータ # $ ./painful_char | tee painful_doc.html # $ emacs painful_doc.html # $ firefox painful_doc.html # http://www.unicode.org/charts/ # https://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=50C5 # https://mojikiban.ipa.go.jp/1292.html targets = [ '葛', '辻', 'U+50C5', 'U+5132', 'U+514E', 'U+5642', 'U+564C', 'U+5EFB', 'U+717D', '高', '崎', '吉', 'U+9AD9', 'U+FA11', 'U+20BB7', ] fonts = { 'font0' => "font-family: 'VL PGothic', sans-serif", 'font1' => "font-family: 'IPAMincho', serif", 'font2' => "font-family: 'Osaka-Mono', monospace", } css = ''; fonts.each {|key, font| css << < 異体字含有文書ジェネレータ
%s

HTML sel_base = 0x0E0100 # 0x0FE00 puts('

異体字セレクタ') puts('

Unicode: U+%X' % sel_base) puts("

UTF-8: %s" % [sel_base].pack('U*').unpack('H*')[0].upcase) def samples(fonts, bin) samples = ''; fonts.each {|key, font| samples << "「%s」" % [key, bin] }; samples end targets.each {|target| bin = target !~ /^U\+(.+)/ ? target[0] : [eval('0x'+ $1)].pack('U*') puts('


') puts('

対象文字:「%s」' % bin) puts('

Unicode: U+%X' % [unicode = bin.unpack('U*')[0]]) puts('

UTF-8: %s' % [utf8 = bin.unpack('H*')[0].upcase]) puts('

%s: U+%X' % [samples(fonts, bin), unicode]) 8.times {|n| bin_sel = [sel_base + n].pack('U*') puts("

%s: U+%X + U+%X" % [samples(fonts, bin + bin_sel), unicode, sel_base + n]) } } puts(< HTML __END__