| Paste number 42434: | ruby-zoom => ruby-marc character encoding test |
| Pasted by: | jaron |
| When: | 2 years, 3 weeks ago |
| Share: | Tweet this! | http://paste.lisp.org/+WQQ |
| Channel: | #code4lib |
| Paste contents: |
#!/usr/bin/ruby -w
# Test of character encoding when moving from ruby-zoom to ruby-marc via XML.
# First, we see the record as ruby-zoom puts it.
# Then the record if we give the xml method no arguments.
# Finally if we ask yaz to convert from marc8 to utf8
require 'rubygems'
require 'zoom'
require 'UniversalDetector' #gem install chardet
require 'marc'
require 'stringio'
zservers = [ ['142.51.8.7', 2200, 'unicorn']] #dbs tells me they have no utf8 records
zservers.each do |zserver|
puts zserver
conn = ZOOM::Connection.new
conn.preferred_record_syntax = 'USMARC'
conn.set_option('charset', 'UTF-8')
conn.connect(zserver[0], zserver[1])
conn.database_name = zserver[2]
rset = conn.search('@attr 1=4 "Revue philosophique de la France et de"')
#how ruby-zoom sees it
puts "the record how ruby-zoom sees it"
puts rset[0]
charset = UniversalDetector.chardet(rset[0].xml)
puts charset.inspect
puts
puts "-----------------------------------"
puts
#if we give the xml method no values
record_no_conversion = MARC::XMLReader.new(StringIO.new(string=rset[0].xml))
record_no_conversion = record_no_conversion.to_a
puts record_no_conversion[0]
marc_charset = UniversalDetector.chardet(record_no_conversion[0].to_s)
puts marc_charset.inspect
puts
puts "-----------------------------------"
puts
record_utf8 = MARC::XMLReader.new(StringIO.new(string=rset[0].xml('MARC8','UTF8')))
record_utf8 = record_utf8.to_a
puts record_utf8[0]
marc_charset = UniversalDetector.chardet(record_utf8[0].to_s)
puts marc_charset.inspect
end
This paste has no annotations.