| Paste number 44755: | pymarc: test/marc8.py with NFD normalization test |
| Pasted by: | anarchivist |
| When: | 1 year, 11 months ago |
| Share: | Tweet this! | http://paste.lisp.org/+YJ7 |
| Channel: | #code4lib |
| Paste contents: |
from pymarc import marc8_to_unicode
from unittest import TestCase
import unicodedata
class MARC8Test(TestCase):
def test_marc8_to_unicode(self):
marc8_file = file('test/test_marc8.txt')
utf8_file = file('test/test_utf8.txt')
count = 0
while True:
marc8 = marc8_file.readline().strip("\n")
utf8 = utf8_file.readline().strip("\n")
if marc8 == '' or utf8 == '':
break
count += 1
self.assertEquals(marc8_to_unicode(marc8).encode('utf8'), utf8)
if hasattr (unicodedata, 'normalize'):
marc8nfd = unicodedata.normalize('NFD', marc8_to_unicode(marc8))
utf8nfd = unicodedata.normalize('NFD', utf8.decode('utf8'))
self.assertEquals(marc8nfd, utf8nfd)
self.assertEquals(count, 1514)
This paste has no annotations.