# # String/unicode conversion utils. # def safestr(s): """ Safely corerce *anything* to a string. If the object can't be str'd, an empty string will be returned. You can (and I do) use this for really crappy unicode handling, but it's a bit like killing a mosquito with a bazooka. """ if s is None: return "" if isinstance(s, unicode): return s.encode('ascii', 'xmlcharrefreplace') else: try: return str(s) except: return "" def safeint(s): """Like safestr(), but always returns an int. Returns 0 on failure.""" try: return int(safestr(s)) except ValueError: return 0 def convertentity(m): import htmlentitydefs """Convert a HTML entity into normal string (ISO-8859-1)""" if m.group(1)=='#': try: return chr(int(m.group(2))) except ValueError: return '&#%s;' % m.group(2) try: return htmlentitydefs.entitydefs[m.group(2)] except KeyError: return '&%s;' % m.group(2) def unquotehtml(s): import re """Convert a HTML quoted string into normal string (ISO-8859-1). Works with &#XX; and with   > etc.""" return re.sub(r'&(#?)(.+?);',convertentity,s)