# This script was hacked to fix some databases that were declared as Latin-1 # [I mean MySQL is a Swedish company, but Latin-1-Swedish as a default?!?!] # and contained UTF-8. MySQL "helpfully" converted some -- and not all, go figure # of the strings to Latin-1. Very big mess indeed. # This situation may happen to other people, so I decided to make it available. # GPL, free of charge, use at your own risk -- I have one [happy] witness who says # Thank You dda! # So if it doesn't fix your dbs, or breaks them, sorry! -- This script worked as intended! # 2005/2/12 - 03:10:00 # dda http://sungnyemun.org/weblog2/ import MySQLdb, codecs host='localhost' user='user' pwd='pwd' errorDB = {} def connect2db(host, user, passwd, db): zeDB='' try: zeDB = MySQLdb.connect(host, user, passwd,db) return zeDB except: return "N/A" if __name__ == '__main__': db=connect2db(host,user,pwd,'mysql') if db=="N/A": print("What?!?!") else: cursor=db.cursor() cursor.execute("SHOW DATABASES") result = cursor.fetchall() db.close() for myDBs in result: myDB=myDBs[0] if myDB != "dspam" and myDB != "mysql" and myDB != "test" and myDB != "spip": print("Database: %s"%myDB) db=connect2db(host,user,pwd,myDB) cursor=db.cursor() cursor.execute("ALTER DATABASE `%s` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci"%myDB) cursor.execute("SHOW TABLES") zeTables = cursor.fetchall() for myTables in zeTables: print(" * %s.%s"%(myDB,myTables[0])) cursor=db.cursor() cursor.execute("ALTER TABLE `%s` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci"%(myTables[0])) cursor.execute("DESCRIBE %s.%s"%(myDB,myTables[0])) zeDesc = cursor.fetchall() zeDesc2 = {} k=0 fieldNames={} name2Field={} for i in zeDesc: zeDesc2[k]=i[1] fieldNames[k]=i[0] name2Field[i[0]]=k if i[1].find('varchar')>-1 or i[1].find('text')>-1 or i[1].find('enum')>-1: cursor.execute("ALTER TABLE `%s` CHANGE `%s` `%s` %s CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL"%(myTables[0],i[0],i[0],i[1])) k+=1 descLen=len(zeDesc2) cursor.execute("SELECT * FROM %s.%s"%(myDB,myTables[0])) zeLines = cursor.fetchall() for li in zeLines: for zeFieldNum in range(0,descLen): zeField=li[zeFieldNum] if zeField!=None and (zeDesc2[zeFieldNum].find('varchar')>-1 or zeDesc2[zeFieldNum].find('text')>-1): try: test=codecs.encode(codecs.decode(zeField,'utf-8'),'utf-8') except: # don't display twice the same error [db.table.field] t=myDB+"."+myTables[0]+"."+fieldNames[zeFieldNum] s=errorDB.get(t,'') if s=='': print(" . Fail: Table `%s.%s`, field %s, id: %d"%(myDB,myTables[0],fieldNames[zeFieldNum],li[0])) errorDB[t]='ok' txt=codecs.encode(codecs.decode(zeField,'iso-8859-1'),'utf-8') txt=MySQLdb.escape_string(txt) cursor.execute("UPDATE %s.%s SET `%s` = \"%s\" WHERE %s = %d;"%(myDB,myTables[0], fieldNames[zeFieldNum], txt, fieldNames[0], li[0])) pass