Redaktor:Atomobot/atomobot language.py
Vzhled
#!/usr/bin/python2.4 # -*- coding: utf-8 -*- class AtomobotLanguageError( Exception ): pass class LanguageText( object ): def __init__( self, lang, text ): self.lang = lang self.text = text self.decomposed = tuple( self.lang.decompose( text ) ) def __str__( self ): return self.text def __repr__( self ): return "LanguageText('%s', %s)" % ( self.lang.code, repr( self.text ) ) def __hash__( self ): return hash( self.decomposed ) def __cmp__( self, other ): if isinstance( other, LanguageText ): return cmp( self.decomposed, other.decomposed ) elif isinstance( other, basestring ): otherlangtext = LanguageText( self.lang, other ) return cmp( self, otherlangtext ) else: raise TypeError() class AtomobotLanguage( object ): plurals = {} def __init__( self ): self.number = 0 self.letters = [ u'?' ] self.number_by_letter = {} self.cgroups_by_first = {} self.letters_by_first = {} self.ch_types = {} def new_number( self ): self.number += 1 return self.number def plural( self, number, name ): forms = self.plurals.get( name, None ) if not forms: return name number = abs( number ) if number >= 5: return forms[5] return forms[ number ] def add_letter( self, letter, ch_type='letter' ): number = self.new_number() self.letters.append( letter ) self.number_by_letter[ letter ] = number cgroups = self.cgroups_by_first.setdefault( letter[0], {} ) samelets = cgroups.setdefault( len( letter ), [] ) samelets.append( letter ) self.ch_types.setdefault( ch_type, set() ).add( letter ) def tidy_letters( self ): for letter, cgroups in self.cgroups_by_first.iteritems(): lst = self.letters_by_first.setdefault( letter, [] ) for size in reversed( sorted( cgroups.keys() ) ): lst.extend( cgroups[ size ] ) def decompose( self, text ): text = text.upper() decomposed = [] pos = 0 while True: if pos >= len( text ): break ch = text[ pos ] firstlets = self.letters_by_first.get( ch, None ) if not firstlets: decomposed.append( 0 ) pos += 1 continue for firstlet in firstlets: if pos + len( firstlet ) > len( text ): continue cpos = 1 mismatched = False for fl in firstlet[ 1: ]: if fl != text[ pos + cpos ]: mismatched = True break if mismatched: continue decomposed.append( self.number_by_letter[ firstlet ] ) pos += len( firstlet ) break return decomposed def compose( self, decotext ): return u''.join( [ self.letters[ number ] for number in decotext ] ) def compare_ci( self, text1, text2 ): dec1 = self.decompose( text1 ) dec2 = self.decompose( text2 ) return cmp( dec1, dec2 ) class AtomobotLanguageSlovak( AtomobotLanguage ): code = 'sk' plurals = { 'článok': ( u'článkov', u'článok', u'články', u'články', u'články', u'článkov' ), 'kategória': ( u'kategórií', u'kategória', u'kategórie', u'kategórie', u'kategórie', u'kategórií' ), } TEXT_UPDATE = u'Atomobot :: aktualizácia' MONTH_NAME = { 1: u'január', 2: u'február', 3: u'marec', 4: u'apríl', 5: u'máj', 6: u'jún', 7: u'júl', 8: u'august', 9: u'september', 10: u'október', 11: u'november', 12: u'december' } MONTH_NAME_GEN = { 1: u'januára', 2: u'februára', 3: u'marca', 4: u'apríla', 5: u'mája', 6: u'júna', 7: u'júla', 8: u'augusta', 9: u'septembra', 10: u'októbra', 11: u'novembra', 12: u'decembra' } def __init__( self ): super( AtomobotLanguageSlovak, self ).__init__() self.init_collated_letters() self.tidy_letters() def format_number( self, number, places=0 ): minus = False number = float( number ) formatstr = '%%.%sf' % places numberstr = formatstr % number parts = numberstr.split( '.', 1 ) if len( parts ) > 1: pre, post = parts else: pre = parts[0] post = '' if pre.startswith( '-' ): pre = pre[1:] minus = True final_number = [] grouppos = 0 for digit in reversed( pre ): if grouppos >= 3: final_number.insert( 0, ' ' ) grouppos = 0 final_number.insert( 0, digit ) grouppos += 1 if minus: final_number.insert( 0, '-' ) final_str = ''.join( final_number ) if post: final_str += ',' + post return final_str def init_collated_letters( self ): self.add_letter( u' ', 'symbol' ) self.add_letter( u'!', 'symbol' ) self.add_letter( u'"', 'symbol' ) self.add_letter( u'#', 'symbol' ) self.add_letter( u'$', 'symbol' ) self.add_letter( u'.', 'symbol' ) self.add_letter( u',', 'symbol' ) self.add_letter( u'-', 'symbol' ) self.add_letter( u'(', 'symbol' ) self.add_letter( u')', 'symbol' ) self.add_letter( u'0', 'number' ) self.add_letter( u'1', 'number' ) self.add_letter( u'2', 'number' ) self.add_letter( u'3', 'number' ) self.add_letter( u'4', 'number' ) self.add_letter( u'5', 'number' ) self.add_letter( u'6', 'number' ) self.add_letter( u'7', 'number' ) self.add_letter( u'8', 'number' ) self.add_letter( u'9', 'number' ) self.add_letter( u'A' ) self.add_letter( u'Á' ) self.add_letter( u'Â' ) self.add_letter( u'Ä' ) self.add_letter( u'Å' ) self.add_letter( u'Æ' ) self.add_letter( u'B' ) self.add_letter( u'C' ) self.add_letter( u'Ć' ) self.add_letter( u'Č' ) self.add_letter( u'Ç' ) self.add_letter( u'D' ) self.add_letter( u'Ď' ) self.add_letter( u'DZ' ) self.add_letter( u'DŽ' ) self.add_letter( u'Ð' ) self.add_letter( u'E' ) self.add_letter( u'É' ) self.add_letter( u'Ě' ) self.add_letter( u'Ę' ) self.add_letter( u'F' ) self.add_letter( u'G' ) self.add_letter( u'H' ) self.add_letter( u'CH' ) self.add_letter( u'I' ) self.add_letter( u'Ì' ) self.add_letter( u'Í' ) self.add_letter( u'Î' ) self.add_letter( u'Ï' ) self.add_letter( u'Į' ) self.add_letter( u'J' ) self.add_letter( u'K' ) self.add_letter( u'L' ) self.add_letter( u'Ĺ' ) self.add_letter( u'Ľ' ) self.add_letter( u'Ł' ) self.add_letter( u'M' ) self.add_letter( u'N' ) self.add_letter( u'Ń' ) self.add_letter( u'Ň' ) self.add_letter( u'Ñ' ) self.add_letter( u'O' ) self.add_letter( u'Ó' ) self.add_letter( u'Ô' ) self.add_letter( u'Ö' ) self.add_letter( u'Ő' ) self.add_letter( u'Œ' ) self.add_letter( u'Ø' ) self.add_letter( u'P' ) self.add_letter( u'Q' ) self.add_letter( u'R' ) self.add_letter( u'Ŕ' ) self.add_letter( u'Ř' ) self.add_letter( u'S' ) self.add_letter( u'Ś' ) self.add_letter( u'Š' ) self.add_letter( u'Ş' ) self.add_letter( u'T' ) self.add_letter( u'Ť' ) self.add_letter( u'U' ) self.add_letter( u'Ú' ) self.add_letter( u'Ů' ) self.add_letter( u'Ü' ) self.add_letter( u'Ű' ) self.add_letter( u'V' ) self.add_letter( u'W' ) self.add_letter( u'X' ) self.add_letter( u'Y' ) self.add_letter( u'Ý' ) self.add_letter( u'Z' ) self.add_letter( u'Ź' ) self.add_letter( u'Ž' )