자바·파이썬·자바스크립트/파이썬 프로그래밍

확장된 애너그램 사전만들기

바로이순간 2013. 7. 24. 22:07

import sys, string, time


debug=0

def halflen(word):

    l=len(word)

    if l<7: return 3

    else: return l/2

def makekey(word):

    c=word.lower()

    b=list(c)

    b.sort()

    k=''                 

    apa=False

    for y in b:

        if y=="'":

            apa=True

        else:

            k=k+y

    if apa: 

        k=k+"'"

    return k


def insertTree(father,path,key):

    #print '****',father,word,key

    if len(key)<3: return

    val,list=treeDic[father]

    x,next=path[0],path[1:]

    x=father+x

    if not x in list:

        list.append(x)

        treeDic[x]='',[]

    if next:

        insertTree(x,next,key)

    else:

        val,l=treeDic[x]

        treeDic[x]=key,l

 

def findTree(father,path,totlist):

    global debug

    #print '****',father,word,key,totlist

    val,list=treeDic[father]

    if val and len(val)>2: totlist.append(val)

    if not path: return

    p='#'

    next=path

    while next:

        x,next=next[0],next[1:]

        x=father+x

        if p==x: pass

        else:

            if x in treeDic:

                findTree(x,next,totlist)

            p=x


t0 = time.clock()


f=open('words.txt','r')

clist=f.readlines()

f.close


blist=[]

for x in alist:

    w=x[:-1]

    blist.append([makekey(w),w])


blist.sort()


allDic={}

for k,w in blist:

    if k in allDic:

        allDic[k].append(w)

    else: allDic[k]=[w]


allkeys=list(allDic.keys())

allkeys.sort()


treeDic={}

treeDic['$']='',[]


for x in allkeys:

    #print 'insert',x

    insertTree('$',x,x)


for x in alist:

    y=makekey(x)

    totlist=[]

    findTree('$',y,totlist)

    if y in totlist: totlist.remove(y)

    plist=allDic[y][:]

    plist.remove(x)

    for y in totlist:

        plist=plist+allDic[y]

    #plist.sort(lambda x, y: cmp(string.lower(x), string.lower(y)))

    if plist:

        plist.sort()

        print(x)

        for y in plist:

            print(' +',y)


print('================================================')

print(time.clock()-t0,'seconds')

print('================================================')