# Jay Summet # CS 1301 # Released to the public domain, Oct 20th 2008 # # from random import randrange #This function will load n-grams from a specified #textfile of length N. It stores them in a dictionary and #returns the dictioary. def loadNgrams(fileName, N): nGrams = {} #Start with an Empty Dictionary! f = open(fileName,"r") text = f.read() f.close() i = 0 while(i < len(text) - N): nGram = text[i:i+N] nextLetter = text[i+N] #Append the next letter after this nGram #in the list associated with this nGram. #If the dictionary does not yet have a key #for this n-gram, the get() method will return #an empty list. lst = nGrams.get(nGram, []) lst = lst + [ nextLetter ] nGrams[nGram] = lst #Uncomment the following line for debugging text #print nGram, lst i = i+1 return( nGrams) #This function will generate text using a pre-loaded #collection of n-Grams. You give it the dictionary of #nGrams, the length of text you want it to generate, #and an (optional) starting bit of text (that must be #an nGram stored in the dictionry). If you don't pass #in a starting nGram, the function will pick the first #key in the dictionary and start with it. def genText(Grams, length, nGram = None): #Do some error checking. if ( len(Grams) < 1): print "Error! nGrams is empty!" return if ( length < 0): print "Can not generate negative amounts of Text!" return #Handle our optional parameter. If the user does #not specify a starting nGram, we use the first #key in the dictionary. if (nGram == None): ourKeys = Grams.keys() nGram = ourKeys[0] text = "" done = 0 while(done < length): #Get the next list for the current nGram! lst = Grams[nGram] #Pick a random letter from it. random = randrange(0,len(lst)) #append the random letter to the text text = text + lst[random] #increment our done counter done = done + 1 #Update our current nGram! nGram = nGram[1:] + lst[random] #When all finished, return the text! return(text) Grams = loadNgrams("msnd.txt",5) print genText(Grams,3000)