""" names_in_a_database.py -- V.1.0, Jan 30 2007 By leonardo maffi, with some code from the solutions document: http://psriram.wordpress.com/files/2007/01/kopc.pdf Names in a Database - Kurukshetra the battle of the brains Problem URL: http://opc.kurukshetra.org.in/opc/problems/2/ The other problems: http://opc.kurukshetra.org.in/opc/problems/ Score: 100 Points Time Limit : 2 seconds Memory Limit : 3.2 MB seconds Submission Limit : 10 submits Source Limit : 5000 bytes PROBLEM: Ram Ellison is contructing a database to manage the names of all the students in his university. Ram has to construct a table that contains just one field (apart from a primary key field) which will hold the name of the students. Unfortunately Ram is not aware of the names of all the people in his university (it's a huge university with some 250 odd colleges affiliated to it) and thus doesn't know what size to make this field. He wants it to be large enough to accomodate all names. So Ram sets out to write an application to collect the names of all students in his institute. He writes a simple device in to which students type their name one by one. The device stores the names typed sequentially in a file. After one month of name collection Ram finally has all the student names. Unfortunately the device has not stored the names in a proper format. It has stored all of the names without any demarcation between names. The file simply contains letters of the English alphabet. So what happened was, if “Ram”, “Sam” and “RingGo” punched their names in to the machine, the machine would have stored “RamSamRingGo”. Ram's problem is still not solved. How is he to determine what size to keep the field in the database? Ram noticed while students were punching their names in to the machine that all the names had one thing in common. No name had a repeated character in it. Plus the names contained only characters from the English alphabet, 'A' to 'Z' or 'a' to 'z'. (NOTE 'g' and 'G are considered different characters by Ram. So “RingGo” is a valid name with NO repeated characters) Given this information (names contain unique characters) and the names in the file, Ram wants you to determine the longest possible name there can be. So in the case of “RamSamRingGo” the longest possible name could be “SamRingGo” and so he can safely set the size of the field as 9. INPUT: First line of input contains T (T < 100) the number of test cases. Then T test cases follow. Each test case contains a string on a line by itself. The string contains characters from [a-z] and [A-Z] only. Length of each string will be at most 300,000. OUTPUT: For each test a number M on a line by itself denoting the maximum length that a name can be. Sample Input: 2 abcabcabc RamSamRingGo Sample Output: 3 9 """ from time import clock def timeit(function, *arg1, **arg2): """timeit(function, *arg1, **arg2): given a function and its parameters, calls it times times and computes its running time, in seconds (its results are discarded).""" t = clock() r = function(*arg1, **arg2) return r, round(clock() - t, 2) # My original *simple* version. # Later I have tried to create a much faster version def longest_simple(text): def lenmax_curr(text, i): charset = set() for j in xrange(i, -1, -1): if text[j] in charset: return i-j else: charset.add(text[j]) return i-j+1 return max(lenmax_curr(text, i) for i in xrange(len(text))) # The basic implementation of the algorithm given in the solutions def longest_basic(text): last_occurance = {} start = max_l = -1 for i, c in enumerate(text): start = max(start, last_occurance.get(c, -1)) max_l = max(max_l, i - start) last_occurance[c] = i return max_l # Faster version when Psyco isn't avaliable, # improved from the given solution def longest(text): last_occurance = dict.fromkeys(letters, -1) start = max_l = -1 for i in xrange(len(text)): c = text[i] locc = last_occurance[c] if locc > start: start = locc if i - start > max_l: max_l = i - start last_occurance[c] = i return max_l from array import array # Faster version when Psyco is avaliable, # improved from the given solution def longest_psyco(text): last_occurance = array("l", [-1] * 256) start = max_l = -1 for i in xrange(len(text)): c = text[i] locc = last_occurance[ord(c)] if locc > start: start = locc if i - start > max_l: max_l = i - start last_occurance[ord(c)] = i return max_l """ # Pyrex version, quite slower than longest_psyco() # (5 times slower with len(intext) = 300000 on my PC) def longest_pyrex(intext): cdef int i, start, max_l, ltext cdef unsigned char c cdef int last_occurance[256] cdef char *text ltext = len(intext) text = intext for i from 0 <= i < 256: last_occurance[i] = -1 start = -1 max_l = -1 for i from 0 <= i < ltext: c = (text[i]) locc = last_occurance[c] if locc > start: start = locc if i - start > max_l: max_l = i - start last_occurance[c] = i return max_l """ if __name__ == "__main__": from string import letters from random import choice, seed print "Basic asserts on the given examples:" for alg in longest_simple, longest_basic, longest, longest_psyco: assert alg("abcabcabc") == 3, alg.__name__ assert alg("RamSamRingGo") == 9, alg.__name__ print "Speed test:" def generatetxt(n): return "".join(choice(letters) for _ in xrange(n)) seed(1) txt = generatetxt(300000) print "String len:", len(txt) for alg in longest_simple, longest_basic, longest, longest_psyco: sol, t = timeit(alg, txt) print "function_name, solution, running_time_s:", alg.__name__, sol, t try: import psyco except ImportError: pass else: print "The same functions with Psyco:" psyco.full() for alg in longest_simple, longest_basic, longest, longest_psyco: sol, t = timeit(alg, txt) print "function_name, solution, running_time_s:", alg.__name__, sol, t