from functools import wraps from functools import reduce import time def timeit(func): '''A function wrapper that measures execution time and prints it.''' @wraps(func) def wrapper(*args, **kwargs): timer = time.time() rslt = func(*args, **kwargs) print(f'Elapsed time: {time.time() - timer:0.2f}s') return rslt return wrapper def read_words(path): '''Read bags of words from a file''' with open(path, 'r') as f: return [line.strip().split(' ') for line in f] def match(documents, topics): '''For each topic, find documents containing all the words from that topic''' word_docs = {} for i, d in enumerate(documents): for w in d: word_docs.setdefault(w, []).append(i) rslt = [] for t in topics: iterator = (word_docs.get(w, []) for w in t) ds = reduce(lambda x, y: x.intersection(y), iterator, set(next(iterator))) rslt.append(ds) return rslt # Load and match documents = read_words('documents.txt') topics = read_words('topics.txt') rslt = timeit(match)(documents, topics) # Print a few summary statistics print(sum(len(d) > 0 for d in rslt)) print(max(len(d) for d in rslt))