''' fuzzy_cats.py v0.1 by noidfumbler from alogs/robowaifu this is a script to get the aiml categories from a list of aiml files which can be read from a file with the urls, or a list handed over from runtime searches for best category and a similarity score, which can then be fed to the bot this is meant to make the bot ignore little errors or improve in other ways ''' import os from fuzzywuzzy import fuzz from fuzzywuzzy import process def get_patterns(knowledge_file_list): ''' takes a list of files (name+path) and returns all the patterns in a list list can come from get_current_knowledge_files() or provided by the runtime ''' return_patterns = [] for know_file in knowledge_file_list: file = open(know_file, 'r') aiml_file_lines = file.read().split('\n') file.close() pattern_lines = [ln for ln in aiml_file_lines if ln.lstrip().startswith('')] patterns = [ln.lstrip()[9:-10].lstrip().rstrip() for ln in pattern_lines] return_patterns += patterns return return_patterns def get_current_knowledge_files(filename='knowledge_list'): ''' parses urls to aiml files (name+path) from a file, with one url per line the file for this is supposed to be written by the runtime for the chatbot this optional file might exist so other programs or users can access it the runtime can import get_patterns() and handing over the list instead ''' file = open(os.getcwd() + '/' + filename, 'r') knowledge_files = [ln for ln in file.read().split('\n') if ln.endswith('.aiml')] file.close() return knowledge_files def get_current_knowledge(filename='knowledge_list'): ''' returns all known patterns as a list nearly the same as get_current_knowledge_files() but returns patterns assumes that a file with the urls for AIML files exists hands this list over to get_patterns() and returns the response ''' file = open(os.getcwd() + '/' + filename, 'r') knowledge_files = [ln for ln in file.read().split('\n') if ln.endswith('.aiml')] file.close() return get_patterns(knowledge_files) def fuzzy_response(input_str, pattern_list=[]): ''' takes an input string and uses fuzzywuzzy process to find the best match returns list with input string, the closest match and a score showing similarity ''' plst, istr = pattern_list, input_str #shorter line alt_input_str, alt_input_ratio = process.extractOne(istr, plst, scorer=fuzz.token_sort_ratio) return input_str, alt_input_str, alt_input_ratio def test_loop(): ''' loops, takes user input and returns list from fuzzy_response (for testing and debugging) ''' loop_status = '' while loop_status != 'stop test': input_str = input('message: ') print(fuzzy_response(input_str, RETURNED_PATTERNS)) loop_status = input_str # will only work if file knownledge_list with urls to aiml files exists in current working dir RETURNED_PATTERNS = (get_current_knowledge()) #print(RETURNED_PATTERNS) ## ToDo # what about double patterns? Set instead list. But then? Preference? ## optimizations: # taking context str as additional input to search-pattern # taking context from knowledge-filename and search-pattern # files in knowledge_list might have no or wrong path, autocorrect