Problem with function that returns the key with most unique values in python

I am trying to implement greedy algorithm where I need to find the alphabet generator with most unique alphabets, process these alphabets and find the next best generator until I have processed all alphabets. However, when I run the function get_next_best_generator the first time it returns the right values but doesn't return anything in the next call with the following error- 0 [] KeyError: '' output of first call-generator_2 6 ['p', 'q', 'k', 'j', 'l', 'm', 'n', 't']

why its not returning anything in the call after the first call?

Here is my code-

# create lists to store covered alphabets and processed generators
alphabets_covered=[]
generator_covered=[]
# create a dict to store generators and the alphabets that they generate
generator_alphabets_dict={'generator_1':['p','q','r'], 'generator_2':['p', 'q','k','j','l','m','n','t'],
                          'generator_3':['x','m'], 'generator_4':['p','e','m','g']}


def get_next_best_generator():
    '''get the generator that generates the most unique alphabets that have not been covered'''
    most_alphabets_count=0
    best_generator_alphabets=[]
    best_generator=''
    #Loop through the dict to find the generator with most unique alphabets
    for generator ,alphabets in generator_alphabets_dict.items():
        unique_alphabets_list = []
        generator_alphabets = list(alphabets)
        # Loop to store the unique alphabets for a generator
        for selected_alphabet in generator_alphabets:
            if selected_alphabet not in alphabets_covered:
                unique_alphabets_list.append(selected_alphabet)
                alphabets_covered.append(selected_alphabet)
        # update all values if current generator's unique element list is greater than its predecessor's       
        if len(unique_alphabets_list) > most_alphabets_count:
            most_alphabets_count = len(unique_alphabets_list)
            best_generator_alphabets = alphabets
            best_generator = generator

    return  most_alphabets_count, best_generator_alphabets , best_generator

# Loop until all alphabets have been covered
while len(alphabets_covered) < 13:


    most_alphabets_count, best_generator_alphabets, best_generator = get_next_best_generator()
    print(best_generator)
    print(most_alphabets_count)
    print(best_generator_alphabets)
    generator_alphabets_dict.pop(best_generator)
    generator_covered.append(best_generator)

After making some changes- My function still doesn't return the key with the most unique elements.

Updated code-

# create lists to store covered alphabets and processed generators

generator_covered=[]
# create a dict to store generators and the alphabets that they generate
generator_alphabets_dict={'generator_1':['p','q','r'], 'generator_2':['p', 'q','k','j','l','m','n','t'],
                          'generator_3':['x','w','b'], 'generator_4':['p','e','m','g']}
alphabets = set(['p','q','r','k','j','l','m','n','t','x','w','b','e','g'])

def get_next_best_generator():
    '''get the generator that generates the most unique alphabets that have not been covered'''
    most_alphabets_count=0
    best_generator_alphabets=[]
    best_generator=''

    #Loop through the dict to find the generator with most unique alphabets
    for generator ,alphabets in generator_alphabets_dict.items():
        unique_alphabets_list = []
        generator_alphabets = list(alphabets)
        for selected_alphabet in generator_alphabets :
            if selected_alphabet  in alphabets:
                unique_alphabets_list.append(selected_alphabet)

        # update all values if current generator's unique element list is greater than its predecessor's       
        if len(unique_alphabets_list) > most_alphabets_count:
            most_alphabets_count = len(unique_alphabets_list)
            best_generator_alphabets = unique_alphabets_list
            best_generator = generator

    return  most_alphabets_count, best_generator_alphabets , best_generator

# Loop until all alphabets have been covered
while alphabets:


    most_alphabets_count, best_generator_unique_alphabets, best_generator = get_next_best_generator()
    print(best_generator)
    print(most_alphabets_count)
    print(best_generator_unique_alphabets)
    generator_alphabets_dict.pop(best_generator)
    generator_covered.append(best_generator)
    alphabets -= set(best_generator_unique_alphabets)

Expected output- generator_2 8 ['p', 'q', 'k', 'j', 'l', 'm', 'n', 't'] generator_3 3 ['x', 'w', 'b'] generator_4 2 ['e','g'] generator_1 1 ['r']

3 answers

  • answered 2020-06-02 08:26 Hampus Larsson

    First of all, if your script crashes, please include the full stack-trace!

    Your problem occurs because your if-statement if len(unique_alphabets_list) > most_alphabets_count is never True on your second pass-through of the function.

    The reason for that is, inside of this for-loop:

    for selected_alphabet in generator_alphabets:
        if selected_alphabet not in alphabets_covered:
            unique_alphabets_list.append(selected_alphabet)
            alphabets_covered.append(selected_alphabet)
    

    There you write to alphabets_covered which after you've run the function once, will contain all of the unique letters already, so the second go-around the if-statement will never be True. If you instead change the scope of the variable by defining it inside of the function, you can still keep count on the unique values.

    And finally because you can no longer use alphabets_covered as your while-statement limiter, change it to just check if generator_alphabets_dict has any values inside of it or not.

    Here is the updated code:

    # create lists to store covered alphabets and processed generators
    generator_covered = []
    # create a dict to store generators and the alphabets that they generate
    generator_alphabets_dict = {'generator_1': ['p', 'q', 'r'], 'generator_2': ['p', 'q', 'k', 'j', 'l', 'm', 'n', 't'],
                                'generator_3': ['x', 'm'], 'generator_4': ['p', 'e', 'm', 'g']}
    
    
    def get_next_best_generator():
        '''get the generator that generates the most unique alphabets that have not been covered'''
        most_alphabets_count = 0
        best_generator_alphabets = []
        best_generator = ''
        alphabets_covered = [] # Moved into the function.
    
        #Loop through the dict to find the generator with most unique alphabets
        for generator, alphabets in generator_alphabets_dict.items():
            unique_alphabets_list = []
            generator_alphabets = list(alphabets)
            # Loop to store the unique alphabets for a generator
            for selected_alphabet in generator_alphabets:
                if selected_alphabet not in alphabets_covered:
                    unique_alphabets_list.append(selected_alphabet)
                    alphabets_covered.append(selected_alphabet)
            # update all values if current generator's unique element list is greater than its predecessor's
            if len(unique_alphabets_list) > most_alphabets_count:
                most_alphabets_count = len(unique_alphabets_list)
                best_generator_alphabets = alphabets
                best_generator = generator
    
        return most_alphabets_count, best_generator_alphabets, best_generator
    
    
    # Loop until all alphabets have been covered
    # the boolean value of an empty dictionary is False, so we can limit it on that.
    while generator_alphabets_dict: 
        most_alphabets_count, best_generator_alphabets, best_generator = get_next_best_generator()
        print(best_generator)
        print(most_alphabets_count)
        print(best_generator_alphabets)
        generator_alphabets_dict.pop(best_generator)
        generator_covered.append(best_generator)
    

    I haven't added any logic to your function, and looking at the output of it, then we can see generators are output in the following order:

    generator_2
    generator_1
    generator_4
    generator_3
    

    However, that isn't the order I would make, because generator_4 has more characters than generator_1 and thus should be more "unique" in my eyes.

    With that in mind, I have simplified your code to return the order that I would see would be more "unique":

    def get_ordered_unique_generator(values: dict) -> list:
        return sorted(values, key=lambda x: len(set(values[x])), reverse=True)
    
    for k in get_ordered_unique_generator(generator_alphabets_dict):
        print(f"Key: {k}, Value: {generator_alphabets_dict[k]}")
    

    Output:

    Key: generator_2, Value: ['p', 'q', 'k', 'j', 'l', 'm', 'n', 't']
    Key: generator_4, Value: ['p', 'e', 'm', 'g']
    Key: generator_1, Value: ['p', 'q', 'r']
    Key: generator_3, Value: ['x', 'm']
    

  • answered 2020-06-02 09:12 JMcK

    Most of your errors seem to come down to a confusion in the scope of local and global variables. For example, in your function get_next_best_generator() you have defined the variable best_generator as a string with value ''. You then attempt to reassign the string a new value with the operation best_generator = generator. This is throwing an error. so this needs to either be global or you need to rethink the format you're returning the data in. For example, you could store the variables most_alphabets_count, best_generator_alphabets and best_generator in a list. Then iterate through them when its returned and assign the values IF they've been updated from the last check. here's an example of what I mean based on your code:

    # create lists to store covered alphabets and processed generators
    alphabets_covered=[]
    generator_covered=[]
    # create a dict to store generators and the alphabets that they generate
    generator_alphabets_dict={
        'generator_1':['p','q','r'],
        'generator_2':['p','q','k','j','l','m','n','t'],
        'generator_3':['x','m'],
        'generator_4':['p','e','m','g']
        }
    
    
    def get_next_best_generator():
        '''get the generator that generates the most unique alphabets that have not been covered'''
        most_alphabets_count=0
        best_generator_alphabets=[]
        value_list = []
        print(generator_alphabets_dict)
        #Loop through the dict to find the generator with most unique alphabets
        for generator ,alphabets in generator_alphabets_dict.items():
            unique_alphabets_list = []
            for select_alphabet in alphabets:
                if select_alphabet not in alphabets_covered:
                    unique_alphabets_list.append(select_alphabet)
                    alphabets_covered.append(select_alphabet)
            # update all values if current generator's unique element list is greater than its predecessor's       
            if len(unique_alphabets_list) > most_alphabets_count:
                value_list.append(len(unique_alphabets_list))
                value_list.append(alphabets)
                value_list.append(generator)  
        return value_list
    
    # Loop until all alphabets have been covered
    def mainloop():
        counter = 0
        while len(alphabets_covered) < 13 and len(generator_alphabets_dict.keys()) > 0 and counter < 10:
            counter += 1
            print(f'Loop counter: {counter}')
            try:
                my_values = get_next_best_generator()
                most_alphabets_count = my_values[0] 
                best_generator_alphabets = my_values[1] 
                best_generator = my_values[2] 
            except Exception as e:
                print(f'Error in assigning variables values to most_alphabets_count, best_generator_alphabets, best_generator: {e}')
    
            print(f'best_generator: {best_generator}')
            print(f'most_alphabets_count: {most_alphabets_count}')
            print(f'best_generator_alphabets: {best_generator_alphabets}')
            try:
                if best_generator in generator_alphabets_dict.keys():
                    generator_alphabets_dict.pop((best_generator))
                    generator_covered.append(best_generator)
                else:
                    print(f'key not found in dict')
            except Exception as e:
                print(f"ERROR: {e}\n")
            else:
                print('process complete with no errors\n')
            finally:
                print('process complete')
    
    
    def print_dict(my_dict):
        for k, v in my_dict.items:
            print(f'Current values in dict:\n')
            print(f'key:{k}')
            print(f'value:{v}')
    
    if __name__ == '__main__':
        mainloop()
    

    I'm not sure this solves the greedy algorithm problem, but it does help see where the errors are coming from and what needs fixed. I've put a counter in on the while loop to stop the machine timing out in an infinite loop.

  • answered 2020-06-02 10:39 yash471994

    # create lists to store covered alphabets and processed generators
    
    generator_covered=[]
    # create a dict to store generators and the alphabets that they generate
    generator_alphabets_dict={'generator_1':['p','q','r'], 'generator_2':['p', 'q','k','j','l','m','n','t'],
                              'generator_3':['x','w','b'], 'generator_4':['p','e','m','g']}
    alphabets_set = set(['p','q','r','k','j','l','m','n','t','x','w','b','e','g'])
    
    def get_next_best_generator():
        '''get the generator that generates the most unique alphabets that have not been covered'''
        most_alphabets_count=0
        best_generator_alphabets=[]
        best_generator=''
    
        #Loop through the dict to find the generator with most unique alphabets
        for generator ,alphabets in generator_alphabets_dict.items():
            unique_alphabets_list = []
            generator_alphabets = list(alphabets)
            for selected_alphabet in generator_alphabets :
    
                if selected_alphabet in alphabets_set:
                    unique_alphabets_list.append(selected_alphabet)
    
            # update all values if current generator's unique element list is greater than its predecessor's       
            if len(unique_alphabets_list) > most_alphabets_count:
                most_alphabets_count = len(unique_alphabets_list)
                best_generator_alphabets = unique_alphabets_list
                best_generator = generator
    
        return  most_alphabets_count, best_generator_alphabets , best_generator
    
    # Loop until all alphabets have been covered
    while alphabets_set:
    
    
        most_alphabets_count, best_generator_unique_alphabets, best_generator = get_next_best_generator()
        print(best_generator)
        print(most_alphabets_count)
        print(best_generator_unique_alphabets)
        generator_alphabets_dict.pop(best_generator)
        generator_covered.append(best_generator)
        alphabets_set -= set(best_generator_unique_alphabets)
    

    I used a set of alphabets which was subtracted by the unique elements found in the next best generator and looped while there were elements in the alphabet set.