Consistancy of pairs in python

I have two lists of the same length, and I wish to find out what percentage of all possible pairs of list indices' values have the same relationship for the two lists, in my case greater than (or sign(list[index_1] - list[index_2])).

Here is my slow implementation:

from itertools import combinations
import random
import numpy as np

values_lists = []
for i in range(4):
    value_list = []
    for j in range(50):
        value_list.append(random.random())
    values_lists.append(value_list)
#

total = 0.
n = 0

for list_1, list_2 in combinations(values_lists, 2):
    for index_1, index_2 in combinations(range(len(list_1)), 2):
        if np.sign(list_1[index_2] - list_1[index_1]) == np.sign(list_2[index_2] - list_2[index_1]):
            total += 1
        
        n += 1

print(total / n)

I'm wondering if anyone has a faster solution, as this takes some time.

3 answers

  • answered 2022-05-04 11:02 Kelly Bundy

    Precomputing the signs for each list (and not using NumPy) makes it about 14 times faster (solution2), and doing the same with mostly NumPy makes it about 48 times faster (solution3).

    11.52 ms  solution1
     0.82 ms  solution2
     0.25 ms  solution3
    
    11.35 ms  solution1
     0.81 ms  solution2
     0.24 ms  solution3
    
    11.42 ms  solution1
     0.83 ms  solution2
     0.26 ms  solution3
    

    Code (Try it online!):

    def solution1(values_lists):
        total = 0.
        n = 0
    
        for list_1, list_2 in combinations(values_lists, 2):
            for index_1, index_2 in combinations(range(len(list_1)), 2):
                if np.sign(list_1[index_2] - list_1[index_1]) == np.sign(list_2[index_2] - list_2[index_1]):
                    total += 1
                n += 1
        return total / n
    
    def solution2(values_lists):
        signs_lists = [
            [-1 if a < b else 1 if b < a else 0
             for a, b in combinations(lst, 2)]
            for lst in values_lists
        ]
        total = 0.
        n = 0
        for signs_1, signs_2 in combinations(signs_lists, 2):
            n += len(signs_1)
            for sign_1, sign_2 in zip(signs_1, signs_2):
                if sign_1 == sign_2:
                    total += 1
        return total / n
    
    def solution3(values_lists):
        triu_indices = np.triu_indices(len(values_lists[0]), 1)
        signs_arrays = [
            prod_signs[triu_indices]
            for lst in values_lists
            for a in [np.array(lst)]
            for prod_signs in [np.sign(np.subtract.outer(a, a))]
        ]
        total = 0
        n = 0
        for signs1, signs2 in combinations(signs_arrays, 2):
            n += signs1.size
            total += (signs1 == signs2).sum()
        return total / n
    
    funcs = solution1, solution2, solution3
    
    from timeit import repeat
    from itertools import combinations
    from operator import eq
    import random
    import numpy as np
    
    values_lists = []
    for i in range(4):
        value_list = []
        for j in range(50):
            value_list.append(random.random())
        values_lists.append(value_list)
    
    for func in funcs:
        print(func(values_lists))
    
    for _ in range(3):
        print()
        for func in funcs:
            t = min(repeat(lambda: func(values_lists), number=10)) / 10
            print('%5.2f ms ' % (t * 1e3), func.__name__)
    

  • answered 2022-05-04 11:04 Mike Pennington

    I think the best help I can offer is with formatting and organization. You should break your functions into pieces that operate independently.

    I also simplified your nested list builder to use a list comprehension.

    This doesn't address your code speed concerns, but I couldn't find anything slow when I run the script in question...

    from itertools import combinations
    import random
    import numpy as np
    from loguru import logger
    
    @logger.catch
    def build_nested_lists(inner_list_size=50, outer_list_size=4):
        values_lists = []
        for ii in range(outer_list_size):
            value_list = [random.random() for jj in range(inner_list_size)]
            values_lists.append(value_list)
        return values_lists
    
    
    @logger.catch
    def handle_nested_lists(
        values_lists=None,
    ):
        assert isinstance(values_lists, (list, tuple))
        total = 0.0
        n = 0
    
        for list_1, list_2 in combinations(values_lists, 2):
            for index_1, index_2 in combinations(range(len(list_1)), 2):
                if np.sign(list_1[index_2] - list_1[index_1]) == np.sign(
                    list_2[index_2] - list_2[index_1]
                ):
                    total += 1
                n += 1
        return total / n
    
    if __name__=="__main__":
        print(handle_nested_lists(build_nested_lists(50, 4)))
    

  • answered 2022-05-04 12:00 Nin17

    Expanding on @Kelly Bundy 's answer, you can make solution2 even faster by summing a generator expression rather than the nested for loop:

    def generatorway():
        signs_lists = (
            [-1 if a < b else 1 if b < a else 0
             for a, b in combinations(lst, 2)]
            for lst in values_lists)
        combos = list(combinations(signs_lists, 2))
        n = sum(len(i) for i, _ in combos)
        total = sum(1 for i, j in combos for k, m in zip(i, j) if k==m)
        return total/n
    

    Speed comparison with values_lists = [[random.random() for _ in range(500)] for _ in range(4)]:

    print(generatorway())
    print(solution2())
    %timeit generatorway()
    %timeit solution2()
    

    Output:

    0.4931048764195057
    0.4931048764195057
    66.9 ms ± 58.5 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
    68.6 ms ± 38.6 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
    

How many English words
do you know?
Test your English vocabulary size, and measure
how many words do you know
Online Test
Powered by Examplum