# Consistancy of pairs in python

I have two lists of the same length, and I wish to find out what percentage of all possible pairs of list indices' values have the same relationship for the two lists, in my case greater than (or sign(list[index_1] - list[index_2])).

Here is my slow implementation:

``````from itertools import combinations
import random
import numpy as np

values_lists = []
for i in range(4):
value_list = []
for j in range(50):
value_list.append(random.random())
values_lists.append(value_list)
#

total = 0.
n = 0

for list_1, list_2 in combinations(values_lists, 2):
for index_1, index_2 in combinations(range(len(list_1)), 2):
if np.sign(list_1[index_2] - list_1[index_1]) == np.sign(list_2[index_2] - list_2[index_1]):
total += 1

n += 1

print(total / n)
``````

I'm wondering if anyone has a faster solution, as this takes some time.

Precomputing the signs for each list (and not using NumPy) makes it about 14 times faster (`solution2`), and doing the same with mostly NumPy makes it about 48 times faster (`solution3`).

``````11.52 ms  solution1
0.82 ms  solution2
0.25 ms  solution3

11.35 ms  solution1
0.81 ms  solution2
0.24 ms  solution3

11.42 ms  solution1
0.83 ms  solution2
0.26 ms  solution3
``````

Code (Try it online!):

``````def solution1(values_lists):
total = 0.
n = 0

for list_1, list_2 in combinations(values_lists, 2):
for index_1, index_2 in combinations(range(len(list_1)), 2):
if np.sign(list_1[index_2] - list_1[index_1]) == np.sign(list_2[index_2] - list_2[index_1]):
total += 1
n += 1

def solution2(values_lists):
signs_lists = [
[-1 if a < b else 1 if b < a else 0
for a, b in combinations(lst, 2)]
for lst in values_lists
]
total = 0.
n = 0
for signs_1, signs_2 in combinations(signs_lists, 2):
n += len(signs_1)
for sign_1, sign_2 in zip(signs_1, signs_2):
if sign_1 == sign_2:
total += 1

def solution3(values_lists):
triu_indices = np.triu_indices(len(values_lists), 1)
signs_arrays = [
prod_signs[triu_indices]
for lst in values_lists
for a in [np.array(lst)]
for prod_signs in [np.sign(np.subtract.outer(a, a))]
]
total = 0
n = 0
for signs1, signs2 in combinations(signs_arrays, 2):
n += signs1.size
total += (signs1 == signs2).sum()

funcs = solution1, solution2, solution3

from timeit import repeat
from itertools import combinations
from operator import eq
import random
import numpy as np

values_lists = []
for i in range(4):
value_list = []
for j in range(50):
value_list.append(random.random())
values_lists.append(value_list)

for func in funcs:
print(func(values_lists))

for _ in range(3):
print()
for func in funcs:
t = min(repeat(lambda: func(values_lists), number=10)) / 10
print('%5.2f ms ' % (t * 1e3), func.__name__)
``````

I think the best help I can offer is with formatting and organization. You should break your functions into pieces that operate independently.

I also simplified your nested list builder to use a list comprehension.

This doesn't address your code speed concerns, but I couldn't find anything slow when I run the script in question...

``````from itertools import combinations
import random
import numpy as np
from loguru import logger

@logger.catch
def build_nested_lists(inner_list_size=50, outer_list_size=4):
values_lists = []
for ii in range(outer_list_size):
value_list = [random.random() for jj in range(inner_list_size)]
values_lists.append(value_list)
return values_lists

@logger.catch
def handle_nested_lists(
values_lists=None,
):
assert isinstance(values_lists, (list, tuple))
total = 0.0
n = 0

for list_1, list_2 in combinations(values_lists, 2):
for index_1, index_2 in combinations(range(len(list_1)), 2):
if np.sign(list_1[index_2] - list_1[index_1]) == np.sign(
list_2[index_2] - list_2[index_1]
):
total += 1
n += 1

if __name__=="__main__":
print(handle_nested_lists(build_nested_lists(50, 4)))
``````

Expanding on @Kelly Bundy 's answer, you can make `solution2` even faster by summing a generator expression rather than the nested for loop:

``````def generatorway():
signs_lists = (
[-1 if a < b else 1 if b < a else 0
for a, b in combinations(lst, 2)]
for lst in values_lists)
combos = list(combinations(signs_lists, 2))
n = sum(len(i) for i, _ in combos)
total = sum(1 for i, j in combos for k, m in zip(i, j) if k==m)
``````

Speed comparison with `values_lists = [[random.random() for _ in range(500)] for _ in range(4)]`:

``````print(generatorway())
print(solution2())
%timeit generatorway()
%timeit solution2()
``````

Output:

``````0.4931048764195057
0.4931048764195057
66.9 ms ± 58.5 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
68.6 ms ± 38.6 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
``````