Test Driven:
def test_weightedMedian_n(self):
for i in range(0, 10):
array = []
limit = 100
while True:
scope = limit
if scope > 40:
scope = scope // 4
x = 0
while True:
try:
x = randint(0, scope)
array.index(x)
except ValueError:
limit -= x
array.append(x)
break
else:
continue
if limit <= 5:
break
while True:
try:
j = array.index(limit)
except ValueError:
array.append(limit)
break
else:
limit *= 2
del(array[j])
weighters = []
for i in range(0, len(array)):
weighters.append(Weighter(i, array[i]))
shuffle(weighters)
contrast = weightedMedian_nlgn(weighters[:])
median = weightedMedian_n(weighters)
self.assertEqual(median, contrast)
Solutions:
class Weighter(object):
def __init__(self, x, w):
super(Weighter, self).__init__()
self.x = x
self.w = w
def __repr__(self):
return '(' + str(self.x) + ', ' + str(self.w) + ')'
def __eq__(self, other):
return self.x == other.x
def __ne__(self, other):
return self.x != other.x
def __lt__(self, other):
return self.x < other.x
def __le__(self, other):
return self.x <= other.x
def __gt__(self, other):
return self.x > other.x
def __ge__(self, other):
return self.x >= other.x
def weightedMedian_nlgn(array:[]) -> Weighter:
heapSort(array)
sum = 0
for weighter in array:
if sum < 50 and sum+weighter.w >= 50:
return weighter
sum += weighter.w
def weightedMedian_n(array:[]) -> Weighter:
if len(array) == 1:
return array[0]
if len(array) == 2:
if array[0].w > array[1].w:
return array[0]
else:
return array[1]
medianIndex = len(array) // 2
pivot = select(array, 0, len(array)-1, medianIndex)
sum = 0
for i in range(0, medianIndex):
sum += array[i].w
if sum < 50 and sum+pivot.w >= 50:
return pivot
elif sum >= 50:
pivot.w = 100 - sum
return weightedMedian_n(array[:medianIndex+1])
else:
pivot.w += sum
return weightedMedian_n(array[medianIndex:])
The first Θ(nlgn) algorithm is designed in accordance with worst-time Θ(nlgn) heap sort or merge sort. With the sorted array, we can traverse and find the weighted median.
The second Θ(n) algorithm is considered in terms of the Θ(n) order statistic selection and divided into 2 equivalent part for summation of the weights in either side. So the costs will be
T(n) <= T(n/2) + Θ(n) <= 2n = Θ(n)
After all, I use a total volume of 100 instead of 1 for conveniences and the weights in the array would be integers at all.
Note that the brute-force in case of len(array) == 1 or 2 is fundamental since if there's an array [0.82, 0.18], then you will endlessly recurse in the loop for letting the second element be the median, leading to a consequence that the left part of the median will never be a summation less than 1/2.