The median of a set is the number at the central position of the ordered version
ID: 3819048 • Letter: T
Question
The median of a set is the number at the central position of the ordered version of the set (in simple words). If the set has even size (therefore not having a unique central number), its median is the average of the 2 central numbers. For example, the median of {5, 2, 7, 4, 6} is 5, and the median of {5, 2, 7, 4, 6, 8} is 5.5 (because 5+6/2=5.5). (i) Design an efficient algorithm (pseudocode or a succinct verbal description) that keeps track of the median (basically calculates it at every change of the set) in a very large (and constantly changing) set of numbers. (ii) What is the worst-case running time and space for your solution? Explain.
*Tip: An efficient solution makes use of a more suitable data structure.
Explanation / Answer
(i) Efficient algorithm
We have various data structures that can help us with this problem namely arrays, binary seearch tree, linked lists, min heap & max heap, etc.
we can use a max heap on left side to represent elements that are less than effective median, and a min heap on right side to represent elements that are greater than effective median.
After processing an incoming element, the number of elements in heaps differ utmost by 1 element. When both heaps contain same number of elements, we pick average of heaps root data as effective median. When the heaps are not balanced, we select effective median from the root of heap containing more elements.
Algorithm
using namespace std;
#define Max_heap_size //heap capacity
#define array_size //heap capacity
inline
void Exch(int &a, int &b)
{
int aux = a;
a = b;
b = aux;
}
// Greater and Smaller are used as comparators
bool Greater(int a, int b)
{
return a > b;
}
bool Smaller(int a, int b)
{
return a < b;
}
int Average(int a, int b)
{
return (a + b) / 2;
}
// Signum function
// = 0 if a == b - heaps are balanced
// = -1 if a < b - left contains less elements than right
// = 1 if a > b - left contains more elements than right
int Signum(int a, int b)
{
if( a == b )
return 0;
return a < b ? -1 : 1;
}
// Heap implementation
// The functionality is embedded into
// Heap abstract class to avoid code duplication
class Heap
{
public:
// Initializes heap array and comparator required
// in heapification
Heap(int *b, bool (*c)(int, int)) : A(b), comp(c)
{
heapSize = -1;
}
// Frees up dynamic memory
virtual ~Heap()
{
if( A )
{
delete[] A;
}
}
// We need only these four interfaces of Heap ADT
virtual bool Insert(int e) = 0;
virtual int GetTop() = 0;
virtual int ExtractTop() = 0;
virtual int GetCount() = 0;
protected:
// We are also using location 0 of array
int left(int i)
{
return 2 * i + 1;
}
int right(int i)
{
return 2 * (i + 1);
}
int parent(int i)
{
if( i <= 0 )
{
return -1;
}
return (i - 1)/2;
}
// Heap array
int *A;
// Comparator
bool (*comp)(int, int);
// Heap size
int heapSize;
// Returns top element of heap data structure
int top(void)
{
int max = -1;
if( heapSize >= 0 )
{
max = A[0];
}
return max;
}
// Returns number of elements in heap
int count()
{
return heapSize + 1;
}
// Heapification
// Note that, for the current median tracing problem
// we need to heapify only towards root, always
void heapify(int i)
{
int p = parent(i);
// comp - differentiate MaxHeap and MinHeap
// percolates up
if( p >= 0 && comp(A[i], A[p]) )
{
Exch(A[i], A[p]);
heapify(p);
}
}
// Deletes root of heap
int deleteTop()
{
int del = -1;
if( heapSize > -1)
{
del = A[0];
Exch(A[0], A[heapSize]);
heapSize--;
heapify(parent(heapSize+1));
}
return del;
}
// Helper to insert key into Heap
bool insertHelper(int key)
{
bool ret = false;
if( heapSize < MAX_HEAP_SIZE )
{
ret = true;
heapSize++;
A[heapSize] = key;
heapify(heapSize);
}
return ret;
}
};
// Specilization of Heap to define MaxHeap
class MaxHeap : public Heap
{
private:
public:
MaxHeap() : Heap(new int[MAX_HEAP_SIZE], &Greater) { }
~MaxHeap() { }
// Wrapper to return root of Max Heap
int GetTop()
{
return top();
}
// Wrapper to delete and return root of Max Heap
int ExtractTop()
{
return deleteTop();
}
// Wrapper to return # elements of Max Heap
int GetCount()
{
return count();
}
// Wrapper to insert into Max Heap
bool Insert(int key)
{
return insertHelper(key);
}
};
// Specilization of Heap to define MinHeap
class MinHeap : public Heap
{
private:
public:
MinHeap() : Heap(new int[MAX_HEAP_SIZE], &Smaller) { }
~MinHeap() { }
// Wrapper to return root of Min Heap
int GetTop()
{
return top();
}
// Wrapper to delete and return root of Min Heap
int ExtractTop()
{
return deleteTop();
}
// Wrapper to return # elements of Min Heap
int GetCount()
{
return count();
}
// Wrapper to insert into Min Heap
bool Insert(int key)
{
return insertHelper(key);
}
};
// Function implementing algorithm to find median so far.
int getMedian(int e, int &m, Heap &l, Heap &r)
{
// Are heaps balanced? If yes, sig will be 0
int sig = Signum(l.GetCount(), r.GetCount());
switch(sig)
{
case 1: // There are more elements in left (max) heap
if( e < m ) // current element fits in left (max) heap
{
// Remore top element from left heap and
// insert into right heap
r.Insert(l.ExtractTop());
// current element fits in left (max) heap
l.Insert(e);
}
else
{
// current element fits in right (min) heap
r.Insert(e);
}
// Both heaps are balanced
m = Average(l.GetTop(), r.GetTop());
break;
case 0: // The left and right heaps contain same number of elements
if( e < m ) // current element fits in left (max) heap
{
l.Insert(e);
m = l.GetTop();
}
else
{
// current element fits in right (min) heap
r.Insert(e);
m = r.GetTop();
}
break;
case -1: // There are more elements in right (min) heap
if( e < m ) // current element fits in left (max) heap
{
l.Insert(e);
}
else
{
// Remove top element from right heap and
// insert into left heap
l.Insert(r.ExtractTop());
// current element fits in right (min) heap
r.Insert(e);
}
// Both heaps are balanced
m = Average(l.GetTop(), r.GetTop());
break;
}
// No need to return, m already updated
return m;
}
void printMedian(int A[], int size)
{
int m = 0; // effective median
Heap *left = new MaxHeap();
Heap *right = new MinHeap();
for(int i = 0; i < size; i++)
{
m = getMedian(A[i], m, *left, *right);
cout << m << endl;
}
// C++ more flexible, ensure no leaks
delete left;
delete right;
}
(ii) Time complexity
1) max heap and min heap
Complexity of median finding is O(N log N), due to heap insertions/deletions.
2) Arrays
The array should be sorted, because we are going to get its median. Even though it only costs O(lgn) time to find the position to be inserted with binary search algorithm, it costs O(n) time to insert a number into a sorted array, because O(n) numbers will be moved if there are n numbers in the array. It is very efficient to get the median, since it only takes O(1) time to access to a number in an array with an index.
3) linked list
It takes O(n) time to find the appropriate position to insert a new number. Additionally, the time to get the median can be optimized to O(1) if we define two pointers which points to the central one or two elements.
4)Binary search tree
costs O(lgn) on average to insert a new node. However, the time complexity is O(n) for the worst cases, when numbers are inserted in sorted (increasingly or decreasingly) order. To get the median number from a binary search tree, auxiliary data to record the number of nodes of its sub-tree is necessary for each node. It also requires O(lgn) time to get the median node on overage, but O(n) time for the worst cases.
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.