Implementazioni di algoritmi/Test Chi Quadrato
Con Test Chi Quadrato si intende uno dei test di verifica d'ipotesi usati in statistica che utilizzano la variabile casuale Chi Quadrato per verificare se l'ipotesi nulla è probabilisticamente compatibile con i dati. A seconda delle ipotesi di partenza usate per costruire il test, tali test vengono considerati a volte parametrici e altre volte non parametrici.
/**
* Calculates the chi-square value for N positive integers less than r
* Source: "Algorithms in C" - Robert Sedgewick - pp. 517
* NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
* since it could be wrong in about one out of ten times."
* @param randomNums a uniformly-randomly-generated array of integers
* @param r upper bound for the random range
* @return whether it is likely to be uniformly randomly generated
*/
public static boolean isRandom(int[] randomNums, int r)
{
//According to Sedgewick: "This is valid if N is greater than about 10r"
if (randomNums.length <= 10 * r)
return false;
//PART A: Get frequency of randoms
Map<Integer,Integer> ht = getFrequencies(randomNums);
//PART B: Calculate chi-square - this approach is in Sedgewick
double n_r = (double)randomNums.length / r;
double chiSquare = 0;
for (int v : ht.values())
{
double f = v - n_r;
chiSquare += f * f;
}
chiSquare /= n_r;
//PART C: According to Swdgewick: "The statistic should be within 2(r)^1/2 of r
//This is valid if N is greater than about 10r"
return Math.abs(chiSquare - r) <= 2 * Math.sqrt(r);
}
/**
* @param nums an array of integers
* @return a Map, key being the number and value its frequency
*/
private static Map<Integer,Integer> getFrequencies(int[] nums)
{
Map<Integer,Integer> freqs = new HashMap<Integer,Integer>();
for (int x : nums)
{
if (freqs.containsKey(x))
freqs.put(x, freqs.get(x) + 1);
else
freqs.put(x, 1);
}
return freqs;
}
import math # for sqrt
def isRandom(randomNums, r):
""" Calculates the chi-square value for N positive integers less than r
Source: "Algorithms in C" - Robert Sedgewick - pp. 517
NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
since it could be wrong in about one out of ten times." """
#Calculate the number of samples - N
N = len(randomNums)
#According to Sedgewick: "This is valid if N is greater than about 10r"
if N <= 10 * r:
return False
N_r = N / r
chi_square = 0
#PART A: Get frequency of randoms
HT = getFrequencies(randomNums)
#PART B: Calculate chi-square - this approach is in Sedgewick
for v in HT.itervalues():
f = v - N_r
chi_square += f * f
chi_square /= N_r
#PART C: According to Swdgewick: "The statistic should be within 2(r)^1/2 of r
#This is valid if N is greater than about 10r"
return abs(chi_square - r) <= 2 * math.sqrt(r)
def getFrequencies(nums):
""" Gets the frequency of occurance of a randomly generated array of integers
Output: A dictionary, key being the random number and value its frequency """
freqs = {}
for x in nums:
if x in freqs:
freqs[x] += 1
else:
freqs[x] = 1
return freqs