Module documentation
[create]
The documentation for this module is missing. Click here to create it.
-- <nowiki>
--[[The MIT License (MIT)
Copyright (c) 2015 Robin Gertenbach
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
https://github.com/RobinGertenbach/Lua-Stats
TODO: Type checking
TODO: Sample Size
TODO: Chi-Square test
TODO: Add more quantile algorithms
use: http://127.0.0.1:11774/library/stats/html/quantile.html
]]
--[[ Generics ]]--
local function assertTables(...)
for _, t in pairs({...}) do
assert(type(t) == "table", "Argument must be a table")
end
end
-- Simple map function
local function map(t, f, ...)
assert(t ~= nil, "No table provided to map")
assert(f ~= nil, "No function provided to map")
local output = {}
for i, e in pairs(t) do
output[i] = f(e, ...)
end
return output
end
-- Simple reduce function
local function reduce(t, f)
assert(t ~= nil, "No table provided to reduce")
assert(f ~= nil, "No function provided to reduce")
local result
for i, value in ipairs(t) do
if i == 1 then
result = value
else
result = f(result, value)
end
end
return result
end
-- Returns a table consisting of all values looked up in the tables
local function multiSubscript(i, ...)
assertTables(...)
assert(#{...} > 0, "Must provide at least one table")
return map({...}, function(t, index) return t[index] end, i)
end
-- checks if a value is in a table as a value or key
local function in_table(value, t, key)
assert(type(t) == 'table', "The second argument must be a table")
key = key or false
for i, e in pairs(t) do
if value == (key and i or e) then
return true
end
end
return false
end
-- Concatenates tables and scalars into one list
local function unify(...)
local output = {}
for i, element in ipairs({...}) do
if type(element) == 'number' then
table.insert(output, element)
elseif type(element) == 'table' then
for j, row in ipairs(element) do
table.insert(output, row)
end
end
end
return output
end
-- Integrates a function from start to stop in delta sized steps
local function integral(f, start, stop, delta, ...)
local delta = delta or 1e-5
local area = 0
for i = start, stop, delta do
area = area + f(i, ...) * delta
end
return area
end
-- Integrates a function from a given start value until
-- it fills the absolute area a
local function integralUntil(f, a, start, delta, ...)
assert(f ~= nil, "No function provided")
assert(a > 0, "Target Area must be > 0")
start = start or 0
delta = delta or 0.001
local currentA = 0
while currentA < a do
currentA = currentA + math.abs(f(start, ...) * delta)
start = start + delta
end
return start - delta / 2
end
-- Calculates the factorial of a number n recursively
local function factorial(x)
assert(x >= 0, "x has to be a positive integer or 0")
if (x == 0) then
return 1
elseif (x == 1) then
return x
elseif (x % 1 == 0) then
return x * (factorial(x - 1))
else
return gamma(x - 1)
end
end
-- finds the x neded for a givnen function f
-- Need to find a way to pass min and max bounds for estimator
local function findX(y, f, accuracy, ...)
assert(y ~= nil, "No y value provided")
assert(f ~= nil, "No function provided")
accuracy = accuracy or 0.001
local minX, maxX, yVal, xVal = -100, 100, 0, 0
while (maxX - minX > accuracy) do
yVal = f(xVal, ...)
if (yVal > y) then
maxX = xVal
else
minX = xVal
end
xVal = (maxX + minX) / 2
end
return xVal
end
-- I have no idea what the hell is going on here. Taken from:
-- http://rosettacode.org/wiki/Gamma_function#Lua
local function gamma(x)
local gamma = 0.577215664901
local coeff = -0.65587807152056
local quad = -0.042002635033944
local qui = 0.16653861138228
local set = -0.042197734555571
local function recigamma(z)
return z + gamma * z^2 + coeff * z^3 + quad * z^4 + qui * z^5 + set * z^6
end
local function gammafunc(z)
if z == 1 then return 1
elseif math.abs(z) <= 0.5 then return 1 / recigamma(z)
else return (z - 1) * gammafunc(z - 1)
end
end
return gammafunc(x)
end
-- Beta function
local function beta(x, y)
assert(x > 0, "x must be positive")
assert(y > 0, "y must be positive")
return (gamma(x) * gamma(y)) / gamma(x + y)
end
-- p-value of a quantile q of a probability function f
local function pValue(q, f, ...)
assert(q ~= nil, "pValue needs a q-value")
assert(f ~= nil, "pValue needs a function")
return math.abs(1 - math.abs(f(q, ...) - f(-q, ...)))
end
--[[ Basic Arithmetic functions needed for aggregate functions ]]--
local function sum(...)
return reduce(unify(...), function(a, b) return a + b end)
end
local function count(...)
return #unify(...)
end
local function mean(...)
return sum(...) / count(...)
end
local function sumSquares(...)
local data = unify(...)
local mu = mean(data)
return sum(map(data, function(x) return (x - mu)^2 end))
end
local function var(...)
return sumSquares(...) / (count(...) - 1)
end
local function varPop(...)
return sumSquares(...) / count(...)
end
local function sd(...)
return math.sqrt(var(...))
end
local function sdPop(...)
return math.sqrt(varPop(...))
end
local function min(...)
local data = unify(...)
table.sort(data)
return data[1]
end
local function max(...)
local data = unify(...)
table.sort(data)
return data[#data]
end
-- Covariance
local function cov(t1, t2)
assertTables(t1, t2)
assert(#t1 == #t2, "The tables have to be of equal length")
mu1, mu2 = mean(t1), mean(t2)
dev1 = map(t1, function(x) return x-mu1 end)
dev2 = map(t2, function(x) return x-mu2 end)
ss = 0
for i, v in ipairs(t1) do
ss = ss + v * t2[i]
end
return ss / (#t1 - 1)
end
--Correlation
local function cor(t1, t2, method)
assertTables(t1, t2)
assert(#t1 == #t2, "The tables have to be of equal length")
method = method or "Pearson"
if method == "Pearson" then
return cov(t1, t2) / (sd(t1) * sd(t2))
else
return nil -- Implement spearman and kendall
end
end
local function unique(t)
assertTables(t)
local uniqueValues = {}
for _, value in ipairs(t) do
if in_table(value, uniqueValues) == false then
table.insert(uniqueValues, value)
end
end
return uniqueValues
end
local function frequency(t)
assertTables(t)
local counts = {}
for _, value in ipairs(t) do
if in_table(value, counts, true) then
counts[value] = counts[value] + 1
else
counts[value] = 1
end
end
return counts
end
-- Pooled standard deviation for two already calculated standard deviations
-- Seconds return is the new sample size adjusted for degrees of freedom
local function sdPooled(sd1, n1, sd2, n2)
return math.sqrt(((n1 - 1) * sd1^2 + (n2 - 1) * sd2^2) / (n1 + n2 - 2))
end
-- Calculates the quantile
-- Currently uses the weighted mean of the two values the position is inbetween
local function quantile(t, q)
assert(t ~= nil, "No table provided to quantile")
assert(q >= 0 and q <= 1, "Quantile must be between 0 and 1")
table.sort(t)
local position = #t * q + 0.5
local mod = position % 1
if position < 1 then
return t[1]
elseif position > #t then
return t[#t]
elseif mod == 0 then
return t[position]
else
return mod * t[math.ceil(position)] +
(1 - mod) * t[math.floor(position)]
end
end
local function median(t)
assert(t ~= nil, "No table provided to median")
return quantile(t, 0.5)
end
local function quartile(t, i)
local quartiles = {0, 0.25, 0.5, 0.75, 1}
assert(in_table(i, {1,2,3,4,5}), "i must be an integer between 1 and 5")
if i == 1 then
return min(t)
elseif i== 5 then
return max(t)
else
return quantile(t, quartiles[i])
end
end
--[[ Normal Distribution Functions ]]--
-- Probability Density function of a Normal Distribution
local function dNorm(x, mu, sd)
assert(type(x) == "number", "x must be a number")
local mu = mu or 0
local sd = sd or 1
return (1 /
(sd * math.sqrt(2 * math.pi))) *
math.exp(-(((x - mu) * (x - mu)) / (2 * sd^2)))
end
-- CDF of a normal distribution
local function pNorm(q, mu, sd, accuracy)
assert(type(q) == "number", "q must be a number")
mu = mu or 0
sd = sd or 1
accuracy = accuracy or 1e-3
return 0.5 +
(q > 0 and 1 or -1) * integral(dNorm, 0, math.abs(q), accuracy, mu, sd)
end
-- Quantile function of the Normal distribution
-- Calculates the Z-Score based on the cumulative probabiltiy
local function qNorm(p, accuracy)
accuracy = accuracy or 0.01
return integralUntil(dNorm, p, -20, 0.0001)
end
--[[ T-Distribution Functions ]]--
-- Probability Density function of a T Distribution
local function dT(x, df)
return gamma((df + 1) / 2) /
(math.sqrt(df * math.pi) * gamma(df / 2)) *
(1 + x^2 / df)^(-(df + 1) / 2)
end
-- CDF of the T-Distribution
local function pT(q, df, accuracy)
assert(df > 0, "More at least 1 degree of freedom needed")
accuracy = accuracy or 1e-4
return 0.5 + (q > 0 and 1 or -1) * integral(dT, 0, math.abs(q), accuracy, df)
end
-- Finds T-Ratio for a given p-value.
local function qT(p, df, accuracy)
accuracy = accuracy or 0.01
return integralUntil(dT, p, -20, 0.0001, df)
end
--[[ Chi-Square Distribution Functions ]]--
-- Probability density of the chi square distribution.
local function dChisq(x, df)
return 1 / (2^(df / 2) * gamma(df / 2)) * x^(df / 2 - 1) * math.exp(-x / 2)
end
-- CDF of the Chi square distribution.
local function pChisq(q, df)
return integral(dChisq, 0, q, 1e-4, df)
end
-- Quantile function of the Chi-Square Distribution.
local function qChisq(p, df, accuracy)
accuracy = accuracy or 0.001
return integralUntil(dChisq, p, 0, 0.001, df)
end
--[[ F Distribution Functions ]]--
-- Probability Density of the F distribution
local function dF(x, df1, df2)
return math.sqrt(((df1 * x)^df1 * df2^df2) /
((df1 * x + df2)^(df1 + df2))) /
(x * beta(df1 / 2, df2 / 2))
end
-- CDF of the F distribution
local function pF(x, df1, df2)
return integral(dF, 0.0001, x, 1e-4, df1, df2)
end
-- Quantile function of the F Distribution
local function qF(p, df1, df2, accuracy)
if p == 0 then
return 0
elseif p == 1 then
return math.huge
else
assert(p > 0 and p < 1, "p must be between 0 and 1")
end
accuracy = accuracy or 0.001
return integralUntil(dF, p, 0.00001, accuracy, df1, df2)
end
--[[ Tests ]]--
-- Calculates the Z-Score for one or two samples.
-- Assumes non equivalent Variance.
local function zValue(y1, sd1, n1, y2, sd2, n2, sameVar)
assert(sd1 > 0, "Standard Deviation has to be positive")
assert(n1 > 1, "Sample Size has to be at least 2")
y2 = y2 or 0
sameVar = sameVar or false
local z
local d = y1 - y2
if n2 == nil then
z = d / (sd1 / math.sqrt(n1))
elseif sameVar then
z = d / math.sqrt(sd1^2 / n1 + sd2^2 / n2)
else
z = d / (sdPooled(sd1, n1, sd2, n2) * math.sqrt(1 / n1 + 1 / n2))
end
return z
end
-- Performs a z-test on two tables and returns z-statistic.
local function zTest(t1, t2, sameVar)
assertTables(t1, t2)
return zValue(mean(t1), sd(t1), #t1, mean(t2), sd(t2), #t2, sameVar)
end
-- Calculates the p-value of a two sample zTest.
local function zTestP(t1, t2, sameVar)
assertTables(t1, t2)
return pValue(zTest(t1, t2, sameVar), pNorm)
end
-- Calculates the t-value of one or two means, assuming non equivalent variance.
local function tValue(y1, sd1, n1, y2, sd2, n2, sameVar)
return zValue(v1, sd1, n1, v2, sd2, n2, sameVar)
end
-- Performs a t-test on two tables and returns t-statistic.
local function tTest(t1, t2, sameVar)
return zTest(t1, t2, sameVar)
end
-- Calculates the p-value of a two sample tTest.
local function tTestP(t1, t2, sameVar)
assertTables(t1, t2)
return pValue(zTest(t1, t2, sameVar), pT, count(t1, t2) - 2)
end
-- Returns the f-value of two variances
local function fValue(s1, s2)
return s1 / s2
end
-- Performs an f-test on two tables
local function fTest(t1, t2)
assertTables(t1, t2)
return var(t1) / var(t2)
end
-- Returns the p-value of an f-test on two tables
local function fTestP(t1, t2)
assertTables(t1, t2)
return pValue(fTest(t1, t2), pF, #t1, #t2)
end