Fandom Developers Wiki
Register
Advertisement
Documentation icon Module documentation

The documentation for this module is missing. Click here to create it.

-- <nowiki>
--[[The MIT License (MIT)

Copyright (c) 2015 Robin Gertenbach

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
https://github.com/RobinGertenbach/Lua-Stats
TODO: Type checking
TODO: Sample Size
TODO: Chi-Square test
TODO: Add more quantile algorithms 
      use: http://127.0.0.1:11774/library/stats/html/quantile.html
]]

--[[ Generics ]]--

local function assertTables(...)
  for _, t in pairs({...}) do
    assert(type(t) == "table", "Argument must be a table")
  end
end


-- Simple map function
local function map(t, f, ...)
  assert(t ~= nil, "No table provided to map")
  assert(f ~= nil, "No function provided to map")
  local output = {}

  for i, e in pairs(t) do
    output[i] = f(e, ...)
  end
  return output
end 


-- Simple reduce function
local function reduce(t, f)
  assert(t ~= nil, "No table provided to reduce")
  assert(f ~= nil, "No function provided to reduce")
  local result

  for i, value in ipairs(t) do
    if i == 1 then
      result = value
    else
      result = f(result, value)
    end 
  end
  return result
end 


-- Returns a table consisting of all values looked up in the tables
local function multiSubscript(i, ...)
  assertTables(...)
  assert(#{...} > 0, "Must provide at least one table")
  return map({...}, function(t, index) return t[index] end, i)
end 


-- checks if a value is in a table as a value or key
local function in_table(value, t, key)
  assert(type(t) == 'table', "The second argument must be a table")
  key = key or false
  for i, e in pairs(t) do
    if value == (key and i or e) then
      return true
    end
  end
  return false
end


-- Concatenates tables and scalars into one list
local function unify(...)
  local output = {}
  for i, element in ipairs({...}) do
    if type(element) == 'number' then
      table.insert(output, element)
    elseif type(element) == 'table' then
      for j, row in ipairs(element) do
        table.insert(output, row)
      end
    end 
  end 
  return output
end 


-- Integrates a function from start to stop in delta sized steps
local function integral(f, start, stop, delta, ...)
  local delta = delta or 1e-5
  local area = 0
  for i = start, stop, delta do
    area = area + f(i, ...) * delta
  end
  return area
end


-- Integrates a function from a given start value until
-- it fills the absolute area a
local function integralUntil(f, a, start, delta, ...)
  assert(f ~= nil, "No function provided")
  assert(a > 0, "Target Area must be > 0")
  start = start or 0
  delta = delta or 0.001
  local currentA = 0

  while currentA < a do
    currentA = currentA + math.abs(f(start, ...) * delta)
    start = start + delta
  end
  return start - delta / 2
end


-- Calculates the factorial of a number n recursively
local function factorial(x)
  assert(x >= 0, "x has to be a positive integer or 0")
  if (x == 0) then
    return 1
  elseif (x == 1) then
    return x
  elseif (x % 1 == 0) then
    return x * (factorial(x - 1))
  else 
    return gamma(x - 1)
  end
end

-- finds the x neded for a givnen function f
-- Need to find a way to pass min and max bounds for estimator
local function findX(y, f, accuracy, ...)
  assert(y ~= nil, "No y value provided")
  assert(f ~= nil, "No function provided")
  accuracy = accuracy or 0.001

  local minX, maxX, yVal, xVal = -100, 100, 0, 0

  while (maxX - minX > accuracy) do 
    yVal = f(xVal, ...)
    if (yVal > y) then
      maxX = xVal
    else
      minX = xVal
    end 
    xVal = (maxX + minX) / 2
  end 
  return xVal
end 


-- I have no idea what the hell is going on here. Taken from:
-- http://rosettacode.org/wiki/Gamma_function#Lua
local function gamma(x)
  local gamma =  0.577215664901
  local coeff = -0.65587807152056
  local quad  = -0.042002635033944
  local qui   =  0.16653861138228
  local set   = -0.042197734555571

  local function recigamma(z)
    return z + gamma * z^2 + coeff * z^3 + quad * z^4 + qui * z^5 + set * z^6
  end
   
  local function gammafunc(z)
    if z == 1 then return 1
    elseif math.abs(z) <= 0.5 then return 1 / recigamma(z)
    else return (z - 1) * gammafunc(z - 1)
    end
  end

  return gammafunc(x)
end 


-- Beta function 
local function beta(x, y)
  assert(x > 0, "x must be positive")
  assert(y > 0, "y must be positive")
  return (gamma(x) * gamma(y)) / gamma(x + y)
end


-- p-value of a quantile q of a probability function f
local function pValue(q, f, ...)
  assert(q ~= nil, "pValue needs a q-value")
  assert(f ~= nil, "pValue needs a function")
  return math.abs(1 - math.abs(f(q, ...) - f(-q, ...)))
end


--[[ Basic Arithmetic functions needed for aggregate functions ]]--
local function sum(...) 
  return reduce(unify(...), function(a, b) return a + b end)
end 


local function count(...) 
  return #unify(...) 
end 


local function mean(...) 
  return sum(...) / count(...) 
end 


local function sumSquares(...)
  local data = unify(...)
  local mu = mean(data)

  return sum(map(data, function(x) return (x - mu)^2 end))  
end 


local function var(...)
  return sumSquares(...) / (count(...) - 1)
end


local function varPop(...)
  return sumSquares(...) / count(...)
end 


local function sd(...)
  return math.sqrt(var(...))
end 


local function sdPop(...)
  return math.sqrt(varPop(...))
end


local function min(...)
  local data = unify(...)
  
  table.sort(data)
  return data[1]
end 


local function max(...)
  local data = unify(...)
  table.sort(data)
  return data[#data]
end 


-- Covariance
local function cov(t1, t2)
  assertTables(t1, t2)
  assert(#t1 == #t2, "The tables have to be of equal length")

  mu1, mu2 = mean(t1), mean(t2)
  dev1 = map(t1, function(x) return x-mu1 end)
  dev2 = map(t2, function(x) return x-mu2 end)
  
  ss = 0
  for i, v in ipairs(t1) do
    ss = ss + v * t2[i]
  end 

  return ss / (#t1 - 1)
end 


--Correlation
local function cor(t1, t2, method)
  assertTables(t1, t2)
  assert(#t1 == #t2, "The tables have to be of equal length")
  method = method or "Pearson"

  if method == "Pearson" then
    return cov(t1, t2) / (sd(t1) * sd(t2))
  else 
    return nil -- Implement spearman and kendall
  end 
end

local function unique(t)
  assertTables(t)
  local uniqueValues = {}
  for _, value in ipairs(t) do
    if in_table(value, uniqueValues) == false then
      table.insert(uniqueValues, value)
    end 
  end
  return uniqueValues
end


local function frequency(t)
  assertTables(t)
  local counts = {}
  for _, value in ipairs(t) do
    if in_table(value, counts, true) then
      counts[value] = counts[value] + 1
    else 
      counts[value] = 1
    end
  end 
  return counts
end


-- Pooled standard deviation for two already calculated standard deviations
-- Seconds return is the new sample size adjusted for degrees of freedom
local function sdPooled(sd1, n1, sd2, n2)
  return math.sqrt(((n1 - 1) * sd1^2 + (n2 - 1) * sd2^2) / (n1 + n2 - 2))
end 


-- Calculates the quantile
-- Currently uses the weighted mean of the two values the position is inbetween
local function quantile(t, q)
  assert(t ~= nil, "No table provided to quantile")
  assert(q >= 0 and q <= 1, "Quantile must be between 0 and 1")
  table.sort(t)
  local position = #t * q + 0.5
  local mod = position % 1

  if position < 1 then 
    return t[1]
  elseif position > #t then
    return t[#t]
  elseif mod == 0 then
    return t[position]
  else
    return mod * t[math.ceil(position)] +
           (1 - mod) * t[math.floor(position)] 
  end 
end 


local function median(t)
  assert(t ~= nil, "No table provided to median")
  return quantile(t, 0.5)
end


local function quartile(t, i)
  local quartiles = {0, 0.25, 0.5, 0.75, 1}
  assert(in_table(i, {1,2,3,4,5}), "i must be an integer between 1 and 5")
  if i == 1 then 
    return min(t)
  elseif i== 5 then
    return max(t)
  else 
    return quantile(t, quartiles[i])
  end 
end 


--[[ Normal Distribution Functions ]]--

-- Probability Density function of a Normal Distribution
local function dNorm(x, mu, sd)
  assert(type(x) == "number", "x must be a number")
  local mu = mu or 0
  local sd = sd or 1

  return (1 / 
         (sd * math.sqrt(2 * math.pi))) * 
         math.exp(-(((x - mu) * (x - mu)) / (2 * sd^2)))
end


-- CDF of a normal distribution
local function pNorm(q, mu, sd, accuracy)
  assert(type(q) == "number", "q must be a number")
  mu = mu or 0
  sd = sd or 1
  accuracy = accuracy or 1e-3

  return 0.5 + 
    (q > 0 and 1 or -1) * integral(dNorm, 0, math.abs(q), accuracy, mu, sd)
end

-- Quantile function of the Normal distribution
-- Calculates the Z-Score based on the cumulative probabiltiy
local function qNorm(p, accuracy)
  accuracy = accuracy or 0.01
  return  integralUntil(dNorm, p, -20, 0.0001)
end 


--[[ T-Distribution Functions ]]--

-- Probability Density function of a T Distribution
local function dT(x, df)
  return gamma((df + 1) / 2) / 
         (math.sqrt(df * math.pi) * gamma(df / 2)) * 
         (1 + x^2 / df)^(-(df + 1) / 2)
end 


-- CDF of the T-Distribution
local function pT(q, df, accuracy)
  assert(df > 0, "More at least 1 degree of freedom needed")
  accuracy = accuracy or 1e-4

  return 0.5 + (q > 0 and 1 or -1) * integral(dT, 0, math.abs(q), accuracy, df)
end 

-- Finds T-Ratio for a given p-value.
local function qT(p, df, accuracy)
  accuracy = accuracy or 0.01
  return integralUntil(dT, p, -20, 0.0001, df)
end 


--[[ Chi-Square Distribution Functions ]]--

-- Probability density of the chi square distribution.
local function dChisq(x, df)
  return 1 / (2^(df / 2) * gamma(df / 2)) * x^(df / 2 - 1) * math.exp(-x / 2)
end


-- CDF of the Chi square distribution.
local function pChisq(q, df)
  return integral(dChisq, 0, q, 1e-4, df)
end 


-- Quantile function of the Chi-Square Distribution.
local function qChisq(p, df, accuracy)
    accuracy = accuracy or 0.001
    return integralUntil(dChisq, p, 0, 0.001, df)
end 


--[[ F Distribution Functions ]]--

-- Probability Density of the F distribution
local function dF(x, df1, df2)
  return math.sqrt(((df1 * x)^df1 * df2^df2) / 
                   ((df1 * x + df2)^(df1 + df2))) / 
                  (x * beta(df1 / 2, df2 / 2))
end


-- CDF of the F distribution
local function pF(x, df1, df2)
  return integral(dF, 0.0001, x, 1e-4, df1, df2)
end


-- Quantile function of the F Distribution
local function qF(p, df1, df2, accuracy)
  if p == 0 then
    return 0
  elseif p == 1 then
    return math.huge
  else
    assert(p > 0 and p < 1, "p must be between 0 and 1")
  end
  accuracy = accuracy or 0.001
  return integralUntil(dF, p, 0.00001, accuracy, df1, df2)
end


--[[ Tests ]]--

-- Calculates the Z-Score for one or two samples. 
-- Assumes non equivalent Variance.
local function zValue(y1, sd1, n1, y2, sd2, n2, sameVar)
  assert(sd1 > 0, "Standard Deviation has to be positive")
  assert(n1  > 1, "Sample Size has to be at least 2")
  
  y2 = y2 or 0
  sameVar = sameVar or false
  local z
  local d = y1 - y2

  if n2 == nil then
    z = d / (sd1 / math.sqrt(n1))
  elseif sameVar then
    z = d / math.sqrt(sd1^2 / n1 + sd2^2 / n2)
  else
    z = d / (sdPooled(sd1, n1, sd2, n2) * math.sqrt(1 / n1 + 1 / n2))
  end

  return z
end


-- Performs a z-test on two tables and returns z-statistic.
local function zTest(t1, t2, sameVar)
  assertTables(t1, t2)
  return zValue(mean(t1), sd(t1), #t1, mean(t2), sd(t2), #t2, sameVar)
end 


-- Calculates the p-value of a two sample zTest.
local function zTestP(t1, t2, sameVar)
  assertTables(t1, t2)
  return pValue(zTest(t1, t2, sameVar), pNorm)
end


-- Calculates the t-value of one or two means, assuming non equivalent variance.
local function tValue(y1, sd1, n1, y2, sd2, n2, sameVar)
  return zValue(v1, sd1, n1, v2, sd2, n2, sameVar)
end


-- Performs a t-test on two tables and returns t-statistic.
local function tTest(t1, t2, sameVar)
  return zTest(t1, t2, sameVar)
end 


-- Calculates the p-value of a two sample tTest.
local function tTestP(t1, t2, sameVar)
  assertTables(t1, t2)
  return pValue(zTest(t1, t2, sameVar), pT, count(t1, t2) - 2)
end


-- Returns the f-value of two variances
local function fValue(s1, s2)
  return s1 / s2
end


-- Performs an f-test on two tables
local function fTest(t1, t2)
  assertTables(t1, t2)

  return var(t1) / var(t2)
end


-- Returns the p-value of an f-test on two tables
local function fTestP(t1, t2)
  assertTables(t1, t2)
  return pValue(fTest(t1, t2), pF, #t1, #t2)
end
Advertisement