Source code for spectrum.criteria

"""Criteria for parametric methods.

.. topic:: This module provides criteria to automatically select order in
    parametric PSD estimate or pseudo spectrum estimates (e.g, music).

    Some criteria such as the AIC criterion helps to chose the order of PSD
    models such as the ARMA model. Nevertheless, it is difficult to estimate
    correctly the order of an ARMA model even by using these criteria. The
    reason being that even the Akaike criteria (AIC) does not provide the
    proper order with a probability of 1 with infinite samples.

    The order choice is related to an expertise of the signal. There is no
    exact criteria. However, they may provide useful information.

    AIC, AICc, KIC and AKICc are based on information theory.  They  attempt
    to balance the complexity (or length) of the model against how well the
    model fits the data.  AIC and KIC are biased estimates of the asymmetric
    and the symmetric Kullback-Leibler divergence respectively.  AICc and
    AKICc attempt to correct the bias.

    There are also criteria related to eigen analysis, which takes as input
    the eigen values of any PSD estimate method.

.. rubric:: Example

.. plot::
    :width: 80%
    :include-source:

    from spectrum import aryule, AIC, marple_data
    from pylab import plot, arange
    order = arange(1, 25)
    rho = [aryule(marple_data, i, norm='biased')[1] for i in order]
    plot(order, AIC(len(marple_data), rho, order), label='AIC')



:References: bd-Krim Seghouane and Maiza Bekara
   "A small sample model selection criterion based on Kullback's symmetric
   divergence", IEEE Transactions on Signal Processing,
   Vol. 52(12), pp 3314-3323, Dec. 2004

"""


[docs]class Criteria(object): """Criteria class for an automatic selection of ARMA order. Available criteria are ======= ===================== ======= ===================== AIC see :func:`AIC` AICc see :func:`AICc` KIC see :func:`KIC` AKICc see :func:`AKICc` FPE see :func:`FPE` MDL see :func:`MDL` CAT see :func:`_CAT` ======= ===================== """ valid_criteria_names = ['AIC', 'AICc', 'KIC', 'FPE', 'AKICc', 'MDL'] error_incorrect_name = 'Invalid name provided. Correct names are %s ' \ % valid_criteria_names error_no_criteria_found = 'No names match the valid criteria names (%s)' \ % valid_criteria_names def __init__(self, name, N): """Create a criteria object :param name: a string or list of strings containing valid criteria method's name :param int N: size of the data sample. """ #valid attributes self.__name = None self.name = name self.__N = N self.__rho = 0 self.__k = None self.__old_data = None self.__data = None self.__norm = True def _getName(self): return self.__name def _setName(self, name): assert isinstance(name, str), 'name must be a string' if name in self.valid_criteria_names: self.__name = name else: raise ValueError(self.error_no_criteria_found) name = property(fget=_getName, fset=_setName, doc="Getter/Setter for the criteria name") def _getData(self): return self.__data def _setData(self, data): # save the data value in old_data is there is something to save if self.data is None: self.__data = data self.__old_data = 2.*data else: self.__old_data = self.data self.__data = data data = property(fget=_getData, fset=_setData, doc="Getter/Setter for the criteria output") def _getOldData(self): return self.__old_data old_data = property(fget=_getOldData, doc="Getter/Setter for the previous value") def _getK(self): return self.__k k = property(fget=_getK, doc="Getter for k the order of evaluation") def _getN(self): return self.__N def _setN(self, N): assert N > 0, 'N must be positive' self.__N = N N = property(fget=_getN, fset=_setN, doc="Getter/Setter for N") def _getRho(self): return self.__rho def _setRho(self, rho): self.__rho = rho rho = property(fget=_getRho, fset=_setRho, doc="Getter/Setter for rho") def __call__(self, rho=None, k=None, N=None, norm=True): """Call the criteria function corresponding to :attr:`name`.""" self.__norm = norm if N is not None: self.N = N # we update rho only if it is needed (input different from self.rho) # if such case, we also update k if rho is not None: self.rho = rho if k is not None: self.__k = k self.__norm = norm #used to check if the criteria is reached or not f = eval(self.name) self.data = f(self.N, self.rho, self.k) # compare the new data with the previous one and return # False if the new value is larger so as to stop the iteration if self.old_data is not None and self.data is not None: if self.data > self.old_data: return False else: return True return True
[docs]def AIC(N, rho, k): r"""Akaike Information Criterion :param rho: rho at order k :param N: sample size :param k: AR order. If k is the AR order and N the size of the sample, then Akaike criterion is .. math:: AIC(k) = \log(\rho_k) + 2\frac{k+1}{N} :: AIC(64, [0.5,0.3,0.2], [1,2,3]) :validation: double checked versus octave. """ from numpy import log, array #k+1 #todo check convention. agrees with octave res = N * log(array(rho)) + 2.* (array(k)+1) return res
[docs]def AICc(N, rho, k, norm=True): r"""corrected Akaike information criterion .. math:: AICc(k) = log(\rho_k) + 2 \frac{k+1}{N-k-2} :validation: double checked versus octave. """ from numpy import log, array p = k #todo check convention. agrees with octave res = log(rho) + 2. * (p+1) / (N-p-2) return res
[docs]def KIC(N, rho, k): r"""Kullback information criterion .. math:: KIC(k) = log(\rho_k) + 3 \frac{k+1}{N} :validation: double checked versus octave. """ from numpy import log, array res = log(rho) + 3. * (k+1.) /float(N) return res
[docs]def AKICc(N, rho, k): r"""approximate corrected Kullback information .. math:: AKICc(k) = log(rho_k) + \frac{p}{N*(N-k)} + (3-\frac{k+2}{N})*\frac{k+1}{N-k-2} """ from numpy import log, array p = k res = log(rho) + p/N/(N-p) + (3.-(p+2.)/N) * (p+1.) / (N-p-2.) return res
[docs]def FPE(N,rho, k=None): r"""Final prediction error criterion .. math:: FPE(k) = \frac{N + k + 1}{N - k - 1} \rho_k :validation: double checked versus octave. """ #k #todo check convention. agrees with octave fpe = rho * (N + k + 1.) / (N- k -1) return fpe
[docs]def MDL(N, rho, k): r"""Minimum Description Length .. math:: MDL(k) = N log \rho_k + p \log N :validation: results """ from numpy import log #p = arange(1, len(rho)+1) mdl = N* log(rho) + k * log(N) return mdl
[docs]def CAT(N, rho, k): r"""Criterion Autoregressive Transfer Function : .. math:: CAT(k) = \frac{1}{N} \sum_{i=1}^k \frac{1}{\rho_i} - \frac{\rho_i}{\rho_k} .. todo:: validation """ from numpy import zeros, arange cat = zeros(len(rho)) for p in arange(1, len(rho)+1): rho_p = float(N)/(N-p)*rho[p-1] s = 0 for j in range(1, p+1): rho_j = float(N)/(N-j)*rho[j-1] s = s + 1./rho_j #print(s, s/float(N), 1./rho_p) cat[p-1] = s/float(N) - 1./rho_p return cat
[docs]def aic_eigen(s, N): r"""AIC order-selection using eigen values :param s: a list of `p` sorted eigen values :param N: the size of the input data. To be defined precisely. :return: * an array containing the AIC values Given :math:`n` sorted eigen values :math:`\lambda_i` with :math:`0 <= i < n`, the proposed criterion from Wax and Kailath (1985) is: .. math:: AIC(k) = -2(n-k)N \ln \frac{g(k)}{a(k)} + 2k(2n-k) where the arithmetic sum :math:`a(k)` is: .. math:: a(k) = \sum_{i=k+1}^{n}\lambda_i and the geometric sum :math:`g(k)` is: .. math:: g(k) = \prod_{i=k+1}^{n} \lambda_i^{-(n-k)} The number of relevant sinusoids in the signal subspace is determined by selecting the minimum of `AIC`. .. seealso:: :func:`~spectrum.eigenfreq.eigen` .. todo:: define precisely the input parameter N. Should be the input data length but when using correlation matrix (SVD), I suspect it should be the length of the correlation matrix rather than the original data. :References: * [Marple]_ Chap 13, * [Wax]_ """ import numpy as np kaic = [] n = len(s) for k in range(0, n-1): ak = 1./(n-k) * np.sum(s[k+1:]) gk = np.prod(s[k+1:]**(1./(n-k))) kaic.append( -2.*(n-k)*N * np.log(gk/ak) + 2.*k*(2.*n-k)) return kaic
[docs]def mdl_eigen(s, N): r"""MDL order-selection using eigen values :param s: a list of `p` sorted eigen values :param N: the size of the input data. To be defined precisely. :return: * an array containing the AIC values .. math:: MDL(k) = (n-k)N \ln \frac{g(k)}{a(k)} + 0.5k(2n-k) log(N) .. seealso:: :func:`aic_eigen` for details :References: * [Marple]_ Chap 13, * [Wax]_ """ import numpy as np kmdl = [] n = len(s) for k in range(0, n-1): ak = 1./(n-k) * np.sum(s[k+1:]) gk = np.prod(s[k+1:]**(1./(n-k))) kmdl.append( -(n-k)*N * np.log(gk/ak) + 0.5*k*(2.*n-k)*np.log(N)) return kmdl