Source code for yatel.weight.euclidean

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# "THE WISKEY-WARE LICENSE":
# <utn_kdd@googlegroups.com> wrote this file. As long as you retain this notice
# you can do whatever you want with this stuff. If we meet some day, and you
# think this stuff is worth it, you can buy us a WISKEY us return.


#==============================================================================
# DOCS
#==============================================================================

"""Euclidean distance implementation of Yatel.

- http://en.wikipedia.org/wiki/Euclidean_distance


"""

#==============================================================================
# IMPORTS
#==============================================================================

import numpy as np

from yatel.weight import core


#==============================================================================
# EUCLIDEAN
#==============================================================================

[docs]class Euclidean(core.BaseWeight): """Calculates "ordinary" distance/weight between two haplotypes given by the Pythagorean formula. Every attribute value is converted to a number by a ``to_num`` function. The default behavior of ``to_num`` is a sumatory of base64 ord value of every attribute value. Example: .. code-block:: python def to_num(attr): value = 0 for c in str(attr).encode("base64"): value += ord(c) return value to_num("h") # 294 For more info about euclidean distance: http://en.wikipedia.org/wiki/Euclidean_distance """ @classmethod
[docs] def names(cls): """Synonims names to call this weight calculation. """ return "euclidean", "euc", "ordinary"
def __init__(self, to_num=None): """Creates a new instance. Parameters ---------- to_num : callable Convert to a number an haplotype attribute value. The default behavior of ``to_num`` is a sumatory of base64 ord value of every attribute value. .. code-block:: python def to_num(attr): value = 0 for c in str(attr).encode("base64"): value += ord(c) return value to_num("h") # 294 """ self.to_num = to_num_default if to_num is None else to_num
[docs] def weight(self, hap0, hap1): """A ``float`` distance between 2 :py:class:`yatel.dom.Haplotype` instances""" s = 0.0 for name in set(hap0.keys() + hap1.keys()): v0 = self.to_num(hap0.get(name, "")) v1 = self.to_num(hap1.get(name, "")) s += (v1 - v0) ** 2 return np.sqrt(s) #============================================================================== # FUNCTION #==============================================================================
[docs]def to_num_default(attr): """The default behavior of ``to_num`` is a sumatory of base64 ord value of every attribute value. """ value = 0 for c in str(attr).encode("base64"): value += ord(c) return value #============================================================================== # MAIN #==============================================================================
if __name__ == "__main__": print(__doc__)