
    v5`x                     X   d dl Z d dlZd dlmZ d dlmZ d dlmZmZ ddl	m
Z 	 d dlZn# e$ r dZY nw xY wg dZ	 eefZn# e$ r efZY nw xY w G d d	e          Z G d
 de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z e            Z e            Z e            Z e            Z  e            Z! e            Z" e            Z# e            Z$dS )    N)Counter)Fraction)groupbypermutations   )Base)ArithNCDLZMANCDBZ2NCDRLENCD	BWTRLENCDZLIBNCDSqrtNCD
EntropyNCDbz2_ncdlzma_ncd	arith_ncdrle_ncd
bwtrle_ncdzlib_ncdsqrt_ncdentropy_ncdc                   0    e Zd ZdZdZddZd Zd Zd ZdS )	_NCDBasezNormalized compression distance (NCD)

    https://articles.orsinium.dev/other/ncd/
    https://en.wikipedia.org/wiki/Normalized_compression_distance#Normalized_compression_distance
    r   c                     || _         d S Nqvalselfr   s     Ilib/python3.11/site-packages/textdistance/algorithms/compression_based.py__init__z_NCDBase.__init__)       			    c                     dS )Nr    )r    	sequencess     r!   maximumz_NCDBase.maximum,   s    qr$   c                 F    t          |                     |                    S r   )len	_compressr    datas     r!   	_get_sizez_NCDBase._get_size/   s    4>>$''(((r$   c                      |sdS   j         | }t          d          } t          |d                               }t          |          D ]g}t	          |t
          t          f          r|                    |          }nt          ||          }t          | 
                    |                    }h fd|D             }t          |          }|dk    rdS |t          |          t          |          dz
  z  z
  |z  S )Nr   Infc                 :    g | ]}                     |          S r&   )r.   ).0sr    s     r!   
<listcomp>z%_NCDBase.__call__.<locals>.<listcomp>@   s%    @@@4>>!,,@@@r$   r   )_get_sequencesfloattyper   
isinstancestrbytesjoinsumminr.   maxr*   )r    r'   
concat_lenemptyr-   compressed_lensmax_lens   `      r!   __call__z_NCDBase.__call__2   s    	1'D'3	5\\
"Yq\""$$ ++ 	? 	?D%#u.. (zz$''4''Z)=)=>>JJ@@@@i@@@o&&a<<1S11S^^a5GHHGSSr$   Nr   )	__name__
__module____qualname____doc__r   r"   r(   r.   rC   r&   r$   r!   r   r   !   sk         
 D     ) ) )T T T T Tr$   r   c                   $     e Zd Zd Z fdZ xZS )_BinaryNCDBasec                     d S r   r&   )r    s    r!   r"   z_BinaryNCDBase.__init__I   s    r$   c                     |sdS t          |d         t                    rd |D             } t                      j        | S )Nr   c                 8    g | ]}|                     d           S )zutf-8)encode)r2   r3   s     r!   r4   z+_BinaryNCDBase.__call__.<locals>.<listcomp>P   s$    >>>q'**>>>r$   )r8   string_typessuperrC   )r    r'   	__class__s     r!   rC   z_BinaryNCDBase.__call__L   sM     	1ilL11 	?>>I>>>Iuww++r$   )rE   rF   rG   r"   rC   __classcell__rQ   s   @r!   rJ   rJ   G   sG          , , , , , , , , ,r$   rJ   c                   2    e Zd ZdZd
dZd Zd Zd Zd	 ZdS )r	   zArithmetic coding

    https://github.com/gw-c/arith
    http://www.drdobbs.com/cpp/data-compression-with-arithmetic-encodin/240169251
    https://en.wikipedia.org/wiki/Arithmetic_coding
       Nr   c                 0    || _         || _        || _        d S r   )base
terminatorr   )r    rW   rX   r   s       r!   r"   zArithNCD.__init__\   s    	$			r$   c                 T    | j         | } | j        | }| j        
d|| j        <   t          |                                          }i }d}t          |                                d d          }|D ]-\  }}t          ||          t          ||          f||<   ||z  }.||k    sJ |S )zD
        https://github.com/gw-c/arith/blob/master/arith.py
        Nr   r   c                 "    | d         | d         fS )Nr   r   r&   )xs    r!   <lambda>z&ArithNCD._make_probs.<locals>.<lambda>m   s    qtQqTl r$   T)keyreverse)_get_counters_sum_countersrX   r<   valuessorteditemsr   )r    r'   countstotal_letters
prob_pairscumulative_countcharcurrent_counts           r!   _make_probszArithNCD._make_probsa   s     'D&	2	##Y/?&&'F4?#FMMOO,,
,B,BDQQQ#) 	. 	.D-)=9966 Jt -=0000r$   c                     | j         .| j         |v r|                    | j         d          }|| j         z  }t          dd          }t          dd          }|D ]}||         \  }}|||z  z  }||z  }|||z   fS )N r   r   )rX   replacer   )r    r-   probsstartwidthrh   
prob_start
prob_widths           r!   
_get_rangezArithNCD._get_rangew   s    ?&$&&||DOR88DO#DAA 	  	 D%*4["J
Z%''EZEEeem##r$   c                    |                      |          }|                     ||          \  }}t          dd          }d}||cxk    r|k     s<n d|j        |z  |j        z  z   }t          ||          }|dz  }||cxk    r|k     7n :|S )N)r-   rn   r   r   rU   )rj   rs   r   	numeratordenominator)r    r-   rn   ro   endoutput_fractionoutput_denominatoroutput_numerators           r!   r+   zArithNCD._compress   s      &&__$e_<<
s"1a..O1111c1111 U_7I%IeN_$_`&'79KLLO!# O1111c11111 r$   c                     |                      |          j        }|dk    rdS t          j        t          j        || j                            S )Nr   )r+   ru   mathceillogrW   )r    r-   ru   s      r!   r.   zArithNCD._get_size   sB    NN4((2	>>1y)TY77888r$   )rU   Nr   )	rE   rF   rG   rH   r"   rj   rs   r+   r.   r&   r$   r!   r	   r	   T   sn            
  ,$ $ $	 	 	9 9 9 9 9r$   r	   c                       e Zd ZdZd ZdS )r   zORun-length encoding

    https://en.wikipedia.org/wiki/Run-length_encoding
    c                 R   g }t          |          D ]\  }}t          t          |                    }|dk    r&|                    t	          |          |z              M|dk    r|                    |           i|                    d|z             d                    |          S )NrU   r   rl   )r   r*   listappendr9   r;   )r    r-   new_datakgns         r!   r+   zRLENCD._compress   s    DMM 	' 	'DAqDGGA1uuA
++++a""""A&&&&wwx   r$   NrE   rF   rG   rH   r+   r&   r$   r!   r   r      s-         

! 
! 
! 
! 
!r$   r   c                   *     e Zd ZdZddZ fdZ xZS )r   z
    https://en.wikipedia.org/wiki/Burrows%E2%80%93Wheeler_transform
    https://en.wikipedia.org/wiki/Run-length_encoding
     c                     || _         d S r   )rX   )r    rX   s     r!   r"   zBWTRLENCD.__init__   s    $r$   c                 *   s| j         ng| j         vr^| j         z  t          fdt          t                              D                       }d                    d |D                       t                                                    S )Nc              3   B   K   | ]}|d          d |         z   V  d S r   r&   )r2   ir-   s     r!   	<genexpr>z&BWTRLENCD._compress.<locals>.<genexpr>   s8      LLad122hbqb1LLLLLLr$   rl   c                     g | ]
}|d          S )r&   )r2   subdatas     r!   r4   z'BWTRLENCD._compress.<locals>.<listcomp>   s    @@@GGBK@@@r$   )rX   rb   ranger*   r;   rP   r+   )r    r-   modifiedrQ   s    ` r!   r+   zBWTRLENCD._compress   s     	B?DD_D((DO#DLLLL5T;K;KLLLLLH77@@x@@@AADww  &&&r$   )r   )rE   rF   rG   rH   r"   r+   rR   rS   s   @r!   r   r      sV         % % % %' ' ' ' ' ' ' ' 'r$   r   c                   &    e Zd ZdZddZd Zd ZdS )r   zSquare Root based NCD

    Size of compressed data equals to sum of square roots of counts of every
    element in the input sequence.
    r   c                     || _         d S r   r   r   s     r!   r"   zSqrtNCD.__init__   r#   r$   c                 X    d t          |                                          D             S )Nc                 >    i | ]\  }}|t          j        |          S r&   )r|   sqrt)r2   elementcounts      r!   
<dictcomp>z%SqrtNCD._compress.<locals>.<dictcomp>   s(    VVVngu5))VVVr$   )r   rc   r,   s     r!   r+   zSqrtNCD._compress   s(    VV@S@S@U@UVVVVr$   c                 j    t          |                     |                                                    S r   )r<   r+   ra   r,   s     r!   r.   zSqrtNCD._get_size   s(    4>>$''..00111r$   NrD   rE   rF   rG   rH   r"   r+   r.   r&   r$   r!   r   r      sS         
   W W W2 2 2 2 2r$   r   c                   &    e Zd ZdZddZd Zd ZdS )	r   zEntropy based NCD

    Get Entropy of input secueance as a size of compressed data.

    https://en.wikipedia.org/wiki/Entropy_(information_theory)
    https://en.wikipedia.org/wiki/Entropy_encoding
    r   rU   c                 0    || _         || _        || _        d S r   )r   coefrW   )r    r   r   rW   s       r!   r"   zEntropyNCD.__init__   s    					r$   c                     t          |          }d}t          |                                          D ]'}||z  }||t          j        || j                  z  z  }(|dk    sJ |S )Ng        r   )r*   r   ra   r|   r~   rW   )r    r-   total_countentropyelement_countps         r!   r+   zEntropyNCD._compress   sn    $ii$T]]1133 	2 	2M+Aq48Aty1111GG!||||r$   c                 <    | j         |                     |          z   S r   )r   r+   r,   s     r!   r.   zEntropyNCD._get_size   s    y4>>$////r$   N)r   r   rU   r   r&   r$   r!   r   r      sP            
  0 0 0 0 0r$   r   c                       e Zd ZdZd ZdS )r   z-
    https://en.wikipedia.org/wiki/Bzip2
    c                 <    t          j        |d          dd          S )N	bz2_codec   codecsrN   r,   s     r!   r+   zBZ2NCD._compress   s    }T;//44r$   Nr   r&   r$   r!   r   r      -         5 5 5 5 5r$   r   c                       e Zd ZdZd ZdS )r
   z,
    https://en.wikipedia.org/wiki/LZMA
    c                 f    t           st          d          t          j        |          dd          S )Nz$Please, install the PylibLZMA module   )lzmaImportErrorcompressr,   s     r!   r+   zLZMANCD._compress   s3     	FDEEE}T""233''r$   Nr   r&   r$   r!   r
   r
      s-         ( ( ( ( (r$   r
   c                       e Zd ZdZd ZdS )r   z,
    https://en.wikipedia.org/wiki/Zlib
    c                 <    t          j        |d          dd          S )N
zlib_codecrU   r   r,   s     r!   r+   zZLIBNCD._compress  s    }T<0044r$   Nr   r&   r$   r!   r   r     r   r$   r   )%r   r|   collectionsr   	fractionsr   	itertoolsr   r   rW   r   _Baser   r   __all__r9   unicoderO   	NameErrorr   rJ   r	   r   r   r   r   r   r
   r   r   r   r   r   r   r   r   r   r&   r$   r!   <module>r      s                 + + + + + + + +       KKKK   DDD  >LL   7LLL#T #T #T #T #Tu #T #T #TL
, 
, 
, 
, 
,X 
, 
, 
,@9 @9 @9 @9 @9x @9 @9 @9F! ! ! ! !X ! ! !&' ' ' ' ' ' ' '*2 2 2 2 2h 2 2 2 0 0 0 0 0 0 0 0D5 5 5 5 5^ 5 5 5( ( ( ( (n ( ( (5 5 5 5 5n 5 5 5 HJJ	Y[[

&((799
&((799799jlls   ) 33A   A
A