
    DUf#                        d dl mZ 	 d dlmZ n# e$ r	 d dlmZ Y nw xY wd dlmZm	Z	m
Z
mZmZ 	 d dlmZ n# e$ r	 d dlmZ Y nw xY wd dlZd dlZd dlZd dlmZ ddgZ ed	          d
z  Ze G d d                      Zdej        fdZ	 	 ddede
ee	eed         f                  de
ee	eed         f                  defdZdS )    )	dataclass)files)DictListOptionalTupleUnion)LiteralN)make_viewframeassemblies_availableassembly_infozbioframe.iodatac                      e Zd ZU dZeed<   eed<   eed<   eed<   ej        ed<   dZej        ed<   dZ	eed	<   dZ
eeef         ed
<   d Zedej        fd            Zedee         fd            Zedej        fd            ZdefdZdS )GenomeAssemblyzR
    A dataclass containing information about sequences in a genome assembly.
    organismproviderprovider_buildrelease_yearseqinfoN	cytobandsurl
alias_dictc                     i | _         | j        d         j                            d          }| j        d         }t	          ||          D ]\  }}|D ]}|| j         |<   d S )Naliases,name)r   r   strsplitzip)selfalias_listsnamesr   r   aliass         Q/var/www/html/software/conda/lib/python3.11/site-packages/bioframe/io/assembly.py__post_init__zGenomeAssembly.__post_init__)   s|    l9-177<<V$ e44 	. 	.MGT  . .)-&&.	. 	.    returnc                 B    | j                             d          d         S )Nr   length)r   	set_indexr    s    r$   
chromsizeszGenomeAssembly.chromsizes1   s    |%%f--h77r&   c                 @    | j         d                                         S )Nr   )r   tolistr+   s    r$   
chromnameszGenomeAssembly.chromnames5   s    |F#**,,,r&   c                 N    t          | j                                                  S )N)r   r,   to_dictr+   s    r$   	viewframezGenomeAssembly.viewframe9   s    do5577888r&   c           	      H    d| j          d| j         d| j         d| j         d	S )NzGenomeAssembly(organism='z', provider='z', provider_build='z', release_year='z', ...))r   r   r   r   r+   s    r$   __repr__zGenomeAssembly.__repr__=   sM    8 8 8DM 8 8#28 8!.8 8 8	
r&   )__name__
__module____qualname____doc__r   __annotations__pd	DataFramer   r   r   r   r%   propertySeriesr,   r   r/   r2   r4    r&   r$   r   r      s8          MMMMMM\"Ir|"""COOO!%JS#X%%%. . . 8BI 8 8 8 X8 -DI - - - X- 92< 9 9 9 X9
# 
 
 
 
 
 
r&   r   r'   c                      t          t          dz            5 } t          j        |           }ddd           n# 1 swxY w Y   t          j                            |          S )a@  
    Get a list of available genome assembly metadata in local storage.

    Returns
    -------
    pandas.DataFrame
        A dataframe with metadata fields for available assemblies, including
        'provider', 'provider_build', 'default_roles', 'default_units',
        and names of seqinfo and cytoband files.
    z_assemblies.ymlN)openASSEMBLY_METADATA_ROOTyaml	safe_loadr:   r;   from_records)f
assembliess     r$   r   r   E   s     
$'88	9	9 'Q^A&&
' ' ' ' ' ' ' ' ' ' ' ' ' ' '<$$Z000s   9= =r   rolesallunitsc           
         t                      }d}d| v r-|                     dd          \  }} |                                }|d|  d}n	d| d|  d}|                    |          }t	          |          dk    rt          d	|            t	          |          dk    rt          d
|           |j        d                             t          j	        gdg          
                                }|d         }|d         }	|d         }
t          j        t          |
z            }t          j        t	          |          t                    }|||d                             |          z  }nht#          |t$          t&          f          r||d                             |          z  }n-t#          |t(                    r|dk    rt          d|           |||d                             |	          z  }nht#          |t$          t&          f          r||d                             |          z  }n-t#          |t(                    r|dk    rt          d|           |j        |         }d}|d         }|t          j        t          |z            }t-          |d         |d         |d         |d         |||d                   S )a  
    Get information about a genome assembly.

    Parameters
    ----------
    name : str
        Name of the assembly. If the name contains a dot, it is interpreted as
        a provider name and a build, e.g. "hg38". Otherwise, the provider
        is inferred if the build name is unique.
    roles : list or tuple or "all", optional
        Sequence roles to include in the assembly info. If not specified, only
        sequences with the default sequence roles for the assembly are shown.
        e.g. "assembled", "unlocalized", "unplaced"
    units : list or tuple or "all", optional
        Assembly units to include in the assembly info. If not specified, only
        sequences from the default units for the assembly are shown.
        e.g. "primary", "non-nuclear", "decoy"

    Returns
    -------
    GenomeAssembly
        A dataclass containing information about the assembly.

    Raises
    ------
    ValueError
        If the assembly name is not found or is not unique.

    Examples
    --------
    >>> hg38 = assembly_info("hg38")
    >>> hg38.chromsizes
    name
    chr1    248956422
    chr2    242193529
    chr3    198295559
    ...     ...

    >>> assembly_info("hg38", roles=("assembled", "non-nuclear"))

    >>> assembly_info("ucsc.hg38", units=("unplaced",))

    N.   zprovider_build == ''zprovider == 'z' and provider_build == 'r   zAssembly not found: zAssembly identifer not unique: default_rolesdefault_unitsr   )dtyperolerH   z$roles must be a tuple or 'all', not unitz$units must be a tuple or 'all', not r   r   r   r   r   r   )r   r   r   r   r   r   r   )r   r   lowerquerylen
ValueErrorilocreplacenpnanr1   r:   
read_tablerA   onesboolisin
isinstancetuplelistr   locr   )r   rG   rI   rF   r   qresultassemblyrN   rO   seqinfo_pathr   maskr   cytobands_paths                  r$   r   r   U   s   ` &''JH
d{{C++$>>##)$)))FHFFtFFFa  F
6{{a666777	VqC6CCDDD{1~%%rvh77??AAH_-M_-MI&Lm2\ABBG73w<<t,,,D}$$]333	EE4=	)	) I$$U+++	E3		 IEUNNGGGHHH}$$]333	EE4=	)	) I$$U+++	E3		 IEUNNGGGHHHk$GIk*N!M"8>"IJJ	*%*% 01n-UO   r&   )NN)dataclassesr   importlib.resourcesr   resource_pathImportErrorimportlib_resourcestypingr   r   r   r   r	   r
   typing_extensionsnumpyrY   pandasr:   rB   bioframer   __all__rA   r   r;   r   r   r   r>   r&   r$   <module>rt      s   ! ! ! ! ! !;::::::: ; ; ;::::::::; 6 5 5 5 5 5 5 5 5 5 5 5 5 5* * * *))))))))*          # # # # # #!?
3&}55>  '
 '
 '
 '
 '
 '
 '
 '
T1bl 1 1 1 1$ ;?:>c c
cE$wu~567c E$wu~567c 	c c c c c cs    6 AA