
    >ie                        d dl mZ d dlZd dlZd dlmZmZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZmZ 	 	 	 	 	 	 ddZd ZdS )    )annotationsN)OpenFileget_fs_token_paths)infer_compression
read_block)tokenize)delayed)
is_integerparse_bytesF128 MiB10 kiBc           	        t          | t          t          t          t          j        f          st          d          t          | d|          \  }}	t          |	          dk    rt          d| z            |Qt          |t                    rt          |          }t          |          st          d          t          |          }|*dggt          |	          z  }
dggt          |	          z  }nvg }
g }|	D ]ndk    rt                    }n}|t          d	                                        d
         }|t          d          |dk    r+|
                    g            |                    g            ||z  r||k    r	|||z  z  }n|}d}dg}g }||z
  |dz  dz
  k    rZ||z  }|                    t          |                     |                    |d         |d         z
             ||z
  |dz  dz
  k    Z|                    ||d         z
             |rd|d<   |dxx         dz  cc<   |
                    |           |                    |           pt#          t$                    g }t'          |	|
|          D ]q\  }}t)          |                              |          fd|D             }fdt'          |||          D             }|                    |           r|r|du rd}t          |t                    rt          |          }t-          |	d                   5 }|                    |          }n]|                    |          }	 |                    |          }|sn-|v r#||                    d          d         z   z   }n||z   }E|}ddd           n# 1 swxY w Y   |r|||	fS ||fS )aU	  Given a path or paths, return delayed objects that read from those paths.

    The path may be a filename like ``'2015-01-01.csv'`` or a globstring
    like ``'2015-*-*.csv'``.

    The path may be preceded by a protocol, like ``s3://`` or ``hdfs://`` if
    those libraries are installed.

    This cleanly breaks data by a delimiter if given, so that block boundaries
    start directly after a delimiter and end on the delimiter.

    Parameters
    ----------
    urlpath : string or list
        Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
        to read from alternative filesystems. To read from multiple files you
        can pass a globstring or a list of paths, with the caveat that they
        must all have the same protocol.
    delimiter : bytes
        An optional delimiter, like ``b'\n'`` on which to split blocks of
        bytes.
    not_zero : bool
        Force seek of start-of-file delimiter, discarding header.
    blocksize : int, str
        Chunk size in bytes, defaults to "128 MiB"
    compression : string or None
        String like 'gzip' or 'xz'.  Must support efficient random access.
    sample : int, string, or boolean
        Whether or not to return a header sample.
        Values can be ``False`` for "no sample requested"
        Or an integer or string value like ``2**20`` or ``"1 MiB"``
    include_path : bool
        Whether or not to include the path with the bytes representing a particular file.
        Default is False.
    **kwargs : dict
        Extra options that make sense to a particular storage connection, e.g.
        host, port, username, password, etc.

    Examples
    --------
    >>> sample, blocks = read_bytes('2015-*-*.csv', delimiter=b'\n')  # doctest: +SKIP
    >>> sample, blocks = read_bytes('s3://bucket/2015-*-*.csv', delimiter=b'\n')  # doctest: +SKIP
    >>> sample, paths, blocks = read_bytes('2015-*-*.csv', include_path=True)  # doctest: +SKIP

    Returns
    -------
    sample : bytes
        The sample header
    blocks : list of lists of ``dask.Delayed``
        Each list corresponds to a file, and each delayed object computes to a
        block of bytes from that file.
    paths : list of strings, only included if include_path is True
        List of same length as blocks, where each item is the path to the file
        represented in the corresponding block.

    z3Path should be a string, os.PathLike, list or tuplerb)modestorage_optionsr   z%s resolved to no filesNzblocksize must be an integerinferzHCannot do chunked reads on compressed files. To read, set blocksize=NonesizezfBacking filesystem couldn't determine file size, cannot do chunked reads. To read, set blocksize=None.      c                     g | ]
}d | d S )zread-block-- ).0otokens     /lib/python3.11/site-packages/dask/bytes/core.py
<listcomp>zread_bytes.<locals>.<listcomp>   s*    :::a)a))%)):::    c           	     X    g | ]&\  }}} t                     |||          'S )compression)dask_key_name)r   )	r   r   keylr#   delayed_read	delimiterfspaths	       r   r   zread_bytes.<locals>.<listcomp>   s`     	
 	
 	
 3 LT{;;;!  	
 	
 	
r    Tr   r"   )
isinstancestrlisttupleosPathLike	TypeErrorr   lenOSErrorr   r
   intr   
ValueErrorinfoappendr	   read_block_from_filezipr   ukeyr   readsplit)urlpathr(   not_zero	blocksizesampler#   include_pathkwargsfs_tokenpathsoffsetslengthscompr   
blocksize1placeofflengthoutoffsetkeysvaluesfsample_buffnewr'   r)   r*   r   s    `   `                   @@@@r   
read_bytesrS      s   D gT5"+>?? OMNNN,W4QWXXXB%
5zzQ/'9:::i%% 	/#I..I)$$ 	<:;;;	NN	3%#e**$6(SZZ' *	' *	'Dg%%(.." 2   774==(D| E  
 r"""r"""" )# +y(8(8!%):!;JJ!*Jc Ulj1n%999Z'EJJs5zz***MM#b'CG"3444 Ulj1n%999 dSWn--- #CF1IIINIIIs###v&&&&/00L
C #E7G < <  ff9dBGGDMM;PVWW::::6:::	
 	
 	
 	
 	
 	
 	
 	
 !v66	
 	
 	
 	

6 %T>>Ffc"" 	) ((Fb%(<<< 	% ffVnn	4&&..C  C'''#))Iq*A*A!*DDyP $ "-"3K	4 %!	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	%"  "sE!!3;s   A6OOOc                    t          j         |           5 }|dk    r"| |                                cd d d            S t          ||||          cd d d            S # 1 swxY w Y   d S )Nr   )copyr;   r   )	lazy_filerJ   bsr(   rP   s        r   r8   r8      s    	9		 1!88
66881 1 1 1 1 1 1 1 !S"i001 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1s   AAA #A )NFr   r   NF)
__future__r   rU   r/   fsspec.corer   r   fsspec.utilsr   r   	dask.baser   dask.delayedr	   
dask.utilsr
   r   rS   r8   r   r    r   <module>r^      s    " " " " " "  				 4 4 4 4 4 4 4 4 6 6 6 6 6 6 6 6                   . . . . . . . .
 m m m m`1 1 1 1 1r    