
    >iem              	          d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d d	lmZmZ  ed
          Zddeddd
dddf	dZddZd Zd ZdS )    )annotationsN)partial)
open_files)concat)from_delayed)
read_bytes)delayed)parse_bytessystem_encodingT)pureinferstrictFc
           
       	 ||t          d          t          |t                    rt          |          }|ǉdv r}
dnd}
t	          | fd||
d|pi }|	fd|D             }n;g }t          dt          |          |          D ]r}||||z            } t          t                     t          t                    t          t          		          |                    }|                    |           snt          | f                                nd
|d|	d|pi }|d         fdt                    D             }	rUt          t          fdt!          |d                   D                                 }d t#          ||          D             }|st          d|           |rt%          |          }|S )a\	  Read lines from text files

    Parameters
    ----------
    urlpath : string or list
        Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
        to read from alternative filesystems. To read from multiple files you
        can pass a globstring or a list of paths, with the caveat that they
        must all have the same protocol.
    blocksize: None, int, or str
        Size (in bytes) to cut up larger files.  Streams by default.
        Can be ``None`` for streaming, an integer number of bytes, or a string
        like "128MiB"
    compression: string
        Compression format like 'gzip' or 'xz'.  Defaults to 'infer'
    encoding: string
    errors: string
    linedelimiter: string or None
    collection: bool, optional
        Return dask.bag if True, or list of delayed values if false
    storage_options: dict
        Extra options that make sense to a particular storage connection, e.g.
        host, port, username, password, etc.
    files_per_partition: None or int
        If set, group input files into partitions of the requested size,
        instead of one partition per file. Mutually exclusive with blocksize.
    include_path: bool
        Whether or not to include the path in the bag.
        If true, elements are tuples of (line, path).
        Default is False.

    Examples
    --------
    >>> b = read_text('myfiles.1.txt')  # doctest: +SKIP
    >>> b = read_text('myfiles.*.txt')  # doctest: +SKIP
    >>> b = read_text('myfiles.*.txt.gz')  # doctest: +SKIP
    >>> b = read_text('s3://bucket/myfiles.*.txt')  # doctest: +SKIP
    >>> b = read_text('s3://key:secret@bucket/myfiles.*.txt')  # doctest: +SKIP
    >>> b = read_text('hdfs://namenode.example.com/myfiles.*.txt')  # doctest: +SKIP

    Parallelize a large file by providing the number of uncompressed bytes to
    load into each partition.

    >>> b = read_text('largefile.txt', blocksize='10MB')  # doctest: +SKIP

    Get file paths of the bag by setting include_path=True

    >>> b = read_text('myfiles.*.txt', include_path=True) # doctest: +SKIP
    >>> b.take(1) # doctest: +SKIP
    (('first line of the first file', '/home/dask/myfiles.0.txt'),)

    Returns
    -------
    dask.bag.Bag or list
        dask.bag.Bag if collection is True or list of Delayed lists otherwise.

    See Also
    --------
    from_sequence: Build bag from Python sequence
    Nz7Only one of blocksize or files_per_partition can be setN 
z
r   rt)modeencodingerrorscompressionnewlinec                    g | ]J} t          t                     t          t          t                               |                    KS )	delimiter)r	   listr   file_to_blocks).0filinclude_pathlinedelimiters     -lib/python3.11/site-packages/dask/bag/text.py
<listcomp>zread_text.<locals>.<listcomp>n   sm         GVVV         r   r      
F)r   	blocksizesampler   r!      c                P    g | ]"} t          t                    |          #S  )r	   decode)r   br   r   r"   s     r#   r$   zread_text.<locals>.<listcomp>   s?     
 
 
 GFOOAx??
 
 
r%   c                H    g | ]\  }}|gt          |                   z  S r+   )len)r   ipath
raw_blockss      r#   r$   zread_text.<locals>.<listcomp>   s0    SSS4Z]!3!33SSSr%      c                P    g | ]#\  }} t          t                    ||          $S r+   )r	   attach_path)r   entryr1   s      r#   r$   zread_text.<locals>.<listcomp>   s>       6AeT$$$UD11  r%   zNo files found)
ValueError
isinstancestrr
   r   ranger/   r	   r   mapr   r   appendr   encoder   	enumeratezipr   )urlpathr'   r   r   r   r"   
collectionstorage_optionsfiles_per_partitionr!   r   filesblocksstartblock_filesblock_linesopathsr2   s      ```   `        @r#   	read_textrK      s   P !4!@RSSS)S!! +	**	:::#G MMG
#
 
 $"
 
 &     !  FF Fq#e**.ABB + +#EU5H-H$IJ-gfoo GCLLVVV#   k****+ 
0=0Im**,,,u#%
 
 $"
 
 qT

 
 
 
 
 
J''
 
 
  	SSSS9QqT??SSSTT E EHQVEWEW  F  4)7333 &f%%Mr%   c              #  N   K   5 }n|                                 }|sg cd d d            S |                              } fdfd|d d         D             |dd          z   D             E d {V  n|D ]} r	|j        fn|V  d d d            d S # 1 swxY w Y   d S )Nc              3  2   K   | ]}r	|j         fn|V  d S N)r1   )r   liner!   	lazy_files     r#   	<genexpr>z!file_to_blocks.<locals>.<genexpr>   sH         +7@y~&&D     r%   c                    g | ]}|z   S r+   r+   )r   rO   r   s     r#   r$   z"file_to_blocks.<locals>.<listcomp>   s    EEE$TI-EEEr%   )readsplitr1   )r!   rP   r   ftextpartsrO   s   ```    r#   r   r      s     	 Ga 6688D 	G G G G G G G G
 JJy))E    EEEE%*EEEbcc
R          
  G G0<FtY^,,$FFFFG G G G G G G G G G G G G G G G G Gs   BABB!Bc              #      K   | D ]}||fV  	d S rN   r+   )blockr1   ps      r#   r5   r5      s.        $i r%   c                (   |                      ||          }dv r%t          j        |          }t          |          S |sg S |                              }fd|d d         D             |                              s
|dd          ng z   }|S )Nr   )r   c                    g | ]}|z   S r+   r+   )r   tline_delimiters     r#   r$   zdecode.<locals>.<listcomp>   s    666aq>!666r%   rS   )r,   ioStringIOr   rU   endswith)rZ   r   r   r_   rW   linesrX   outs      `    r#   r,   r,      s    <<&))D777D.999E{{ 	I

>**66665":666"mmN;;CE"##JJ
 
r%   rN   )
__future__r   r`   	functoolsr   fsspec.corer   tlzr   dask.bag.corer   
dask.bytesr   dask.delayedr	   
dask.utilsr
   r   rK   r   r5   r,   r+   r%   r#   <module>rm      s,   " " " " " " 				       " " " " " "       & & & & & & ! ! ! ! ! !             3 3 3 3 3 3 3 3
't



 P P P PfG G G G"  
    r%   