
    &Vf+                         d Z ddlZddlZddlZddlmZ ddlmZ  ed           G d de                      Z	 ed          dd
            Z
 ed          	 	 	 	 	 	 dd            ZdS )z4Deprecated sequence preprocessing APIs from Keras 1.    N)keras_export)	PyDatasetz8keras._legacy.preprocessing.sequence.TimeseriesGeneratorc                   @    e Zd ZdZ	 	 	 	 	 	 	 ddZd Zd	 Zd
 Zd ZdS )TimeseriesGeneratoral  Utility class for generating batches of temporal data.

    DEPRECATED.

    This class takes in a sequence of data-points gathered at
    equal intervals, along with time series parameters such as
    stride, length of history, etc., to produce batches for
    training/validation.

    Arguments:
        data: Indexable generator (such as list or Numpy array)
            containing consecutive data points (timesteps).
            The data should be at 2D, and axis 0 is expected
            to be the time dimension.
        targets: Targets corresponding to timesteps in `data`.
            It should have same length as `data`.
        length: Length of the output sequences (in number of timesteps).
        sampling_rate: Period between successive individual timesteps
            within sequences. For rate `r`, timesteps
            `data[i]`, `data[i-r]`, ... `data[i - length]`
            are used for create a sample sequence.
        stride: Period between successive output sequences.
            For stride `s`, consecutive output samples would
            be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc.
        start_index: Data points earlier than `start_index` will not be used
            in the output sequences. This is useful to reserve part of the
            data for test or validation.
        end_index: Data points later than `end_index` will not be used
            in the output sequences. This is useful to reserve part of the
            data for test or validation.
        shuffle: Whether to shuffle output samples,
            or instead draw them in chronological order.
        reverse: Boolean: if `true`, timesteps in each output sample will be
            in reverse chronological order.
        batch_size: Number of timeseries samples in each batch
            (except maybe the last one).

    Returns:
        A PyDataset instance.
       r   NF   c                    t          |          t          |          k    r/t          dt          |           dt          |                     || _        || _        || _        || _        || _        ||z   | _        |t          |          dz
  }|| _        || _	        |	| _
        |
| _        | j        | j        k    r t          d| j         d| j         d          d S )Nz;Data and targets have to be of same length. Data length is z while target length is r   z`start_index+length=z > end_index=zU` is disallowed, as no part of the sequence would be left to be used as current step.)len
ValueErrordatatargetslengthsampling_ratestridestart_index	end_indexshufflereverse
batch_size)selfr   r   r   r   r   r   r   r   r   r   s              d/var/www/html/software/conda/lib/python3.11/site-packages/keras/src/legacy/preprocessing/sequence.py__init__zTimeseriesGenerator.__init__7   s    t99G$$925d))9 9*-g,,9 9   	*&/D		AI"$dn,,<t'7 < <#~< < <   -,    c                 `    | j         | j        z
  | j        | j        z  z   | j        | j        z  z  S )N)r   r   r   r   )r   s    r   __len__zTimeseriesGenerator.__len__`   s3    NT--$+0MMo+- 	-r   c                      j         r5t          j                             j         j        dz    j                  }n[ j         j         j        z  |z  z   }t          j        |t          | j         j        z  z    j        dz              j                  }t          j
         fd|D                       }t          j
         fd|D                       } j        r|d d d d ddf         |fS ||fS )Nr   )sizec                 J    g | ]}j         |j        z
  |j                  S  )r   r   r   .0rowr   s     r   
<listcomp>z3TimeseriesGenerator.__getitem__.<locals>.<listcomp>s   sA        	#+cD4FFG  r   c                 *    g | ]}j         |         S r   )r   r    s     r   r#   z3TimeseriesGenerator.__getitem__.<locals>.<listcomp>x   s     >>>#DL->>>r   .)r   nprandomrandintr   r   r   r   arangeminarrayr   )r   indexrowsisamplesr   s   `     r   __getitem__zTimeseriesGenerator.__getitem__e   s*   < 
	9$$ $.1"44? %  DD  4?T[#@5#HHA9A$+55t~7IJJ D (     
 
 (>>>>>>>??< 	2111dddC<('11r   c                 t   | j         }t          | j                   j        t          j        k    r| j                                         }	 t          j        |          }n%# t          $ r}t          d|           |d}~ww xY w| j	        }t          | j	                  j        t          j        k    r| j	                                        }	 t          j        |          }n%# t          $ r}t          d|           |d}~ww xY w||| j
        | j        | j        | j        | j        | j        | j        | j        d
S )zReturns the TimeseriesGenerator configuration as Python dictionary.

        Returns:
            A Python dictionary with the TimeseriesGenerator configuration.
        zData not JSON Serializable: NzTargets not JSON Serializable: )
r   r   r   r   r   r   r   r   r   r   )r   type
__module__r&   __name__tolistjsondumps	TypeErrorr   r   r   r   r   r   r   r   r   )r   r   	json_dataer   json_targetss         r   
get_configzTimeseriesGenerator.get_config~   sS    y	??%449##%%D	J
4((II 	J 	J 	JA4AABBI	J ,(BK77l))++G	P:g..LL 	P 	P 	PGgGGHHaO	P #k!/k+||/
 
 	
s0   	A 
B (A;;B C   
D*C==Dc                 l    |                                  }| j        j        |d}t          j        |fi |S )a  Returns a JSON string containing the generator's configuration.

        Args:
            **kwargs: Additional keyword arguments to be passed
                to `json.dumps()`.

        Returns:
            A JSON string containing the tokenizer configuration.
        )
class_nameconfig)r<   	__class__r4   r6   r7   )r   kwargsr?   timeseries_generator_configs       r   to_jsonzTimeseriesGenerator.to_json   sF     "".1'
 '
# z5@@@@@r   )r   r   r   NFFr   )	r4   r3   __qualname____doc__r   r   r0   r<   rC   r   r   r   r   r      s        ' '\ ' ' ' 'R- - -
     2!
 !
 !
FA A A A Ar   r   z8keras._legacy.preprocessing.sequence.make_sampling_tableh㈵>c                     d}t          j        |           }d|d<   |t          j        |          |z   z  dz   dd|z  z  z
  }||z  }t          j        d|t          j        |          z            S )aC  Generates a word rank-based probabilistic sampling table.

    DEPRECATED.

    Used for generating the `sampling_table` argument for `skipgrams`.
    `sampling_table[i]` is the probability of sampling
    the word i-th most common word in a dataset
    (more common words should be sampled less frequently, for balance).

    The sampling probabilities are generated according
    to the sampling distribution used in word2vec:

    ```
    p(word) = (min(1, sqrt(word_frequency / sampling_factor) /
        (word_frequency / sampling_factor)))
    ```

    We assume that the word frequencies follow Zipf's law (s=1) to derive
    a numerical approximation of frequency(rank):

    `frequency(rank) ~ 1/(rank * (log(rank) + gamma) + 1/2 - 1/(12*rank))`
    where `gamma` is the Euler-Mascheroni constant.

    Args:
        size: Int, number of possible words to sample.
        sampling_factor: The sampling factor in the word2vec formula.

    Returns:
        A 1D Numpy array of length `size` where the ith entry
        is the probability that a word of rank i should be sampled.
    gX9v?r   r   g      ?      ?g      (@)r&   r)   logminimumsqrt)r   sampling_factorgammarankinv_fqfs         r   make_sampling_tablerQ      sr    B E9T??DDGRVD\\E)*S03$+3FFF& A:c1rwqzz>***r   z.keras._legacy.preprocessing.sequence.skipgrams   rH   TFc                    g }g }	t          |           D ]\  }
}|s|||         t          j                    k     r(t          d|
|z
            }t          t	          |           |
|z   dz             }t          ||          D ]Y}||
k    rQ| |         }|s|                    ||g           |r|	                    ddg           D|	                    d           Z|dk    rut          t	          |	          |z            }d |D             t          j                   |fdt          |          D             z  }|r|	ddgg|z  z  }	n	|	dg|z  z  }	|rg|t          j	        dd          }t          j
        |           t          j        |           t          j
        |           t          j        |	           ||	fS )a  Generates skipgram word pairs.

    DEPRECATED.

    This function transforms a sequence of word indexes (list of integers)
    into tuples of words of the form:

    - (word, word in the same window), with label 1 (positive samples).
    - (word, random word from the vocabulary), with label 0 (negative samples).

    Read more about Skipgram in this gnomic paper by Mikolov et al.:
    [Efficient Estimation of Word Representations in
    Vector Space](http://arxiv.org/pdf/1301.3781v3.pdf)

    Args:
        sequence: A word sequence (sentence), encoded as a list
            of word indices (integers). If using a `sampling_table`,
            word indices are expected to match the rank
            of the words in a reference dataset (e.g. 10 would encode
            the 10-th most frequently occurring token).
            Note that index 0 is expected to be a non-word and will be skipped.
        vocabulary_size: Int, maximum possible word index + 1
        window_size: Int, size of sampling windows (technically half-window).
            The window of a word `w_i` will be
            `[i - window_size, i + window_size+1]`.
        negative_samples: Float >= 0. 0 for no negative (i.e. random) samples.
            1 for same number as positive samples.
        shuffle: Whether to shuffle the word couples before returning them.
        categorical: bool. if False, labels will be
            integers (eg. `[0, 1, 1 .. ]`),
            if `True`, labels will be categorical, e.g.
            `[[1,0],[0,1],[0,1] .. ]`.
        sampling_table: 1D array of size `vocabulary_size` where the entry i
            encodes the probability to sample a word of rank i.
        seed: Random seed.

    Returns:
        couples, labels: where `couples` are int pairs and
            `labels` are either 0 or 1.

    Note:
        By convention, index 0 in the vocabulary is
        a non-word and will be skipped.
    Nr   r   c                     g | ]
}|d          S )r   r   )r!   cs     r   r#   zskipgrams.<locals>.<listcomp>,  s    '''!1'''r   c                 p    g | ]2}|t                    z           t          j        d d z
            g3S )r   )r
   r'   r(   )r!   r.   vocabulary_sizewordss     r   r#   zskipgrams.<locals>.<listcomp>/  sN     
 
 
 1s5zz>"FN1o6I$J$JK
 
 
r   g    cA)	enumerater'   maxr*   r
   rangeappendintr   r(   seed)sequencerW   window_sizenegative_samplesr   categoricalsampling_tabler^   coupleslabelsr.   wiwindow_start
window_endjwjnum_negative_samplesrX   s    `               @r   	skipgramsrl      s0   n GF8$$ % %2 	%b!FMOO331a+o..XK!(;<<
|Z00 		% 		%AAvva[ Bx((( %MM1a&))))MM!$$$		% !"3v;;1A#ABB''w'''u 
 
 
 
 
/00
 
 
 	
  	11vh!555FFqc000F <>!T**DDwDvF?r   )rF   )rR   rH   TFNN)rE   r6   r'   numpyr&   keras.src.api_exportr   3keras.src.trainers.data_adapters.py_dataset_adapterr   r   rQ   rl   r   r   r   <module>rp      s    : :       - - - - - - I I I I I I HIIcA cA cA cA cA) cA cA JIcAL HII&+ &+ &+ JI&+R >?? 	b b b @?b b br   