/*
 *  R : A Computer Language for Statistical Data Analysis
 *  Copyright (C) 1997--2023  The R Core Team
 *  Copyright (C) 1995, 1996  Robert Gentleman and Ross Ihaka
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, a copy is available at
 *  https://www.R-project.org/Licenses/
 *
 * EXPORTS:
 *
 *  OneIndex()        -- used for "[[<-" in ./subassign.c
 *  get1index()       -- used for "[["   in ./subassign.c & subset.c
 *  vectorIndex()     -- used for "[[" and "[[<-" with a vector arg

 *  mat2indsub()      -- for "mat[i]"     "    "            "
 *  strmat2intmat()   -- for "mat[<ch>]"  "    "            "

 *  makeSubscript()   -- for "[" and "[<-" in ./subset.c and ./subassign.c,
 *			 and "[[<-" with a scalar in ./subassign.c
 *  arraySubscript()  -- for "[i,j,..." and "[<-..." in ./subset.c, ./subassign.c
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <Defn.h>

#include <R_ext/Itermacros.h>

/* interval at which to check interrupts, a guess (~subsecond on current hw) */
#define NINTERRUPT 10000000

/* We might get a call with R_NilValue from subassignment code */
#define ECALL(call, yy)     if(call == R_NilValue) error(yy);    else errorcall(call, yy);
#define ECALL3(call, yy, A) if(call == R_NilValue) error(yy, A); else errorcall(call, yy, A);
#define ECALL4(call, yy, A, B) if(call == R_NilValue) error(yy, A, B); else errorcall(call, yy, A, B);

NORET static void ECALL_OutOfBounds(SEXP x, int subscript,
				    R_xlen_t index, SEXP call)
{
    if (call == R_NilValue)
	call = R_CurrentExpression; /* default behaves like error() */
    SEXP sindex = ScalarReal((double) index);
    PROTECT(sindex);
    SEXP cond = R_makeOutOfBoundsError(x, subscript, sindex, call, NULL);
    PROTECT(cond);
    R_signalErrorCondition(cond, call);
    UNPROTECT(2); /* sindex, cond; not reached */
}

NORET static void ECALL_MissingSubs(SEXP call) // no x
{
    if (call == R_NilValue)
	call = R_CurrentExpression;
    SEXP cond = R_makeMissingSubscriptError1(call);
    PROTECT(cond);
    R_signalErrorCondition(cond, call);
    UNPROTECT(1); /* cond; not reached */
}

NORET static void ECALL_OutOfBoundsCHAR(SEXP x, int subscript,
					SEXP sindex, SEXP call)
{
    if (call == R_NilValue)
	call = R_CurrentExpression; /* default behaves like error() */
    sindex = ScalarString(sindex);
    PROTECT(sindex);
    SEXP cond = R_makeOutOfBoundsError(x, subscript, sindex, call, NULL);
    PROTECT(cond);
    R_signalErrorCondition(cond, call);
    UNPROTECT(2); /* sindex, cond; not reached */
}

/* This allows for the unusual case where x is of length 2,
   and x[[-m]] selects one element for m = 1, 2.
   So 'len' is only used if it is 2 and i is negative.
*/
static R_INLINE int integerOneIndex(int i, R_xlen_t len, SEXP call)
{
    int indx = -1;

    if (i > 0) /* a regular 1-based index from R */
	indx = i - 1;
    else if (i == 0 || len < 2) {
	ECALL3(call, _("attempt to select less than one element in %s"), "integerOneIndex");
    } else if (len == 2 && i > -3)
	indx = 2 + i;
    else {
	ECALL3(call, _("attempt to select more than one element in %s"), "integerOneIndex");
    }
    return indx;
}

/* Utility used (only in) do_subassign2_dflt(), i.e. "[[<-" in ./subassign.c : */
R_xlen_t attribute_hidden
OneIndex(SEXP x, SEXP s, R_xlen_t nx, int partial, SEXP *newname,
	 int pos, SEXP call)
{
    SEXP names;
    R_xlen_t i, indx;
    const void *vmax;

    if (pos < 0 && length(s) > 1) {
	ECALL3(call, _("attempt to select more than one element in %s"), "OneIndex");
    }
    if (pos < 0 && length(s) < 1) {
	ECALL3(call, _("attempt to select less than one element in %s"), "OneIndex");
    }

    if(pos < 0) pos = 0;

    indx = -1;
    *newname = R_NilValue;
    switch(TYPEOF(s)) {
    case LGLSXP:
    case INTSXP:
	indx = integerOneIndex(INTEGER_ELT(s, pos), nx, call);
	break;
    case REALSXP:
    {
	double dblind = REAL_ELT(s, pos);
	if(!ISNAN(dblind)) {
	    /* see comment above integerOneIndex */
	    if (dblind >= 1) indx = (R_xlen_t)(dblind - 1);
	    else if (dblind > -1 || nx < 2) {
		ECALL3(call, _("attempt to select less than one element in %s"), "OneIndex <real>");
	    } else if (nx == 2 && dblind > -3)
		indx = 2 + (R_xlen_t) dblind;
	    else {
		ECALL3(call, _("attempt to select more than one element in %s"), "OneIndex <real>");
	    }
	}
	break;
    }
    case STRSXP:
	vmax = vmaxget();
	names = getAttrib(x, R_NamesSymbol);
	if (names != R_NilValue) {
	    PROTECT(names);
	    /* Try for exact match */
	    for (i = 0; i < nx; i++) {
		const char *tmp = translateChar(STRING_ELT(names, i));
		if (!tmp[0]) continue;
		if (streql(tmp, translateChar(STRING_ELT(s, pos)))) {
		    indx = i;
		    break;
		}
	    }
	    // Try for partial match -- not ever used in current R (partial is 0)
	    if (partial && indx < 0) {
		size_t l = strlen(translateChar(STRING_ELT(s, pos)));
		for(i = 0; i < nx; i++) {
		    const char *tmp = translateChar(STRING_ELT(names, i));
		    if (!tmp[0]) continue;
		    if(!strncmp(tmp, translateChar(STRING_ELT(s, pos)), l)) {
			if(indx == -1 )
			    indx = i;
			else
			    indx = -2;
		    }
		}
	    }
	    UNPROTECT(1); /* names */
	}
	if (indx == -1)
	    indx = nx;
	*newname = STRING_ELT(s, pos);
	vmaxset(vmax);
	break;
    case SYMSXP:
	vmax = vmaxget();
	names = getAttrib(x, R_NamesSymbol);
	if (names != R_NilValue) {
	    PROTECT(names);
	    for (i = 0; i < nx; i++)
		if (streql(translateChar(STRING_ELT(names, i)),
			   CHAR(PRINTNAME(s)))) {
		    indx = i;
		    break;
		}
	    UNPROTECT(1); /* names */
	}
	if (indx == -1)
	    indx = nx;
	*newname = PRINTNAME(s);
	vmaxset(vmax);
	break;
    default:
	ECALL3(call, _("invalid subscript type '%s'"), R_typeToChar(s));
    }
    return indx;
}

/* used here and in subset.c and subassign.c */
R_xlen_t attribute_hidden
get1index(SEXP s, SEXP names, R_xlen_t len, int pok, int pos, SEXP call)
{
/* Get a single index for the [[ and [[<- operators.
   Checks that only one index is being selected.
   Returns -1 for no match.

   s is the subscript
   len is the length of the object or dimension, with names its (dim)names.
   pos is len-1 or -1 for [[, -1 for [[<-
     -1 means use the only element of length-1 s.
   pok : is "partial ok" ?
	 if pok is -1, warn if partial matching occurs, but allow.
*/
    const char *ss, *cur_name;
    const void *vmax;

    int warn_pok = (pok == -1);
    if (warn_pok)
	pok = 1;

    if (pos < 0 && length(s) != 1) {
	if (length(s) > 1) {
	    ECALL3(call, _("attempt to select more than one element in %s"), "get1index");
	} else {
	    ECALL3(call, _("attempt to select less than one element in %s"), "get1index");
	}
    } else if(pos >= length(s)) {
	ECALL(call, _("internal error in use of recursive indexing"));
    }
    if(pos < 0) pos = 0;
    R_xlen_t indx = -1;
    switch (TYPEOF(s)) {
    case LGLSXP:
    case INTSXP:
    {
	int i = INTEGER_ELT(s, pos);
	if (i != NA_INTEGER)
	    indx = integerOneIndex(i, len, call);
	break;
    }
    case REALSXP:
    {
	double dblind = REAL_ELT(s, pos);
	if(!ISNAN(dblind)) {
	    /* see comment above integerOneIndex */
	    if (dblind >= 1) {
		if(R_FINITE(dblind)) indx = (R_xlen_t)(dblind - 1);
	    } else if (dblind > -1 || len < 2) {
		ECALL3(call,
		       (dblind <= -1) ? _("invalid negative subscript in %s")
		       : _("attempt to select less than one element in %s"),
		       "get1index <real>");
	    } else if (len == 2 && dblind > -3) // dblind = -2 or -1 {why exception ?}
		indx = 2 + (R_xlen_t) dblind;
	    else {
		ECALL3(call,
		       (dblind <= -1) ? _("invalid negative subscript in %s")
		       : _("attempt to select more than one element in %s"),
		       "get1index <real>");
	    }
	}
	break;
    }
    case STRSXP:
	/* NA matches nothing */
	if(STRING_ELT(s, pos) == NA_STRING) break;
	/* "" matches nothing: see names.Rd */
	if(!CHAR(STRING_ELT(s, pos))[0]) break;

	/* Try for exact match */
	vmax = vmaxget();
	ss = translateChar(STRING_ELT(s, pos));
	for (R_xlen_t i = 0; i < xlength(names); i++)
	    if (STRING_ELT(names, i) != NA_STRING) {
		if (streql(translateChar(STRING_ELT(names, i)), ss)) {
		    indx = i;
		    break;
		}
	    }
	/* Try for partial match */
	if (pok && indx < 0) {
	    size_t len = strlen(ss);
	    for(R_xlen_t i = 0; i < xlength(names); i++) {
		if (STRING_ELT(names, i) != NA_STRING) {
		    cur_name = translateChar(STRING_ELT(names, i));
		    if(!strncmp(cur_name, ss, len)) {
			if(indx == -1) {/* first one */
			    indx = i;
			    if (warn_pok) {
				if (call == R_NilValue)
				    warning(_("partial match of '%s' to '%s'"),
					    ss, cur_name);
				else
				    warningcall(call,
						_("partial match of '%s' to '%s'"),
						ss, cur_name);
			    }
			}
			else {
			    indx = -2;/* more than one partial match */
			    if (warn_pok) /* already given context */
				warningcall(R_NilValue,
					    _("further partial match of '%s' to '%s'"),
					    ss, cur_name);
			    break;
			}
		    }
		}
	    }
	}
	vmaxset(vmax);
	break;
    case SYMSXP:
	if (s == R_MissingArg) {
	    ECALL_MissingSubs(call);
	}
	vmax = vmaxget();
	for (R_xlen_t i = 0; i < xlength(names); i++)
	    if (STRING_ELT(names, i) != NA_STRING &&
		streql(translateChar(STRING_ELT(names, i)),
		       CHAR(PRINTNAME(s)))) {
		indx = i;
		vmaxset(vmax);
		break;
	    }
	break;
    default:
	ECALL3(call, _("invalid subscript type '%s'"), R_typeToChar(s));
    }
    return indx;
}

/* This is used for [[ and [[<- with a vector of indices of length > 1 .
   x is a list or pairlist, and it is indexed recusively from
   level start to level stop-1.  ( 0...len-1 or 0..len-2 then len-1).
   For [[<- it needs to duplicate if substructure might be shared.
 */
attribute_hidden SEXP
vectorIndex(SEXP x, SEXP thesub, int start, int stop, int pok, SEXP call,
	    Rboolean dup)
{
    int i;
    R_xlen_t offset;
    SEXP cx;

    /* sanity check */
    if (dup && MAYBE_SHARED(x))
	error("should only be called in an assignment context.");

    for(i = start; i < stop; i++) {
	if(!isVectorList(x) && !isPairList(x)) {
	    if (i)
		errorcall(call, _("recursive indexing failed at level %d\n"), i+1);
	    else
		errorcall(call, _("attempt to select more than one element in %s"), "vectorIndex");
	}
	PROTECT(x);
	SEXP names = PROTECT(getAttrib(x, R_NamesSymbol));
	offset = get1index(thesub, names,
			   xlength(x), pok, i, call);
	UNPROTECT(2); /* x, names */
	if(offset < 0 || offset >= xlength(x))
	    errorcall(call, _("no such index at level %d\n"), i+1);
	if(isPairList(x)) {
#ifdef LONG_VECTOR_SUPPORT
	    if (offset > R_SHORT_LEN_MAX)
		error("invalid subscript for pairlist");
#endif
	    cx = nthcdr(x, (int) offset);
	    RAISE_NAMED(CAR(x), NAMED(x));
	    x = CAR(cx);
	    if (dup && MAYBE_SHARED(x)) {
		PROTECT(cx);
		x = shallow_duplicate(x);
		SETCAR(cx, x);
		UNPROTECT(1); /* cx */
	    }
	} else {
	    cx = x;
	    x = VECTOR_ELT(x, offset);
	    RAISE_NAMED(x, NAMED(cx));
	    if (dup && MAYBE_SHARED(x)) {
		PROTECT(cx);
		x = shallow_duplicate(x);
		SET_VECTOR_ELT(cx, offset, x);
		UNPROTECT(1); /* cx */
	    }
	}
    }
    return x;
}

/* Special Matrix Subscripting: Handles the case x[i] where
   x is an n-way array and i is a matrix with n columns.
   This code returns a vector containing the subscripts
   to be extracted when x is regarded as unravelled.

   Negative indices are not allowed.

   A zero/NA anywhere in a row will cause a zero/NA in the same
   position in the result.
*/
/*
  Called from `[`   VectorSubset()    ./subset.c	and
              `[<-` VectorSubassign() ./subassign.c

  only if(isMatrix(s) && isArray(x) && ncols(s) == "length(dim(x))"),
  where dims = getAttrib(x, R_DimSymbol);
*/
attribute_hidden SEXP mat2indsub(SEXP dims, SEXP s, SEXP call, SEXP x)
{
    int nrs = nrows(s);
    R_xlen_t NR = nrs;
    SEXP rvec;
    int ndim = LENGTH(dims);
    const int *pdims = INTEGER_RO(dims);

    if (ncols(s) != ndim) {
	ECALL(call, _("incorrect number of columns in matrix subscript"));
    }

#ifdef LONG_VECTOR_SUPPORT
    /* Check if it is a long vector we need to index */
    R_xlen_t len = 1;
    for (int j = 0; j < ndim; j++)  len *= pdims[j];

    if(len > R_SHORT_LEN_MAX) {
	PROTECT(rvec = allocVector(REALSXP, nrs));
	double *rv = REAL(rvec);
	for (int i = 0; i < nrs; i++) rv[i] = 1.; // 1-based.
	if (TYPEOF(s) == REALSXP) {
	    for (int i = 0; i < nrs; i++) {
		R_xlen_t tdim = 1;
		const double *ps = REAL_RO(s);
		for (int j = 0; j < ndim; j++) {
		    double k = ps[i + j * NR];
		    if(ISNAN(k)) {rv[i] = NA_REAL; break;}
		    k = trunc(k); // = "as.integer(k)" for large k
		    if(k < 0) {
			ECALL(call, _("negative values are not allowed in a matrix subscript"));
		    }
		    if(k == 0.) {rv[i] = 0.; break;}
		    if (k > pdims[j]) {
			ECALL_OutOfBounds(x, j, (R_xlen_t)k, call);
		    }
		    rv[i] += (k - 1.) * tdim;
		    tdim *= pdims[j];
		}
	    }
	} else {
	    s = coerceVector(s, INTSXP);
	    const int *ps = INTEGER_RO(s);
	    for (int i = 0; i < nrs; i++) {
		R_xlen_t tdim = 1;
		for (int j = 0; j < ndim; j++) {
		    int k = ps[i + j * NR];
		    if(k == NA_INTEGER) {rv[i] = NA_REAL; break;}
		    if(k < 0) {
			ECALL(call, _("negative values are not allowed in a matrix subscript"));
		    }
		    if(k == 0) {rv[i] = 0.; break;}
		    if (k > pdims[j]) {
			ECALL_OutOfBounds(x, j, k, call);
		    }
		    rv[i] += (double) ((k - 1) * tdim);
		    tdim *= pdims[j];
		}
	    }
	}
    } else
#endif
    {
	PROTECT(rvec = allocVector(INTSXP, nrs));
	int *iv = INTEGER(rvec);
	for (int i = 0; i < nrs; i++) iv[i] = 1; // 1-based.
	s = coerceVector(s, INTSXP);
	int *ps = INTEGER(s);
	for (int i = 0; i < nrs; i++) {
	    int tdim = 1;
	    for (int j = 0; j < ndim; j++) {
		int k = ps[i + j * NR];
		if(k == NA_INTEGER) {iv[i] = NA_INTEGER; break;}
		if(k < 0) {
		    ECALL(call, _("negative values are not allowed in a matrix subscript"));
		}
		if(k == 0) {iv[i] = 0; break;}
		if (k > pdims[j]) {
		    ECALL_OutOfBounds(x, j, k, call);
		}
		iv[i] += (k - 1) * tdim;
		tdim *= pdims[j];
	    }
	}
    }

    UNPROTECT(1);
    return rvec;
}

/*
Special Matrix Subscripting: For the case x[i] where x is an n-way
array and i is a character matrix with n columns, this code converts i
to an integer matrix by matching against the dimnames of x. NA values
in any row of i propagate to the result.  Unmatched entries result in
a subscript out of bounds error.  */
/*
  Called from `[`   VectorSubset()    ./subset.c	and
              `[<-` VectorSubassign() ./subassign.c

  only if(isMatrix(s) && isArray(x) && ncols(s) == "length(dim(x))"),
  where dnamelist = PROTECT(GetArrayDimnames(x))
 */
attribute_hidden SEXP strmat2intmat(SEXP s, SEXP dnamelist, SEXP call, SEXP x)
{
    /* XXX: assumes all args are protected */
    SEXP dim = getAttrib(s, R_DimSymbol);
    int nr = INTEGER(dim)[0], // = nrow(s)
	nc = INTEGER(dim)[1]; // = ncol(s)
    if(isNull(dnamelist)) { // && nr > 0)
#if 1
	ECALL(call, _("no 'dimnames' attribute for array")); // as in  arraySubscript()
#else
	ECALL4(call,
          _("%d x %d character matrix subscript illegal for NULL dimnames"),
	       nr, nc);
#endif
    }
#if 0 /* unneeded as only called  if (length(dnamelist) == nc) : */
    if(length(dnamelist) != nc) {
	ECALL4(call,
          _("subscripting by character matrix of %d columns does not match length(dimnames(x)) == %d"),
	       nc, length(dnamelist));
    }
#endif
    SEXP snames = PROTECT(allocVector(STRSXP, nr));
    // result si := an integer matrix of same dimension as 's'
    SEXP si = PROTECT(allocVector(INTSXP, xlength(s)));
    dimgets(si, dim);
    int *psi = INTEGER(si);
    memset(psi, 0, XLENGTH(si) * sizeof(int));
    for (int i = 0; i < nc; i++) {
	R_xlen_t iNR = i * (R_xlen_t) nr;
	for (int j = 0; j < nr; j++)
	    SET_STRING_ELT(snames, j, STRING_ELT(s, j + iNR));
	SEXP dnames = VECTOR_ELT(dnamelist, i);
	SEXP sicol = PROTECT(match(dnames, snames, 0));
	for (int j = 0; j < nr; j++) {
	    int v = INTEGER_ELT(sicol, j);
	    R_xlen_t idx = j + iNR;
	    SEXP s_elt = STRING_ELT(s, idx);
	    if (s_elt == NA_STRING) v = NA_INTEGER;
	    else if (!CHAR(s_elt)[0]) v = 0; /* disallow "" match */
	    if (v == 0) ECALL_OutOfBoundsCHAR(x, i, s_elt, call);
	    psi[idx] = v;
	}
	UNPROTECT(1);
    }
    UNPROTECT(2);
    return si;
}

static SEXP nullSubscript(R_xlen_t n)
{
    SEXP indx;
#ifdef LONG_VECTOR_SUPPORT
    if (n > R_SHORT_LEN_MAX) {
	indx = allocVector(REALSXP, n);
	double *pindx = REAL(indx);
	for (R_xlen_t i = 0; i < n; i++)
	    pindx[i] = (double)(i + 1);
    } else
#endif
    {
	indx = allocVector(INTSXP, n);
	int *pindx = INTEGER(indx);
	for (int i = 0; i < n; i++)
	    pindx[i] = i + 1;
    }
    return indx;
}


static SEXP
logicalSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, R_xlen_t *stretch, SEXP call)
{
    R_xlen_t count, i, nmax, i1, i2;
    int canstretch;
    SEXP indx;
    canstretch = *stretch > 0;
    if (!canstretch && ns > nx) {
	ECALL(call, _("(subscript) logical subscript too long"));
    }
    nmax = (ns > nx) ? ns : nx;
    *stretch = (ns > nx) ? ns : 0;
    if (ns == 0) return(allocVector(INTSXP, 0));
    const int *ps = LOGICAL_RO(s);    /* Calling LOCICAL_RO here may force a
					 large allocation, but no larger than
					 the one made by R_alloc below. This
					 could use rewriting to better handle
					 a sparse logical index. */
#ifdef LONG_VECTOR_SUPPORT
    if (nmax > R_SHORT_LEN_MAX) {
	if (ns == nmax) { /* no recycling - use fast single-index code */
	    const void *vmax = vmaxget();
	    double *buf = (double *) R_alloc(nmax, sizeof(double));
	    count = 0;
	    R_ITERATE_CHECK(NINTERRUPT, nmax, i,
		if (ps[i]) {
		    if (ps[i] == NA_LOGICAL)
			buf[count++] = NA_REAL;
		    else
			buf[count++] = (double)(i + 1);
		});
	    PROTECT(indx = allocVector(REALSXP, count));
	    memcpy(REAL(indx), buf, sizeof(double) * count);
	    vmaxset(vmax);
	    UNPROTECT(1);
	    return indx;
	}
	count = 0;
	/* we only need to scan s once even if we recycle,
	   just remember the total count as well as
	   the count for the last incomplete chunk (if any) */
	i1 = (ns < nmax) ? (nmax % ns) : 0;
	if (i1 > 0) { /* last recycling chunk is incomplete -
			 we have to get the truncated count as well */
	    R_xlen_t rem = 0;
	    for (i = 0; i < ns; i++) {
		if (i == i1) rem = count;
		if (ps[i]) count++;
	    }
	    count = count * (nmax / ns) + rem;
	} else { /* nested recycling, total is sufficient */
	    for (i = 0; i < ns; i++)
		if (ps[i]) count++;
	    count *= nmax / ns;
	}
	PROTECT(indx = allocVector(REALSXP, count));
	double *pindx = REAL(indx);
	count = 0;
	MOD_ITERATE_CHECK(NINTERRUPT, nmax, ns, nmax, i, i1, i2,
	    if (ps[i1]) {
		if (ps[i1] == NA_LOGICAL)
		    pindx[count++] = NA_REAL;
		else
		    pindx[count++] = (double)(i + 1);
	    });

	UNPROTECT(1);
	return indx;
    }
#endif
// else --- the same code for  non-long vectors --------------------------
    if (ns == nmax) {  /* no recycling - use fast single-index code */
	const void *vmax = vmaxget();
	int *buf = (int *) R_alloc(nmax, sizeof(int));
	count = 0;
	R_ITERATE_CHECK(NINTERRUPT, nmax, i,
	    if (ps[i]) {
		if (ps[i] == NA_LOGICAL)
		    buf[count++] = NA_INTEGER;
		else
		    buf[count++] = (int)(i + 1);
	    });
	PROTECT(indx = allocVector(INTSXP, count));
	memcpy(INTEGER(indx), buf, sizeof(int) * count);
	vmaxset(vmax);
	UNPROTECT(1);
	return indx;
    }

    count = 0;
    /* we only need to scan s once even if we recycle,
       just remember the total count as well as
       the count for the last incomplete chunk (if any) */
    i1 = (ns < nmax) ? (nmax % ns) : 0;
    if (i1 > 0) { /* last recycling chunk is incomplete -
		     we have to get the truncated count as well */
	R_xlen_t rem = 0;
	for (i = 0; i < ns; i++) {
	    if (i == i1) rem = count;
	    if (ps[i]) count++;
	}
	count = count * (nmax / ns) + rem;
    } else { /* nested recycling, total is sufficient */
	for (i = 0; i < ns; i++)
	    if (ps[i]) count++;
	count *= nmax / ns;
    }
    PROTECT(indx = allocVector(INTSXP, count));
    int *pindx = INTEGER(indx);
    count = 0;
    MOD_ITERATE_CHECK(NINTERRUPT, nmax, ns, nmax, i, i1, i2,
	if (ps[i1]) {
	    if (ps[i1] == NA_LOGICAL)
		pindx[count++] = NA_INTEGER;
	    else
		pindx[count++] = (int)(i + 1);
	});

    UNPROTECT(1);
    return indx;
}

static SEXP negativeSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, SEXP call)
{
    SEXP indx;
    R_xlen_t stretch = 0;
    R_xlen_t i;
    PROTECT(indx = allocVector(LGLSXP, nx));
    int *pindx = LOGICAL(indx);
    for (i = 0; i < nx; i++)
	pindx[i] = 1;
    const int *ps = INTEGER_RO(s);
    for (i = 0; i < ns; i++) {
	int ix = ps[i];
	if (ix != 0 && ix != NA_INTEGER && -ix <= nx)
	    pindx[-ix - 1] = 0;
    }
    s = logicalSubscript(indx, nx, nx, &stretch, call);
    UNPROTECT(1);
    return s;
}

static SEXP positiveSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx)
{
    SEXP indx;
    R_xlen_t i, zct = 0;
    const int *ps = INTEGER_RO(s);
    for (i = 0; i < ns; i++) if (ps[i] == 0) zct++;
    if (zct) {
	indx = allocVector(INTSXP, (ns - zct));
	int *pindx = INTEGER(indx);
	for (i = 0, zct = 0; i < ns; i++)
	    if (ps[i] != 0)
		pindx[zct++] = ps[i];
	return indx;

    } else return s;
}

static SEXP
integerSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, R_xlen_t *stretch,
		 SEXP call, SEXP x)
{
    R_xlen_t i;
    int ii, neg, max, canstretch;
    Rboolean isna = FALSE;
    canstretch = *stretch > 0;
    *stretch = 0;
    neg = FALSE;
    max = 0;
    const int *ps = INTEGER_RO(s);
    for (i = 0; i < ns; i++) {
	ii = ps[i];
	if (ii < 0) {
	    if (ii == NA_INTEGER)
		isna = TRUE;
	    else
		neg = TRUE;
	}
	else if (ii > max)
	    max = ii;
    }
    if (max > nx) {
	if(canstretch) *stretch = max;
	else {
	    ECALL_OutOfBounds(x, -1, max, call);
	}
    }
    if (neg) {
	if (max == 0 && !isna) return negativeSubscript(s, ns, nx, call);
	else {
	    ECALL(call, _("only 0's may be mixed with negative subscripts"));
	}
    }
    else return positiveSubscript(s, ns, nx);
    return R_NilValue;
}

static SEXP
realSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, R_xlen_t *stretch,
	      SEXP call, SEXP x)
{
    int canstretch = *stretch > 0;
    *stretch = 0;
    double min = 0, max = 0;
    const double *ps = REAL_RO(s);
    Rboolean isna = FALSE;
    for (R_xlen_t i = 0; i < ns; i++) {
	double ii = ps[i];
	if (R_FINITE(ii)) {
	    if (ii < min) min = ii;
	    if (ii > max) max = ii;
	} else isna = TRUE;
    }
    if (max >= nx+1.) {
#ifndef LONG_VECTOR_SUPPORT
	if (max > INT_MAX) {
	    ECALL(call, _("subscript too large for 32-bit R"));
	}
#endif
	if(canstretch) *stretch = (R_xlen_t) max;
	else {
	    ECALL_OutOfBounds(x, -1, (R_xlen_t) max, call);
	}
    }
    if (min <= -1) {
	if (max < 1 && !isna) {
	    R_xlen_t i, stretch = 0;
	    SEXP indx = PROTECT(allocVector(LGLSXP, nx));
	    int *pindx = LOGICAL(indx);
	    for (i = 0; i < nx; i++) pindx[i] = 1;
	    for (i = 0; i < ns; i++) {
		double dx = ps[i];
		if (R_FINITE(dx) && dx <= -1  && -dx < nx+1.) {
		    R_xlen_t ix = (R_xlen_t)(-dx - 1);
		    pindx[ix] = 0;
		}
	    }
	    s = logicalSubscript(indx, nx, nx, &stretch, call);
	    UNPROTECT(1);
	    return s;
	} else {
	    ECALL(call, _("only 0's may be mixed with negative subscripts"));
	}
    } else {
	/* Only return a REALSXP index if we need to */
	SEXP indx;
	R_xlen_t i, cnt = 0;
	Rboolean int_ok = TRUE;
	/* NB, indices will be truncated eventually,
	   so need to do that to take '0' into account */
	for (i = 0; i < ns; i++) {
	    double ds = ps[i];
#ifdef OLDCODE_LONG_VECTOR
	    if (!R_FINITE(ds)) {
		if (ds > INT_MAX) int_ok = FALSE;
		cnt++;
	    } else if ((R_xlen_t) ds != 0) cnt++;
#else
	    if (R_FINITE(ds) && ds > INT_MAX) int_ok = FALSE;
	    if (!R_FINITE(ds) || (R_xlen_t) ds != 0) cnt++;
#endif
	}
	if (int_ok) {
	    indx = allocVector(INTSXP, cnt);
	    int *pindx = INTEGER(indx);
	    for (i = 0, cnt = 0; i < ns; i++) {
		double ds = ps[i];
		int ia;
		if (!R_FINITE(ds)) ia = NA_INTEGER;
		else ia = (int) ds;
		if (ia != 0) pindx[cnt++] = ia;
	    }
	} else {
	    indx = allocVector(REALSXP, cnt);
	    double *pindx = REAL(indx);
	    for (i = 0, cnt = 0; i < ns; i++) {
		double ds = ps[i];
		if (!R_FINITE(ds) || (R_xlen_t) ds != 0) pindx[cnt++] = ds;
	    }
	}
	return indx;
    }
    return R_NilValue;
}

/* This uses a couple of horrible hacks in conjunction with
 * VectorAssign (in subassign.c).  If subscripting is used for
 * assignment, it is possible to extend a vector by supplying new
 * names, and we want to give the extended vector those names, so they
 * are returned as the use.names attribute. Also, unset elements of the vector
 * of new names (places where a match was found) are indicated by
 * setting the element of the newnames vector to NULL.
*/

/* The original code (pre 2.0.0) used a ns x nx loop that was too
 * slow.  So now we hash.  Hashing is expensive on memory (up to 32nx
 * bytes) so it is only worth doing if ns * nx is large.  If nx is
 * large, then it will be too slow unless ns is very small.
 */

static SEXP
stringSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, SEXP names,
		R_xlen_t *stretch, SEXP call, SEXP x, int dim)
{
    SEXP indx, indexnames = R_NilValue;
    R_xlen_t i, j, nnames, extra, sub;
    int canstretch = *stretch > 0;
    /* product may overflow, so check factors as well. */
    Rboolean usehashing = ( ((ns > 1000 && nx) || (nx > 1000 && ns)) || (ns * nx > 15*nx + ns) );
    int nprotect = 0;

    PROTECT(s);
    PROTECT(names);
    nprotect += 2;
    nnames = nx;
    extra = nnames;

    /* Process each of the subscripts. First we compare with the names
     * on the vector and then (if there is no match) with each of the
     * previous subscripts, since (if assigning) we may have already
     * added an element of that name. (If we are not assigning, any
     * nonmatch will have given an error.)
     */

    if(usehashing) {
	/* must be internal, so names contains a character vector */
	/* NB: this does not behave in the same way with respect to ""
	   and NA names: they will match */
	PROTECT(indx = match(names, s, 0)); /**** guaranteed to be fresh???*/
	nprotect++;
	/* second pass to correct this */
	int *pindx = INTEGER(indx);
	for (i = 0; i < ns; i++)
	    if(STRING_ELT(s, i) == NA_STRING || !CHAR(STRING_ELT(s, i))[0])
		pindx[i] = 0;
    } else {
	PROTECT(indx = allocVector(INTSXP, ns));
	nprotect++;
	int *pindx = INTEGER(indx);
	for (i = 0; i < ns; i++) {
	    sub = 0;
	    if (names != R_NilValue) {
		for (j = 0; j < nnames; j++) {
		    SEXP names_j = STRING_ELT(names, j);
		    if (NonNullStringMatch(STRING_ELT(s, i), names_j)) {
			sub = j + 1;
			break;
		    }
		}
	    }
	    pindx[i] = (int) sub;
	}
    }

    int *pindx = INTEGER(indx);
    SEXP sindx = NULL;
    for (i = 0; i < ns; i++) {
	sub = pindx[i];
	if (sub == 0) {
	    if (sindx == NULL) {
		sindx = PROTECT(match(s, s, 0));
		indexnames = PROTECT(allocVector(VECSXP, ns));
		nprotect += 2;
		for (int z = 0; z < ns; z++)
		    SET_VECTOR_ELT(indexnames, z, R_NilValue);
	    }
	    int j = INTEGER(sindx)[i] - 1;
	    if(STRING_ELT(s, i) != NA_STRING && CHAR(STRING_ELT(s, i))[0]) {
		sub = pindx[j];
		SET_VECTOR_ELT(indexnames, i, STRING_ELT(s, j));
	    }
	}
	if (sub == 0) {
	    if (!canstretch) {
		ECALL_OutOfBoundsCHAR(x, dim, STRING_ELT(s, i), call);
	    }
	    extra += 1;
	    sub = extra;
	    SET_VECTOR_ELT(indexnames, i, STRING_ELT(s, i));
	}
	pindx[i] = (int) sub;
    }
    /* We return the new names as the names attribute of the returned
       subscript vector. */
    if (extra != nnames)
	setAttrib(indx, R_UseNamesSymbol, indexnames);
    if (canstretch)
	*stretch = extra;
    UNPROTECT(nprotect);
    return indx;
}

/* Array Subscripts.
    dim is the dimension (0 to k-1)
    s is the subscript list,
    dims is the dimensions of x
    dng is a function (usually getAttrib) that obtains the dimnames
    x is the array to be subscripted.
*/

attribute_hidden SEXP
int_arraySubscript(int dim, SEXP s, SEXP dims, SEXP x, SEXP call)
{
    int nd, ns;
    R_xlen_t stretch = 0;
    SEXP dnames, tmp;
    ns = length(s);
    nd = INTEGER_ELT(dims, dim);

    switch (TYPEOF(s)) {
    case NILSXP:
	return allocVector(INTSXP, 0);
    case LGLSXP:
	return logicalSubscript(s, ns, nd, &stretch, call);
    case INTSXP:
	return integerSubscript(s, ns, nd, &stretch, call, x);
    case REALSXP:
	/* We don't yet allow subscripts > R_SHORT_LEN_MAX */
	PROTECT(tmp = coerceVector(s, INTSXP));
	tmp = integerSubscript(tmp, ns, nd, &stretch, call, x);
	UNPROTECT(1);
	return tmp;
    case STRSXP:
	dnames = getAttrib(x, R_DimNamesSymbol);
	if (dnames == R_NilValue) {
	    ECALL(call, _("no 'dimnames' attribute for array"));
	}
	dnames = VECTOR_ELT(dnames, dim);
	return stringSubscript(s, ns, nd, dnames, &stretch, call, x, dim);
    case SYMSXP:
	if (s == R_MissingArg)
	    return nullSubscript(nd);
    default:
	ECALL3(call, _("invalid subscript type '%s'"), R_typeToChar(s));
    }
    return R_NilValue;
}

/* This is used by packages arules, cba, proxy and seriation. */
typedef SEXP AttrGetter(SEXP x, SEXP data);
typedef SEXP (*StringEltGetter)(SEXP x, int i);

SEXP
arraySubscript(int dim, SEXP s, SEXP dims, AttrGetter dng,
	       StringEltGetter strg, SEXP x)
{
    return int_arraySubscript(dim, s, dims, x, R_NilValue);
}

/* Subscript creation.  The first thing we do is check to see */
/* if there are any user supplied NULL's, these result in */
/* returning a vector of length 0. */
/* if stretch is zero on entry then the vector x cannot be
   "stretched",
   otherwise, stretch returns the new required length for x
*/

attribute_hidden SEXP
makeSubscript(SEXP x, SEXP s, R_xlen_t *stretch, SEXP call)
{
    if (! (isVector(x) || isList(x) || isLanguage(x))) {
	ECALL(call, _("subscripting on non-vector"));
    }

    R_xlen_t nx = xlength(x);

    /* special case for simple indices -- does not duplicate */
    if (IS_SCALAR(s, INTSXP)) {
	int i = SCALAR_IVAL(s);
	if (0 < i && i <= nx) {
	    *stretch = 0;
	    return s;
	}
    }
    else if (IS_SCALAR(s, REALSXP)) {
	double di = SCALAR_DVAL(s);
	if (1 <= di && di <= nx) {
	    *stretch = 0;
	    /* We could only return a REALSXP if the value is too
	       large for an INTSXP, but, as the calling code can
	       handle REALSXP indices, returning the REALSXP
	       avoids an allocation. */
	    return s;
	}
    }

    R_xlen_t ns = xlength(s);
    SEXP ans = R_NilValue;
    switch (TYPEOF(s)) {
    case NILSXP:
	*stretch = 0;
	ans = allocVector(INTSXP, 0);
	break;
    case LGLSXP:
	ans = logicalSubscript(s, ns, nx, stretch, call);
	break;
    case INTSXP:
	ans = integerSubscript(s, ns, nx, stretch, call, x);
	break;
    case REALSXP:
	ans = realSubscript(s, ns, nx, stretch, call, x);
	break;
    case STRSXP:
    {
	SEXP names = PROTECT(getAttrib(x, R_NamesSymbol));
	/* *stretch = 0; */
	ans = stringSubscript(s, ns, nx, names, stretch, call, x, -1);
	UNPROTECT(1); /* names */
	break;
    }
    case SYMSXP:
	*stretch = 0;
	if (s == R_MissingArg) {
	    ans = nullSubscript(nx);
	    break;
	}
    default:
	ECALL3(call, _("invalid subscript type '%s'"), R_typeToChar(s));
    }
    return ans;
}
