/*
 *  R : A Computer Language for Statistical Data Analysis
 *  Copyright (C) 2001--2021  The R Core Team
 *  Copyright (C) 1995, 1996  Robert Gentleman and Ross Ihaka
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, a copy is available at
 *  https://www.R-project.org/Licenses/
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <Parse.h> // -> IOStuff.h, Defn.h
#include <Internal.h>
#include <Fileio.h>
#include <Rconnections.h>

attribute_hidden SEXP getParseContext(void)
{
    int i, last = PARSE_CONTEXT_SIZE;
    char context[PARSE_CONTEXT_SIZE+1];

    SEXP ans = R_NilValue, ans2;
    int nn, nread;
    char c;

    context[last] = '\0';
    for (i=R_ParseContextLast; last>0 ; i += PARSE_CONTEXT_SIZE - 1) {
	i = i % PARSE_CONTEXT_SIZE;
	context[--last] = R_ParseContext[i];
	if (!context[last]) {
	    last++;
	    break;
	}
    }

    nn = 16; /* initially allocate space for 16 lines */
    PROTECT(ans = allocVector(STRSXP, nn));
    c = context[last];
    nread = 0;
    while(c) {
	nread++;
	if(nread >= nn) {
	    ans2 = allocVector(STRSXP, 2*nn);
	    for(i = 0; i < nn; i++)
		SET_STRING_ELT(ans2, i, STRING_ELT(ans, i));
	    nn *= 2;
	    UNPROTECT(1); /* old ans */
	    PROTECT(ans = ans2);
	}
	i = last;
	while((c = context[i++])) {
	    if(c == '\n') break;
	}
	context[i-1] = '\0';
	SET_STRING_ELT(ans, nread-1, mkChar(context + last));
	last = i;
    }
    /* get rid of empty line after last newline */
    if (nread && !length(STRING_ELT(ans, nread-1))) {
	nread--;
	R_ParseContextLine--;
    }
    PROTECT(ans2 = allocVector(STRSXP, nread));
    for(i = 0; i < nread; i++)
	SET_STRING_ELT(ans2, i, STRING_ELT(ans, i));
    UNPROTECT(2);
    return ans2;
}

static void getParseFilename(char* buffer, size_t buflen)
{
    buffer[0] = '\0';
    if (R_ParseErrorFile) {
	if (isEnvironment(R_ParseErrorFile)) {
	    SEXP filename;
	    PROTECT(filename = findVar(install("filename"), R_ParseErrorFile));
	    if (isString(filename) && length(filename)) {
		strncpy(buffer, CHAR(STRING_ELT(filename, 0)), buflen - 1);
		buffer[buflen - 1] = '\0';
	    }
	    UNPROTECT(1);
	} else if (isString(R_ParseErrorFile) && length(R_ParseErrorFile)) {
	    strncpy(buffer, CHAR(STRING_ELT(R_ParseErrorFile, 0)), buflen - 1);
	    buffer[buflen - 1] = '\0';
	}
    }
}

static SEXP tabExpand(SEXP strings)
{
    int i;
    char buffer[200], *b;
    const char *input;
    SEXP result;
    PROTECT(strings);
    PROTECT(result = allocVector(STRSXP, length(strings)));
    for (i = 0; i < length(strings); i++) {
	input = CHAR(STRING_ELT(strings, i));
	for (b = buffer; *input && (b-buffer < 192); input++) {
	    if (*input == '\t') do {
		*b++ = ' ';
	    } while (((b-buffer) & 7) != 0);
	    else *b++ = *input;
	}
	*b = '\0';
	SET_STRING_ELT(result, i, mkCharCE(buffer, Rf_getCharCE(STRING_ELT(strings, i))));
    }
    UNPROTECT(2);
    return result;
}

NORET void parseError(SEXP call, int linenum)
{
    SEXP context;
    int len, width;
    char filename[128], buffer[10];
    PROTECT(context = tabExpand(getParseContext()));
    len = length(context);
    if (linenum) {
	getParseFilename(filename, sizeof(filename)-2);
	if (strlen(filename)) strcpy(filename + strlen(filename), ":");

	switch (len) {
	case 0:
	    error("%s%d:%d: %s",
		  filename, linenum, R_ParseErrorCol, R_ParseErrorMsg);
	    break;
	case 1: // replaces use of %n
	    width = snprintf(buffer, 10, "%d: ", R_ParseContextLine);
	    error("%s%d:%d: %s\n%d: %s\n%*s",
		  filename, linenum, R_ParseErrorCol, R_ParseErrorMsg,
		  R_ParseContextLine, CHAR(STRING_ELT(context, 0)),
		  width+R_ParseErrorCol+1, "^");
	    break;
	default:
	    width = snprintf(buffer, 10, "%d:", R_ParseContextLine);
	    error("%s%d:%d: %s\n%d: %s\n%d: %s\n%*s",
		  filename, linenum, R_ParseErrorCol, R_ParseErrorMsg,
		  R_ParseContextLine-1, CHAR(STRING_ELT(context, len-2)),
		  R_ParseContextLine, CHAR(STRING_ELT(context, len-1)),
		  width+R_ParseErrorCol+1, "^");
	    break;
	}
    } else {
	switch (len) {
	case 0:
	    error("%s", R_ParseErrorMsg);
	    break;
	case 1:
	    error(_("%s in \"%s\""),
		  R_ParseErrorMsg, CHAR(STRING_ELT(context, 0)));
	    break;
	default:
	    error(_("%s in:\n\"%s\n%s\""),
		  R_ParseErrorMsg, CHAR(STRING_ELT(context, len-2)),
		  CHAR(STRING_ELT(context, len-1)));
	    break;
	}
    }
    UNPROTECT(1);
}

typedef struct parse_info {
    Rconnection con;
    Rboolean old_latin1;
    Rboolean old_utf8;
}  parse_cleanup_info;

static void parse_cleanup(void *data)
{
    parse_cleanup_info *pci = (parse_cleanup_info *)data;
    Rconnection con = pci->con;
    if(con && con->isopen)
	con->close(con);
    known_to_be_latin1 = pci->old_latin1;
    known_to_be_utf8 = pci->old_utf8;
}

/* "do_parse" - the user interface input/output to files.

 The internal R_Parse.. functions are defined in ./gram.y (-> gram.c)

 .Internal( parse(file, n, text, prompt, srcfile, encoding) )
 If there is text then that is read and the other arguments are ignored.
*/
attribute_hidden SEXP do_parse(SEXP call, SEXP op, SEXP args, SEXP env)
{
    checkArity(op, args);
    if(!inherits(CAR(args), "connection"))
	error(_("'file' must be a character string or connection"));
    R_ParseError = 0;
    R_ParseErrorMsg[0] = '\0';

    int ifile = asInteger(CAR(args));                   args = CDR(args);
    Rconnection con = getConnection(ifile);
    Rboolean wasopen = con->isopen;
    int num = asInteger(CAR(args));			args = CDR(args);
    if (num == 0)
	return(allocVector(EXPRSXP, 0));

    SEXP text = PROTECT(coerceVector(CAR(args), STRSXP));
    if(length(CAR(args)) && !length(text))
	error(_("coercion of 'text' to character was unsuccessful"));
    args = CDR(args);
    SEXP prompt = CAR(args);				args = CDR(args);
    SEXP source = CAR(args);				args = CDR(args);
    if(!isString(CAR(args)) || LENGTH(CAR(args)) != 1)
	error(_("invalid '%s' value"), "encoding");
    const char *encoding = CHAR(STRING_ELT(CAR(args), 0)); /* ASCII */

    parse_cleanup_info pci;
    pci.con = NULL;
    pci.old_latin1 = known_to_be_latin1;
    pci.old_utf8 = known_to_be_utf8;
    RCNTXT cntxt;
    /* set up context to recover known_to_be_* and to close connection on
       error if opened by do_parse */
    begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
		 R_NilValue, R_NilValue);
    cntxt.cend = &parse_cleanup;
    cntxt.cenddata = &pci;

    known_to_be_latin1 = known_to_be_utf8 = FALSE;
    Rboolean allKnown = TRUE;
    /* allow 'encoding' to override declaration on 'text'. */
    if(streql(encoding, "latin1")) {
	if (!mbcslocale) {
	    known_to_be_latin1 = TRUE;
	    allKnown = FALSE;
	} else
	    warning(_("argument encoding=\"latin1\" is ignored in MBCS locales"));
    } else if(streql(encoding, "UTF-8"))  {
	if (!mbcslocale || utf8locale) {
	    known_to_be_utf8 = TRUE;
	    allKnown = FALSE;
	} else
	    /* the input may be invalid or not parseable when interpreted as
	       in different multi-byte encoding; related to PR#16819 */
	    warning(_("argument encoding=\"UTF-8\" is ignored in MBCS locales"));
    } else if(!streql(encoding, "unknown") && !streql(encoding, "native.enc"))
	warning(_("argument '%s = \"%s\"' will be ignored"), "encoding", encoding);

    if (prompt == R_NilValue)
	PROTECT(prompt);
    else
	PROTECT(prompt = coerceVector(prompt, STRSXP));

    ParseStatus status;
    SEXP s;
    if (length(text) > 0) {
	/* If 'text' has known encoding then we can be sure it will be
	   correctly re-encoded to the current encoding by
	   translateChar in the parser and so could mark the result in
	   a Latin-1 or UTF-8 locale.

	   A small complication is that different elements could have
	   different encodings, but all that matters is that all
	   non-ASCII elements have known encoding.
	*/
	if(allKnown)
	  for(int i = 0; i < length(text); i++)
	    if(!ENC_KNOWN(STRING_ELT(text, i)) &&
	       ! IS_ASCII(STRING_ELT(text, i))) {
		allKnown = FALSE;
		break;
	    }
	if(allKnown) {
	    known_to_be_latin1 = pci.old_latin1;
	    known_to_be_utf8 = pci.old_utf8;
	}
	if (num == NA_INTEGER) num = -1;
	s = R_ParseVector(text, num, &status, source);
    }
    else if (ifile >= 3) {/* file != "" */
	if (num == NA_INTEGER) num = -1;
	if(!wasopen) {
	    if(!con->open(con)) error(_("cannot open the connection"));
	    pci.con = con; /* close the connection on error */
	}
	if(!con->canread) error(_("cannot read from this connection"));
	s = R_ParseConn(con, num, &status, source);
	if(!wasopen) {
	    PROTECT(s);
	    pci.con = NULL;
	    con->close(con);
	    UNPROTECT(1);
	}
    }
    else {
	if (num == NA_INTEGER) num = 1;
	s = R_ParseBuffer(&R_ConsoleIob, num, &status, prompt, source);
    }
    if (status != PARSE_OK) parseError(call, R_ParseError);

    known_to_be_latin1 = pci.old_latin1;
    known_to_be_utf8 = pci.old_utf8;
    PROTECT(s);
    endcontext(&cntxt);
    UNPROTECT(3);
    return s;
}