// Copyright 2022-2024 Luminary Cloud, Inc. All Rights Reserved.
import * as FileSaver from 'file-saver';

import { unquote, wordsToList } from './text';

// Simple CSV writer to write CSV files according to RFC 4180.
// Quotes fields only as required (allowed by the RFC), and
// supports custom delimiter, quote characters and line breaks.
//
// TODO(bamo): This writer constructs the CSV contents entirely in-memory.
// This means that it is not suitable for very large datasets (for example,
// if the data for the CSV is streamed from the server rather than held
// in browser memory). If we need to write such large CSVs in the future, we
// should implement this with StreamSaver as opposed to FileSaver, which would
// let us write chunks of the file at a time.
export class CsvWriter {
  private contents: string = '';
  private delim: string;
  private quote: string;
  private br: string;

  // Create a new CSV writer based on the provided headers, with the
  // option of using custom delimiter, quote character or line break
  // character.
  // headers must be nonzero length, or a valid CSV cannot be created.
  constructor(
    private readonly headers: string[],
    delim: string = ',',
    quote: string = '"',
    br: string = '\n',
  ) {
    if (this.headers.length === 0) {
      throw new Error('Cannot create CSV with 0 columns.');
    }
    this.delim = delim;
    this.quote = quote;
    this.br = br;
    this.addRow(this.headers);
  }

  // Add a row to the CSV.  Throws an error if the row is not the
  // same length as the header row.
  public addRow(values: string[]) {
    if (values.length !== this.headers.length) {
      throw new Error(`Wrong number of values ${values.length}, expected ${this.headers.length}`);
    }
    this.contents += values.map((value) => this.maybeQuote(value)).join(this.delim) + this.br;
  }

  // Save the CSV through the browser, suggesting the requested filename.
  // Should be called after all desired rows have been added.
  public saveFile(filename: string) {
    const blob = new Blob(
      [this.contents],
      { type: 'text/csv;charset=utf-8' },
    );
    FileSaver.saveAs(blob, filename);
  }

  // Add quotes around a single field if necessary (only if the field contains one of
  // the delimiter, quote character or line break).
  private maybeQuote(value: string): string {
    // Quote value if it includes delimiter, quote or line break.
    if (value.includes(this.delim) || value.includes(this.quote) || value.includes(this.br)) {
      // If the value contains quotes, they must be doubled according to the RFC.
      return this.quote + value.split(this.quote).join(this.quote.repeat(2)) + this.quote;
    }
    return value;
  }

  // Should not be used outside unit tests.  Get contents of file that would be written.
  public testOnlyContents(): string {
    return this.contents;
  }
}

export interface ColumnTemplate {
  format: 'number' | 'string';
}

export type RowTemplate = ColumnTemplate[];

export type RowData = (number | string)[];

export interface ParseOptions {
  // Optionally define a delimiter (defaults to ',')
  delimiter?: string;
  // Optionally define a set of quote characters (defaults to [', "])
  quoteChars?: string[];
  // Optionally allow generic tables with arbitrary uniform row length
  generic?: boolean;
  // Optionally parse and include the first line if it is detected as a header
  header?: boolean;
  // Optionally show the expected column names when the number of parsed columns is incorrect
  columnNames?: string[];
}

export interface ParseLineOptions extends ParseOptions {
  // Set to false for lines that may be discarded if the parsed content doesn't
  // match the template (such as a file's first line, which might contain header
  // content).  When a row doesn't match the template, an Error will be thrown
  // if strict is true, and null will be returned if strict is false.
  strict?: boolean;
  // Set to true if the currently parsed line is the first line of the CSV and
  // we want to keep this line if it is detected to be a header
  isHeader?: boolean;
}

export function parseLine(
  line: string,
  rowTemplate: RowTemplate,
  options?: ParseLineOptions,
): RowData | null {
  const {
    delimiter = ',',
    quoteChars = ['"', '\''],
    strict = true,
  } = options || {};

  const cellStack: string[] = [];
  let cellIndex = 0;
  let inQuotes = false;
  let quoteChar = '';

  Array.from(line.trim()).forEach((char: string) => {
    // Because entries can be quoted, it's not sufficient to split on the
    // delimiter.  Instead, we walk the line, character by character, to track
    // quote context and push the right cell content onto the stack.
    let isCellContent = false;

    if (quoteChars.includes(char)) {
      // Possible quote character encountered
      if (inQuotes) {
        if (char === quoteChar) {
          // Matching quote means we're closing the quote context
          inQuotes = false;
          quoteChar = '';
        }
      } else {
        // Open a quoting context and save the actual quote character so we know
        // when to close the context
        inQuotes = true;
        quoteChar = char;
      }
      // Note that we push any character here to the stack.  If it's a true
      // quote, we'll want to keep it around to preserve any internal white
      // space (it will be unquoted in post-processing).  Otherwise, it's just
      // character data inside a quote.
      isCellContent = true;
    } else if (char === delimiter) {
      if (inQuotes) {
        // A delimiter inside a quote is just character data
        isCellContent = true;
      } else {
        // A delimiter outside a quote means we should initialize a new cell
        // make sure the current cell is well defined in cases where the cell is empty
        cellStack[cellIndex] = cellStack[cellIndex] || '';
        cellIndex += 1;
        cellStack[cellIndex] = '';
      }
    } else {
      isCellContent = true;
    }

    if (isCellContent) {
      cellStack[cellIndex] = cellStack[cellIndex] || '';
      cellStack[cellIndex] += char;
    }
  });

  // Sanity check row length based on template;
  const hasRowTemplate = !!rowTemplate.length;
  if (hasRowTemplate) {
    if (cellStack.length !== rowTemplate.length) {
      if (!strict) {
        return null;
      }
      if (options?.generic) {
        throw Error(
          'The CSV provided contains empty columns or formatting errors. ' +
          'Please correct and reupload.',
        );
      }
      // otherwise, assume the header column count is well defined
      const columnText = rowTemplate.length === 1 ? 'column' : 'columns';
      const columnOrder = options?.columnNames ?
        ` in this order: ${wordsToList(options.columnNames)}` : '';
      throw Error(
        `Please upload a CSV file with ${rowTemplate.length} ${columnText}${columnOrder}`,
      );
    }
  } else {
    // auto generate rowTemplate based on the current row size
    // assume table has number values so the rest of the table are parsed as numbers
    cellStack.forEach((_) => {
      rowTemplate.push({ format: 'number' });
    });
  }

  // Cast cell values to types described in rowTemplate
  let error = '';
  const cells = rowTemplate.map((columnTemplate, j) => {
    const { format } = columnTemplate;
    const cellValue = unquote(cellStack[j].trim());
    if ((format === 'number' || !hasRowTemplate) && cellValue) {
      // cast as number if specified or attempt it when no template given
      const value = Number(cellValue.trim());
      const isNan = Number.isNaN(value);
      if (isNan) {
        error = `Value '${cellValue}' is not a number`;
      }
      // if no template and was unsuccessful, then return original cell
      return !hasRowTemplate && isNan ? cellValue : value;
    }
    return cellValue;
  });

  if (error) {
    if (strict) {
      throw Error(error);
    } else if (!options?.isHeader) {
      // this row may be a header, by default we discard it unless the option is set
      return null;
    }
  }
  return cells;
}

// Given the contents of a file, assume it's a CSV and attempt to parse it into
// rows of equal numbers of cells.  By default, If the first row looks like
// header cells, discard it.  The 'rowTemplate' argument provides expected row
// length as well as the expected format for each cell. If the `rowTemplate` is
// empty, then it is populated based on the length of the first row of the table
export function parse(
  content: string,
  rowTemplate: RowTemplate,
  options?: ParseOptions,
): RowData[] {
  const lines = content.trim().split(/[\r\n]+/);
  const rows: RowData[] = [];

  // Attempt to parse each line, considering quoted text
  lines.forEach((line: string, i: number) => {
    const cells = parseLine(line, rowTemplate, {
      ...options,
      strict: i !== 0,
      isHeader: options?.header && i === 0,
    });

    if (cells) {
      rows.push(cells);
    }
  });
  return rows;
}
