import { removeUrls } from "./datasetManager";

// This file contains several routines support our search capability.
// The routine that actually filters our dataset is filterRecords
// in tableManager.js.

export const SEARCH_OPTION_NONE       = 0x0000;
export const SEARCH_OPTION_WHOLE_WORD = 0x0001;
export const SEARCH_OPTION_IGNORE_URL = 0x0010;
export const SEARCH_OPTION_4          = 0x0100;
export const SEARCH_OPTION_5          = 0x1000;

//////////////////////////////////////////////////////////////////////////

// TBD: needs a unit test.

function isAlphanumeric(char) {
  const alphanumericRegex = /^[a-zA-Z0-9]+$/;
  return alphanumericRegex.test(char);
}

//////////////////////////////////////////////////////////////////////////

// TBD: Needs comments and a unit test.

function isWhitespace(string) {
  const whitespaceRegex = /\s/;
  return whitespaceRegex.test(string);
}

//////////////////////////////////////////////////////////////////////////

// Returns true if a substring (such as we might parse within a search
// string) matches a main string (such as we might find in a dataset
// field value), given the specified search options. A match means that
// the substring occurs at least once within the main string.
// See unit test in testManager.js.

const isMatch = (mainString, substring, searchOptions) => {
  if (substring.trim().length === 0) {
    return true; // an empty substring matches anything
  }
  const positions = substringPositions(mainString, substring, searchOptions);
  return positions.length > 0;
}; // end isMatch

//////////////////////////////////////////////////////////////////////////

// Returns the positions of a substring (such as we might parse
// from a search string) within a main string, (such as we might
// find in a dataset record), case-insensitive, and subject to
// the given search options. The substring is "as-is"; that is,
// any quote parsing, whitespace trimming, etc. has already been
// done elsewhere. We just want to know where the substring occurs
// in the main string. (In the future, our tests might become more
// sophisticated, e.g. Soundex, stemming, wildcards, etc.)
// TBD: needs a unit test, but currently tested indirectly via isMatch.

const substringPositions = (mainString, substring, searchOptions) => {
  mainString = mainString.toLowerCase();
  substring = substring.toLowerCase();

  let positions = [];
  let index = mainString.indexOf(substring); // TBD: convert to use localeCompare

  while (index !== -1 && index <= mainString.length - substring.length) {
    // With a whole-word search, any found occurrences must
    // satisfy the additional condition that any adjacent
    // characters are not alphanumeric.

    if (searchOptions === SEARCH_OPTION_WHOLE_WORD) {
      const isAtStart = index === 0;
      const isAtEnd = index === mainString.length - substring.length;

      const startIsOK = isAtStart
        ? true
        : !isAlphanumeric(mainString[index - 1]);
      const endIsOK = isAtEnd
        ? true
        : !isAlphanumeric(mainString[index + substring.length]);

      if (startIsOK && endIsOK) {
        positions.push(index);
      }
    } else {
      positions.push(index);
    }

    index = mainString.indexOf(substring, index + substring.length);
  }

  return positions;
}; // end substringPositions


//////////////////////////////////////////////////////////////////////////

// Returns true if a search string matches a set of field values (such
// as we might find in a dataset record), given the input search options.
// A match means that each substring of the search string matches at
// least one field value.
// See unit test in testManager.js.

const isMatchMulti = (fieldValues, searchString, searchOptions) => {
  if (typeof searchString === "string" && searchString.trim() === "") {
    return true; // an empty search string matches everything
  }
  let result = true;

  const substrings = substringsIn(searchString); // do this outside loop
  for (const substring of substrings) {
    let substringFound = false;

    // Try to find this substring in any field.
    // As soon as we find it, we can stop looking
    // and move on to the next substring.
    for (const fieldValue of fieldValues) {
      if (isMatch(fieldValue, substring, searchOptions)) {
        substringFound = true;
        break; // move on to the next field
      }
    }

    // If we didn't find the substring in any fields, then
    // there is no match. If there is, keep looking for the
    // next substring.
    if (!substringFound) {
      result = false;
      break; // if any substring is not found, we're done
    } else {
      substringFound = false;
    }
  }

  return result;
}; // end isMatchMulti

//////////////////////////////////////////////////////////////////////////

// Quoted words search support - PRISM-41
const quotedWordsRegex = /"([^"]+)"/gm;

const getQuotedWordsMatches = (searchStr) => {
  const quotedWordsMatches = searchStr.match(quotedWordsRegex);
  return quotedWordsMatches;
};

const containsQuotedWords = (searchStr) => {
  return getQuotedWordsMatches(searchStr) !== null;
};

//////////////////////////////////////////////////////////////////////////

// Needs comments.
// TBD: needs a unit test.

const sanitizeSearchString = (
  searchStr,
  searchOptions,
  returnAsArray = false
) => {
  if (typeof searchStr === "string") {
    searchStr = searchStr.replace(/([*.?[\]|&;$%@"<>()+,])/g, "\\$1");
    // Deals with not finished quotes
    if (searchStr === "\\") {
      return "";
    }
  }

  const quotedWordsMatches = getQuotedWordsMatches(searchStr);

  if (searchOptions === SEARCH_OPTION_WHOLE_WORD) {
    // Clean up quotes
    searchStr = searchStr.replaceAll('\\"', "");
    return searchStr;
  }

  const splitted = (quotedWordsMatches === null // if no paired quotes are found, clean any single quotes
    ? searchStr.replaceAll('\\"', "")
    : searchStr
  )
    .split(" ")
    .filter((el) => el !== "");
  if (returnAsArray) {
    return splitted;
  }

  return splitted.join("|");
}; // end sanitizeSearchString

//////////////////////////////////////////////////////////////////////////

// This function returns the regex string for the selected search type
// and the input string which may use quotes for exact match and other
// search syntax tools/options.
// The resulting regex string can be empty if no words were found on
// searchString

const getMatchRegex = (searchString, searchOptions) => {
  //console.log('getMatchRegex('+searchString+', '+searchOptions+')');

  // Checks for quotes in the search string
  if (containsQuotedWords(searchString)) {
    const quotedWordsArray = substringsIn(searchString);
    // Bails out if we got no words back
    if (quotedWordsArray.length === 0) {
      return "";
    }
    const regex = [];
    for (const word of quotedWordsArray) {
      // PRISM-41 - Keep spaces inbetween words when matching words are found one after the other
      regex.push(`\\b(${word}\\s?)\\b`);
    }
    // Join bits with a regex to catch any other words that are not included in the quoted words between matching words
    return regex.join(`((?!.*${quotedWordsArray.join("?!.*")}).*)`);
  }

  // PRISM-41 - sanitized quote search keywords will be returned as array to avoid joining it with | (or in regex)
  // ... so, in this case, we join the array with spaces so the regex tries to find the whole set of keywords
  // inclosed in quotes
  if (searchOptions === SEARCH_OPTION_WHOLE_WORD) {
    searchString = searchString.replace(nonWordsRegex, " ").trim();
    const keywords = substringsIn(searchString);
    // Bails out if we got no words back
    if (keywords.length === 0) {
      return "";
    }
    // Deals with multiple keywords search
    if (isWhitespace(searchString)) {
      // Assembles the regex with all possible combinations
      const keywordsCombinations = getAllCombinations(keywords).filter(
        (entry) => entry.length > 1
      );
      //console.log('getMatchRegex(WW) combo:', keywordsCombinations);
      const keywordMatches = [];
      for (const combo of keywordsCombinations) {
        //const phraseRegex = `\\b(${combo.join(' ')})\\b`;
        let phraseRegex = "";
        for (const keyword of combo) {
          phraseRegex += `\\b(${keyword})\\b.*`;
        }
        keywordMatches.push(phraseRegex);
      }
      return keywordMatches.join("|");
    }

    return `\\b(${keywords.join("")})\\b`;
  }

  const keywords = substringsIn(searchString);
  // Bails out if we got no words back
  if (keywords.length === 0) {
    return "";
  }
  return `(${keywords.join(")|(")})`;
}; // end getMatchRegex

// Needs comments.

const nonWordsRegex = /[^a-zA-Z0-9]/g;
// const quotesRegex = /\'|\"/gi; // unused, and has unnecessary escapes

//////////////////////////////////////////////////////////////////////////

// This returns a string with the last occurrence of one character
// replace by another. (We use it to replace an unpaired quote with
// space character.)
// TBD: needs a unit test.

function replaceLastOccurrence(inputString, targetChar, replacementChar) {
  const lastIndexOfTargetChar = inputString.lastIndexOf(targetChar);

  if (lastIndexOfTargetChar !== -1) {
    const prefix = inputString.substring(0, lastIndexOfTargetChar);
    const suffix = inputString.substring(lastIndexOfTargetChar + 1);
    // console.log("lastIndexOfTargetChar = " + lastIndexOfTargetChar);
    // console.log("prefix = <" + prefix + ">");
    // console.log("suffix = <" + suffix + ">");
    return prefix + replacementChar + suffix;
  }

  return inputString; // If the targetChar is not found, return the original string
} // end replaceLastOccurrence

//////////////////////////////////////////////////////////////////////////

// This routine parses an input string into substrings, for use
// in subsequent filtering. The parsing rules are as follows:
// - Substrings are delimited by runs of whitespace, except within quotes.
// - Leading and trailing whitespace is ignored, except within quotes.
// - Quotes occur in pairs; nesting quotes is not supported.
// - Quote characters (paired or unpaired) are substring boundaries.
// - The only quote character supported is the ASCII double-quote.
// See unit test in testManager.js.

function substringsIn(inputString) {
  const quoteChar = '"';
  let substrings = [];

  let quoteCount = 0;
  for (let char of inputString) {
    if (char === quoteChar) {
      quoteCount++;
    }
  }

  if (quoteCount === 0) {
    // We have no quote chars. Split the string at whitespace,
    // and return any non-blank substrings.
    //console.log("no quotes");

    substrings = inputString
      .split(/\s+/)
      .filter((substring) => substring.trim() !== "")
      .map((substring) => substring.trim());
  } else if (quoteCount % 2 === 1) {
    // We have an odd number of quotes. Replace the last one
    // with a space character and then reparse. (An alternative
    // might be to append a quote char at the end and reparse.)

    const fixedInputString = replaceLastOccurrence(inputString, quoteChar, " ");
    //console.log("vvv odd quotes (reparsing as <" + fixedInputString + ">)");
    substrings = substrings.concat(substringsIn(fixedInputString));
  } else {
    // Even number of quotes, so they are all paired.
    //console.log("vvv even quotes (" + quoteCount + ")");

    let currentSubstring = "";
    let insideQuotes = false;

    for (let char of inputString) {
      if (char === quoteChar) {
        if (insideQuotes) {
          // This quote char terminates a quoted substring.
          // If the substring is not empty, add it to our results,
          // without trimming it, and without the quote chars.
          if (currentSubstring !== "") substrings.push(currentSubstring);
        }
        currentSubstring = "";
        insideQuotes = !insideQuotes;
      } else if (isWhitespace(char)) {
        if (insideQuotes) {
          // This white char is part of a quoted substring.
          // Accumulate it in the current substring.
          currentSubstring += char;
        } else {
          // This white char is not part of a quoted substring.
          // It therefore terminates the current substring.
          if (currentSubstring !== "") substrings.push(currentSubstring);
          currentSubstring = "";
        }
      } else {
        // This unspecial char is neither white nor quote, and
        // therefore it does not terminate or open a substring.
        // Accumulate it in the current substring.
        currentSubstring += char;
      }
    }

    // There are no more characters in the input string. If the
    // current substring is not empty, add it to our results.
    if (currentSubstring !== "") {
      if (!insideQuotes) currentSubstring = currentSubstring.trim();
      if (currentSubstring !== "") substrings.push(currentSubstring);
    }
  }

  return substrings;
} // end substringsIn

//////////////////////////////////////////////////////////////////////////

// Needs comments.
// TBD: needs a unit test.

const getAllCombinations = (inputArray) => {
  const resultArray = [];
  const combine = function() {
    for (var i in inputArray) {
      var temp = [];
      var tempResult = [];
      for (var j in arguments) {
        tempResult.push(inputArray[arguments[j]]);
        if (arguments[j] === i) {
          temp = false;
        } else if (temp) {
          temp.push(arguments[j]);
        }
      }
      if (temp) {
        temp.push(i);
        combine.apply(null, temp);
      }
    }
    if (tempResult.length > 0) {
      resultArray.push(tempResult);
    }
    return resultArray;
  };
  return combine();
}; // end getAllCombinations

//////////////////////////////////////////////////////////////////////////

/**
 *
 * This is our central filtering routine. Given a dataset's metadata
 * and content, it returns an array of dataset records that match the
 * input filter settings. (At time of this writing, filter settings
 * consist of a search string and search options.)
 *
 * @param {Object} datasetMetadata
 * @param {Object} datasetContent
 * @param {string} searchString
 * @returns Object (filtered table.records)
 */
const filterRecords = (
  datasetMetadata, // normally from Datasets.js, but can be undefined
  datasetContent, // i.e. headers and records
  searchString, // raw; might need parsing into substrings
  searchOptions = SEARCH_OPTION_NONE
) => {
  if (!datasetContent) return [];
  const datasetHeaders = datasetContent.headers;
  const datasetRecords = datasetContent.records;

  // Figure out which fields we will search. If the dataset has a fieldsToSearch
  // property, we search every field whose name is in that list. Otherwise, we
  // search all fields. If a field name is repeated within the dataset,
  // we search all instances of that field.

  const fieldIndicesToSearch = []; // track by number to avoid duplicate issues
  if (datasetMetadata && datasetMetadata.fieldsToSearch) {
    // Search only specified fields.
    for (
      let iFilterFld = 0;
      iFilterFld < datasetMetadata.fieldsToSearch.length;
      iFilterFld++
    ) {
      for (let iFld = 0; iFld < datasetHeaders.length; iFld++) {
        if (datasetHeaders[iFld] === datasetMetadata.fieldsToSearch[iFilterFld]) {
          fieldIndicesToSearch.push(iFld);
        }
      }
    }
  } else {
    // Default: search all fields.
    for (let iFld = 0; iFld < datasetHeaders.length; iFld++) {
      fieldIndicesToSearch.push(iFld);
    }
  }

  // Now that we know which fields are of interest, step
  // through each record and test against those fields.

  const matchingRecords = [];
  for (let iRec = 0; iRec < datasetRecords.length; iRec++) {
    
    // Accumulate the field values of interest.
    const fldValues = [];
    for (let iFld = 0; iFld < fieldIndicesToSearch.length; iFld++) {
      let fldValue = datasetRecords[iRec][fieldIndicesToSearch[iFld]];
      // If we're ignoring URLs, need to push a cleaned-up field value here
      if (searchOptions & SEARCH_OPTION_IGNORE_URL) {
        fldValue = removeUrls(fldValue);
        //console.log("Removed URLs from field value: " + fldValue, datasetRecords[iRec]);
      }
      fldValues.push(fldValue);
    }

    // See if we have a match among all these field values.
    if (isMatchMulti(fldValues, searchString, searchOptions)) {
      matchingRecords.push(datasetRecords[iRec]);
    }
  }

  return matchingRecords; //array of array of strings
}; // end filterRecords

//////////////////////////////////////////////////////////////////////////

export {
  sanitizeSearchString,
  isMatch,
  isMatchMulti,
  getMatchRegex,
  containsQuotedWords,
  substringsIn,
  filterRecords,
};