// Scan the text to generate a list of words.
// Function to generate a list of words from the text
// @param text - Text content from the input file

import { WordStatistics, WordItem, DBRecord } from "../types";

/**
 * Extracts a list of words from the given text.
 * @param {string | null} text - The input text.
 * @returns {array} - List of words extracted from the text.
 */
function generateListOfWords(text: string | null) {
  // Use regex to match words in the text including apostrophes within words
  if (text) return text.match(/\b[a-zA-Z]+(?:['’`‘’‛ʼ'ʾ]+?[a-zA-Z]+)*\b/g);
  return null;
}

// Count the number of occurrences for each word
function generateWordStatistics(wordsList: string[]) {
  const wordCount: WordStatistics = {};

  wordsList.forEach((word) => {
    const lowercaseWord = word.toLowerCase();
    if (lowercaseWord !== "") {
      if (wordCount[lowercaseWord]) {
        wordCount[lowercaseWord].count += 1;
      } else {
        wordCount[lowercaseWord] = {
          count: 1,
          origin: word,
          known: null,
        };
      }
    }
  });

  return wordCount;
}

function updateWordKnownStatus(
  wordsList: WordItem[],
  dbList: DBRecord[]
): WordItem[] {
  dbList.forEach(({ word, known }) => {
    const wordObj = wordsList.find((item) => item[0].toLowerCase() === word.toLowerCase());
    if (wordObj) {
      wordObj[1].known = known;
    }
  });

  return wordsList;
}

export { generateListOfWords, generateWordStatistics, updateWordKnownStatus };
