import Sentiment from "sentiment";
import { modifiedEnBase } from "./sentiment/updated-language";

export const useSentiment = () => {
  const sentiment = new Sentiment();
  sentiment.registerLanguage("reviews", reviewLanguage);

  return {
    calculate: (untrimmed: string) => {
      const text = untrimmed.trim();
      const rawResult = sentiment.analyze(text, { language: "reviews" });

      const isNegative = rawResult.comparative < 0;
      // The standard algorithm tends to penalize for neutral words. We will not
      const rawScore = rawResult.score;
      const positiveWords = rawResult.positive.length;
      const negativeWords = rawResult.negative.length;
      const nonNeutralWords = negativeWords + positiveWords;

      const comparative = rawScore / (nonNeutralWords + 1);
      const outOfFive = isNegative ? Math.max(comparative, -4.7) : Math.min(comparative, 4.7);

      console.log({ rawResult, outOfFive });
      return outOfFive;
    },
  };
};

// TODO: This can get way way better, but a bunch of heuristics cobbled together for now can do the trick
const reviewLanguage = {
  labels: {
    ...modifiedEnBase,
    // Ratings/Reactions
    "⭐️": -4,
    "⭐️½": -3,
    "⭐️⭐️": -2,
    "⭐️⭐️½": 0,
    "⭐️⭐️⭐️": 1,
    "⭐️⭐️⭐️½": 2,
    "⭐️⭐️⭐️⭐️": 4,
    "⭐️⭐️⭐️⭐️½": 8, // We can go above 5 if we wish
    "⭐️⭐️⭐️⭐️⭐️": 10,

    // Below are some words that are particularly more meaningful in a review/recommendation context than in typical language
    best: 5,
    unbelievable: 5,
    exceptional: 5,
    exquisite: 5,
    worst: -5,
    incredible: 4,
    sucks: -4,
    mediocre: -2,

    excited: 1,
    fan: 1,
    less: -1,
  },
  scoringStrategy: {
    apply: (tokens: string[], cursor: number, tokenScore: number) => {
      if (cursor > 0) {
        const prev = tokens[cursor - 1];
        const curr = tokens[cursor];
        if (prev === "not") {
          return ["perfect", "amazing", "terrible"].includes(curr) ? -0.3 * tokenScore : -tokenScore;
        }
        if (tokens[cursor - 2] === "not" && prev === "the") {
          return ["worst", "best"].includes(curr) ? 0 : tokenScore;
        }
        if (["really", "very", "exceptionally", "totally"].includes(prev)) {
          return tokenScore * 1.5;
        }
      }
      return tokenScore;
    },
  },
};
