import { buildSearchResultsParams } from "../../lib/queries";
import escapeStringRegexp from "escape-string-regexp";
import { gql } from "@apollo/client";
import intersection from "lodash/intersection";
import orderBy from "lodash/orderBy";
import { searchResultsAggregations } from "../../graphql/queries";

// get stakeholders in all results, or in a filter
export const getStakeholdersInResults = async ({
  // people/organisations
  entityType,
  // all/followed/tagged
  stakeholderType,
  // filter object or null for all results
  filter,
  // supporting stuff
  clientId,
  projectId,
  projectData,
  maxDate,
  apollo,
}) => {
  // get stakeholders of entityType from the whole project
  let stakeholders = projectData[entityType];

  // filter for followed/tagged
  if (stakeholderType === "followed") stakeholders = stakeholders.filter((x) => !x.tags);
  if (stakeholderType === "tagged") stakeholders = stakeholders.filter((x) => x.tags);

  // if no filter, we're done
  if (!filter) return stakeholders;

  // get all people/organisations present on results from the filter
  const filterParams = {
    clientId,
    projectId,
    maxDate,
    query: filter ? JSON.parse(filter.query) : {},
  };
  const searchAggs = {
    [`${entityType}`]: {
      terms: {
        field: `${entityType}.keyword`,
        size: 10000,
      },
    },
  };
  const res = await apollo.query({
    query: gql(searchResultsAggregations),
    variables: {
      searchParams: JSON.stringify(
        buildSearchResultsParams({ source: "METABASE", filterParams, aggs: searchAggs, projectData })
      ),
    },
  });

  // keep the intersection of people/organisations found on the results from the filter, and the stakeholders on the project
  const entitiesInResults = JSON.parse(res.data.searchResultsAggregations.aggregations)[entityType].buckets.map((x) =>
    x.key.toLowerCase()
  );
  const newStakeholders = [];
  stakeholders.forEach((stakeholder) => {
    if (
      // name matches exactly
      entitiesInResults.includes(stakeholder.name.toLowerCase()) ||
      // or is one of the alternative names
      intersection(
        entitiesInResults,
        (stakeholder.alternativeNames || []).map((x) => x.toLowerCase())
      ).length
    ) {
      newStakeholders.push(stakeholder);
    }
  });
  stakeholders = newStakeholders;

  return stakeholders;
};

// calculate most likely organisations based on proximity to a person (names and alternativeNames), using generated ES fragments
export const suggestOrganisationsFromPersonFragments = (personNames, fragments, organisations) => {
  // nothing to do if there are no fragments, or no organisations found for the personNames
  if (!fragments.length || !organisations.length) return [];

  // create empty array for each organisation, use lowercase name as regex are case insensitive
  const charDistances = {};
  for (const organisation of organisations) charDistances[organisation.toLowerCase()] = [];

  // setup regex
  // TODO we're assuming a latin language wordBoundary here as we don't have a result language here as looking at multiple?
  const personRegex = new RegExp(`\\b${personNames.map(escapeStringRegexp).join("|")}\\b`, "gi");
  const organisationRegex = new RegExp(`\\b${organisations.map(escapeStringRegexp).join("|")}\\b`, "gi");

  for (const fragment of fragments) {
    // match of all variants of person name in this fragment
    const personMatches = [...fragment.matchAll(personRegex)];

    // match of all organisations in this fragment
    const organisationMatches = [...fragment.matchAll(organisationRegex)];

    // for each person match...
    for (const personMatch of personMatches) {
      // for each organisation match...
      for (const organisationMatch of organisationMatches) {
        // get the name of the organisation we matched
        const organisationName = organisationMatch[0];
        // calculate the distance between the person/org index
        const charDistance = Math.abs(organisationMatch.index - personMatch.index);
        // add to the array of distances
        charDistances[organisationName.toLowerCase()].push(charDistance);
      }
    }
  }

  // for each organisation, calculate geometric mean
  // https://towardsdatascience.com/on-average-youre-using-the-wrong-average-geometric-harmonic-means-in-data-analysis-2a703e21ea0
  const result = [];
  for (const [organisation, distances] of Object.entries(charDistances)) {
    result.push({
      // get display name as lowercase was used for matching
      organisation: organisations.find((x) => x.toLowerCase() === organisation),
      score: distances.length
        ? Math.pow(
            distances.reduce((a, b) => a * b, 1),
            1 / distances.length
          )
        : 0,
    });
  }

  // remove any that are 0 score, sort by lowest score first
  return orderBy(
    result.filter((x) => x.score),
    ["score"],
    ["asc"]
  );
};
