import { distance } from 'fastest-levenshtein';

export const deduplicate = ({ data, field, countField = 'doc_count', useLevenshtein, minLength = 2 }) => {
    const ret = [];
    for (const item of data) {
        const value = item[field].toLowerCase().trim().replace(/_/, ' ');
        if (value) {
            if (value.length < minLength) {
                continue;
            }
            let hasDupe = false;
            for (let i = 0; i < ret.length; i++) {
                const retValue = ret[i][field].toLowerCase().trim().replace(/_/, ' ');

                if (retValue === value) {
                    ret[i][countField] += item[countField] || 1;
                    hasDupe = true;
                    break;
                }
                if (useLevenshtein && distance(retValue, value) < 2) {
                    ret[i][countField] += item[countField] || 1;
                    hasDupe = true;
                    break;
                }
                if (retValue.split(/ |-/)[0].trim() === value.split(/ |-/)[0].trim()) {
                    if (checkIfStringHasNumberVariant(retValue) || checkIfStringHasNumberVariant(value)) {
                        ret[i][countField] += item[countField] || 1;
                        hasDupe = true;
                        break;
                    }
                }
            }
            if (!hasDupe) {
                ret.push({ ...item, doc_count: item[countField] || 1 });
            }
        }
    }
    return ret;
};

const checkIfStringHasNumberVariant = (string) => {
    const split = string.split(/ |-/);
    return /^\d+$/.test(split[split.length - 1].trim());
};
