Adding Search to a Hugo Site With Match Sorter

December - 2020

I'm adding search to the garden. I was originally going to make a call to either DuckDuckGo or Google. I didn't like the Google option because I'm moving away from their tracking on my site. DuckDuckGo has privacy going for it, but it would mean the search results page would kick over to their site. Bleck.

Luckily, maxellw was in chat and introduced me to an excellent client-side javascript solution called match-sorter.

It's TypeScript and designed as an npm module. Hugo deals with neither. So, we took a hacksaw and blowtorch to it and turned it this plain-old, vanilla javascript:

"use strict";
/**
 * @name match-sorter
 * @license MIT license.
 * @copyright (c) 2020 Kent C. Dodds
 * @author Kent C. Dodds <me@kentcdodds.com> (https://kentcdodds.com)
 */
var __assign = (this && this.__assign) || function () {
    __assign = Object.assign || function(t) {
        for (var s, i = 1, n = arguments.length; i < n; i++) {
            s = arguments[i];
            for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
                t[p] = s[p];
        }
        return t;
    };
    return __assign.apply(this, arguments);
};
/*
exports.__esModule = true;
exports.defaultBaseSortFn = exports.rankings = exports.matchSorter = void 0;
*/

var rankings = {
    CASE_SENSITIVE_EQUAL: 7,
    EQUAL: 6,
    STARTS_WITH: 5,
    WORD_STARTS_WITH: 4,
    CONTAINS: 3,
    ACRONYM: 2,
    MATCHES: 1,
    NO_MATCH: 0
};
// exports.rankings = rankings;
matchSorter.rankings = rankings;
var defaultBaseSortFn = function (a, b) {
    return String(a.rankedValue).localeCompare(String(b.rankedValue));
};
// exports.defaultBaseSortFn = defaultBaseSortFn;
/**
 * Takes an array of items and a value and returns a new array with the items that match the given value
 * @param {Array} items - the items to sort
 * @param {String} value - the value to use for ranking
 * @param {Object} options - Some options to configure the sorter
 * @return {Array} - the new sorted array
 */
function matchSorter(items, value, options) {
    if (options === void 0) { options = {}; }
    var keys = options.keys, _a = options.threshold, threshold = _a === void 0 ? rankings.MATCHES : _a, _b = options.baseSort, baseSort = _b === void 0 ? defaultBaseSortFn : _b;
    var matchedItems = items.reduce(reduceItemsToRanked, []);
    return matchedItems
        .sort(function (a, b) { return sortRankedValues(a, b, baseSort); })
        .map(function (_a) {
        var item = _a.item;
        return item;
    });
    function reduceItemsToRanked(matches, item, index) {
        var rankingInfo = getHighestRanking(item, keys, value, options);
        var rank = rankingInfo.rank, _a = rankingInfo.keyThreshold, keyThreshold = _a === void 0 ? threshold : _a;
        if (rank >= keyThreshold) {
            matches.push(__assign(__assign({}, rankingInfo), { item: item, index: index }));
        }
        return matches;
    }
}
// exports.matchSorter = matchSorter;
/**
 * Gets the highest ranking for value for the given item based on its values for the given keys
 * @param {*} item - the item to rank
 * @param {Array} keys - the keys to get values from the item for the ranking
 * @param {String} value - the value to rank against
 * @param {Object} options - options to control the ranking
 * @return {{rank: Number, keyIndex: Number, keyThreshold: Number}} - the highest ranking
 */
function getHighestRanking(item, keys, value, options) {
    if (!keys) {
        // if keys is not specified, then we assume the item given is ready to be matched
        var stringItem = item;
        return {
            // ends up being duplicate of 'item' in matches but consistent
            rankedValue: stringItem,
            rank: getMatchRanking(stringItem, value, options),
            keyIndex: -1,
            keyThreshold: options.threshold
        };
    }
    var valuesToRank = getAllValuesToRank(item, keys);
    return valuesToRank.reduce(function (_a, _b, i) {
        var rank = _a.rank, rankedValue = _a.rankedValue, keyIndex = _a.keyIndex, keyThreshold = _a.keyThreshold;
        var itemValue = _b.itemValue, attributes = _b.attributes;
        var newRank = getMatchRanking(itemValue, value, options);
        var newRankedValue = rankedValue;
        var minRanking = attributes.minRanking, maxRanking = attributes.maxRanking, threshold = attributes.threshold;
        if (newRank < minRanking && newRank >= rankings.MATCHES) {
            newRank = minRanking;
        }
        else if (newRank > maxRanking) {
            newRank = maxRanking;
        }
        if (newRank > rank) {
            rank = newRank;
            keyIndex = i;
            keyThreshold = threshold;
            newRankedValue = itemValue;
        }
        return { rankedValue: newRankedValue, rank: rank, keyIndex: keyIndex, keyThreshold: keyThreshold };
    }, {
        rankedValue: item,
        rank: rankings.NO_MATCH,
        keyIndex: -1,
        keyThreshold: options.threshold
    });
}
/**
 * Gives a rankings score based on how well the two strings match.
 * @param {String} testString - the string to test against
 * @param {String} stringToRank - the string to rank
 * @param {Object} options - options for the match (like keepDiacritics for comparison)
 * @returns {Number} the ranking for how well stringToRank matches testString
 */
function getMatchRanking(testString, stringToRank, options) {
    testString = prepareValueForComparison(testString, options);
    stringToRank = prepareValueForComparison(stringToRank, options);
    // too long
    if (stringToRank.length > testString.length) {
        return rankings.NO_MATCH;
    }
    // case sensitive equals
    if (testString === stringToRank) {
        return rankings.CASE_SENSITIVE_EQUAL;
    }
    // Lower casing before further comparison
    testString = testString.toLowerCase();
    stringToRank = stringToRank.toLowerCase();
    // case insensitive equals
    if (testString === stringToRank) {
        return rankings.EQUAL;
    }
    // starts with
    if (testString.startsWith(stringToRank)) {
        return rankings.STARTS_WITH;
    }
    // word starts with
    if (testString.includes(" " + stringToRank)) {
        return rankings.WORD_STARTS_WITH;
    }
    // contains
    if (testString.includes(stringToRank)) {
        return rankings.CONTAINS;
    }
    else if (stringToRank.length === 1) {
        // If the only character in the given stringToRank
        //   isn't even contained in the testString, then
        //   it's definitely not a match.
        return rankings.NO_MATCH;
    }
    // acronym
    if (getAcronym(testString).includes(stringToRank)) {
        return rankings.ACRONYM;
    }
    // will return a number between rankings.MATCHES and
    // rankings.MATCHES + 1 depending  on how close of a match it is.
    return getClosenessRanking(testString, stringToRank);
}
/**
 * Generates an acronym for a string.
 *
 * @param {String} string the string for which to produce the acronym
 * @returns {String} the acronym
 */
function getAcronym(string) {
    var acronym = '';
    var wordsInString = string.split(' ');
    wordsInString.forEach(function (wordInString) {
        var splitByHyphenWords = wordInString.split('-');
        splitByHyphenWords.forEach(function (splitByHyphenWord) {
            acronym += splitByHyphenWord.substr(0, 1);
        });
    });
    return acronym;
}
/**
 * Returns a score based on how spread apart the
 * characters from the stringToRank are within the testString.
 * A number close to rankings.MATCHES represents a loose match. A number close
 * to rankings.MATCHES + 1 represents a tighter match.
 * @param {String} testString - the string to test against
 * @param {String} stringToRank - the string to rank
 * @returns {Number} the number between rankings.MATCHES and
 * rankings.MATCHES + 1 for how well stringToRank matches testString
 */
function getClosenessRanking(testString, stringToRank) {
    var matchingInOrderCharCount = 0;
    var charNumber = 0;
    function findMatchingCharacter(matchChar, string, index) {
        for (var j = index; j < string.length; j++) {
            var stringChar = string[j];
            if (stringChar === matchChar) {
                matchingInOrderCharCount += 1;
                return j + 1;
            }
        }
        return -1;
    }
    function getRanking(spread) {
        var spreadPercentage = 1 / spread;
        var inOrderPercentage = matchingInOrderCharCount / stringToRank.length;
        var ranking = rankings.MATCHES + inOrderPercentage * spreadPercentage;
        return ranking;
    }
    var firstIndex = findMatchingCharacter(stringToRank[0], testString, 0);
    if (firstIndex < 0) {
        return rankings.NO_MATCH;
    }
    charNumber = firstIndex;
    for (var i = 1; i < stringToRank.length; i++) {
        var matchChar = stringToRank[i];
        charNumber = findMatchingCharacter(matchChar, testString, charNumber);
        var found = charNumber > -1;
        if (!found) {
            return rankings.NO_MATCH;
        }
    }
    var spread = charNumber - firstIndex;
    return getRanking(spread);
}
/**
 * Sorts items that have a rank, index, and keyIndex
 * @param {Object} a - the first item to sort
 * @param {Object} b - the second item to sort
 * @return {Number} -1 if a should come first, 1 if b should come first, 0 if equal
 */
function sortRankedValues(a, b, baseSort) {
    var aFirst = -1;
    var bFirst = 1;
    var aRank = a.rank, aKeyIndex = a.keyIndex;
    var bRank = b.rank, bKeyIndex = b.keyIndex;
    var same = aRank === bRank;
    if (same) {
        if (aKeyIndex === bKeyIndex) {
            // use the base sort function as a tie-breaker
            return baseSort(a, b);
        }
        else {
            return aKeyIndex < bKeyIndex ? aFirst : bFirst;
        }
    }
    else {
        return aRank > bRank ? aFirst : bFirst;
    }
}
/**
 * Prepares value for comparison by stringifying it, removing diacritics (if specified)
 * @param {String} value - the value to clean
 * @param {Object} options - {keepDiacritics: whether to remove diacritics}
 * @return {String} the prepared value
 */
function prepareValueForComparison(value, _a) {
    var keepDiacritics = _a.keepDiacritics;
    // value might not actually be a string at this point (we don't get to choose)
    // so part of preparing the value for comparison is ensure that it is a string
    value = "" + value; // toString
    // if (!keepDiacritics) {
    //     value = removeAccents(value);
    // }
    return value;
}
/**
 * Gets value for key in item at arbitrarily nested keypath
 * @param {Object} item - the item
 * @param {Object|Function} key - the potentially nested keypath or property callback
 * @return {Array} - an array containing the value(s) at the nested keypath
 */
function getItemValues(item, key) {
    if (typeof key === 'object') {
        key = key.key;
    }
    var value;
    if (typeof key === 'function') {
        value = key(item);
        // eslint-disable-next-line no-negated-condition
    }
    else {
        value = getNestedValue(key, item);
    }
    var values = [];
    // concat because `value` can be a string or an array
    // eslint-disable-next-line
    return value != null ? values.concat(value) : null;
}
/**
 * Given key: "foo.bar.baz"
 * And obj: {foo: {bar: {baz: 'buzz'}}}
 *   -> 'buzz'
 * @param key a dot-separated set of keys
 * @param obj the object to get the value from
 */
function getNestedValue(key, obj) {
    // @ts-expect-error really have no idea how to type this properly...
    return key.split('.').reduce(function (itemObj, nestedKey) {
        // @ts-expect-error lost on this one as well...
        return itemObj ? itemObj[nestedKey] : null;
    }, obj);
}
/**
 * Gets all the values for the given keys in the given item and returns an array of those values
 * @param item - the item from which the values will be retrieved
 * @param keys - the keys to use to retrieve the values
 * @return objects with {itemValue, attributes}
 */
function getAllValuesToRank(item, keys) {
    return keys.reduce(function (allVals, key) {
        var values = getItemValues(item, key);
        if (values) {
            values.forEach(function (itemValue) {
                allVals.push({
                    itemValue: itemValue,
                    attributes: getKeyAttributes(key)
                });
            });
        }
        return allVals;
    }, []);
}
var defaultKeyAttributes = {
    maxRanking: Infinity,
    minRanking: -Infinity
};
/**
 * Gets all the attributes for the given key
 * @param key - the key from which the attributes will be retrieved
 * @return object containing the key's attributes
 */
function getKeyAttributes(key) {
    if (typeof key === 'string') {
        return defaultKeyAttributes;
    }
    return __assign(__assign({}, defaultKeyAttributes), key);
}

Include that file, then use it like this.

NOTE: This code has been pulled out so it'll work the MDX.

TODO: Figure out how to get this code working with MDX.

<script>

    const search_objects = []

    {{ range (.Paginate .Pages).Pages -}}
        search_objects.push({'title': '{{ .Render "search_title" }}', 'url': '{{ .Render "search_link" }}', 'page_content': '{{ .Content }}'});
    {{- end }}

    const selectElement = document.getElementById('try_to_get_this');
    
    selectElement.addEventListener('keypress', (event) => {
        title_matches_live = matchSorter(search_objects, event.target.value, {keys: ['title', 'page_content'], threshold: matchSorter.rankings.CONTAINS });
        const result = document.getElementById('result');
        let result_lines = "";
        let i;
        for (i=0; i<title_matches_live.length; i++) {
            result_lines = `${result_lines} <br /> <a href="${title_matches_live[i]['url']}">${title_matches_live[i]['title']}</a>`;
        }
        result.innerHTML = `${result_lines}`;
    });
</script>

The client side part kinda freaks me out. Like, you have to load all the content in that you want to search. I've got a thousand or so posts here. That's a lot of content. So, I'm still nervous about it, but given how huge most websites are these days, I guess it won't be out of the norm. That said, I'm still going to setup so the content is lazy loaded.

Hmmm, also makes me wonder about localstorage, like, could I store all the site content in there?