<!DOCTYPE html>
<html>
<head>
<link rel="stylesheet" href="style.css" />
<script data-require="jquery@2.1.3" data-semver="2.1.3" src="http://code.jquery.com/jquery-2.1.3.min.js"></script>
</head>
<body>
<h1>TrieScore</h1>
<p>
Construct a <a href="http://de.wikipedia.org/wiki/Trie" target="_blank">trie</a> from arrays of objects (multiple series),
using an attribute (<code>obj.name</code>) as the stem.
</p>
<p class="sub">
A score is given to each series based on the number of times the same node occurs in any series,
multiplied by the commonality (the number of prefix nodes common across series).
</p>
<p class="sub">
It's based off <a href="https://github.com/mikedeboer/trie" target="_blank">mikedeboer/trie</a> (Github),
which made this a trivial exercise.
</p>
<div>
<form>
<input type="text" id="q" name="q" placeholder="coffee, banana, aardvark" value="" autocomplete="off"/>
</form>
</div>
<div class="output">
<div class="console"><code><pre></pre></code></div>
<div class="matches"><code><pre></pre></code></div>
</div>
<script src="triescore.js"></script>
<script src="data.js"></script>
<script src="script.js"></script>
</body>
</html>
var c, m;
// Emulate console.log in a div
c = (function() {
return {
el: '.console > code > pre',
log: function() {
var args = Array.prototype.slice.apply(arguments),
$el = $(this.el),
log = $el.html();
if(log) log = log + '\n';
$el.html(log + args.join('\n'));
setTimeout(function() { $el.scrollTop(9999); }, 0);
},
clear: function() {
$(this.el).html('');
}
};
}.bind(c)());
// Extend the above to print matches.log to a different div
m = Object.create(console, {
el: {value: '.matches > code > pre' },
log: {
value: function() {
$('.matches').show();
c.log.apply(this, arguments);
}
},
clear: {
value: function() {
$(this.el).html('');
$('.matches').hide();
}
}
});
var seriess = [],
trie, timer;
function setUp() {
c.log("Building a trie...", '');
trie = new Trie();
var i = 0,
lapse = 0,
l = data.length,
t = performance.now(),
x, index;
// add data to the trie
for (; i < l; ++i) {
seriess.push(trie.add(data[i]));
}
t = performance.now() - t;
c.log(printSeriess(Trie.compact(data, 'name')), '');
c.log(i + " series added: " + t.toPrecision(3) + " ms, avg: " + (t / l).toPrecision(3) + " ms/series");
t = performance.now();
x = trie.index;
t = performance.now() - t;
l = 0;
index = Object.keys(x).map(function(key) {
var count = trie.index[key].length;
l += count;
return '[' + key + ']{' + count + '}';
}).join(', ');
c.log(l + "/" + trie.nodeCount + " nodes indexed: " + t.toPrecision(3) + " ms, avg: " + (t / l).toPrecision(3) + " ms/node\n", index);
}
function onSearch() {
var q = $(this).val().replace(/(^\s+)|([\s,]+$)/g, ''),
list = q.split(/[\s,]+/).map(function(word) {
return {name: word};
}),
tail, stem, nodes, seriess, meta;
m.clear();
if (!q || !list.length) return;
tail = list[list.length - 1];
stem = trie.find(list);
nodes = trie.nodes(tail.name);
m.log('Search:', list.map(function(word) {
return '{name: "' + word.name + '"}';
}).join(', '));
seriess = stem ? stem.getDistinctSeries() : null;
if(stem && stem.isComplete) {
metas = stem.meta;
m.log('\nComplete series:', metas.map(function(meta) {
return printSeries({series: meta.series});
}).join('\n'));
}
if (seriess && seriess.length) {
m.log('\nPossible matches:');
m.log(printSeriess(seriess));
}
if (nodes && nodes.length) {
var s = [],
l = nodes.length,
i = 0,
ns;
for(; i < l; ++i) {
ns = nodes[i].getSeries();
if(ns) s = s.concat(ns);
}
s = Trie.compact(s);
m.log('\nAll series containing \'' + tail.name + '\':');
m.log(printSeriess(s));
}
if(stem) m.log('\nStem: ', stem);
if(nodes && nodes.length) m.log('\nAll occurrences of \'' + tail.name + '\': ', nodes.join('\n'));
}
function printSeriess(matches) {
if (!matches) return;
return matches.map(printSeries).join('\n');
}
function printSeries(meta) {
if(!meta || !meta.series) return;
var series = meta.series,
score = trie.getScore(series),
dupes = meta.count,
list;
list = series.map(function(item) {
return item.name;
}).join(', ');
if (dupes > 1) {
return '(' + dupes + 'x, s: ' + score.toPrecision(3) + ') ' + list;
} else {
return '(score: ' + score.toPrecision(3) + ') ' + list;
}
}
//////////////////////////////////////////////////
///// CONSTRUCT A TRIE AND SEARCH THE THINGS /////
setUp();
$('#q').on('keyup', function() {
setTimeout(onSearch.bind(this), 0);
});
html, body {
font-family: "Helvetica Neue", Helvetica, sans-serif;
color: #555;
line-height: 1.25em;
}
* {
font-weight: normal;
}
h1 {
margin-bottom: 0;
}
input {
width: 100%;
padding: 0.125em;
font-size: 2em;
box-sizing: border-box;
margin-bottom: 0;
border: 1px solid #ccc;
border-bottom: none;
}
.sub {
font-size: 12px;
line-height: 1.25em;
}
.foot {
font-size: 12px;
}
.output {
position: relative;
}
.console {
margin-top: 0;
min-height: 100px;
max-height: 600px;
padding: 10px;
font-size: 10px;
background-color: #fafafa;
border: 1px solid #ddd;
box-sizing: border-box;
box-shadow: inset 0 2px 2px rgba(0,0,0,0.025);
overflow: auto;
}
.console > code,
.console > code > pre {
margin: 0;
padding: 0;
line-height: 1em;
}
.matches {
display: none;
position: absolute;
top: 0;
left: 0;
right: 0;
max-height: 600px;
padding: 10px;
font-size: 11px;
line-height: 1em;
background-color: #fff;
border: 1px solid #ddd;
box-sizing: border-box;
box-shadow: 0 2px 2px rgba(0,0,0,0.05);
overflow: auto;
}
.matches > code,
.matches > code > pre {
margin: 0;
padding: 0;
}
.show {
display: block;
}
TrieScore
=========
## Construct a [trie](http://de.wikipedia.org/wiki/Trie) from arrays of objects (multiple series), using an attribute (`obj.name`) as the stem.
A score is given to each series based on the number of times the same node occurs in any series, multiplied by the commonality (the number of prefix nodes common across series).
It's based off [mikedeboer/trie](https://github.com/mikedeboer/trie) (Github), which made this a trivial exercise.
This example uses jQuery to make life easier, but `trie.js` itself has no external dependencies.
## License
MIT
/*
* The MIT License
*
* Copyright (c) 2015 Ian White
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/*
* The MIT License
*
* Copyright (c) 2010 Mike de Boer
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/**
* A trie is an efficient search tree. (See [Knuth1972] for more details on
* digital search trees.)
* [Fredkin1960] introduced the trie terminology, which is abbreviated from
* "Retrieval".
* [Knuth1972] Knuth, D. E. The Art of Computer Programming Vol. 3, Sorting and
* Searching. Addison-Wesley. 1972.
* [Fredkin1960] Fredkin, E. Trie Memory. Communication of the ACM. Vol. 3:9
* (Sep 1960). pp. 490-499.
* @see <a href="http://linux.thai.net/~thep/datrie/datrie.html">source</a>
* @see <a href="http://en.wikipedia.org/wiki/Trie">Wikipedia article</a>
*
* The trie implementation of Mike de Boer served as a starting point for this,
* which in turn was inspired by Dennis Byrne:
* @link https://github.com/mikedeboer/trie
* @link http://notdennisbyrne.blogspot.com/2008/12/javascript-trie-implementation.html
*
* @param {Object} stem object having a string attribute which represents the node prefix
* @default {name:""}
* @param {String} attr attribute on {stem} whose value represents the node prefix
* @default "name"
* @param {Number} sorting sort method ({@link SORT_ASC} or {@link SORT_DESC})
* @default SORT_DESC
* @license MIT
* @constructor
*/
var Trie = (function() {
/** @ignore */
function Trie(stem, attr, sorting) {
this.attr = attr || 'name';
this.stem = stem || {};
if (!this.stem[attr]) this.stem[attr] = '';
this.nstem = this.stem[attr];
this.sorting = sorting || Trie.SORT_DESC;
this.inCommonCount = 0;
this.visitCount = 0;
this.seriesCount = 0;
this.branchCount = 0;
this.isComplete = false;
this.children = [];
this.meta = [];
}
Trie.SORT_ASC = 0x0001;
Trie.SORT_DESC = 0x0002;
Trie.SORT_NONE = 0x0004;
var STATIC_PROPS = ["stem", "nstem", "sorting", "inCommonCount", "visitsCount", "seriesCount", "branchCount", "isComplete", "meta"];
Trie.compact = compact;
Trie.prototype = {
/**
* Add an object having attribute {attr} to the existing dictionary. If a
* trie node doesn't exist yet, it is created with that object as its stem.
* Since an 'add' is an expensive action, compared to adding nodes to native
* Javascript containers like Array or Object, inserting a trie node in
* lexical order is relatively cheap.
*
* @param {Array} series series of objects to add to the trie
* @param {String} attr name of attribute to pluck as the node prefix
* @default [this.attr|'name']
* @param {Object} meta additional metadata to associate with a series
* @default {}
* @param {Trie} parent parent node for reverse traversal
* @default this
* @return {Trie} the start of the added series
*/
add: function (series, attr, meta, parent) {
// End of the line, add series info to node metadata
if (!series.length) {
if (!meta || !meta.series) throw "metadata missing!";
++this.seriesCount;
this.isComplete = true;
this.meta.push(meta);
return;
}
var isRoot = !parent,
k = series[0],
c = this.children,
l = c.length,
s = this.sorting,
i = 0,
v, t;
attr = attr || this.attr || 'name';
meta = meta || {};
if (isRoot) {
this.isRoot = true;
}
if (!meta.series) meta.series = series;
v = k[attr];
// check if a child with stem 'k' already exists:
for (; i < l; ++i) {
if (c[i].nstem == v) {
t = c[i];
++t.inCommonCount;
break;
}
}
// if not, add a branch:
if (!t) {
++this.branchCount;
t = new Trie(k, attr, s);
t.parent = this;
if (!s || !c.length || s & Trie.SORT_NONE) {
c.push(t);
} else if (s & Trie.SORT_DESC) {
i = l;
do {
if (--i < 0) {
c.unshift(t);
break;
}
} while (c[i].nstem > v);
if (i >= 0) c.splice(i + 1, 0, t);
} else {
i = 0, --l;
do {
if (++i > l) {
c.unshift(t);
break;
}
} while (c[i].nstem > v);
if (i <= l) c.splice(i, 0, t);
}
}
++t.visitCount;
t.add(series.slice(1), attr, meta, this);
return t;
},
/**
* Retrieve a descendent trie matching the stem.
*
* @param {Array} stem the prefix to look for
* @type {Trie} the matching node
*/
find: function (stem) {
if (!stem || stem[this.attr] == this.nstem) return this;
var res = walk(stem, this);
return res ? res.trie : null;
},
/**
* Remove a branch from the trie.
*
* @param {Array} stem the prefix to remove
* @type {void}
*/
remove: function (stem) {
stem = stem || [this.parent];
var res = walk(stem, this);
if (!res) return false;
res.parent.children.remove(res.idx);
return true;
},
/**
* Replace a stem. This is implemented like a filesystem rename: add a
* node with the new name and remove the 'old' version.
*
* @param {Array} replace the stem to replace
* @param {Array} series the series that will prelace the stem
* @param {Object} meta metadata associated with the series
* @return {Trie} the trie representing the start of the new series
*/
replace: function(replace, series, meta) {
this.remove(replace);
return this.add(series, this.attr, meta, this);
},
/**
* Retrieve all occurrences of value in the trie.
*
* @param {String} value the value of the node
* @type {Trie} the matching node
*/
nodes: function(value) {
if(!this.index) return null;
return this.index[value] || null;
},
/**
* Number of nodes in the trie. Triggers an index of nodes
* (Could be more efficient if it's something that's often used)
*/
get nodeCount() {
var k = this.index,
c = 0, i;
for (i in this.index) {
c += this.index[i].length;
}
return c;
},
/**
* Reverse traverse up to the root node
*
* @type {Trie} the root node
*/
get root() {
if(this.isRoot) return null;
var p = this.parent;
while(p && !p.isRoot) {
p = p.parent;
if(!p) return null;
}
return p || null;
},
/**
* Retrieve a list of all decentent nodes, indexed by value
* This is expensive and generated lazily
*
* @type {Object} Object with node values as keys, and an array of
* occurrences for values
*/
get index() {
if(!this.nodeIndex) return this.reindex();
return this.nodeIndex;
},
reindex: function() {
var c = this.children,
l = c.length,
i = 0,
ci, k;
this.nodeIndex = {};
for (; i < l; ++i) {
// add child nodes
if(!this.nodeIndex[c[i].nstem]) this.nodeIndex[c[i].nstem] = [];
this.nodeIndex[c[i].nstem].push(c[i]);
// reindex child and add them too
ci = c[i].reindex();
for(k in ci) {
if(!this.nodeIndex[k]) this.nodeIndex[k] = [];
this.nodeIndex[k] = this.nodeIndex[k].concat(c[i].index[k]);
}
}
return this.nodeIndex;
},
/**
* Retrieve a direct child with matching value
*
* @param {String} value child value
* @type {Trie} matching child node
*/
getChild: function(value) {
var i = 0,
c = this.children,
l = c.length;
for (; i < l; ++i) {
if (c[i].nstem == value) return c[i];
}
return null;
},
/**
* Returns true if this trie has any direct children matching prefix
*
* @param {String} value child value
* @type {Boolean}
*/
hasChild: function(value) {
return this.getChild(stem) !== null;
},
/**
* Sort in lexical order, either in an ascending or descending direction.
* Since it uses the native {@link Array#sort}, sorting speed can be
* considered linear O(n) to the size of the trie, i.e. the series count.
*
* @param {Number} direction sorting direction. Possible values:
* {@link Trie#SORT_ASC}
* {@link Trie#SORT_DESC}
* @type {void}
*/
sort: function(direction) {
if (typeof direction == "undefined")
direction = Trie.SORT_DESC;
if (!this.branchCount || this.sorting === direction) return;
this.sorting = direction;
if (direction & Trie.SORT_NONE) return;
var c = this.children,
i = c.length - 1,
m = direction & Trie.SORT_ASC ? sortAsc : sortDesc;
c.sort(m);
for (; i >= 0; --i)
c[i].sort(direction);
},
/**
* Retrieve metadata from all descendents
*
* @type {Array}
*/
getMeta: function() {
var metas = [],
c = this.children,
i = 0,
l = c.length;
if(this.seriesCount) {
metas = metas.concat(this.meta);
}
for (; i < l; ++i) {
metas = metas.concat(c[i].getMeta());
}
return metas;
},
/**
* Retrieve all series (plural) that terminate in any descendent. The main
* use-case for this function is for implementing such fancy things as
* type-ahead, or node graphs. The performance of this function still needs
* to be profiled against alternatives, like pre-caching series per-Trie on
* construction.
*
* @type {Array}
*/
getSeries: function() {
var seriess = [],
m = this.getMeta(),
l = m.length,
i = 0;
for (; i < l; ++i) {
seriess.push(m[i].series);
}
return seriess;
},
/**
* Retrieve an array of unique series, where each element contains the
* distinct series and a duplicate count.
*
* @type {Array} formatted as `[{series: [...], count: i}]`
*/
getDistinctSeries: function() {
var seriess = this.getSeries();
if(!seriess.length) return null;
return compact(seriess, this.attr);
},
/**
* Retrieve the branch count at the matching stem
*
* @param {Array} stem the prefix to
* @type {Number}
*/
getBranchCount: function(stem) {
if(!stem) return this.branchCount;
var res = walk(stem, this);
return res ? res.trie.branchCount : 0;
},
/**
* Retrieve the series count at the matching stem
*
* @param {Array} stem the series-completing stem
* @type {Number}
*/
getSeriesCount: function(stem) {
if(!stem) return this.seriesCount;
var res = walk(stem, this);
return res ? res.trie.seriesCount : 0;
},
/**
* Retrieve the computed score of the matching series.
* The score is calculated by summing `viewCount` and `inCommonCount` for
* all nodes, then dividing the aggregate common count by the number of
* nodes in the series, and finally adding the aggregate view count plus
* node count. Several series may cross the same node, which increases the
* view count for that node, and that helps. But the main multiplier is the
* multiplier of common series and the length of the common run.
*
* @param {Array} stem the series-completing stem
* @type {Number}
*/
getScore: function(stem) {
var n = stem ? this.find(stem) : this,
s = 0, nc = 0, cc = 0, vc = 0;
if(!n) return s;
do {
cc += n.inCommonCount;
vc += n.visitCount * n.inCommonCount;
n = n.parent;
++nc;
} while(n && !n.isRoot);
// Add it all up
return (((cc/nc) * nc + vc + nc) / this.nodeCount) * 100;
},
/**
* Override toString for prettier representation of a Trie.
*
* @type {String}
*/
toString: function() {
return "Trie{'" + this.nstem + "'}: {\n" +
" attr: " + this.attr + ",\n" +
((this.isRoot) ? " isRoot: true,\n" : "") +
" isComplete: " + this.isComplete + ",\n" +
" visitCount: " + this.visitCount + ",\n" +
" inCommonCount: " + this.inCommonCount + ",\n" +
" branchCount: " + this.branchCount + ",\n" +
" seriesCount: " + this.seriesCount + ",\n" +
((this.parent) ? " parent: Trie{'" + this.parent.nstem + "'},\n" : "") +
" children: [Trie]{" + this.children.length + "}\n" +
" meta: " + JSON.stringify(this.meta) + ",\n" +
"}";
}
};
function walk(stem, trie) {
if (!stem || !trie) return null;
var route = [],
ch, c, l, i, prev;
while (stem.length) {
ch = stem[0],
c = trie.children,
l = c.length,
i = 0;
for (; i < l; ++i) {
if (ch[trie.attr] == c[i].nstem) break;
}
if (i == l) return null; // not found
route.push(trie);
prev = trie;
trie = c[i];
stem = stem.slice(1);
}
return {
trie: trie,
parent: prev,
idx: i,
route: route.concat(trie)
};
}
function compact(seriess, attr) {
var s = {},
l = seriess.length,
i = 0,
k;
attr = attr || 'name';
for (; i < l; ++i) {
k = hashSeries(seriess[i], attr);
if (!s[k]) s[k] = {series: seriess[i], count: 0};
s[k].count += 1;
}
return Object.keys(s).map(function(hash) {
return s[hash];
});
}
/**
* Simple string hash used to index a series.
* (I'm sure there's a better way to do this)
*/
function hash(str) {
var hashed = 0, len = str.length, i = 0;
if (!len) return hash;
for (; i < len; ++i) {
hashed = hashed * 31 + str.charCodeAt(i);
}
return hashed;
}
/**
* Simple series hash used to index a series. I've separated the individual
* attribute hashes just for debugging, this will need some work to optimize.
*/
function hashSeries(series, attr) {
var hashed = [], len = series.length, i = 0;
if (!len) return hashed;
for (; i < len; ++i) {
hashed.push(hash(series[i][attr]));
}
return hashed.join(':');
}
/**
* Sorting helper function that can be passed to Array.sort().
* The result of this helper will be that all nodes will be sorted in
* ascending lexical order.
*
* @param {Trie} a first element for comparison
* @param {Trie} b second element for comparison
* @type {Number}
* @memberOf Trie
*/
function sortAsc(a, b) {
var s1 = a.nstem,
s2 = b.nstem;
return (s1 < s2) ? 1 : (s1 > s2) ? -1 : 0;
}
/**
* Sorting helper function that can be passed to Array.sort().
* The result of this helper will be that all nodes will be sorted in
* descending lexical order.
*
* @param {Trie} a first element for comparison
* @param {Trie} b second element for comparison
* @type {Number}
* @memberOf Trie
*/
function sortDesc(a, b) {
var s1 = a.nstem,
s2 = b.nstem;
return (s1 > s2) ? 1 : (s1 < s2) ? -1 : 0;
}
return Trie;
})();
var data = [
[
{name: "aardvark"},
{name: "banana"},
{name: "coffee"},
{name: "coffee"},
{name: "derp"},
{name: "especial"},
{name: "fork"},
{name: "fork"}
],
[
{name: "aardvark"},
{name: "aardvark"},
{name: "aardvark"},
{name: "derp"}
],
[
{name: "coffee"},
{name: "banana"},
{name: "apple"},
{name: "especial"},
{name: "banana"}
],
[
{name: "apple"},
{name: "banana"},
{name: "especial"},
{name: "banana"},
{name: "coffee"}
],
[
{name: "apple"},
{name: "banana"},
{name: "coffee"},
{name: "aardvark"},
{name: "coffee"}
],
[
{name: "apple"},
{name: "banana"},
{name: "coffee"},
{name: "aardvark"},
{name: "coffee"}
],
[
{name: "banana"},
{name: "coffee"},
{name: "especial"},
{name: "fork"}
],
[
{name: "apple"},
{name: "banana"},
{name: "coffee"},
{name: "aardvark"},
{name: "aardvark"},
{name: "aardvark"},
{name: "especial"},
{name: "fork"}
],
[
{name: "coffee"},
{name: "banana"},
{name: "aardvark"}
],
[
{name: "coffee"},
{name: "banana"},
{name: "aardvark"}
],
[
{name: "coffee"},
{name: "banana"},
{name: "aardvark"}
],
[
{name: "coffee"},
{name: "banana"},
{name: "aardvark"}
],
[
{name: "coffee"},
{name: "banana"},
{name: "aardvark"},
{name: "fork"}
],
[
{name: "coffee"},
{name: "banana"},
{name: "aardvark"},
{name: "fork"},
{name: "especial"}
],
[
{name: "fork"}
],
[
{name: "fork"}
],
[
{name: "fork"}
],
[
{name: "fork"}
],
[
{name: "fork"}
],
[
{name: "fork"}
]
];