-
Notifications
You must be signed in to change notification settings - Fork 38
Add minimization algorithm for Acyclic FST #49
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -329,4 +329,121 @@ Graph viterbiPath(const Graph& g) { | |
return detail::shortestPath(g); | ||
} | ||
|
||
Graph minimizeAcyclicFST(const Graph& g){ | ||
Graph graph; | ||
std::vector<int> oldToNew(g.numNodes(), -1); // a map between the nodes of g and the minimized graph. | ||
std::vector<int> oldProcessed; // store which nodes has been processed in the g graph | ||
std::set<int> predecessors; // a subset will become candidates to explore | ||
|
||
auto addPredecessors = [&predecessors, &g](int node) { | ||
for (auto in_arc : g.in(node)){ | ||
predecessors.insert(g.srcNode(in_arc)); | ||
} | ||
}; | ||
|
||
auto isToMerge = [&g, &oldToNew] (int node1, int node2){ | ||
if (g.isStart(node1) == g.isStart(node2) && | ||
g.isAccept(node1) == g.isAccept(node2) && | ||
g.numOut(node1) == g.numOut(node2)){ | ||
|
||
//find out if there is a 1:1 mapping between the out arcs of node1 and node2 | ||
if ( std::equal(g.out(node1).begin(), g.out(node1).end(), g.out(node2).begin(), [&g, &oldToNew](int a1, int a2){ | ||
return (g.ilabel(a1) == g.ilabel(a2) && | ||
g.olabel(a1) == g.olabel(a2) && | ||
oldToNew[g.dstNode(a1)] == oldToNew[g.dstNode(a2)]);}) | ||
){ | ||
return true; | ||
} | ||
Comment on lines
+350
to
+356
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: replace this block with |
||
} | ||
return false; | ||
}; | ||
|
||
|
||
//Initialization | ||
//a. Find all states with no outgoing arcs. (Since we are dealing with an acyclic FST, it is always possible.) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What I would do is rename the function to |
||
//b. Split the resulting set into 4 sets according to their START and ACCEPT status. | ||
|
||
int nodeStartAccept = -1 , nodeStartNoAccept = -1, nodeNoStartAccept = -1, nodeNoStartNoAccept = -1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In general we assume in GTN that graphs are "trim" meaning there are no paths which lead to dangling nodes. You are welcome to make the same assumption, or alternatively if you find any node that is not accepting that has 0 outgoing arcs we should ignore it since it does nothing for the set of paths the graph allows and really shouldn't be part of a "minimal" graph. |
||
for (auto n = 0; n < g.numNodes(); ++n) { | ||
if (g.numOut(n) == 0){ | ||
|
||
if (g.isStart(n) && g.isAccept(n)){ | ||
if (nodeStartAccept < 0){ | ||
nodeStartAccept = graph.addNode(true, true); | ||
} | ||
oldToNew[n] = nodeStartAccept; | ||
|
||
} else if (g.isStart(n) && !g.isAccept(n)){ | ||
if (nodeStartNoAccept < 0){ | ||
nodeStartNoAccept = graph.addNode(true, false); | ||
} | ||
oldToNew[n] = nodeStartNoAccept; | ||
|
||
} else if (!g.isStart(n) && g.isAccept(n)){ | ||
if (nodeNoStartAccept < 0){ | ||
nodeNoStartAccept = graph.addNode(false, true); | ||
} | ||
oldToNew[n] = nodeNoStartAccept; | ||
|
||
} else if (!g.isStart(n) && !g.isAccept(n)){ | ||
if (nodeNoStartNoAccept < 0) { | ||
nodeNoStartNoAccept = graph.addNode(false, false); | ||
} | ||
oldToNew[n] = nodeNoStartNoAccept; | ||
} | ||
|
||
addPredecessors(n); // fill predecessors accordingly | ||
oldProcessed.push_back(n); | ||
} | ||
} | ||
|
||
std::vector<std::vector<int>> candidateSets; | ||
while (!predecessors.empty()) { | ||
candidateSets.clear(); | ||
// find candidates in predecessors and separate them in subsets with same: | ||
// - start state | ||
// - final state | ||
// - out arcs (same ilabel, same olable, same destNode). | ||
for (auto predNode : predecessors){ | ||
//verfiy if this node lead to only processed nodes | ||
if (std::all_of(g.out(predNode).begin(), g.out(predNode).end(), | ||
[&g, &oldProcessed](int a) {return std::count(oldProcessed.begin(), oldProcessed.end(), g.dstNode(a)) > 0;})){ | ||
Comment on lines
+409
to
+410
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Change |
||
// place this candidate in an exiting subset of candidateSets if possible | ||
auto it = std::find_if(candidateSets.begin(), candidateSets.end(), [&g, &predNode, &isToMerge](std::vector<int> subset){ | ||
return isToMerge(subset[0], predNode); | ||
}); | ||
|
||
//if subset not found | ||
if (it == candidateSets.end()){ | ||
candidateSets.push_back({predNode}); | ||
} else{ | ||
it->push_back(predNode); | ||
} | ||
} | ||
} | ||
|
||
predecessors.clear(); | ||
|
||
for (auto subset : candidateSets) { | ||
int mergedNode = graph.addNode(g.isStart(subset[0]), g.isAccept(subset[0])); | ||
for (auto n : subset){ | ||
addPredecessors(n); | ||
oldProcessed.push_back(n); | ||
oldToNew[n] = mergedNode; | ||
} | ||
//reattaching arcs as appropriate | ||
for (auto a : g.out(subset[0])){ | ||
graph.addArc( | ||
mergedNode, | ||
oldToNew[g.dstNode(a)], | ||
g.ilabel(a), | ||
g.olabel(a), | ||
g.weight(a)); // should be change to support weighted graphs | ||
} | ||
} | ||
} | ||
|
||
return graph; | ||
} | ||
|
||
} // namespace gtn |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -151,5 +151,8 @@ Graph viterbiScore(const Graph& g); | |
*/ | ||
Graph viterbiPath(const Graph& g); | ||
|
||
/** Minimize an Acyclic FST */ | ||
Graph minimizeAcyclicFST(const Graph& g); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's just call this |
||
|
||
/** @} */ | ||
} // namespace gtn |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should make this a
vector<bool> processed
(the name old is redundant). Make it the size of the old graph number of nodes and initialize every element tofalse
. Then to check if a node is already processed checkprocessed[n]
.