Skip to content

Commit

Permalink
Ellipsis nodes with added token are now split as two nodes, one being…
Browse files Browse the repository at this point in the history
… empty ellipsis node and other being dependant node with all the morphology.
  • Loading branch information
lauma committed Apr 4, 2019
1 parent ae0969e commit e0ccf48
Show file tree
Hide file tree
Showing 10 changed files with 173 additions and 12 deletions.
6 changes: 6 additions & 0 deletions LVTB2UD/src/lv/ailab/lvtb/universalizer/LvtbToUdUI.java
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,10 @@ protected static boolean updateParamsFormArgs(String[] args)
}
else return false;
break;
case "split_ellipsis":
if (isBool) params.SPLIT_NONEMPTY_ELLIPSIS = value;
else return false;
break;
case "debug":
if (isBool) params.DEBUG = value;
else return false;
Expand Down Expand Up @@ -226,6 +230,8 @@ protected static void printMan()
"Possible key values (case insensitive):\n" +
" add_node_ids [bool, mandatory] - should Misc column contain node IDs from\n" +
" LVTB?\n" +
" split_ellipsis [bool, true by default] - treat ellipsis node with morphology\n" +
" as two nodes.\n" +
" debug [bool, false by default] - print debug message for each node.\n" +
" warn_ellipsis [bool, false by default] - warn on ellipsis.\n" +
" warn_omissions [bool, true by default] - warn if a sentence is omitted.\n" +
Expand Down
1 change: 1 addition & 0 deletions LVTB2UD/src/lv/ailab/lvtb/universalizer/pml/LvtbRoles.java
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,5 @@ public class LvtbRoles

// other
public final static String REPEAT = "repeat";
public final static String ELLIPSIS_TOKEN = "ellipsisTok";
}
24 changes: 21 additions & 3 deletions LVTB2UD/src/lv/ailab/lvtb/universalizer/pml/PmlANode.java
Original file line number Diff line number Diff line change
Expand Up @@ -192,13 +192,31 @@ public interface PmlANode
* @return descendant list sorted by ord values
*/
public List<PmlANode> getDescendantsWithOrdAndM();

/**
* Split nonempty ellipsis node into empty ellipsis node and dependant
* child.
* @param idPostfix string to append to the node ID to create ID for new
* node.
* @return if an actual split was done
*/
public boolean splitMorphoEllipsis(String idPostfix);

/**
* Find pure ellipsis (no corresponding token) in the subtree headed by this
* node. Parameter allows to find either all ellipsis or only leaf nodes.
* @param leafsOnly if true, only leaf nodes are returned
* @return list of ellipsis nodes in no particular order
*/
public List<PmlANode> getPureEllipsisDescendants(boolean leafsOnly);
/**
* Find ellipsis in the subtree headed by this node. Parameter allows to
* find either all ellipsis or only leaf nodes.
* Find ellipsis nodes with corresponding token in the subtree headed by
* this node. Parameter allows to find either all ellipsis or only leaf
* nodes.
* @param leafsOnly if true, only leaf nodes are returned
* @return list of ellipsis nodes in no particular order
*/
public List<PmlANode> getEllipsisDescendants(boolean leafsOnly);
public List<PmlANode> getMorphoEllipsisDescendants(boolean leafsOnly);
/**
* Find parent or the closest ancestor, that is not coordination phrase or
* crdPart node.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import lv.ailab.lvtb.universalizer.pml.PmlANode;
import lv.ailab.lvtb.universalizer.pml.utils.PmlANodeListUtils;
import lv.ailab.lvtb.universalizer.transformator.morpho.AnalyzerWrapper;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

Expand Down Expand Up @@ -675,13 +676,13 @@ public ArrayList<PmlANode> getDescendantsWithOrdAndM()
}

/**
* Find ellipsis in the subtree headed by this node. Parameter allows to
* find either all ellipsis or only leaf nodes.
* Find pure ellipsis (no corresponding token) in the subtree headed by this
* node. Parameter allows to find either all ellipsis or only leaf nodes.
* @param leafsOnly if true, only leaf nodes are returned
* @return list of ellipsis nodes in no particular order
*/
@Override
public ArrayList<PmlANode> getEllipsisDescendants(boolean leafsOnly)
public ArrayList<PmlANode> getPureEllipsisDescendants(boolean leafsOnly)
{
String pattern = leafsOnly
? ".//node[reduction and not(m.rf) and not(children)]"
Expand All @@ -698,6 +699,88 @@ public ArrayList<PmlANode> getEllipsisDescendants(boolean leafsOnly)
}
}

/**
* Find ellipsis nodes with corresponding token in the subtree headed by
* this node. Parameter allows to find either all ellipsis or only leaf
* nodes.
* @param leafsOnly if true, only leaf nodes are returned
* @return list of ellipsis nodes in no particular order
*/
@Override
public List<PmlANode> getMorphoEllipsisDescendants(boolean leafsOnly)
{
String pattern = leafsOnly
? ".//node[reduction and m.rf and not(children)]"
: ".//node[reduction and m.rf]";
try
{
NodeList tempRes = (NodeList) XPathEngine.get().evaluate(
pattern, domNode, XPathConstants.NODESET);
return XmlDomANode.asList(tempRes);
}
catch (XPathExpressionException e)
{
throw new IllegalArgumentException(e);
}
}

/**
* Split nonempty ellipsis node into empty ellipsis node and dependant
* child.
* @param idPostfix string to append to the node ID to create ID for new
* node.
* @return if an actual split was done
*/
@Override
public boolean splitMorphoEllipsis(String idPostfix)
{
if (isPureReductionNode()) return false;
String reductionField = getReduction();
if (reductionField == null || reductionField.isEmpty()) return false;
try
{
// Children container
Node childenNode = (Node) XPathEngine.get().evaluate(
"./children", domNode, XPathConstants.NODE);
if (childenNode == null)
{
childenNode = domNode.getOwnerDocument().createElement("children");
domNode.appendChild(childenNode);
}

// Node itself
Element newTokenNode = domNode.getOwnerDocument().createElement("node");
childenNode.appendChild(newTokenNode);

// id attribute
String newId = getId() + idPostfix;
newTokenNode.setAttribute("id", newId);

// Move morphology
Node mDom = (Node) XPathEngine.get().evaluate(
"./m.rf", domNode, XPathConstants.NODE);
domNode.removeChild(mDom);
newTokenNode.appendChild(mDom);

// Move ord
Node ord = (Node) XPathEngine.get().evaluate(
"./ord", domNode, XPathConstants.NODE);
domNode.removeChild(ord);
newTokenNode.appendChild(ord);

// Role.
Node roleNode = domNode.getOwnerDocument().createElement("role");
newTokenNode.appendChild(roleNode);
roleNode.appendChild(domNode.getOwnerDocument().createTextNode(LvtbRoles.ELLIPSIS_TOKEN));

return true;
}
catch (XPathExpressionException e)
{
throw new IllegalArgumentException(e);
}
}

/**
* Find parent or the closest ancestor, that is not coordination phrase or
* crdPart node.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ public class Sentence
*/
public HashSet<String> ellipsisWithOrphans = new HashSet<>();

/**
* Postfix used to append to the ellipsis node ID during the non-empty
* ellipsis split to create ID for new node.
*/
public static final String ID_POSTFIX = "-SPLIT";

public Sentence(PmlANode pmlTree)
{
this.pmlTree = pmlTree;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ public void transform()
{
if (params.DEBUG) System.out.printf("Working on sentence \"%s\".\n", s.id);

if (params.SPLIT_NONEMPTY_ELLIPSIS) ellipPreproc.splitTokenEllipsis();
morphoTransf.transformTokens();
StandardLogger.l.flush();
morphoTransf.extractSendenceText();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ public class TransformationParams
*/
public Boolean UD_STANDARD_NULLNODES = true; // NB! Check this before each UD release!

/**
* To fit UD standard, this must be true. When true, each ellipsis node with
* morphology is split into empty ellipsis node and dependant node with
* morphology.
*/
public Boolean SPLIT_NONEMPTY_ELLIPSIS = true; // NB! Check this before each UD release!

// ===== Enhanced.
/**
* To fit UD standard, this must be true. If this is false, enhanced graph
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,28 @@ public void transformTokens()
prevW = currentWs.get(currentWs.size() - 1);
prevOrd = currentOrd;
}
if (params.ADD_NODE_IDS && params.SPLIT_NONEMPTY_ELLIPSIS) cleanupIds();
}

/**
* Check all references to original nodes and remove artificially added node
* ID prefixes.
*/
protected void cleanupIds()
{
for (Token t : s.conll)
{
HashSet<String> misc = t.misc.get(MiscKeys.LVTB_NODE_ID);
for (String id : misc)
{
if (id.endsWith(Sentence.ID_POSTFIX))
{
String newId = id.substring(0, id.length() - Sentence.ID_POSTFIX.length());
misc.remove(id);
misc.add(newId);
}
}
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ public static Tuple<UDv2Relations, String> depToUDLogic(
// Other
// TODO wait for answer in https://github.com/UniversalDependencies/docs/issues/594
case LvtbRoles.REPEAT : return Tuple.of(UDv2Relations.REPARANDUM, null);
case LvtbRoles.ELLIPSIS_TOKEN: return ellipsisTokToUD(node, parent);
default : return Tuple.of(UDv2Relations.DEP, null);
}
}
Expand Down Expand Up @@ -608,6 +609,13 @@ public static Tuple<UDv2Relations, String> insToUD(PmlANode node, PmlANode paren
return Tuple.of(UDv2Relations.DISCOURSE, null); // Washington (CNN) is left unidentified.
}

public static Tuple<UDv2Relations, String> ellipsisTokToUD(PmlANode node, PmlANode parent)
{
String tag = node.getAnyTag();
if (tag.matches("z.*")) return Tuple.of(UDv2Relations.DEP, null);
return Tuple.of(UDv2Relations.DEP, null);
}

/**
* Print out the warning that role was not tranformed.
* @param node node for which UD dependency should be obtained (use
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import lv.ailab.lvtb.universalizer.pml.LvtbXTypes;
import lv.ailab.lvtb.universalizer.pml.PmlANode;
import lv.ailab.lvtb.universalizer.transformator.Sentence;
import lv.ailab.lvtb.universalizer.utils.Logger;

import java.util.List;

Expand All @@ -20,6 +19,16 @@ public EllipsisPreprocessor(Sentence sent)
s = sent;
}

public void splitTokenEllipsis()
{
List<PmlANode> ellipsisNodes = s.pmlTree.getMorphoEllipsisDescendants(false);
if (ellipsisNodes == null || ellipsisNodes.isEmpty()) return;
for (PmlANode ellipsisNode : ellipsisNodes)
{
ellipsisNode.splitMorphoEllipsis(Sentence.ID_POSTFIX);
}
}

/**
* Remove the childless ellipsis nodes assuming they can be ignored in
* latter processing. Replace empty xPreds with just ellipsis nodes.
Expand All @@ -29,7 +38,7 @@ public EllipsisPreprocessor(Sentence sent)
public boolean removeAllChildlessEllipsis()
{
// Childless, empty reductions are removed.
List<PmlANode> ellipsisChildren = s.pmlTree.getEllipsisDescendants(true);
List<PmlANode> ellipsisChildren = s.pmlTree.getPureEllipsisDescendants(true);
while (ellipsisChildren != null && !ellipsisChildren.isEmpty())
{
for (PmlANode ellipsisChild : ellipsisChildren)
Expand All @@ -48,11 +57,11 @@ public boolean removeAllChildlessEllipsis()
parent.delete();
}
}
ellipsisChildren = s.pmlTree.getEllipsisDescendants(true);
ellipsisChildren = s.pmlTree.getPureEllipsisDescendants(true);
}

// Check if there is other reductions.
ellipsisChildren = s.pmlTree.getEllipsisDescendants(false);
ellipsisChildren = s.pmlTree.getPureEllipsisDescendants(false);
return ellipsisChildren == null || ellipsisChildren.size() <= 0;
}

Expand All @@ -69,7 +78,7 @@ public boolean removeNonpredChildlessEllipsis()
boolean searchForMore = true;
while (searchForMore)
{
List<PmlANode> ellipsisChildren = s.pmlTree.getEllipsisDescendants(true);
List<PmlANode> ellipsisChildren = s.pmlTree.getPureEllipsisDescendants(true);
searchForMore = false;
for (PmlANode ellipsisChild : ellipsisChildren)
{
Expand Down Expand Up @@ -98,7 +107,7 @@ public boolean removeNonpredChildlessEllipsis()
}

// Check if there is other reductions.
List<PmlANode> ellipsisChildren = s.pmlTree.getEllipsisDescendants(false);
List<PmlANode> ellipsisChildren = s.pmlTree.getPureEllipsisDescendants(false);
return ellipsisChildren == null || ellipsisChildren.size() <= 0;
}

Expand Down

0 comments on commit e0ccf48

Please sign in to comment.