nodeList) {
+ boolean onlyBrTag = true;
+ Node lastBrTag = null;
+ for (Node child : nodeList) {
+ final boolean isEmptyTextChild = child instanceof TextNode && text((TextNode) child).trim().isEmpty();
+ final boolean isBrChild = BR_TAG.equals(child.nodeName());
+ onlyBrTag = onlyBrTag && (isBrChild || isEmptyTextChild);
+ if (isBrChild) {
+ lastBrTag = child;
+ } else if (lastBrTag != null && !isEmptyTextChild) {
+ lastBrTag = null;
+ }
+ }
+ if (!onlyBrTag && lastBrTag != null) {
+ toRemove.add(lastBrTag);
+ }
+ }
+
/**
* Initializes SVG to prevent JSoup's HTML parser from breaking case sensitive SVG.
*
@@ -758,7 +973,7 @@ private void parseHead(URI baseURI, Element headElement) {
*/
private void walkNodeTree(MParagraph parent, Context context, Node node, Element lastElement) {
final Context contextCopy = context.copy();
- if (node instanceof Element && !isHidden(node)) {
+ if (node instanceof Element) {
if ("table".equals(node.nodeName())) {
Node tHeader = null;
for (Node child : node.childNodes()) {
@@ -1332,7 +1547,7 @@ private MParagraph startElement(MParagraph parent, Context context, Element elem
final String nodeName = element.nodeName();
boolean isNumbering = false;
- if ("p".equals(nodeName)) {
+ if (P_TAG.equals(nodeName)) {
res = createMParagraph(context, parent, element, null, null);
} else if (BLOCKQUOTE_TAG.equals(nodeName)) {
if (element.childNodeSize() > 0 && element.childNode(0) instanceof TextNode) {
@@ -1347,91 +1562,92 @@ private MParagraph startElement(MParagraph parent, Context context, Element elem
} else {
res = parent;
}
- } else if ("strong".equals(nodeName) || "b".equals(nodeName)) {
+ } else if (STRONG_TAG.equals(nodeName) || B_TAG.equals(nodeName)) {
setModifiers(context.style, MStyle.FONT_BOLD);
res = parent;
- } else if ("em".equals(nodeName) || "i".equals(nodeName) || "var".equals(nodeName) || "cite".equals(nodeName)) {
- setModifiers(context.style, MStyle.FONT_ITALIC);
- res = parent;
- } else if ("s".equals(nodeName) || "strike".equals(nodeName) || "del".equals(nodeName)) {
- setModifiers(context.style, MStyle.FONT_STRIKE_THROUGH);
- res = parent;
- } else if ("u".equals(nodeName) || "ins".equals(nodeName)) {
- setModifiers(context.style, MStyle.FONT_UNDERLINE);
- res = parent;
- } else if ("sub".equals(nodeName)) {
- setModifiers(context.style, MStyle.SUBSCRIPT);
- res = parent;
- } else if ("sup".equals(nodeName)) {
- setModifiers(context.style, MStyle.SUPERSCRIPT);
- res = parent;
- } else if ("font".equals(nodeName)) {
- if (element.hasAttr("color")) {
- context.style.setForegroundColor(htmlToColor(element.attr("color").toLowerCase()));
- }
- if (element.hasAttr("face")) {
- // TODO double check this
- context.style.setFontName(element.attr("face"));
- }
- if (element.hasAttr(SIZE_ATTR)) {
- context.style.setFontSize(fontSizeToPoint(element.attr(SIZE_ATTR)));
- }
- res = parent;
- } else if ("a".equals(nodeName)) {
- context.linkTargetURI = toURI(context.baseURI, element.attr(HREF_ATTR));
- if (element.hasAttr("title")) {
- context.linkTitle = element.attr("title");
- }
- res = parent;
- } else if ("br".equals(nodeName)) {
- final MList parentContents = (MList) parent.getContents();
- parentContents.add(MPagination.ligneBreak);
- res = parent;
- } else if ("li".equals(nodeName)) {
- res = createMParagraph(context, parent, element, context.numberingID.longValue(),
- context.numberingLevel - 1);
- isNumbering = true;
- } else if (OL_TAG.equals(nodeName)) {
- setOrderedListNumbering(context, element);
- isNumbering = true;
- res = parent;
- } else if (UL_TAG.equals(nodeName)) {
- setUnorderedListNumbering(context, element);
- isNumbering = true;
- res = parent;
- } else if ("img".equals(nodeName)) {
- final MImage mImage = createMImage(context, element);
- final MList parentContents = (MList) parent.getContents();
- parentContents.add(mImage);
- res = parent;
- } else if ("big".equals(nodeName)) {
- setBigFont(context);
- res = parent;
- } else if ("small".equals(nodeName)) {
- setSmallFont(context);
- res = parent;
- } else
- if ("tt".equals(nodeName) || "code".equals(nodeName) || "samp".equals(nodeName) || "kbd".equals(nodeName)) {
- context.style.setFontName(COURIER_NEW_FONT);
+ } else if (EM_TAG.equals(nodeName) || I_TAG.equals(nodeName) || VAR_TAG.equals(nodeName)
+ || CITE_TAG.equals(nodeName)) {
+ setModifiers(context.style, MStyle.FONT_ITALIC);
res = parent;
- } else if ("h1".equals(nodeName)) {
- res = createHeading(parent, context, element, H1_FONT_SIZE);
- } else if ("h2".equals(nodeName)) {
- res = createHeading(parent, context, element, H2_FONT_SIZE);
- } else if ("h3".equals(nodeName)) {
- res = createHeading(parent, context, element, H3_FONT_SIZE);
- } else if ("h4".equals(nodeName)) {
- res = createHeading(parent, context, element, H4_FONT_SIZE);
- } else if ("h5".equals(nodeName)) {
- res = createHeading(parent, context, element, H5_FONT_SIZE);
- } else if ("h6".equals(nodeName)) {
- res = createHeading(parent, context, element, H6_FONT_SIZE);
- } else if (CENTER_TAG.equals(nodeName)) {
- res = createMParagraph(context, parent, element, null, null);
- res.setHAlignment(HAlignment.CENTER);
- } else {
+ } else if (S_TAG.equals(nodeName) || STRIKE_TAG.equals(nodeName) || DEL_TAG.equals(nodeName)) {
+ setModifiers(context.style, MStyle.FONT_STRIKE_THROUGH);
res = parent;
- }
+ } else if (U_TAG.equals(nodeName) || INS_TAG.equals(nodeName)) {
+ setModifiers(context.style, MStyle.FONT_UNDERLINE);
+ res = parent;
+ } else if (SUB_TAG.equals(nodeName)) {
+ setModifiers(context.style, MStyle.SUBSCRIPT);
+ res = parent;
+ } else if (SUP_TAG.equals(nodeName)) {
+ setModifiers(context.style, MStyle.SUPERSCRIPT);
+ res = parent;
+ } else if (FONT_TAG.equals(nodeName)) {
+ if (element.hasAttr("color")) {
+ context.style.setForegroundColor(htmlToColor(element.attr("color").toLowerCase()));
+ }
+ if (element.hasAttr("face")) {
+ // TODO double check this
+ context.style.setFontName(element.attr("face"));
+ }
+ if (element.hasAttr(SIZE_ATTR)) {
+ context.style.setFontSize(fontSizeToPoint(element.attr(SIZE_ATTR)));
+ }
+ res = parent;
+ } else if (A_TAG.equals(nodeName)) {
+ context.linkTargetURI = toURI(context.baseURI, element.attr(HREF_ATTR));
+ if (element.hasAttr("title")) {
+ context.linkTitle = element.attr("title");
+ }
+ res = parent;
+ } else if (BR_TAG.equals(nodeName)) {
+ final MList parentContents = (MList) parent.getContents();
+ parentContents.add(MPagination.ligneBreak);
+ res = parent;
+ } else if ("li".equals(nodeName)) {
+ res = createMParagraph(context, parent, element, context.numberingID.longValue(),
+ context.numberingLevel - 1);
+ isNumbering = true;
+ } else if (OL_TAG.equals(nodeName)) {
+ setOrderedListNumbering(context, element);
+ isNumbering = true;
+ res = parent;
+ } else if (UL_TAG.equals(nodeName)) {
+ setUnorderedListNumbering(context, element);
+ isNumbering = true;
+ res = parent;
+ } else if ("img".equals(nodeName)) {
+ final MImage mImage = createMImage(context, element);
+ final MList parentContents = (MList) parent.getContents();
+ parentContents.add(mImage);
+ res = parent;
+ } else if (BIG_TAG.equals(nodeName)) {
+ setBigFont(context);
+ res = parent;
+ } else if (SMALL_TAG.equals(nodeName)) {
+ setSmallFont(context);
+ res = parent;
+ } else if ("tt".equals(nodeName) || "code".equals(nodeName) || "samp".equals(nodeName)
+ || "kbd".equals(nodeName)) {
+ context.style.setFontName(COURIER_NEW_FONT);
+ res = parent;
+ } else if ("h1".equals(nodeName)) {
+ res = createHeading(parent, context, element, H1_FONT_SIZE);
+ } else if ("h2".equals(nodeName)) {
+ res = createHeading(parent, context, element, H2_FONT_SIZE);
+ } else if ("h3".equals(nodeName)) {
+ res = createHeading(parent, context, element, H3_FONT_SIZE);
+ } else if ("h4".equals(nodeName)) {
+ res = createHeading(parent, context, element, H4_FONT_SIZE);
+ } else if ("h5".equals(nodeName)) {
+ res = createHeading(parent, context, element, H5_FONT_SIZE);
+ } else if ("h6".equals(nodeName)) {
+ res = createHeading(parent, context, element, H6_FONT_SIZE);
+ } else if (CENTER_TAG.equals(nodeName)) {
+ res = createMParagraph(context, parent, element, null, null);
+ res.setHAlignment(HAlignment.CENTER);
+ } else {
+ res = parent;
+ }
if (!isNumbering) {
context.numbering = null;
@@ -1688,13 +1904,13 @@ private void setOrderedListNumbering(Context context, Element element) {
} else if ("A".equals(typeStr)
|| CSS_PARSER.hasCSS(context.cssProperties, M2DocCSSParser.CSS_LIST_STYLE_TYPE, "upper-alpha")) {
type = STNumberFormat.UPPER_LETTER;
- } else if ("a".equals(typeStr)
+ } else if (A_TAG.equals(typeStr)
|| CSS_PARSER.hasCSS(context.cssProperties, M2DocCSSParser.CSS_LIST_STYLE_TYPE, "lower-alpha")) {
type = STNumberFormat.LOWER_LETTER;
} else if ("I".equals(typeStr)
|| CSS_PARSER.hasCSS(context.cssProperties, M2DocCSSParser.CSS_LIST_STYLE_TYPE, "upper-roman")) {
type = STNumberFormat.UPPER_ROMAN;
- } else if ("i".equals(typeStr) || CSS_PARSER.hasCSS(context.cssProperties,
+ } else if (I_TAG.equals(typeStr) || CSS_PARSER.hasCSS(context.cssProperties,
M2DocCSSParser.CSS_LIST_STYLE_TYPE, "lower-roman")) {
type = STNumberFormat.LOWER_ROMAN;
} else
diff --git a/tests/org.obeonetwork.m2doc.html.tests/resources/html/br/br-expected-generation.OK.docx b/tests/org.obeonetwork.m2doc.html.tests/resources/html/br/br-expected-generation.OK.docx
new file mode 100644
index 000000000..4aa7a930a
Binary files /dev/null and b/tests/org.obeonetwork.m2doc.html.tests/resources/html/br/br-expected-generation.OK.docx differ
diff --git a/tests/org.obeonetwork.m2doc.html.tests/resources/html/br/br-expected-generation.docx b/tests/org.obeonetwork.m2doc.html.tests/resources/html/br/br-expected-generation.docx
index 912d6f006..806562ce8 100644
Binary files a/tests/org.obeonetwork.m2doc.html.tests/resources/html/br/br-expected-generation.docx and b/tests/org.obeonetwork.m2doc.html.tests/resources/html/br/br-expected-generation.docx differ
diff --git a/tests/org.obeonetwork.m2doc.html.tests/resources/html/br/doc.html b/tests/org.obeonetwork.m2doc.html.tests/resources/html/br/doc.html
index 7c7165279..87e9988ed 100644
--- a/tests/org.obeonetwork.m2doc.html.tests/resources/html/br/doc.html
+++ b/tests/org.obeonetwork.m2doc.html.tests/resources/html/br/doc.html
@@ -2,9 +2,69 @@
-
-To break lines
in a text,
use the br element.
-
+ To break lines
in a text,
use the br element.
+ ********************
+
+ ********************
+
+
+ ********************
+
+
+
+ ********************
+ A Line
+ ********************
+
A Line
+ ********************
+
A Line
+ ********************
+ A Line
+
+ - Item1
+ - Item2
+ - Item3
+
+ ********************
+
+ To break lines
in a text,
use the br element.
+
+ ********************
+
+
+
+ ********************
+
+
+
+
+ ********************
+
+
+
+
+
+ ********************
+
+ A Line
+
+ ********************
+
+
A Line
+
+ ********************
+
+
A Line
+
+ ********************
+
+ A Line
+
+ - Item1
+ - Item2
+ - Item3
+
+