Skip to content

Commit

Permalink
1.0.3
Browse files Browse the repository at this point in the history
improvements to display of output
  • Loading branch information
d-shapiro committed Apr 24, 2019
1 parent b669030 commit ef76200
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 11 deletions.
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name := "wikiClade"

version := "1.0.2"
version := "1.0.3"

scalaVersion := "2.12.8"

Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/cladograms/Cladogram.scala
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class Cladogram (val clade: Clade, var children: Set[Cladogram]) {
def prominentDescendants(verbosity: Int): Set[Cladogram] = for {
child <- children
descendant =
if (child.children.size == 1 && !child.children.head.clade.shouldDisplay(verbosity))
if (child.children.size == 1 && !child.clade.shouldDisplay(verbosity))
child.prominentDescendants(verbosity).head //guaranteed to have exactly one element
else child
} yield descendant
Expand Down
21 changes: 12 additions & 9 deletions src/main/scala/cladograms/EnWikipediaClade.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,25 @@ import org.jsoup.nodes.{Document, Element}
import org.jsoup.select.Elements

import scala.util.{Failure, Success, Try}

/**
* Created by Daniel on 4/8/2019.
*/
case class EnWikipediaClade(val name: String, val path: Option[String], val priorityOverride: Double = 100) extends Clade {
val baseUrl = "https://en.wikipedia.org"
val ignorableCladeTypes = Set("Clade", "(unranked)")
val importantCladeTypes = Set("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")

lazy val meta: WikiCladeMetadata = getMeta

def ancestors: List[Clade] = meta.ancestors
def priority: Double = Math.min(priorityOverride, meta.docPriority)
def priority: Double = Math.min(
Math.min(priorityOverride, meta.docPriority),
if (importantCladeTypes contains meta.cladeType) 20 else 100)

override def shouldDisplay(verbosity: Int): Boolean = priority <= verbosity

override def DOTDefinition: Option[String] = {
val cladeTypeStr = if (meta.cladeType.isEmpty) "" else meta.cladeType + " <br/>"
val cladeTypeStr = if (meta.cladeType.isEmpty) "" else s"""<FONT POINT-SIZE=\"10\">${meta.cladeType}</FONT><br/>"""
Some(s""""$name" [label=<$cladeTypeStr<B>$name</B>>]""")
}

Expand All @@ -37,7 +40,7 @@ case class EnWikipediaClade(val name: String, val path: Option[String], val prio
else new EnWikipediaClade(details.name, Some(details.path))
}
val docPriority = priorityBasedOnDoc(docOpt)
WikiCladeMetadata(ancestors, cladeType, docPriority)
WikiCladeMetadata(ancestors, sanitizeCladeType(cladeType), docPriority)
}

private def getDoc: Option[Document] = path match {
Expand Down Expand Up @@ -92,15 +95,10 @@ case class EnWikipediaClade(val name: String, val path: Option[String], val prio
val details = parseRow(row)
if (details.path.nonEmpty) {
val pagetry = Try(Jsoup.connect(baseUrl + details.path).get().select("title").text())
// val pagetry = doctry match {
// case Success(doc) => Try(doc.select("title").text())
// case Failure(e) => Failure(e)
// }
pagetry match {
case Success(page) => if (knownPages contains page) {
iter(i + 1, started, knownPages, TaxonDetails(details.name, details.cladeType, "") :: taxList)
} else {
//val pri = priorityBasedOnDoc(doctry.get)
iter(i + 1, started, knownPages + page, details :: taxList)
}
case Failure(_) =>
Expand All @@ -116,6 +114,11 @@ case class EnWikipediaClade(val name: String, val path: Option[String], val prio
iter(0, false, Set(), List())
}

private def sanitizeCladeType(cladeType: String): String = {
val cleaned = cladeType.replaceAll(":", "").trim
if (ignorableCladeTypes contains cleaned) "" else cleaned
}

def priorityBasedOnDoc(docOpt: Option[Document]): Double = docOpt match {
case Some(doc) => Math.min (99, Math.max (1, 100 - (15 * (Math.log (doc.text ().length) - 7) ) ) )
case None => 99
Expand Down

0 comments on commit ef76200

Please sign in to comment.