Skip to content

Commit

Permalink
Improved auto-scraping accuracy - #101
Browse files Browse the repository at this point in the history
  • Loading branch information
Asnivor committed Jun 27, 2017
1 parent 9de1b5b commit 2151ee6
Showing 1 changed file with 28 additions and 31 deletions.
59 changes: 28 additions & 31 deletions MedLaunch/Classes/Scraper/ScraperSearch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -166,39 +166,36 @@ public string StripSymbols(string i)
// remove all (xxx), [xxx]
string regex = "(\\[.*\\])|(\\(.*\\))";
string s = Regex.Replace(i, regex, "").Replace("()", "").Replace("[]", "").ToLower().Trim();

// add this to the class
SearchString = s;
// remove all - : _ '
s = s.Replace(" - ", " ").Replace("_", " ").Replace(": ", " ").Replace(" : ", " ").Replace(":", "").Replace("'", "").Replace("-", " ").Replace(".", "").Trim();
// remove all roman numerals
/*
s.Replace(" I", " ");
s.Replace(" II ", " ").Replace(" II", " ");
s.Replace(" III ", " ").Replace(" III", " ");
s.Replace(" IV ", " ").Replace(" IV", " ");
s.Replace(" V ", " ");
s.Replace(" VI ", " ").Replace(" VI", " ");
s.Replace(" VII ", " ").Replace(" VII", " ");
s.Replace(" VIII ", " ").Replace(" VIII", " ");
s.Replace(" IX ", " ").Replace(" IX", " ");
s.Replace(" X ", " ");
s.Replace(" XI ", " ").Replace(" XI", " ");
s.Replace(" XII ", " ").Replace(" XII", " ");
// replace ending numbers
string[] arr = BuildArray(s);
string l = arr[arr.Length - 1];
foreach (char c in l)
{
if (char.IsDigit(c))
{
arr = arr.Take(arr.Count() - 1).ToArray();
break;
}
}
s = BuildSearchString(arr);
*/
return s;

// remove all extraneous symbols
s = s.Replace(" - ", " ").Replace("_", " ").Replace(": ", " ")
.Replace(" : ", " ").Replace(":", "").Replace("'", "")
.Replace("-", " ").Replace(".", "").Replace(" ", " ").Trim();

// convert all digits to roman numberals
s = s.Replace(" 2", " II")
.Replace(" 3", " III")
.Replace(" 4", " IV")
.Replace(" 5", " V")
.Replace(" 6", " VI")
.Replace(" 7", " VII")
.Replace(" 8", " VIII")
.Replace(" 9", " IX")
.Replace(" 10", " X")
.Replace(" 11", " XI")
.Replace(" 12", " XII")
.Replace(" 13", " XIII")
.Replace(" 14", " XIV")
.Replace(" 15", " XV")
.Replace(" 16", " XVI")
.Replace(" 17", " XVII")
.Replace(" 18", "XVIII");


return s.ToLower();
}

/// <summary>
Expand Down

0 comments on commit 2151ee6

Please sign in to comment.