Skip to content

Commit

Permalink
I've made a huge mistake
Browse files Browse the repository at this point in the history
fix all parser accuracy regressions with this one weird trick!
(+ several other experimental changes)
  • Loading branch information
Nate Parrott committed Jan 16, 2015
1 parent 0065a7a commit 87f6103
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ - (NSArray *)ps_tokenize {
NSMutableArray *tokens = [NSMutableArray new];
NSLinguisticTagger *tagger = [[NSLinguisticTagger alloc] initWithTagSchemes:@[NSLinguisticTagSchemeTokenType] options:0];
tagger.string = self;
__block NSString *prevText = nil;
[tagger enumerateTagsInRange:NSMakeRange(0, self.length) scheme:NSLinguisticTagSchemeTokenType options:0 usingBlock:^(NSString *tag, NSRange tokenRange, NSRange sentenceRange, BOOL *stop) {
NSString *text = [self substringWithRange:tokenRange];
if ([tag isEqualToString:NSLinguisticTagWhitespace]) {
Expand All @@ -43,7 +44,9 @@ - (NSArray *)ps_tokenize {
} else {
PSToken *token = [PSToken new];
token.original = text;
token.features = @[token.original];
NSString *bigram = [NSString stringWithFormat:@"%@-%@", prevText, text];
prevText = text;
token.features = @[token.original, bigram];
[tokens addObject:token];
}
}];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@
- (void)addItem:(id)item;
- (NSEnumerator *)allItems;
- (double)smoothedLogProbForItem:(id)item;
- (double)specialTextProbabilityForItem:(id)item;

@end
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,10 @@ - (void)ps_mergeWith:(PSProbabilityCounter *)other allowUnmergeableTypes:(BOOL)a
}
}

- (double)specialTextProbabilityForItem:(id)item {
double count = [self.countsForItems[item] doubleValue];
double p = 1 - 1.0 / (count + 1);
return PSLogProb(PSMinimalProbability + p * (1 - PSMinimalProbability));
}

@end
4 changes: 2 additions & 2 deletions FlashlightApp/FlashlightKit/FlashlightKit/Parser/Parsnip.m
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,12 @@ - (void)addNewCandidatesToDictionary:(NSMutableDictionary *)dict withCandidate:(

- (double)logProbOfEmissionOfToken:(PSToken *)token fromTerminalNodeNamed:(NSString *)tagName {
if ([PSTerminalNode isNameOfFreeTextNode:tagName]) {
return PSSmoothLogProb(PSLogProb(PSMinimalProbability));
return PSSmoothLogProb(PSLogProb(PSFreeTextProbability));
} else {
PSProbabilityCounter *counter = self.emissionProbs[tagName];
double logProb = 0;
for (id feature in token.features) {
logProb += [counter smoothedLogProbForItem:feature];
logProb += [counter specialTextProbabilityForItem:feature];
}
return logProb;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
objects = {

/* Begin PBXBuildFile section */
5F249B291A5CFCA3001FEF8F /* FlashlightKit.framework in CopyFiles */ = {isa = PBXBuildFile; fileRef = 5FC13E5D1A4E13B8008A0FE3 /* FlashlightKit.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
431E20151A68906E00C2BD39 /* FlashlightKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 431E20101A68905E00C2BD39 /* FlashlightKit.framework */; };
5F7BBDB71A5E490F00377A9F /* Icon.icns in Resources */ = {isa = PBXBuildFile; fileRef = 5F7BBDB61A5E490F00377A9F /* Icon.icns */; };
5F7BBDB91A5E503E00377A9F /* bundle.icns in Resources */ = {isa = PBXBuildFile; fileRef = 5F7BBDB81A5E503E00377A9F /* bundle.icns */; };
5F7BBDBB1A5E515800377A9F /* say.bundle in Resources */ = {isa = PBXBuildFile; fileRef = 5F7BBDBA1A5E515800377A9F /* say.bundle */; };
Expand All @@ -16,10 +16,30 @@
5FC13D4B1A4CD847008A0FE3 /* MainMenu.xib in Resources */ = {isa = PBXBuildFile; fileRef = 5FC13D491A4CD847008A0FE3 /* MainMenu.xib */; };
5FC13D571A4CD847008A0FE3 /* FlashlightToolTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 5FC13D561A4CD847008A0FE3 /* FlashlightToolTests.m */; };
5FC13DE31A4E0705008A0FE3 /* WebKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5FC13DE21A4E0705008A0FE3 /* WebKit.framework */; };
5FC13E5E1A4E13B8008A0FE3 /* FlashlightKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5FC13E5D1A4E13B8008A0FE3 /* FlashlightKit.framework */; };
/* End PBXBuildFile section */

/* Begin PBXContainerItemProxy section */
431E200F1A68905E00C2BD39 /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 431E200A1A68905E00C2BD39 /* FlashlightKit.xcodeproj */;
proxyType = 2;
remoteGlobalIDString = 5FC13DEE1A4E12DB008A0FE3;
remoteInfo = FlashlightKit;
};
431E20111A68905E00C2BD39 /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 431E200A1A68905E00C2BD39 /* FlashlightKit.xcodeproj */;
proxyType = 2;
remoteGlobalIDString = 5FC13DF91A4E12DB008A0FE3;
remoteInfo = FlashlightKitTests;
};
431E20131A68906A00C2BD39 /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 431E200A1A68905E00C2BD39 /* FlashlightKit.xcodeproj */;
proxyType = 1;
remoteGlobalIDString = 5FC13DED1A4E12DB008A0FE3;
remoteInfo = FlashlightKit;
};
5FC13D511A4CD847008A0FE3 /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 5FC13D351A4CD847008A0FE3 /* Project object */;
Expand All @@ -36,13 +56,13 @@
dstPath = "";
dstSubfolderSpec = 10;
files = (
5F249B291A5CFCA3001FEF8F /* FlashlightKit.framework in CopyFiles */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXCopyFilesBuildPhase section */

/* Begin PBXFileReference section */
431E200A1A68905E00C2BD39 /* FlashlightKit.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = FlashlightKit.xcodeproj; path = ../FlashlightKit/FlashlightKit.xcodeproj; sourceTree = "<group>"; };
5F7BBDB61A5E490F00377A9F /* Icon.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; path = Icon.icns; sourceTree = "<group>"; };
5F7BBDB81A5E503E00377A9F /* bundle.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; path = bundle.icns; sourceTree = "<group>"; };
5F7BBDBA1A5E515800377A9F /* say.bundle */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.plug-in"; path = say.bundle; sourceTree = "<group>"; };
Expand All @@ -56,15 +76,14 @@
5FC13D551A4CD847008A0FE3 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
5FC13D561A4CD847008A0FE3 /* FlashlightToolTests.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = FlashlightToolTests.m; sourceTree = "<group>"; };
5FC13DE21A4E0705008A0FE3 /* WebKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = WebKit.framework; path = System/Library/Frameworks/WebKit.framework; sourceTree = SDKROOT; };
5FC13E5D1A4E13B8008A0FE3 /* FlashlightKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = FlashlightKit.framework; path = "../../../../../Library/Developer/Xcode/DerivedData/Flashlight-ecbbmgoifutwxzgzjgruzjyizltl/Build/Products/Debug/FlashlightKit.framework"; sourceTree = "<group>"; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
5FC13D3A1A4CD847008A0FE3 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
5FC13E5E1A4E13B8008A0FE3 /* FlashlightKit.framework in Frameworks */,
431E20151A68906E00C2BD39 /* FlashlightKit.framework in Frameworks */,
5FC13DE31A4E0705008A0FE3 /* WebKit.framework in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
Expand All @@ -79,10 +98,19 @@
/* End PBXFrameworksBuildPhase section */

/* Begin PBXGroup section */
431E200B1A68905E00C2BD39 /* Products */ = {
isa = PBXGroup;
children = (
431E20101A68905E00C2BD39 /* FlashlightKit.framework */,
431E20121A68905E00C2BD39 /* FlashlightKitTests.xctest */,
);
name = Products;
sourceTree = "<group>";
};
5FC13D341A4CD847008A0FE3 = {
isa = PBXGroup;
children = (
5FC13E5D1A4E13B8008A0FE3 /* FlashlightKit.framework */,
431E200A1A68905E00C2BD39 /* FlashlightKit.xcodeproj */,
5FC13DE21A4E0705008A0FE3 /* WebKit.framework */,
5FC13D3F1A4CD847008A0FE3 /* FlashlightTool */,
5FC13D531A4CD847008A0FE3 /* FlashlightToolTests */,
Expand Down Expand Up @@ -154,6 +182,7 @@
buildRules = (
);
dependencies = (
431E20141A68906A00C2BD39 /* PBXTargetDependency */,
);
name = FlashlightTool;
productName = FlashlightTool;
Expand Down Expand Up @@ -207,6 +236,12 @@
mainGroup = 5FC13D341A4CD847008A0FE3;
productRefGroup = 5FC13D3E1A4CD847008A0FE3 /* Products */;
projectDirPath = "";
projectReferences = (
{
ProductGroup = 431E200B1A68905E00C2BD39 /* Products */;
ProjectRef = 431E200A1A68905E00C2BD39 /* FlashlightKit.xcodeproj */;
},
);
projectRoot = "";
targets = (
5FC13D3C1A4CD847008A0FE3 /* FlashlightTool */,
Expand All @@ -215,6 +250,23 @@
};
/* End PBXProject section */

/* Begin PBXReferenceProxy section */
431E20101A68905E00C2BD39 /* FlashlightKit.framework */ = {
isa = PBXReferenceProxy;
fileType = wrapper.framework;
path = FlashlightKit.framework;
remoteRef = 431E200F1A68905E00C2BD39 /* PBXContainerItemProxy */;
sourceTree = BUILT_PRODUCTS_DIR;
};
431E20121A68905E00C2BD39 /* FlashlightKitTests.xctest */ = {
isa = PBXReferenceProxy;
fileType = wrapper.cfbundle;
path = FlashlightKitTests.xctest;
remoteRef = 431E20111A68905E00C2BD39 /* PBXContainerItemProxy */;
sourceTree = BUILT_PRODUCTS_DIR;
};
/* End PBXReferenceProxy section */

/* Begin PBXResourcesBuildPhase section */
5FC13D3B1A4CD847008A0FE3 /* Resources */ = {
isa = PBXResourcesBuildPhase;
Expand Down Expand Up @@ -257,6 +309,11 @@
/* End PBXSourcesBuildPhase section */

/* Begin PBXTargetDependency section */
431E20141A68906A00C2BD39 /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
name = FlashlightKit;
targetProxy = 431E20131A68906A00C2BD39 /* PBXContainerItemProxy */;
};
5FC13D521A4CD847008A0FE3 /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
target = 5FC13D3C1A4CD847008A0FE3 /* FlashlightTool */;
Expand Down

0 comments on commit 87f6103

Please sign in to comment.