-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchunk.php
85 lines (65 loc) · 2.23 KB
/
chunk.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
<?php
include('myParseTree.php');
function chunkAfile($inputFileName, $outputFileName){
$inputFile = fopen($inputFileName, "r") or die("Can't open input file!");
$outputFile = fopen($outputFileName, "a") or die("Can't open output file!");
// read the input file by line
while (($input = fgets($inputFile)) !== false) {
$input = str_replace("\n", "", $input);
$input = substr($input, 2);
$input = substr($input, 0, -2);
$input = str_replace("((", "( (", $input);
$input = str_replace("((", "( (", $input);
$input = str_replace("))", ") )", $input);
$input = str_replace("))", ") )", $input);
$tokens = explode(" ", $input);
unset($rootNode);
unset($currentNode);
foreach($tokens as $token){
if(strcmp(substr($token, 0, 1), "(") == 0){
//got a new phrase, create a new leaf
$tokenCategory = trim(substr($token, 1));
if(!isset($rootNode)){
$rootNode = new Node($tokenCategory);
$currentNode = $rootNode;
$currentNode->level = 0;
}else{
$newNode = new Node($tokenCategory);
$newNode->setParent($currentNode);
$newNode->level = $currentNode->level + 1;
if(!$rootNode->hasChildren())
$rootNode->addChild($newNode);
else
$currentNode->addChild($newNode);
$currentNode = $newNode;
}
}elseif(strcmp(substr($token, -1, 1), ")") == 0){
//phrase ended
//if it was a word, add to the current leaf, else go to the parent
$tokenWord = substr($token, 0, -1);
if(strlen($tokenWord) > 0){
$currentNode->setWord($tokenWord);
}
if($currentNode->getParent() != null)
$currentNode = $currentNode->getParent();
}
}
$wordCount = str_word_count($rootNode->traverse('inorder', ''));
$chunkSize = ceil($wordCount/4);
$finalChunks = array();
$rootNode->getChunksToSize($rootNode, $chunkSize, $finalChunks);
while(count($finalChunks) > 10){
$finalChunks = array();
$rootNode->clearInnerChunks($rootNode);
$chunkSize = $chunkSize * 1.5;
$rootNode->getChunksToSize($rootNode, $chunkSize, $finalChunks);
}
$finalChunks = array_reverse($finalChunks);
foreach($finalChunks as $finalChunk){
fwrite($outputFile, $finalChunk."\n");
}
fwrite($outputFile, "\n");
}
fclose($inputFile);
fclose($outputFile);
}