diff --git a/Service/Analyzer.php b/Service/Analyzer.php index 1c8e276..98ba856 100644 --- a/Service/Analyzer.php +++ b/Service/Analyzer.php @@ -10,24 +10,22 @@ class Analyzer public function __construct() { + $this->tree = new \stdClass(); $this->tree->dictionary = new Dictionary(); } - public function all_parents($domElement, $function) + private function analyze_all_parents(&$domElement) { - if ($parent = $domElement->parentNode) { - + $parent = $domElement->parentNode; + if ($parent) { //As long as the top element isn't the document if ($parent->nodeName != "#document") - $this->$function($parent); - - return $this->all_parents($parent, $function); - + $this->log_count($parent); + return $this->analyze_all_parents($parent ); } - } - - public function log_count($node) + + public function log_count(&$node) { $count = $node->getAttribute('count') ? $node->getAttribute('count') : 0; $node->setAttribute('count', $count + 1); @@ -39,13 +37,14 @@ public function get_category($word) $first_letter = substr($word, 0, 1); $first_letter = ctype_upper($first_letter) ? $first_letter : strtoupper($first_letter) ; - // $tags = $this->tree->alpha[$first_letter]; - - //foreach ($tags as $term) { - // if (preg_match('/^(' . $term->nodeValue . ')$/i', $word)) { - // return $term; - // } - //} + $tags = $this->tree->dictionary->getTermsByLetter($first_letter); + + foreach ($tags as $term) { + + if (preg_match('/^(' . $term->nodeValue . ')$/i', $word)) { + return $term; + } + } } @@ -60,26 +59,25 @@ public function analyze($input) $this->words_input = count($tokens); $this->words_analyzed = 0; $this->input_text = $input; - - foreach ($tokens as $token) - - if ($this->get_category($token)) { - $node = $this->get_category($token); - $this->words_analyzed++; - $this->all_parents($node, 'log_count'); + + foreach ($tokens as $token){ + $node = $this->get_category($token); + if ($node) { + $this->words_analyzed++; + $this->analyze_all_parents($node); } - + } if ($this->words_analyzed > 0) return true; } public function retrieve_data( $nodepath = array() ) { - $xpath = new \DOMXpath($this->tree->dictionary); + $xpath = new \DOMXpath($this->tree->dictionary->records); $path = is_array($nodepath) ? "/" . implode($nodepath, "/") : "/*"; - $query = $path . "/*[@count]"; - - $nodes = $xpath->query($query); - + $query = '/'.$path . "/*[@count]"; + + $nodes = $xpath->query($query); + $data['columns'] = array( array( 'name' => 'Category', @@ -88,12 +86,10 @@ public function retrieve_data( $nodepath = array() ) 'name' => 'Count', 'type' => 'number') ); - + foreach ($nodes as $node) { $data['rows'][] = array($node->nodeName, $node->getAttribute('count')); } - return $data; - - } + } } diff --git a/Service/Dictionary.php b/Service/Dictionary.php index 9ae76da..5ffdc9b 100644 --- a/Service/Dictionary.php +++ b/Service/Dictionary.php @@ -2,7 +2,7 @@ namespace Cam5\RidPhp\Service; -class Dictionary implements DictionaryInterface +class Dictionary implements DictionaryInterface { const DEFAULT_RID = 'RID.CAT'; @@ -39,14 +39,20 @@ public function __construct($input = null) ); } + public function getRecords(){ + return $this->records; + } + public function initTemporaryValues() { $this->temporaryValues = new \stdClass; } - public function initRecords() + private function initRecords() { $this->records = new \DomDocument; + $this->records->appendChild($this->records->createElement('Dictionary')); + $this->records->formatOutput = true; return $this->records; @@ -76,12 +82,12 @@ public function readTabs($input, $maxTabs = 3) public function normalizeWord($word) { //EXAMPLE* (1) --> EXAMPLE.* + $word = trim($word); $word = preg_replace( - array('/(\(1\))|\s/', '/\*/'), - array('', '.*'), + array('/(\s\(1\))/', '/\*/', '/\s/'), + array('', '.*', '-'), $word ); - return $word; } @@ -89,8 +95,8 @@ public function processLine($line) { $tabs = $this->readTabs($line); $word = $this->normalizeWord($line); - $category = $this->fixTabRead(self::$enum[$tabs], $line); - + $category = $this->fixTabRead(self::$enum[$tabs], $line); + switch ($category) { case 'Primary' : case 'Secondary' : @@ -105,15 +111,16 @@ public function processLine($line) $this->handleTermNode($node, $word, $category, $originalCategory); break; } + } - public function handleCategoryNode(\DOMNode $node, $category) + private function handleCategoryNode(\DOMNode $node, $category) { $parentCategory = self::$parentEnum[$category]; - + // Append to root if primary, else last parent-level node. if ('None' === $parentCategory) { - $this->temporaryValues->$category = $this->records->appendChild( + $this->temporaryValues->$category = $this->records->getElementsByTagName('Dictionary')->item(0)->appendChild( $this->records->importNode($node) ); } else { @@ -126,27 +133,28 @@ public function handleCategoryNode(\DOMNode $node, $category) return $this->temporaryValues->$category; } - public function handleTermNode(\DOMNode $node, $word, $category, $originalCategory) + private function handleTermNode(\DOMNode $node, $word, $category, $originalCategory) { $targetCategory = $this->getTargetCategory($word, $category, $originalCategory); - $this->temporaryValues->$targetCategory->appendChild($node); $firstLetter = substr($word, 0, 1); - $this->alphadex[$firstLetter][] = $word; + $this->alphadex[$firstLetter][] = $node; } + public function getTermsByLetter($letter){ + return $this->alphadex[$letter]; + } + public function getTargetCategory($word, $category, $originalCategory) { - $parentCategory = self::$parentEnum[$category]; - if ($category !== $originalCategory) { $targetCategory = ('Tertiary' === $originalCategory) - ? 'Primary' - : 'Secondary'; + ? 'Secondary' + : 'Primary'; } else { - $targetCategory = $parentCategory; + $targetCategory = self::$parentEnum[$category]; } return $targetCategory;