Tengo la siguiente clase:
http://www.phpclasses.org/package/3245-PHP-Automatically-suggest-keywords-from-content-text.html<?php
class autokeyword {
//declare variables
//the site contents
var $contents;
var $encoding;
//the generated keywords
var $keywords;
//minimum word length for inclusion into the single word
//metakeys
var $wordLengthMin;
var $wordOccuredMin;
//minimum word length for inclusion into the 2 word
//phrase metakeys
var $word2WordPhraseLengthMin;
var $phrase2WordLengthMinOccur;
//minimum word length for inclusion into the 3 word
//phrase metakeys
var $word3WordPhraseLengthMin;
//minimum phrase length for inclusion into the 2 word
//phrase metakeys
var $phrase2WordLengthMin;
var $phrase3WordLengthMinOccur;
//minimum phrase length for inclusion into the 3 word
//phrase metakeys
var $phrase3WordLengthMin;
function autokeyword($params, $encoding)
{
//get parameters
$this->encoding = $encoding;
$this->contents = $this->replace_chars($params['content']);
// single word
$this->wordLengthMin = $params['min_word_length'];
$this->wordOccuredMin = $params['min_word_occur'];
// 2 word phrase
$this->word2WordPhraseLengthMin = $params['min_2words_length'];
$this->phrase2WordLengthMin = $params['min_2words_phrase_length'];
$this->phrase2WordLengthMinOccur = $params['min_2words_phrase_occur'];
// 3 word phrase
$this->word3WordPhraseLengthMin = $params['min_3words_length'];
$this->phrase3WordLengthMin = $params['min_3words_phrase_length'];
$this->phrase3WordLengthMinOccur = $params['min_3words_phrase_occur'];
//parse single, two words and three words
}
function get_keywords()
{
$keywords = $this->parse_words().$this->parse_2words().$this->parse_3words();
return substr($keywords, 0, -2); }
//turn the site contents into an array
//then replace common html tags.
function replace_chars($content)
{
//convert all characters to lower case
//$content = mb_strtolower($content, "UTF-8");
$punctuations = array(',', ')', '(', '.', "'", '"', '<', '>', '!', '?', '“', '”', '/', '-',
'_', '[', ']', ':', ';', '+', '=', '#',
'$', '"', '©', '039;', 'nbsp;', 'nbsp',
'á', 'é', '•', 'bull;', 'aacute', 'eacute', 'uacute', 'ntilde', 'Ntilde', 'oacute', 'iacute', 'í',
'ó', 'ú', ' ', '¿', '…', 'hellip', '¡', 'iexcl;', 'iquest;', 'iquest', 'iexcl', 'nbsp;', 'ñ', 'mdash;', '—', 'mdash', '&mdash', 'ldquo;', '“', 'ldquo', '&ldquo', 'rdquo;', '”', 'rdquo', '&rdquo', 'Ñ', '&039;', '&', '>', '<',
// replace multiple gaps
return $content;
}
//single words META KEYWORDS
function parse_words()
{
//list of commonly used words
// this can be edited to suit your needs
$common = array("más", "039;m"); //create an array out of the site contents
$s = split(" ", $this->contents); //initialize array
//iterate inside the array
foreach( $s as $key=>$val ) {
//delete single or two letter words and
//Add it to the list if the word is not
//contained in the common words list.
}
}
//count the words
//sort the words from
//highest count to the
//lowest.
$occur_filtered = $this->occure_filter($k, $this->wordOccuredMin);
$imploded = $this->implode(", ", $occur_filtered);
//release unused variables
return $imploded;
}
function parse_2words()
{
//create an array out of the site contents
$x = split(" ", $this->contents); //initilize array
//$y = array();
for ($i=0; $i < count($x)-1; $i++) { //delete phrases lesser than 5 characters
if( (mb_strlen(trim($x[$i])) >= $this->word2WordPhraseLengthMin ) && (mb_strlen(trim($x[$i+1])) >= $this->word2WordPhraseLengthMin) ) {
}
}
//count the 2 word phrases
$occur_filtered = $this->occure_filter($y, $this->phrase2WordLengthMinOccur);
//sort the words from highest count to the lowest.
$imploded = $this->implode(", ", $occur_filtered);
//release unused variables
return $imploded;
}
function parse_3words()
{
//create an array out of the site contents
$a = split(" ", $this->contents); //initilize array
for ($i=0; $i < count($a)-2; $i++) { //delete phrases lesser than 5 characters
{
}
}
//count the 3 word phrases
//sort the words from
//highest count to the
//lowest.
$occur_filtered = $this->occure_filter($b, $this->phrase3WordLengthMinOccur);
$imploded = $this->implode(", ", $occur_filtered);
//release unused variables
return $imploded;
}
function occure_filter($array_count_values, $min_occur)
{
$occur_filtered = array(); foreach ($array_count_values as $word => $occured) {
if ($occured >= $min_occur) {
$occur_filtered[$word] = $occured;
}
}
return $occur_filtered;
}
{
$c = "";
foreach($array as $key=>$val) {
@$c .= $key.$gule;
}
return $c;
}
}
?>
Lo que hace la clase es buscar palabras repetidas y mostrarlas
Tengo el siguiente texto
"La convocatoria de reunión es una notificación. Reunión. Reunión. Reunión"
Deberia marcarme Reunión, pero lo que me marca es solamente reuni y se corta.
Alguien sabe a que se debe el problema, o como lo podria arreglar?
La palabra es reunión y no reunión solo
Gracias
Salu2