elhacker.net cabecera Bienvenido(a), Visitante. Por favor Ingresar o Registrarse
¿Perdiste tu email de activación?.


Tema destacado: Arreglado, de nuevo, el registro del warzone (wargame) de EHN


+  Foro de elhacker.net
|-+  Programación
| |-+  Desarrollo Web
| | |-+  PHP (Moderador: #!drvy)
| | | |-+  Clase de auto keywords, acentos.
0 Usuarios y 1 Visitante están viendo este tema.
Páginas: [1] Ir Abajo Respuesta Imprimir
Autor Tema: Clase de auto keywords, acentos.  (Leído 1,565 veces)
LuffyFF


Desconectado Desconectado

Mensajes: 754


Final Fantasy XIII


Ver Perfil WWW
Clase de auto keywords, acentos.
« en: 20 Julio 2011, 21:03 pm »

Tengo la siguiente clase: http://www.phpclasses.org/package/3245-PHP-Automatically-suggest-keywords-from-content-text.html

Código
  1. <?php
  2. class autokeyword {
  3.  
  4. //declare variables
  5. //the site contents
  6. var $contents;
  7. var $encoding;
  8. //the generated keywords
  9. var $keywords;
  10. //minimum word length for inclusion into the single word
  11. //metakeys
  12. var $wordLengthMin;
  13. var $wordOccuredMin;
  14. //minimum word length for inclusion into the 2 word
  15. //phrase metakeys
  16. var $word2WordPhraseLengthMin;
  17. var $phrase2WordLengthMinOccur;
  18. //minimum word length for inclusion into the 3 word
  19. //phrase metakeys
  20. var $word3WordPhraseLengthMin;
  21. //minimum phrase length for inclusion into the 2 word
  22. //phrase metakeys
  23. var $phrase2WordLengthMin;
  24. var $phrase3WordLengthMinOccur;
  25. //minimum phrase length for inclusion into the 3 word
  26. //phrase metakeys
  27. var $phrase3WordLengthMin;
  28.  
  29. function autokeyword($params, $encoding)
  30. {
  31.  
  32. //get parameters
  33. $this->encoding = $encoding;
  34.  
  35.  
  36. $this->contents = $this->replace_chars($params['content']);
  37.  
  38. // single word
  39. $this->wordLengthMin = $params['min_word_length'];
  40. $this->wordOccuredMin = $params['min_word_occur'];
  41.  
  42. // 2 word phrase
  43. $this->word2WordPhraseLengthMin = $params['min_2words_length'];
  44. $this->phrase2WordLengthMin = $params['min_2words_phrase_length'];
  45. $this->phrase2WordLengthMinOccur = $params['min_2words_phrase_occur'];
  46.  
  47. // 3 word phrase
  48. $this->word3WordPhraseLengthMin = $params['min_3words_length'];
  49. $this->phrase3WordLengthMin = $params['min_3words_phrase_length'];
  50. $this->phrase3WordLengthMinOccur = $params['min_3words_phrase_occur'];
  51.  
  52. //parse single, two words and three words
  53.  
  54. }
  55.  
  56. function get_keywords()
  57. {
  58. $keywords = $this->parse_words().$this->parse_2words().$this->parse_3words();
  59. return substr($keywords, 0, -2);
  60. }
  61.  
  62. //turn the site contents into an array
  63. //then replace common html tags.
  64. function replace_chars($content)
  65. {
  66. //convert all characters to lower case
  67. $content = mb_strtolower($content);
  68. //$content = mb_strtolower($content, "UTF-8");
  69. $content = strip_tags($content);
  70.  
  71. $punctuations = array(',', ')', '(', '.', "'", '"',
  72. '<', '>', '!', '?', '&ldquo;', '&rdquo;', '/', '-',
  73. '_', '[', ']', ':', ';', '+', '=', '#',
  74. '$', '&quot;', '&copy;', '039;', 'nbsp;', 'nbsp',
  75. '&aacute;', '&eacute;', '&bull;', 'bull;', 'aacute', 'eacute', 'uacute', 'ntilde', 'Ntilde', 'oacute', 'iacute', '&iacute;',
  76. '&oacute;', '&uacute', '&nbsp;', '&iquest;', '&hellip;', 'hellip', '&iexcl;', 'iexcl;', 'iquest;', 'iquest', 'iexcl', 'nbsp;', '&ntilde', 'mdash;', '&mdash;', 'mdash', '&mdash', 'ldquo;', '&ldquo;', 'ldquo', '&ldquo', 'rdquo;', '&rdquo;', 'rdquo', '&rdquo', '&Ntilde', '&039;', '&', '&gt;', '&lt;',
  77. chr(10), chr(13), chr(9));
  78.  
  79. $content = str_replace($punctuations, " ", $content);
  80. // replace multiple gaps
  81. $content = preg_replace('/ {2,}/si', " ", $content);
  82.  
  83. return $content;
  84. }
  85.  
  86. //single words META KEYWORDS
  87. function parse_words()
  88. {
  89. //list of commonly used words
  90. // this can be edited to suit your needs
  91. $common = array("m&aacute;s", "039;m");
  92. //create an array out of the site contents
  93. $s = split(" ", $this->contents);
  94. //initialize array
  95. $k = array();
  96. //iterate inside the array
  97. foreach( $s as $key=>$val ) {
  98. //delete single or two letter words and
  99. //Add it to the list if the word is not
  100. //contained in the common words list.
  101. if(mb_strlen(trim($val)) >= $this->wordLengthMin  && !in_array(trim($val), $common)  && !is_numeric(trim($val))) {
  102. $k[] = trim($val);
  103. }
  104. }
  105. //count the words
  106. //sort the words from
  107. //highest count to the
  108. //lowest.
  109. $occur_filtered = $this->occure_filter($k, $this->wordOccuredMin);
  110. arsort($occur_filtered);
  111.  
  112. $imploded = $this->implode(", ", $occur_filtered);
  113. //release unused variables
  114. unset($k);
  115. unset($s);
  116.  
  117. return $imploded;
  118. }
  119. function parse_2words()
  120. {
  121. //create an array out of the site contents
  122. $x = split(" ", $this->contents);
  123. //initilize array
  124.  
  125. //$y = array();
  126. for ($i=0; $i < count($x)-1; $i++) {
  127. //delete phrases lesser than 5 characters
  128. if( (mb_strlen(trim($x[$i])) >= $this->word2WordPhraseLengthMin ) && (mb_strlen(trim($x[$i+1])) >= $this->word2WordPhraseLengthMin) )
  129. {
  130. $y[] = trim($x[$i])." ".trim($x[$i+1]);
  131. }
  132. }
  133.  
  134. //count the 2 word phrases
  135. $y = @array_count_values($y);
  136.  
  137. $occur_filtered = $this->occure_filter($y, $this->phrase2WordLengthMinOccur);
  138. //sort the words from highest count to the lowest.
  139. arsort($occur_filtered);
  140.  
  141. $imploded = $this->implode(", ", $occur_filtered);
  142. //release unused variables
  143. unset($y);
  144. unset($x);
  145.  
  146. return $imploded;
  147. }
  148.  
  149. function parse_3words()
  150. {
  151. //create an array out of the site contents
  152. $a = split(" ", $this->contents);
  153. //initilize array
  154. $b = array();
  155.  
  156. for ($i=0; $i < count($a)-2; $i++) {
  157. //delete phrases lesser than 5 characters
  158. if( (mb_strlen(trim($a[$i])) >= $this->word3WordPhraseLengthMin) && (mb_strlen(trim($a[$i+1])) > $this->word3WordPhraseLengthMin) && (mb_strlen(trim($a[$i+2])) > $this->word3WordPhraseLengthMin) && (mb_strlen(trim($a[$i]).trim($a[$i+1]).trim($a[$i+2])) > $this->phrase3WordLengthMin) )
  159. {
  160. $b[] = trim($a[$i])." ".trim($a[$i+1])." ".trim($a[$i+2]);
  161. }
  162. }
  163.  
  164. //count the 3 word phrases
  165. //sort the words from
  166. //highest count to the
  167. //lowest.
  168. $occur_filtered = $this->occure_filter($b, $this->phrase3WordLengthMinOccur);
  169. arsort($occur_filtered);
  170.  
  171. $imploded = $this->implode(", ", $occur_filtered);
  172. //release unused variables
  173. unset($a);
  174. unset($b);
  175.  
  176. return $imploded;
  177. }
  178.  
  179. function occure_filter($array_count_values, $min_occur)
  180. {
  181. $occur_filtered = array();
  182. foreach ($array_count_values as $word => $occured) {
  183. if ($occured >= $min_occur) {
  184. $occur_filtered[$word] = $occured;
  185. }
  186. }
  187.  
  188. return $occur_filtered;
  189. }
  190.  
  191. function implode($gule, $array)
  192. {
  193. $c = "";
  194. foreach($array as $key=>$val) {
  195. @$c .= $key.$gule;
  196. }
  197. return $c;
  198. }
  199. }
  200. ?>

Lo que hace la clase es buscar palabras repetidas y mostrarlas
Tengo el siguiente texto
"La convocatoria de reuni&oacute;n es una notificaci&oacute;n. Reuni&oacute;n. Reuni&oacute;n. Reuni&oacute;n"
Deberia marcarme Reuni&oacute;n, pero lo que me marca es solamente reuni y se corta.

Alguien sabe a que se debe el problema, o como lo podria arreglar?

La palabra es reuni&oacute;n y no reunión solo

Gracias
Salu2


« Última modificación: 20 Julio 2011, 21:06 pm por Lightning » En línea

Páginas: [1] Ir Arriba Respuesta Imprimir 

Ir a:  

Mensajes similares
Asunto Iniciado por Respuestas Vistas Último mensaje
se puede dar herencia de una clase anidada a una clase normal.
Java
kev4142 0 3,431 Último mensaje 7 Agosto 2009, 20:46 pm
por kev4142
Problema de interacion entre una clase Interfaz y la clase que la llama
Java
yeah69 4 4,618 Último mensaje 21 Mayo 2010, 06:30 am
por we4rt
Diferencia de instancia (CLASE oObjeto; y CLASE *oObjeto = new CLASE();)
Programación C/C++
ahkbar87 2 2,873 Último mensaje 14 Junio 2011, 23:17 pm
por Don Pollo
Creando nuevas keywords para C
Programación C/C++
MAFUS 2 2,120 Último mensaje 30 Julio 2019, 19:32 pm
por MAFUS
WAP2 - Aviso Legal - Powered by SMF 1.1.21 | SMF © 2006-2008, Simple Machines