12 while ($arLanguage = $rsLanguages->Fetch())
19 if ($sLang !==
false && !isset(
$arStemFunc[$sLang]))
21 $stemming_function_suf = $sLang;
23 if (!function_exists(
'stemming_' . $sLang))
25 $strFileName =
$_SERVER[
'DOCUMENT_ROOT'] . BX_PERSONAL_ROOT .
'/php_interface/' . $sLang .
'/search/stemming.php';
26 if (file_exists($strFileName))
28 @include $strFileName;
30 if (!function_exists(
'stemming_' . $sLang))
32 $strFileName =
$_SERVER[
'DOCUMENT_ROOT'] .
'/bitrix/modules/search/tools/' . $sLang .
'/stemming.php';
33 if (file_exists($strFileName))
35 if (\
Bitrix\Main\Localization\Translation::allowConvertEncoding())
37 \Bitrix\Main\Localization\StreamConverter::include($strFileName, $sLang);
41 @include $strFileName;
44 if (!function_exists(
'stemming_' . $sLang))
46 $stemming_function_suf =
'default';
51 $stemming_stop_function =
'stemming_stop_' . $sLang;
52 if (!function_exists($stemming_stop_function))
54 $stemming_stop_function =
'stemming_stop_default';
57 $stemming_upper_function =
'stemming_upper_' . $sLang;
58 if (!function_exists($stemming_upper_function))
60 $stemming_upper_function =
'stemming_upper_default';
64 $stemming_letter_function =
'stemming_letter_' . $sLang;
65 if (function_exists($stemming_letter_function))
67 $letters .= $stemming_letter_function();
72 if (function_exists($stemming_letter_function))
74 $abc = $stemming_letter_function();
87 'stem' =>
'stemming_' . $stemming_function_suf,
88 'stop' => $stemming_stop_function,
89 'upper' => $stemming_upper_function,
90 'letters' => $letters,
91 'pcre_letters' =>
'\\w\\d' . str_replace(
92 [
'\\' ,
'-' ,
'^' ,
']' ,
'/'],
93 [
'\\\\',
'\\-',
'\\^',
'\\]',
'\\/'],
97 'pcre_abc' =>
'\\w\\d' . str_replace(
98 [
'\\' ,
'-' ,
'^' ,
']' ,
'/'],
99 [
'\\\\',
'\\-',
'\\^',
'\\]',
'\\/'],
105 if ($sLang ===
false)
148function stemming($sText, $sLang=
'ru', $bIgnoreStopWords =
false, $bReturnPositions =
false)
150 static $STOP_CACHE = [];
151 if (!isset($STOP_CACHE[$sLang]))
153 $STOP_CACHE[$sLang] = [];
155 $stop_cache = &$STOP_CACHE[$sLang];
163 if (!isset($arStemInfo[$sLang]))
168 $stem_func = $arStemInfo[$sLang][
'stem'];
169 $pcre_abc =
'/[^' . $arStemInfo[$sLang][
'pcre_abc'] .
']+/u';
174 if ($bReturnPositions)
176 $sText = preg_replace(
'/[^' . $arStemInfo[$sLang][
'pcre_letters'] .
'.!?]+/u', $tok, $sText);
177 $sText = preg_replace(
'/[!?]+/u',
'.', $sText);
181 $sText = preg_replace(
'/[^' . $arStemInfo[$sLang][
'pcre_letters'] .
']+/u', $tok, $sText);
185 $words = strtok($sText, $tok);
187 while ($words !==
false)
189 if ($bReturnPositions)
191 $words = explode(
'.', $words);
198 foreach ($words as
$i => $word)
200 $word = mb_substr($word, 0, 50);
202 if ($bReturnPositions)
216 $stem = $stem_func($word, 1);
224 && preg_match($pcre_abc, $word)
232 $stop_lang = $guess[1];
236 if ($bIgnoreStopWords)
240 foreach ($stem as $st)
242 $stems[$st] = isset($stems[$st]) ? $stems[$st] + $pos : $pos;
247 $stems[$stem] = isset($stems[$stem]) ? $stems[$stem] + $pos : $pos;
252 $stop_func = $arStemInfo[$stop_lang][
'stop'];
255 foreach ($stem as $st)
257 if (!isset($stop_cache[$st]))
259 $stop_cache[$st] = $stop_func($st);
262 if ($stop_cache[$st])
264 $stems[$st] = isset($stems[$st]) ? $stems[$st] + $pos : $pos;
270 if (!isset($stop_cache[$stem]))
272 $stop_cache[$stem] = $stop_func($stem);
275 if ($stop_cache[$stem])
277 $stems[$stem] = isset($stems[$stem]) ? $stems[$stem] + $pos : $pos;
282 if ($bReturnPositions)
288 $words = strtok($tok);
static GetOptionString($module_id, $name, $def="", $site=false)
if(!defined('SITE_ID')) $lang