tsvector关键字抽取时,会从给定的关键字中提取中英文及数字,屏蔽其他内容

This commit is contained in:
2024-11-15 10:35:20 +08:00
parent ab3cc1aeea
commit 3be2c77008
2 changed files with 20 additions and 2 deletions

View File

@ -1,6 +1,6 @@
{
"name": "@yizhi/postgres",
"version": "1.0.7",
"version": "1.0.8",
"main": "dist/index.js",
"types": "typing/index.d.ts",
"scripts": {},

View File

@ -84,7 +84,7 @@ export class TSVector {
}
//处理关键词
keywords = keywords.map(s => s.trim()).filter(s => !!s);
keywords = this.#resolveKeywords(keywords);// keywords.map(s => s.trim()).filter(s => !!s);
//屏蔽纯字母数字的关键字
keywords = keywords.filter(kw => {
@ -121,6 +121,24 @@ export class TSVector {
//完成
return new this(items.map(s => s.replaceAll("'", "")));
}
//处理关键字
static #resolveKeywords(keywords: string[]): string[] {
let result: string[] = [];
for (const kw of keywords) {
for (const s of kw.matchAll(/([a-zA-Z0-9\u4e00-\u9fa5]+)/g)) {
if (result.includes(s[1])) continue;
result.push(s[1]);
}
}
//排序
result = result.sort((a, b) => b.length - a.length);
//长度处理: 如果存在长度大于1的则只保留长度大于1的关键字
if (result.length > 1) result = result.filter(s => s.length > 1);
return result;
}
}