当前位置：首页 > JavaScript

js实现fuzzywuzzy

2026-02-01 02:25:06JavaScript

实现模糊字符串匹配（FuzzyWuzzy）的 JavaScript 方法

FuzzyWuzzy 是一个流行的字符串模糊匹配库，最初用 Python 编写。在 JavaScript 中，可以通过以下方式实现类似功能：

使用 `fuzzball.js` 库

fuzzball.js 是 FuzzyWuzzy 的 JavaScript 移植版本，提供类似功能：

const fuzz = require('fuzzball');

// 简单比例匹配
const ratio = fuzz.ratio("hello world", "hello worl");
console.log(ratio); // 输出相似度分数

// 部分比例匹配
const partialRatio = fuzz.partial_ratio("hello world", "worl");
console.log(partialRatio);

// 令牌排序比例
const tokenSortRatio = fuzz.token_sort_ratio("hello world", "world hello");
console.log(tokenSortRatio);

// 令牌集合比例
const tokenSetRatio = fuzz.token_set_ratio("hello world", "hello small world");
console.log(tokenSetRatio);

自定义实现 Levenshtein 距离

基础相似度算法可以通过实现 Levenshtein 距离来计算：

js实现fuzzywuzzy

function levenshteinDistance(a, b) {
  const matrix = [];
  for (let i = 0; i <= b.length; i++) {
    matrix[i] = [i];
  }
  for (let j = 0; j <= a.length; j++) {
    matrix[0][j] = j;
  }
  for (let i = 1; i <= b.length; i++) {
    for (let j = 1; j <= a.length; j++) {
      if (b.charAt(i-1) === a.charAt(j-1)) {
        matrix[i][j] = matrix[i-1][j-1];
      } else {
        matrix[i][j] = Math.min(
          matrix[i-1][j-1] + 1,
          matrix[i][j-1] + 1,
          matrix[i-1][j] + 1
        );
      }
    }
  }
  return matrix[b.length][a.length];
}

function similarityScore(str1, str2) {
  const distance = levenshteinDistance(str1, str2);
  const maxLength = Math.max(str1.length, str2.length);
  return (1 - distance / maxLength) * 100;
}

使用字符串相似度库

string-similarity 是另一个流行的 JavaScript 库：

const stringSimilarity = require('string-similarity');

const similarity = stringSimilarity.compareTwoStrings("hello", "helloo");
console.log(similarity); // 输出 0.933...

const matches = stringSimilarity.findBestMatch("hello", [
  "hell", "world", "hello", "helloo"
]);
console.log(matches.bestMatch); // 输出最佳匹配

实现部分匹配功能

对于部分字符串匹配，可以扩展基础算法：

js实现fuzzywuzzy

function partialMatchScore(mainStr, subStr) {
  const len = subStr.length;
  let bestScore = 0;

  for (let i = 0; i <= mainStr.length - len; i++) {
    const segment = mainStr.substr(i, len);
    const score = similarityScore(segment, subStr);
    if (score > bestScore) {
      bestScore = score;
    }
  }

  return bestScore;
}

实现令牌排序匹配

处理单词顺序不同的情况：

function tokenSortRatio(str1, str2) {
  const tokens1 = str1.split(/\s+/).sort().join(' ');
  const tokens2 = str2.split(/\s+/).sort().join(' ');
  return similarityScore(tokens1, tokens2);
}

实现令牌集合匹配

更高级的匹配方式，考虑重复单词：

function tokenSetRatio(str1, str2) {
  const set1 = new Set(str1.split(/\s+/));
  const set2 = new Set(str2.split(/\s+/));

  const intersection = new Set([...set1].filter(x => set2.has(x)));
  const union = new Set([...set1, ...set2]);

  const sortedIntersection = [...intersection].sort().join(' ');
  const sortedUnion = [...union].sort().join(' ');

  return similarityScore(sortedIntersection, sortedUnion);
}

这些方法提供了从基础到高级的字符串模糊匹配功能，可以根据具体需求选择使用现成库或自定义实现。对于大多数应用场景，fuzzball.js 或 string-similarity 库已经足够，且性能更好。