aboutsummaryrefslogtreecommitdiff
path: root/src/utils/fstrcmp.c
blob: 3b0cc233c1485e57ea85aafe2545a04b2b2c8b50 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/*
 * Functions to make fuzzy comparisons between strings.
 *
 * Derived from PHP 5 similar_text() function
 *
 * The basic algorithm is described in:
 * Oliver [1993] and the complexity is O(N**3) with N == length of longest string

   +----------------------------------------------------------------------+
   | PHP Version 5                                                        |
   +----------------------------------------------------------------------+
   | Copyright (c) 1997-2010 The PHP Group                                |
   +----------------------------------------------------------------------+
   | This source file is subject to version 3.01 of the PHP license,      |
   | that is bundled with this package in the file LICENSE, and is        |
   | available through the world-wide-web at the following url:           |
   | http://www.php.net/license/3_01.txt                                  |
   | If you did not receive a copy of the PHP license and are unable to   |
   | obtain it through the world-wide-web, please send a note to          |
   | license@php.net so we can mail you a copy immediately.               |
   +----------------------------------------------------------------------+
   | Authors: Rasmus Lerdorf <rasmus@php.net>                             |
   |          Stig Sther Bakken <ssb@php.net>                             |
   |          Zeev Suraski <zeev@zend.com>                                |
   +----------------------------------------------------------------------+
 */

#ifdef __cplusplus
extern "C"
{
#endif

#include <string.h>

static int similar_text(const char *str1, const char *str2, int len1, int len2)
{
  int sum;
  int pos1 = 0, pos2 = 0;
  int max = 0;

  char *p, *q;
  char *end1 = (char *)str1 + len1;
  char *end2 = (char *)str2 + len2;
  int l;

  for (p = (char *)str1; p < end1; p++)
  {
    for (q = (char *)str2; q < end2; q++)
    {
      for (l = 0; (p + l < end1) && (q + l < end2) && (p[l] == q[l]); l++)
        ;
      if (l > max)
      {
        max = l;
        pos1 = p - str1;
        pos2 = q - str2;
      }
    }
  }
  if ((sum = max))
  {
    if (pos1 && pos2)
      sum += similar_text(str1, str2, pos1, pos2);

    if ((pos1 + max < len1) && (pos2 + max < len2))
      sum += similar_text(str1 + pos1 + max, str2 + pos2 + max,
                          len1 - pos1 - max, len2 - pos2 - max);
  }

  return sum;
}

/* NAME
 fstrcmp - fuzzy string compare

   SYNOPSIS
 double fstrcmp(const char *, const char *, double);

   DESCRIPTION
 The fstrcmp function may be used to compare two string for
 similarity.  It is very useful in reducing "cascade" or
 "secondary" errors in compilers or other situations where
 symbol tables occur.

   RETURNS
 double; 0 if the strings are entirly dissimilar, 1 if the
 strings are identical, and a number in between if they are
 similar.  */

double
fstrcmp (const char *string1, const char *string2, double minimum)
{
  int len1, len2, score;

  len1 = (int)strlen(string1);
  len2 = (int)strlen(string2);

  /* short-circuit obvious comparisons */
  if (len1 == 0 && len2 == 0)
    return 1.0;
  if (len1 == 0 || len2 == 0)
    return 0.0;

  score = similar_text(string1, string2, len1, len2);
  /* The result is
  ((number of chars in common) / (average length of the strings)).
     This is admittedly biased towards finding that the strings are
     similar, however it does produce meaningful results.  */
  return ((double)score * 2.0 / (len1 + len2));
}

#ifdef __cplusplus
} // extern "C"
#endif