summaryrefslogtreecommitdiffstats
path: root/kbabel/common/stringdistance.h
blob: 13265b955019f94e565f96f42350be5c88cbb33d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/* ****************************************************************************
    Copyright (C) 2003-2004 Eva Brucherseifer <[email protected]>
		  2005	    Stanislav Visnovsky <[email protected]>

    This file is part of the KDE project

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

  In addition, as a special exception, the copyright holders give
  permission to link the code of this program with any edition of
  the TQt library by Trolltech AS, Norway (or with modified versions
  of TQt that use the same license as TQt), and distribute linked
  combinations including the two.  You must obey the GNU General
  Public License in all respects for all of the code used other than
  TQt. If you modify this file, you may extend this exception to
  your version of the file, but you are not obligated to do so.  If
  you do not wish to do so, delete this exception statement from
  your version.

**************************************************************************** */

#ifndef STRINGDISTANCE_H
#define STRINGDISTANCE_H

#include <tqstring.h>

//#include <boost/numeric/ublas/matrix.hpp>


/** Private copy constructor and copy assignment ensure classes derived from
  * class noncopyable cannot be copied.
  * Taken from Boost library
  * Contributed by Dave Abrahams
  *
  * If anyone needs a namespace here, please tell me at [email protected]
  */
class NonCopyable
{
protected:
        NonCopyable(){}
        virtual ~NonCopyable(){}
private:  // emphasize the following members are private
        NonCopyable( const NonCopyable& );
        const NonCopyable& operator=( const NonCopyable& );
};


/**
  * @class Distance
  * @author Eva Brucherseifer
  *
  * The class Distance calculates the distance
  * between two Entities (left & right).
  * It is the parent for other distance-classes.
  */
class Distance : public NonCopyable
{
public:
	virtual ~Distance(){}
	double operator()(const TQString& left, const TQString& right);

	int editCostReplace() { return editCost_replace_base; }
	static int debug;
	
protected:
	virtual double calculate(const TQString& left_string, const TQString& right_string) = 0;
	int nodeDistance(const TQString& left_letter, const TQString& right_letter);
	static const int editCost_replace_base;
	double m_distance;
};


double relativeDistance(double distance, const TQString& left_string, const TQString right_string);


/**
  * @class HammingDistance
  * @author Eva Brucherseifer
  *
  * The class HammingDistance is based on an algorithm
  * of Hamming. It increase the distance if the nodes from
  * the tree are not the same. Also called edit-distance.
  */
class HammingDistance : public Distance
{
protected:
	virtual double calculate(const TQString& left_string, const TQString& right_string);
	int editCostReplace() { return editCost; }
	static const int editCost;
};

/**
  * @class LevenshteinDistance
  * @author Eva Brucherseifer
  *
  * The class LevenshteinDistance is based on an algorithm
  * of Levenshtein. It search for the minimum distance of
  * two trees. You can specify the distance between
  * a gap & a node and between two different nodes.
  */
class LevenshteinDistance : public Distance
{
protected:
	virtual double calculate(const TQString& left_string, const TQString& right_string);
	int editCostReplace() { return editCost_replace; }
	static const int editCost_replace;
	static const int editCost_insert;
	static const int editCost_delete;
};


/** wrapper function for replacement of fstrcmp from gettext */
inline double fstrcmp(const TQString& left, const TQString& right)
{
	return LevenshteinDistance()(left,right);
}


#endif