1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
|
/***************************************************************************
* Copyright (C) 2004-2009 by Thomas Fischer *
* [email protected] *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
#ifndef KBIBTEXFINDDUPLICATES_H
#define KBIBTEXFINDDUPLICATES_H
#include <qobject.h>
class KProgressDialog;
class QWidget;
namespace KBibTeX
{
/**
@author Thomas Fischer <[email protected]>
*/
class FindDuplicates : public QObject
{
Q_OBJECT
public:
typedef QValueList<BibTeX::Element*> DuplicateClique;
typedef QValueList<DuplicateClique> DuplicateCliqueList;
/**
* Find duplicates in a given BibTeX file. The sensitivity parameter controls the distance between two elements where both elements are considered to be duplicates. The parent object is used as a progress dialog's parent.
* @param file
* @param sensitivity
* @param parent
* @return
*/
FindDuplicates( DuplicateCliqueList &result, unsigned int sensitivity, BibTeX::File *file, QWidget *parent );
~FindDuplicates();
/**
* Maximum sensitivity
*/
static const unsigned int maxDistance;
protected:
void determineDistances( BibTeX::File *file, unsigned int *distVector, QMap<BibTeX::Element*, int> &mapElementToIndex, KProgressDialog *progDlg );
void buildClique( DuplicateCliqueList &result, BibTeX::File *file, unsigned int *distVector, QMap<BibTeX::Element*, int> &mapElementToIndex, unsigned int sensitivity );
unsigned int entryDistance( BibTeX::Entry *entryA, BibTeX::Entry *entryB );
unsigned int macroDistance( BibTeX::Macro *macroA, BibTeX::Macro *macroB );
unsigned int preambleDistance( BibTeX::Preamble *preambleA, BibTeX::Preamble *preambleB );
static QString extractTitle( BibTeX::Entry *entry );
static QStringList authorsLastName( BibTeX::Entry *entry );
static int extractYear( BibTeX::Entry *entry );
static QString extractMacroKey( BibTeX::Macro *macro );
static QString extractMacroValue( BibTeX::Macro *macro );
private:
bool m_doCancel;
double levenshteinDistance( const QStringList &s, const QStringList &t );
double levenshteinDistance( const QString &s, const QString &t );
double levenshteinDistanceWord( const QString &s, const QString &t );
int arrayOffset( int a, int b );
void sort( unsigned int *array, int len );
private slots:
void slotCancel();
};
}
#endif
|