diff options
author | Slávek Banko <[email protected]> | 2013-06-24 02:08:15 +0200 |
---|---|---|
committer | Slávek Banko <[email protected]> | 2013-07-04 02:44:37 +0200 |
commit | 998f21e02a725cd553d7c278819f67cd81295af4 (patch) | |
tree | 4bd158018e9302c31367b00c01cd2b41eb228414 /src/findduplicates.h | |
download | kbibtex-998f21e02a725cd553d7c278819f67cd81295af4.tar.gz kbibtex-998f21e02a725cd553d7c278819f67cd81295af4.zip |
Initial import
Diffstat (limited to 'src/findduplicates.h')
-rw-r--r-- | src/findduplicates.h | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/src/findduplicates.h b/src/findduplicates.h new file mode 100644 index 0000000..e1a2fce --- /dev/null +++ b/src/findduplicates.h @@ -0,0 +1,85 @@ +/*************************************************************************** + * Copyright (C) 2004-2009 by Thomas Fischer * + * [email protected] * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ +#ifndef KBIBTEXFINDDUPLICATES_H +#define KBIBTEXFINDDUPLICATES_H + +#include <qobject.h> + +class KProgressDialog; +class QWidget; + +namespace KBibTeX +{ + + /** + @author Thomas Fischer <[email protected]> + */ + class FindDuplicates : public QObject + { + Q_OBJECT + public: + typedef QValueList<BibTeX::Element*> DuplicateClique; + typedef QValueList<DuplicateClique> DuplicateCliqueList; + + /** + * Find duplicates in a given BibTeX file. The sensitivity parameter controls the distance between two elements where both elements are considered to be duplicates. The parent object is used as a progress dialog's parent. + * @param file + * @param sensitivity + * @param parent + * @return + */ + FindDuplicates( DuplicateCliqueList &result, unsigned int sensitivity, BibTeX::File *file, QWidget *parent ); + + ~FindDuplicates(); + + /** + * Maximum sensitivity + */ + static const unsigned int maxDistance; + + protected: + void determineDistances( BibTeX::File *file, unsigned int *distVector, QMap<BibTeX::Element*, int> &mapElementToIndex, KProgressDialog *progDlg ); + void buildClique( DuplicateCliqueList &result, BibTeX::File *file, unsigned int *distVector, QMap<BibTeX::Element*, int> &mapElementToIndex, unsigned int sensitivity ); + unsigned int entryDistance( BibTeX::Entry *entryA, BibTeX::Entry *entryB ); + unsigned int macroDistance( BibTeX::Macro *macroA, BibTeX::Macro *macroB ); + unsigned int preambleDistance( BibTeX::Preamble *preambleA, BibTeX::Preamble *preambleB ); + + static QString extractTitle( BibTeX::Entry *entry ); + static QStringList authorsLastName( BibTeX::Entry *entry ); + static int extractYear( BibTeX::Entry *entry ); + static QString extractMacroKey( BibTeX::Macro *macro ); + static QString extractMacroValue( BibTeX::Macro *macro ); + + private: + bool m_doCancel; + + double levenshteinDistance( const QStringList &s, const QStringList &t ); + double levenshteinDistance( const QString &s, const QString &t ); + double levenshteinDistanceWord( const QString &s, const QString &t ); + int arrayOffset( int a, int b ); + void sort( unsigned int *array, int len ); + + private slots: + void slotCancel(); + }; + +} + +#endif |