/*************************************************************************** * Copyright (C) 2004-2009 by Thomas Fischer * * fischer@unix-ag.uni-kl.de * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef KBIBTEXFINDDUPLICATES_H #define KBIBTEXFINDDUPLICATES_H #include <ntqobject.h> class KProgressDialog; class TQWidget; namespace KBibTeX { /** @author Thomas Fischer <fischer@unix-ag.uni-kl.de> */ class FindDuplicates : public TQObject { Q_OBJECT public: typedef TQValueList<BibTeX::Element*> DuplicateClique; typedef TQValueList<DuplicateClique> DuplicateCliqueList; /** * Find duplicates in a given BibTeX file. The sensitivity parameter controls the distance between two elements where both elements are considered to be duplicates. The parent object is used as a progress dialog's parent. * @param file * @param sensitivity * @param parent * @return */ FindDuplicates( DuplicateCliqueList &result, unsigned int sensitivity, BibTeX::File *file, TQWidget *parent ); ~FindDuplicates(); /** * Maximum sensitivity */ static const unsigned int maxDistance; protected: void determineDistances( BibTeX::File *file, unsigned int *distVector, TQMap<BibTeX::Element*, int> &mapElementToIndex, KProgressDialog *progDlg ); void buildClique( DuplicateCliqueList &result, BibTeX::File *file, unsigned int *distVector, TQMap<BibTeX::Element*, int> &mapElementToIndex, unsigned int sensitivity ); unsigned int entryDistance( BibTeX::Entry *entryA, BibTeX::Entry *entryB ); unsigned int macroDistance( BibTeX::Macro *macroA, BibTeX::Macro *macroB ); unsigned int preambleDistance( BibTeX::Preamble *preambleA, BibTeX::Preamble *preambleB ); static TQString extractTitle( BibTeX::Entry *entry ); static TQStringList authorsLastName( BibTeX::Entry *entry ); static int extractYear( BibTeX::Entry *entry ); static TQString extractMacroKey( BibTeX::Macro *macro ); static TQString extractMacroValue( BibTeX::Macro *macro ); private: bool m_doCancel; double levenshteinDistance( const TQStringList &s, const TQStringList &t ); double levenshteinDistance( const TQString &s, const TQString &t ); double levenshteinDistanceWord( const TQString &s, const TQString &t ); int arrayOffset( int a, int b ); void sort( unsigned int *array, int len ); private slots: void slotCancel(); }; } #endif