diff options
Diffstat (limited to 'debian/uncrustify-trinity/uncrustify-trinity-0.78.1/src/sorting.cpp')
-rw-r--r-- | debian/uncrustify-trinity/uncrustify-trinity-0.78.1/src/sorting.cpp | 709 |
1 files changed, 709 insertions, 0 deletions
diff --git a/debian/uncrustify-trinity/uncrustify-trinity-0.78.1/src/sorting.cpp b/debian/uncrustify-trinity/uncrustify-trinity-0.78.1/src/sorting.cpp new file mode 100644 index 00000000..807e0739 --- /dev/null +++ b/debian/uncrustify-trinity/uncrustify-trinity-0.78.1/src/sorting.cpp @@ -0,0 +1,709 @@ +/** + * @file sorting.cpp + * Sorts chunks and imports + * + * @author Ben Gardner + * @license GPL v2+ + */ + +#include "sorting.h" + +#include "newlines.h" +#include "prototypes.h" + +#include <regex> +#include <unordered_map> + +constexpr static auto LCURRENT = LSORT; + +using namespace uncrustify; + +Option<std::string> *include_category_options[] = +{ + &options::include_category_0, + &options::include_category_1, + &options::include_category_2, +}; +constexpr static int kIncludeCategoriesCount = 3; + + +struct include_category +{ + include_category(const std::string &pattern) + : regex(pattern) + { + } + std::regex regex; +}; + + +include_category *include_categories[kIncludeCategoriesCount]; +unordered_map<Chunk *, int> chunk_priority_cache; +unordered_map<std::string, bool> filename_without_ext_cache; + + +/** + * Compare two series of chunks, starting with the given ones. + * @param pc1 first instance to compare + * @param pc2 second instance to compare + * @param tcare take care of case (lower case/ upper case) Issue #2091 + * + * @retval == 0 both text elements are equal + * @retval > 0 + * @retval < 0 + */ +static int compare_chunks(Chunk *pc1, Chunk *pc2, bool tcare); + + +/** + * Sort all of the chunks in O(n log n) time with a maximum of O(n) swaps + */ +static void do_the_sort(Chunk **chunks, size_t num_chunks); + + +#define MARK_CHANGE() mark_change(__func__, __LINE__) + + +static void mark_change(const char *func, size_t line) +{ + LOG_FUNC_ENTRY(); + cpd.changes++; + + if (cpd.pass_count == 0) + { + LOG_FMT(LCHANGE, "%s(%d): change %d on %s:%zu\n", + __func__, __LINE__, cpd.changes, func, line); + } +} + + +static void prepare_categories() +{ + for (int i = 0; i < kIncludeCategoriesCount; ++i) + { + const auto &cat_pattern = (*include_category_options[i])(); + + if (!cat_pattern.empty()) + { + include_categories[i] = new include_category(cat_pattern); + } + else + { + include_categories[i] = nullptr; + } + } +} + + +static void cleanup_categories() +{ + chunk_priority_cache.clear(); + filename_without_ext_cache.clear(); + + for (auto &include_category : include_categories) + { + if (include_category == nullptr) + { + continue; + } + delete include_category; + include_category = NULL; + } +} + + +static int get_chunk_priority(Chunk *pc) +{ + if (chunk_priority_cache.count(pc) > 0) + { + return(chunk_priority_cache[pc]); + } + int category = kIncludeCategoriesCount; + + for (int i = 0; i < kIncludeCategoriesCount; i++) + { + if (include_categories[i] != nullptr) + { + if (std::regex_match(pc->Text(), include_categories[i]->regex)) + { + category = i; + break; + } + } + } + + chunk_priority_cache[pc] = category; + return(category); +} + + +/** + * Returns true if the text contains filename without extension. + */ +static bool text_contains_filename_without_ext(const char *text) +{ + if (filename_without_ext_cache.count(text) > 0) + { + return(filename_without_ext_cache[text]); + } + std::string filepath = cpd.filename; + size_t slash_idx = filepath.find_last_of("/\\"); + std::string filename_without_ext = filepath; + + if ( slash_idx != std::string::npos + && slash_idx < (filepath.size() - 1)) + { + std::string filename = filepath.substr(slash_idx + 1); + size_t dot_idx = filename.find_last_of('.'); + filename_without_ext = filename.substr(0, dot_idx); + } + const std::regex special_chars = std::regex(R"([-[\]{}()*+?.,\^$|#\s])"); + const std::string sanitized_filename = std::regex_replace(filename_without_ext, special_chars, R"(\$&)"); + const std::regex filename_pattern = std::regex("\\S?" + sanitized_filename + "\\b.*"); + + filename_without_ext_cache[text] = std::regex_match(text, filename_pattern); + return(filename_without_ext_cache[text]); +} + + +/** + * Get chunk text without the extension. + */ +static UncText get_text_without_ext(const UncText &chunk_text) +{ + UncText result = chunk_text; + int idx = result.rfind(".", result.size() - 1); + + if (idx == -1) + { + return(result); + } + result.erase(idx, result.size() - idx); + return(result); +} + + +/** + * Returns true if UncText has "." which implies extension. + */ +static bool has_dot(const UncText &chunk_text) +{ + int idx = chunk_text.rfind(".", chunk_text.size() - 1); + + return(idx != -1); +} + + +/** + * Returns chunk string required for sorting. + */ +static UncText chunk_sort_str(Chunk *pc) +{ + if (pc->GetParentType() == CT_PP_INCLUDE) + { + return(UncText{ pc->GetStr(), 0, pc->Len() - 1 }); + } + return(pc->GetStr()); +} + + +//! Compare two chunks +static int compare_chunks(Chunk *pc1, Chunk *pc2, bool tcare) +{ + LOG_FUNC_ENTRY(); + LOG_FMT(LSORT, "%s(%d): @begin pc1->len is %zu, line is %zu, column is %zu\n", + __func__, __LINE__, pc1->Len(), pc1->GetOrigLine(), pc1->GetOrigCol()); + LOG_FMT(LSORT, "%s(%d): @begin pc2->len is %zu, line is %zu, column is %zu\n", + __func__, __LINE__, pc2->Len(), pc2->GetOrigLine(), pc2->GetOrigCol()); + + if (pc1 == pc2) // same chunk is always identical thus return 0 differences + { + return(0); + } + + while ( pc1->IsNotNullChunk() + && pc2->IsNotNullChunk()) + { + auto const &s1_ext = chunk_sort_str(pc1); + auto const &s2_ext = chunk_sort_str(pc2); + + log_rule_B("mod_sort_incl_import_ignore_extension"); + auto const &s1 = (options::mod_sort_incl_import_ignore_extension()) ? get_text_without_ext(s1_ext) : s1_ext; + auto const &s2 = (options::mod_sort_incl_import_ignore_extension()) ? get_text_without_ext(s2_ext) : s2_ext; + log_rule_B("mod_sort_incl_import_prioritize_filename"); + + if (options::mod_sort_incl_import_prioritize_filename()) + { + bool s1_contains_filename = text_contains_filename_without_ext(s1.c_str()); + bool s2_contains_filename = text_contains_filename_without_ext(s2.c_str()); + + if ( s1_contains_filename + && !s2_contains_filename) + { + return(-1); + } + else if ( !s1_contains_filename + && s2_contains_filename) + { + return(1); + } + } + + if (options::mod_sort_incl_import_prioritize_extensionless()) + { + log_rule_B("mod_sort_incl_import_prioritize_extensionless"); + const bool s1_has_dot = has_dot(s1_ext); + const bool s2_has_dot = has_dot(s2_ext); + + if ( s1_has_dot + && !s2_has_dot) + { + return(1); + } + else if ( !s1_has_dot + && s2_has_dot) + { + return(-1); + } + } + + if (options::mod_sort_incl_import_prioritize_angle_over_quotes()) + { + log_rule_B("mod_sort_incl_import_prioritize_angle_over_quotes"); + + if ( s1.startswith("<") + && s2.startswith("\"")) + { + return(-1); + } + else if ( s1.startswith("\"") + && s2.startswith("<")) + { + return(1); + } + } + int ppc1 = get_chunk_priority(pc1); + int ppc2 = get_chunk_priority(pc2); + + if (ppc1 != ppc2) + { + return(ppc1 - ppc2); + } + LOG_FMT(LSORT, "%s(%d): text is %s, pc1->len is %zu, line is %zu, column is %zu\n", + __func__, __LINE__, pc1->Text(), pc1->Len(), pc1->GetOrigLine(), pc1->GetOrigCol()); + LOG_FMT(LSORT, "%s(%d): text is %s, pc2->len is %zu, line is %zu, column is %zu\n", + __func__, __LINE__, pc2->Text(), pc2->Len(), pc2->GetOrigLine(), pc2->GetOrigCol()); + + int ret_val = UncText::compare(s1, s2, std::min(s1.size(), s2.size()), tcare); + LOG_FMT(LSORT, "%s(%d): ret_val is %d\n", + __func__, __LINE__, ret_val); + + if (ret_val != 0) + { + return(ret_val); + } + + if (pc1->Len() != pc2->Len()) + { + return(pc1->Len() - pc2->Len()); + } + // Same word, same length. Step to the next chunk. + pc1 = pc1->GetNext(); + LOG_FMT(LSORT, "%s(%d): text is %s, pc1->len is %zu, line is %zu, column is %zu\n", + __func__, __LINE__, pc1->Text(), pc1->Len(), pc1->GetOrigLine(), pc1->GetOrigCol()); + + if (pc1->Is(CT_MEMBER)) + { + pc1 = pc1->GetNext(); + LOG_FMT(LSORT, "%s(%d): text is %s, pc1->len is %zu, line is %zu, column is %zu\n", + __func__, __LINE__, pc1->Text(), pc1->Len(), pc1->GetOrigLine(), pc1->GetOrigCol()); + } + pc2 = pc2->GetNext(); + LOG_FMT(LSORT, "%s(%d): text is %s, pc2->len is %zu, line is %zu, column is %zu\n", + __func__, __LINE__, pc2->Text(), pc2->Len(), pc2->GetOrigLine(), pc2->GetOrigCol()); + + if (pc2->Is(CT_MEMBER)) + { + pc2 = pc2->GetNext(); + LOG_FMT(LSORT, "%s(%d): text is %s, pc2->len is %zu, line is %zu, column is %zu\n", + __func__, __LINE__, pc2->Text(), pc2->Len(), pc2->GetOrigLine(), pc2->GetOrigCol()); + } + LOG_FMT(LSORT, "%s(%d): >>>text is %s, pc1->len is %zu, line is %zu, column is %zu\n", + __func__, __LINE__, pc1->Text(), pc1->Len(), pc1->GetOrigLine(), pc1->GetOrigCol()); + LOG_FMT(LSORT, "%s(%d): >>>text is %s, pc2->len is %zu, line is %zu, column is %zu\n", + __func__, __LINE__, pc2->Text(), pc2->Len(), pc2->GetOrigLine(), pc2->GetOrigCol()); + + // If we hit a newline or null chuck, we are done + if ( pc1->IsNullChunk() + || pc1->IsNewline() + || pc2->IsNullChunk() + || pc2->IsNewline()) + { + break; + } + } + + if ( pc1->IsNullChunk() + || !pc2->IsNewline()) + { + return(-1); + } + + if (!pc1->IsNewline()) + { + return(1); + } + return(0); +} // compare_chunks + + +/** + * Sort all of the chunks in O(n log n) time with a maximum of O(n) swaps + */ +static void do_the_sort(Chunk **chunks, size_t num_chunks) +{ + LOG_FUNC_ENTRY(); + + LOG_FMT(LSORT, "%s(%d): %zu chunks:", + __func__, __LINE__, num_chunks); + + for (size_t idx = 0; idx < num_chunks; idx++) + { + LOG_FMT(LSORT, " [%s]", chunks[idx]->Text()); + } + + LOG_FMT(LSORT, "\n"); + + log_rule_B("mod_sort_case_sensitive"); + bool take_care = options::mod_sort_case_sensitive(); // Issue #2091 + + // Sort an array of the chunk positions in order to minimize the number of swaps + std::vector<size_t> chunk_positions(num_chunks); + + for (size_t idx = 0; idx < num_chunks; idx++) + { + chunk_positions[idx] = idx; + } + + sort(chunk_positions.begin(), chunk_positions.end(), [chunks, take_care](const size_t &l, const size_t &r) { + return(compare_chunks(chunks[l], chunks[r], take_care) < 0); + }); + + // Swap the chunk positions + for (size_t idx = 0; idx < num_chunks; idx++) + { + if (chunk_positions[idx] != idx) + { + log_rule_B("mod_sort_incl_import_grouping_enabled"); + + const size_t from = chunk_positions[idx]; + const size_t to = chunk_positions[from]; + chunks[from]->SwapLines(chunks[to]); + + Chunk *pc = chunks[from]; + chunks[from] = chunks[to]; + chunks[to] = pc; + + chunk_positions[from] = from; + chunk_positions[idx] = to; + idx--; + } + } +} // do_the_sort + + +/** + * Remove blank lines between chunks. + */ +static void remove_blank_lines_between_imports(Chunk **chunks, size_t num_chunks) +{ + LOG_FUNC_ENTRY(); + + if (num_chunks < 2) + { + return; + } + + for (size_t idx = 0; idx < (num_chunks - 1); idx++) + { + Chunk *chunk1 = chunks[idx]->GetNextNl(); + chunk1->SetNlCount(1); + MARK_CHANGE(); + } +} + + +/** + * Delete chunks on line having chunk. + */ +static void delete_chunks_on_line_having_chunk(Chunk *chunk) +{ + LOG_FUNC_ENTRY(); + + Chunk *pc = chunk->GetFirstChunkOnLine(); + + while ( pc->IsNotNullChunk() + && !pc->IsComment()) + { + Chunk *next_pc = pc->GetNext(); + LOG_FMT(LCHUNK, "%s(%d): Removed '%s' on orig line %zu\n", + __func__, __LINE__, pc->Text(), pc->GetOrigLine()); + + if (pc->IsNewline()) + { + Chunk::Delete(pc); + break; + } + else + { + Chunk::Delete(pc); + } + pc = next_pc; + } +} + + +/** + * Dedupe import/include directives. + */ +static void dedupe_imports(Chunk **chunks, size_t num_chunks) +{ + LOG_FUNC_ENTRY(); + log_rule_B("mod_sort_case_sensitive"); + + for (size_t idx = 1; idx < num_chunks; idx++) + { + auto const &s1 = chunk_sort_str(chunks[idx - 1]); + auto const &s2 = chunk_sort_str(chunks[idx]); + + if (s1.size() != s2.size()) + { + continue; + } + int ret_val = UncText::compare(s1, s2, std::min(s1.size(), s2.size()), options::mod_sort_case_sensitive()); + + if (ret_val == 0) + { + delete_chunks_on_line_having_chunk(chunks[idx - 1]); + } + } +} + + +/** + * Add blank line before the chunk. + */ +static void blankline_add_before(Chunk *pc) +{ + Chunk *newline = newline_add_before(pc->GetFirstChunkOnLine()); + + if (newline->GetNlCount() < 2) + { + double_newline(newline); + } +} + + +/** + * Group imports. + */ +static void group_imports_by_adding_newlines(Chunk **chunks, size_t num_chunks) +{ + LOG_FUNC_ENTRY(); + + // Group imports based on first character, typically quote or angle. + int c_idx = -1; + int c_idx_last = -1; + + for (size_t idx = 0; idx < num_chunks; idx++) + { + if (chunks[idx]->GetStr().size() > 0) + { + c_idx = chunks[idx]->GetStr().at(0); + } + else + { + c_idx = -1; + } + + if ( c_idx_last != c_idx + && idx > 0) + { + blankline_add_before(chunks[idx]); + } + c_idx_last = c_idx; + } + + // Group imports based on having extension. + bool chunk_has_dot = false; + bool chunk_last_has_dot = false; + + for (size_t idx = 0; idx < num_chunks; idx++) + { + chunk_has_dot = has_dot(chunks[idx]->GetStr()); + + if ( chunk_last_has_dot != chunk_has_dot + && idx > 0) + { + blankline_add_before(chunks[idx]); + } + chunk_last_has_dot = chunk_has_dot; + } + + // Group imports based on priority defined by config. + int chunk_pri = -1; + int chunk_pri_last = -1; + + for (size_t idx = 0; idx < num_chunks; idx++) + { + chunk_pri = get_chunk_priority(chunks[idx]); + + if ( chunk_pri_last != chunk_pri + && idx > 0) + { + blankline_add_before(chunks[idx]); + } + chunk_pri_last = chunk_pri; + } + + // Group imports that contain filename pattern. + bool chunk_has_filename = false; + bool last_chunk_has_filename = false; + + for (size_t idx = 0; idx < num_chunks; idx++) + { + auto const &chunk_text = chunk_sort_str(chunks[idx]); + chunk_has_filename = text_contains_filename_without_ext(chunk_text.c_str()); + + if ( !chunk_has_filename + && last_chunk_has_filename) + { + blankline_add_before(chunks[idx]); + } + last_chunk_has_filename = chunk_has_filename; + } +} // group_imports_by_adding_newlines + + +void sort_imports() +{ + LOG_FUNC_ENTRY(); + const int max_number_to_sort = 1024; + const int max_lines_to_check_for_sort_after_include = 128; + const int max_gap_threshold_between_include_to_sort = 32; + + Chunk *chunks[max_number_to_sort]; + size_t num_chunks = 0; + Chunk *p_last = Chunk::NullChunkPtr; + Chunk *p_imp = Chunk::NullChunkPtr; + Chunk *p_imp_last = Chunk::NullChunkPtr; + + prepare_categories(); + + Chunk *pc = Chunk::GetHead(); + + log_rule_B("mod_sort_incl_import_grouping_enabled"); + + while (pc->IsNotNullChunk()) + { + // Simple optimization to limit the sorting. Any MAX_LINES_TO_CHECK_AFTER_INCLUDE lines after last + // import is seen are ignore from sorting. + if ( options::mod_sort_incl_import_grouping_enabled() + && p_imp_last->IsNotNullChunk() + && (pc->GetOrigLine() - p_imp_last->GetOrigLine()) > max_lines_to_check_for_sort_after_include) + { + break; + } + Chunk *next = pc->GetNext(); + + if (pc->IsNewline()) + { + bool did_import = false; + + if ( p_imp->IsNotNullChunk() + && ( p_last->Is(CT_SEMICOLON) + || p_imp->TestFlags(PCF_IN_PREPROC))) + { + if (num_chunks < max_number_to_sort) + { + LOG_FMT(LSORT, "%s(%d): p_imp is %s\n", + __func__, __LINE__, p_imp->Text()); + chunks[num_chunks++] = p_imp; + } + else + { + fprintf(stderr, "Number of 'import' to be sorted is too big for the current value %d.\n", max_number_to_sort); + fprintf(stderr, "Please make a report.\n"); + log_flush(true); + exit(EX_SOFTWARE); + } + did_import = true; + } + log_rule_B("mod_sort_incl_import_grouping_enabled"); + + if ( !did_import + || ( !options::mod_sort_incl_import_grouping_enabled() + && pc->GetNlCount() > 1) + || ( options::mod_sort_incl_import_grouping_enabled() + && p_imp_last->IsNotNullChunk() + && (pc->GetOrigLine() - p_imp_last->GetOrigLine()) > max_gap_threshold_between_include_to_sort) + || next->IsNullChunk()) + { + if (num_chunks > 1) + { + log_rule_B("mod_sort_incl_import_grouping_enabled"); + + if (options::mod_sort_incl_import_grouping_enabled()) + { + remove_blank_lines_between_imports(chunks, num_chunks); + do_the_sort(chunks, num_chunks); + group_imports_by_adding_newlines(chunks, num_chunks); + dedupe_imports(chunks, num_chunks); + } + else + { + do_the_sort(chunks, num_chunks); + } + } + num_chunks = 0; + } + p_imp_last = p_imp; + p_imp = Chunk::NullChunkPtr; + p_last = Chunk::NullChunkPtr; + } + else if (pc->Is(CT_IMPORT)) + { + log_rule_B("mod_sort_import"); + + if (options::mod_sort_import()) + { + p_imp = pc->GetNext(); + } + } + else if (pc->Is(CT_USING)) + { + log_rule_B("mod_sort_using"); + + if (options::mod_sort_using()) + { + p_imp = pc->GetNext(); + } + } + else if (pc->Is(CT_PP_INCLUDE)) + { + log_rule_B("mod_sort_include"); + + if (options::mod_sort_include()) + { + p_imp = pc->GetNext(); + p_last = pc; + } + } + else if (!pc->IsComment()) + { + p_last = pc; + } + pc = next; + } + cleanup_categories(); +} // sort_imports |