/*
* Software Index, Copyright 2010, Software Index Project Team
* Link: http://swi.sourceforge.net
*
* This file is part of Software Index Tool.
*
* Software Index is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* Software Index is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Software Index. If not, see .
*/
#include
#include
#include
static const char SWI_EOF = '\x7'; // ASCII code 7 is used as EOF marker
static const int SWI_MAX_TEXT_LENGTH = 10000;
/**
* Container for sortable stuff
* Strings are not moved, just bookmarks are sorted
*/
struct SwiBookmark
{
int m_possition;
const char* m_text;
// sort algorithm requires to define this operator
bool operator<(const SwiBookmark& another) const
{
return strcmp(another.m_text, m_text) < 0;
}
};
/**
* Container for the file/function record
*/
struct SwiRecord
{
const char* functionName;
const char* fileName;
int end;
};
/**
* Returns line number in intial record
* from the begging till the symbol by offset
*/
static int swiLineNumberGet(const char* text,
int offset,
SwiRecord files[],
int fileIndex)
{
const int start = (fileIndex == 0) ? 0 : files[fileIndex - 1].end;
int line = 1;
for (int i = start; i < offset; ++i)
{
if (text[i] == '\n')
{
++line;
}
}
return line;
}
/**
* Adds a character to the processed string,
* initializes new bookmark, and returns it
*/
static SwiBookmark swiProcessedCharacterAdd(char c, int originalPossition, char* processedText)
{
static int processedPossition;
SwiBookmark bookmark;
bookmark.m_possition = originalPossition;
bookmark.m_text = processedText + processedPossition;
processedText[processedPossition++] = c;
return bookmark;
}
/**
* Prints to stdout the found items
* The format of stdout is strictly defined by the caller: SWI/PROCESSOR
* Change it synchroniously with SWI/PROCESSOR code
*/
static void swiResultPrint(SwiBookmark bookmark,
SwiRecord files[],
const char* originalText,
int numOfDuplicatedSymbols)
{
int recordIndex = 0;
while (files[recordIndex].end <= bookmark.m_possition)
{
++recordIndex;
}
printf("duplication: file: '%s' function: '%s' possition: '%d' size: '%d' \n",
files[recordIndex].fileName,
files[recordIndex].functionName,
swiLineNumberGet(originalText, bookmark.m_possition, files, recordIndex),
numOfDuplicatedSymbols);
}
/**
* Returns number of symbols which are equal from the beggining
*/
static int swiStringsCompare(SwiBookmark bookmarkFirst,
SwiBookmark bookmarkSecond)
{
int pos = 0;
for (; bookmarkFirst.m_text[pos] == bookmarkSecond.m_text[pos]; ++pos)
{
if (bookmarkFirst.m_text[pos] == SWI_EOF)
{
break;
}
}
return pos;
}
/**
* Checks if two strings are equal in the defined range of symbol's indexes:
* from the first symbol till upperLimit
*/
static bool swiStringsEqualCheck(SwiBookmark bookmarkFirst,
SwiBookmark bookmarkSecond,
int upperLimit,
const char* endPos)
{
if (bookmarkFirst.m_text + upperLimit >= endPos ||
bookmarkSecond.m_text + upperLimit >= endPos)
{
return false;
}
for (int i = upperLimit; i >= 0; --i)
{
if (bookmarkFirst.m_text[i] != bookmarkSecond.m_text[i])
{
return false;
}
}
return true;
}
/**
* Scans the original text into a processed text, which is returned.
* The processed string does not include the ignorable symbols
* The list of ignorable symbols is configured (usualy spaces)
* Bookmarks are installed for every newline
* if it begins from the 'regular' symbol
* The list of non-regular symbols is configred
*/
static const char* swiOriginalStringProcess(std::string originalText,
std::vector& bookmarks,
const char * ignorableSymbols,
const char * nonregularSymbols)
{
char* const processedText = new char[originalText.length()];
bool isNewlineDetected = true;
for (unsigned int i = 0; i < originalText.length(); ++i)
{
const char c = originalText[i];
bool isIgnorable = false;
bool isRegular = true;
// Processed text should not include the end-null symbol
if (c == '\0')
{
continue;
}
if (c == SWI_EOF)
{
swiProcessedCharacterAdd(c, i, processedText);
continue;
}
for (int j = 0; ignorableSymbols[j] != '\0'; ++j)
{
if (ignorableSymbols[j] == c)
{
isIgnorable = true;
}
}
for (int j = 0; nonregularSymbols[j] != '\0'; ++j)
{
if (nonregularSymbols[j] == c)
{
isRegular = false;
}
}
if (!isIgnorable)
{
if (isNewlineDetected && isRegular)
{
bookmarks.push_back(swiProcessedCharacterAdd(c, i, processedText));
}
else
{
swiProcessedCharacterAdd(c, i, processedText);
}
isNewlineDetected = false;
}
if (c == '\n')
{
isNewlineDetected = true;
}
}
swiProcessedCharacterAdd('\0', originalText.length(), processedText);
return processedText;
}
/**
* Removes bookmarks which are in reported area already
* It helps to prevent the unexpected reporting of the duplicatedSymbolsCount markers more than once
*/
static void swiBookmarksShift(std::vector& bookmarks)
{
unsigned int indexFrom = 0;
for (unsigned int indexTo = 0; indexTo < bookmarks.size() - 1; ++indexTo)
{
if (bookmarks[indexTo].m_text == 0)
{
if (indexFrom <= indexTo)
{
indexFrom = indexTo + 1;
}
while (indexFrom < bookmarks.size() &&
bookmarks[indexFrom].m_text == 0)
{
++indexFrom;
}
if (indexFrom == bookmarks.size())
{
break;
}
bookmarks[indexTo]= bookmarks[indexFrom];
bookmarks[indexFrom].m_text = 0;
}
}
}
/**
* Programm entry point
*/
int main(int argc, char* argv[])
{
std::vector files(0);
std::string commonString = "";
int minLength = 100;
int proximityFactor = 100;
char * ignorabaleSymbols = new char[SWI_MAX_TEXT_LENGTH];
char * nonRegularSymbols = new char[SWI_MAX_TEXT_LENGTH];
strcpy(ignorabaleSymbols, " \t\n");
strcpy(nonRegularSymbols, "}");
while (1)
{
char * command = new char[SWI_MAX_TEXT_LENGTH];
scanf("%s", command);
if (strcmp(command, "start") == 0)
{
// Continue search algorithm
break;
}
if (strcmp(command, "exit") == 0)
{
// Breaking the search
exit(EXIT_SUCCESS);
}
if (strcmp(command, "init_length") == 0)
{
scanf("%d", &minLength);
}
if (strcmp(command, "init_proximity") == 0)
{
scanf("%d", &proximityFactor);
if (proximityFactor < 1 || proximityFactor > 100)
{
fprintf(stderr, "error: Proximity factor must be between 1 and 100 (inclusive)\n");
exit(EXIT_FAILURE);
}
}
if (strcmp(command, "init_ignorable") == 0)
{
// TODO: no ways to configure newline symbol
scanf("%s", ignorabaleSymbols);
}
if (strcmp(command, "init_nonregular") == 0)
{
// TODO: no ways to configure hewline symbol
scanf("%s", nonRegularSymbols);
}
if (strcmp(command, "init_file") == 0)
{
unsigned int fileLength = 0;
unsigned int functionLength = 0;
unsigned int textLength = 0;
scanf("%d", &fileLength);
scanf("%d", &functionLength);
scanf("%d", &textLength);
char * file = new char[fileLength + 1];
char * function = new char[functionLength + 1];
char * text = new char[textLength + 1 + 1]; // +1 for null pointer, +1 for EOF symbol
file[fileLength] = '\0';
function[functionLength] = '\0';
text[textLength] = SWI_EOF;
text[textLength + 1] = '\0';
fread(file, 1, 1, stdin); // Consume one empty symbol
fread(file, fileLength, 1, stdin);
fread(function, functionLength, 1, stdin);
fread(text, textLength, 1, stdin);
commonString += std::string(text);
SwiRecord record;
record.end = commonString.length();
record.functionName = function;
record.fileName = file;
files.push_back(record);
delete text;
}
delete command;
}
if (commonString.length() == 0)
{
fprintf(stderr, "error: No files defined or they are empty\n");
exit(EXIT_FAILURE);
}
std::vector bookmarks;
const char* processedText =
swiOriginalStringProcess(commonString, bookmarks, ignorabaleSymbols, nonRegularSymbols);
const char* processedEnd = processedText + strlen(processedText);
std::sort(bookmarks.begin(), bookmarks.end());
while(1)
{
int longestDuplication = 0;
int firstInstanceIndex = 0;
// Find the two bookmarks that have the longest common substring.
for (unsigned int bookmarkIndex = 0; bookmarkIndex < bookmarks.size() - 1; ++bookmarkIndex)
{
if (bookmarks[bookmarkIndex + 1].m_text == 0)
{
break;
}
if (swiStringsEqualCheck(bookmarks[bookmarkIndex], bookmarks[bookmarkIndex + 1],
longestDuplication, processedEnd))
{
const int duplicatedSymbolsCount = swiStringsCompare(bookmarks[bookmarkIndex],
bookmarks[bookmarkIndex + 1]);
if (duplicatedSymbolsCount > longestDuplication)
{
firstInstanceIndex = bookmarkIndex;
longestDuplication = duplicatedSymbolsCount;
}
}
}
if (longestDuplication < minLength)
{
// Do not process too short strings
// This is the exit from the while loop
break;
}
int numberOfInstances = 2;
int almostLongestDuplication = (longestDuplication * proximityFactor) / 100;
int initialIndexForScan = firstInstanceIndex;
// // Search for duplicated strings before the current pair.
for (int i = initialIndexForScan - 1; i >= 0; --i)
{
const int duplicatedSymbolsCount =
swiStringsCompare(bookmarks[initialIndexForScan], bookmarks[i]);
if (duplicatedSymbolsCount < almostLongestDuplication)
{
break;
}
numberOfInstances++;
if (longestDuplication > duplicatedSymbolsCount)
{
longestDuplication = duplicatedSymbolsCount;
}
firstInstanceIndex = i;
}
// Search for duplicated strings after the current pair.
for (unsigned int i = initialIndexForScan + 2; i < bookmarks.size(); ++i)
{
if (bookmarks[i].m_text == 0)
{
break;
}
const int duplicatedSymbolsCount = swiStringsCompare(bookmarks[initialIndexForScan],
bookmarks[i]);
if (duplicatedSymbolsCount < almostLongestDuplication)
{
break;
}
numberOfInstances++;
if (longestDuplication > duplicatedSymbolsCount)
{
longestDuplication = duplicatedSymbolsCount;
}
}
printf("info: group_start\n");
for (int i = 0; i < numberOfInstances; ++i)
{
swiResultPrint(bookmarks[firstInstanceIndex + i], &files[0],
commonString.c_str(), longestDuplication);
// Clear bookmarks that point out to the reported area.
const char* reportStart =
bookmarks[firstInstanceIndex + i].m_text;
for (unsigned int j = 0; j < bookmarks.size() - 1; ++j)
{
if (bookmarks[j].m_text >= reportStart &&
bookmarks[j].m_text < reportStart + longestDuplication)
{
bookmarks[j].m_text = 0;
}
}
}
printf("\n");
swiBookmarksShift(bookmarks);
}
delete [] processedText;
return 0;
}