From 75f2759801ae2f3d2324c1ac67a40ae9b95d3579 Mon Sep 17 00:00:00 2001 From: Matthew Haugen Date: Thu, 30 May 2019 11:12:56 -0700 Subject: [PATCH 1/4] Add new --append option to IndexBuilder to support adding new items to an existing index --- AnnService/inc/IndexBuilder/Options.h | 2 ++ AnnService/src/IndexBuilder/Options.cpp | 3 ++- AnnService/src/IndexBuilder/main.cpp | 26 ++++++++++++++++++------- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/AnnService/inc/IndexBuilder/Options.h b/AnnService/inc/IndexBuilder/Options.h index 7c939efae..c724419f0 100644 --- a/AnnService/inc/IndexBuilder/Options.h +++ b/AnnService/inc/IndexBuilder/Options.h @@ -29,6 +29,8 @@ class BuilderOptions : public Helper::ArgumentsParser std::string m_vectorDelimiter; + bool m_append; + SPTAG::VectorValueType m_inputValueType; std::string m_inputFiles; diff --git a/AnnService/src/IndexBuilder/Options.cpp b/AnnService/src/IndexBuilder/Options.cpp index d0fcd0fd8..288438dc2 100644 --- a/AnnService/src/IndexBuilder/Options.cpp +++ b/AnnService/src/IndexBuilder/Options.cpp @@ -22,7 +22,8 @@ BuilderOptions::BuilderOptions() AddRequiredOption(m_inputFiles, "-i", "--input", "Input raw data."); AddRequiredOption(m_outputFolder, "-o", "--outputfolder", "Output folder."); AddRequiredOption(m_indexAlgoType, "-a", "--algo", "Index Algorithm type."); - AddOptionalOption(m_builderConfigFile, "-c", "--config", "Config file for builder."); + AddOptionalOption(m_builderConfigFile, "-c", "--config", "Config file for builder."); + AddOptionalOption(m_append, "-p", "--append", "Append to existing index."); } diff --git a/AnnService/src/IndexBuilder/main.cpp b/AnnService/src/IndexBuilder/main.cpp index ba4de4612..3e23e4758 100644 --- a/AnnService/src/IndexBuilder/main.cpp +++ b/AnnService/src/IndexBuilder/main.cpp @@ -55,7 +55,9 @@ int main(int argc, char* argv[]) indexBuilder->SetParameter(iter.first.c_str(), iter.second.c_str()); } - ErrorCode code; + std::shared_ptr p_vectorSet = nullptr; + std::shared_ptr p_metaSet = nullptr; + if (options->m_inputFiles.find("BIN:") == 0) { std::vector files = SPTAG::Helper::StrUtils::SplitString(options->m_inputFiles.substr(4), ","); std::ifstream inputStream(files[0], std::ifstream::binary); @@ -71,14 +73,12 @@ int main(int argc, char* argv[]) char* vecBuf = reinterpret_cast(vectorSet.Data()); inputStream.read(vecBuf, totalRecordVectorBytes); inputStream.close(); - std::shared_ptr p_vectorSet(new BasicVectorSet(vectorSet, options->m_inputValueType, col, row)); + + p_vectorSet = std::make_shared(*new BasicVectorSet(vectorSet, options->m_inputValueType, col, row)); - std::shared_ptr p_metaSet = nullptr; if (files.size() >= 3) { p_metaSet.reset(new FileMetadataSet(files[1], files[2])); } - code = indexBuilder->BuildIndex(p_vectorSet, p_metaSet); - indexBuilder->SaveIndex(options->m_outputFolder); } else { auto vectorReader = IndexBuilder::VectorSetReader::CreateInstance(options); @@ -87,10 +87,22 @@ int main(int argc, char* argv[]) fprintf(stderr, "Failed to read input file.\n"); exit(1); } - code = indexBuilder->BuildIndex(vectorReader->GetVectorSet(), vectorReader->GetMetadataSet()); - indexBuilder->SaveIndex(options->m_outputFolder); + + p_vectorSet = vectorReader->GetVectorSet(); + p_metaSet = vectorReader->GetMetadataSet(); } + ErrorCode code; + std::shared_ptr vecIndex; + if (options->m_append && ErrorCode::Success == indexBuilder->LoadIndex(options->m_outputFolder, vecIndex) && nullptr != vecIndex) { + code = vecIndex->AddIndex(p_vectorSet, p_metaSet); + indexBuilder = vecIndex; + } + else { + code = indexBuilder->BuildIndex(p_vectorSet, p_metaSet); + } + indexBuilder->SaveIndex(options->m_outputFolder); + if (ErrorCode::Success != code) { fprintf(stderr, "Failed to build index.\n"); From 39e53e77be336fafa0a04cb81630ade0404e90d2 Mon Sep 17 00:00:00 2001 From: Matthew Haugen Date: Thu, 30 May 2019 11:16:10 -0700 Subject: [PATCH 2/4] Correct tabs to spaces --- AnnService/inc/IndexBuilder/Options.h | 2 +- AnnService/src/IndexBuilder/Options.cpp | 4 ++-- AnnService/src/IndexBuilder/main.cpp | 28 ++++++++++++------------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/AnnService/inc/IndexBuilder/Options.h b/AnnService/inc/IndexBuilder/Options.h index c724419f0..f35d6099e 100644 --- a/AnnService/inc/IndexBuilder/Options.h +++ b/AnnService/inc/IndexBuilder/Options.h @@ -29,7 +29,7 @@ class BuilderOptions : public Helper::ArgumentsParser std::string m_vectorDelimiter; - bool m_append; + bool m_append; SPTAG::VectorValueType m_inputValueType; diff --git a/AnnService/src/IndexBuilder/Options.cpp b/AnnService/src/IndexBuilder/Options.cpp index 288438dc2..45c1a5f52 100644 --- a/AnnService/src/IndexBuilder/Options.cpp +++ b/AnnService/src/IndexBuilder/Options.cpp @@ -22,8 +22,8 @@ BuilderOptions::BuilderOptions() AddRequiredOption(m_inputFiles, "-i", "--input", "Input raw data."); AddRequiredOption(m_outputFolder, "-o", "--outputfolder", "Output folder."); AddRequiredOption(m_indexAlgoType, "-a", "--algo", "Index Algorithm type."); - AddOptionalOption(m_builderConfigFile, "-c", "--config", "Config file for builder."); - AddOptionalOption(m_append, "-p", "--append", "Append to existing index."); + AddOptionalOption(m_builderConfigFile, "-c", "--config", "Config file for builder."); + AddOptionalOption(m_append, "-p", "--append", "Append to existing index."); } diff --git a/AnnService/src/IndexBuilder/main.cpp b/AnnService/src/IndexBuilder/main.cpp index 3e23e4758..7fbd4f07c 100644 --- a/AnnService/src/IndexBuilder/main.cpp +++ b/AnnService/src/IndexBuilder/main.cpp @@ -55,8 +55,8 @@ int main(int argc, char* argv[]) indexBuilder->SetParameter(iter.first.c_str(), iter.second.c_str()); } - std::shared_ptr p_vectorSet = nullptr; - std::shared_ptr p_metaSet = nullptr; + std::shared_ptr p_vectorSet = nullptr; + std::shared_ptr p_metaSet = nullptr; if (options->m_inputFiles.find("BIN:") == 0) { std::vector files = SPTAG::Helper::StrUtils::SplitString(options->m_inputFiles.substr(4), ","); @@ -88,20 +88,20 @@ int main(int argc, char* argv[]) exit(1); } - p_vectorSet = vectorReader->GetVectorSet(); - p_metaSet = vectorReader->GetMetadataSet(); + p_vectorSet = vectorReader->GetVectorSet(); + p_metaSet = vectorReader->GetMetadataSet(); } - ErrorCode code; - std::shared_ptr vecIndex; - if (options->m_append && ErrorCode::Success == indexBuilder->LoadIndex(options->m_outputFolder, vecIndex) && nullptr != vecIndex) { - code = vecIndex->AddIndex(p_vectorSet, p_metaSet); - indexBuilder = vecIndex; - } - else { - code = indexBuilder->BuildIndex(p_vectorSet, p_metaSet); - } - indexBuilder->SaveIndex(options->m_outputFolder); + ErrorCode code; + std::shared_ptr vecIndex; + if (options->m_append && ErrorCode::Success == indexBuilder->LoadIndex(options->m_outputFolder, vecIndex) && nullptr != vecIndex) { + code = vecIndex->AddIndex(p_vectorSet, p_metaSet); + indexBuilder = vecIndex; + } + else { + code = indexBuilder->BuildIndex(p_vectorSet, p_metaSet); + } + indexBuilder->SaveIndex(options->m_outputFolder); if (ErrorCode::Success != code) { From 10ec6104c7125a35e6f3e94f7c333aae5d6f88d4 Mon Sep 17 00:00:00 2001 From: Matthew Haugen Date: Thu, 30 May 2019 11:25:16 -0700 Subject: [PATCH 3/4] Update GettingStart.md --- docs/GettingStart.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/GettingStart.md b/docs/GettingStart.md index 756aa70ed..1d2dae300 100644 --- a/docs/GettingStart.md +++ b/docs/GettingStart.md @@ -11,6 +11,7 @@ -o, --outputfolder Output folder, required. -a, --algo Index Algorithm type (e.g. BKT, KDT), required. + -p, --append Indicate whether the data should be appended to an existing index, if one exists. -t, --thread Thread Number, default is 32. --delimiter Vector delimiter, default is |. Index.= Set the algorithm parameter ArgName with value ArgValue. From a5610ac3f978c00f516c099fc98d050172cfb8c9 Mon Sep 17 00:00:00 2001 From: Matthew Haugen Date: Thu, 30 May 2019 11:32:31 -0700 Subject: [PATCH 4/4] Fix shared pointer assignment --- AnnService/src/IndexBuilder/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AnnService/src/IndexBuilder/main.cpp b/AnnService/src/IndexBuilder/main.cpp index 7fbd4f07c..3e6a54edd 100644 --- a/AnnService/src/IndexBuilder/main.cpp +++ b/AnnService/src/IndexBuilder/main.cpp @@ -74,7 +74,7 @@ int main(int argc, char* argv[]) inputStream.read(vecBuf, totalRecordVectorBytes); inputStream.close(); - p_vectorSet = std::make_shared(*new BasicVectorSet(vectorSet, options->m_inputValueType, col, row)); + p_vectorSet.reset(new BasicVectorSet(vectorSet, options->m_inputValueType, col, row)); if (files.size() >= 3) { p_metaSet.reset(new FileMetadataSet(files[1], files[2]));