diff --git a/AnnService/inc/IndexBuilder/Options.h b/AnnService/inc/IndexBuilder/Options.h index 7c939efae..f35d6099e 100644 --- a/AnnService/inc/IndexBuilder/Options.h +++ b/AnnService/inc/IndexBuilder/Options.h @@ -29,6 +29,8 @@ class BuilderOptions : public Helper::ArgumentsParser std::string m_vectorDelimiter; + bool m_append; + SPTAG::VectorValueType m_inputValueType; std::string m_inputFiles; diff --git a/AnnService/src/IndexBuilder/Options.cpp b/AnnService/src/IndexBuilder/Options.cpp index d0fcd0fd8..45c1a5f52 100644 --- a/AnnService/src/IndexBuilder/Options.cpp +++ b/AnnService/src/IndexBuilder/Options.cpp @@ -23,6 +23,7 @@ BuilderOptions::BuilderOptions() AddRequiredOption(m_outputFolder, "-o", "--outputfolder", "Output folder."); AddRequiredOption(m_indexAlgoType, "-a", "--algo", "Index Algorithm type."); AddOptionalOption(m_builderConfigFile, "-c", "--config", "Config file for builder."); + AddOptionalOption(m_append, "-p", "--append", "Append to existing index."); } diff --git a/AnnService/src/IndexBuilder/main.cpp b/AnnService/src/IndexBuilder/main.cpp index ba4de4612..3e6a54edd 100644 --- a/AnnService/src/IndexBuilder/main.cpp +++ b/AnnService/src/IndexBuilder/main.cpp @@ -55,7 +55,9 @@ int main(int argc, char* argv[]) indexBuilder->SetParameter(iter.first.c_str(), iter.second.c_str()); } - ErrorCode code; + std::shared_ptr p_vectorSet = nullptr; + std::shared_ptr p_metaSet = nullptr; + if (options->m_inputFiles.find("BIN:") == 0) { std::vector files = SPTAG::Helper::StrUtils::SplitString(options->m_inputFiles.substr(4), ","); std::ifstream inputStream(files[0], std::ifstream::binary); @@ -71,14 +73,12 @@ int main(int argc, char* argv[]) char* vecBuf = reinterpret_cast(vectorSet.Data()); inputStream.read(vecBuf, totalRecordVectorBytes); inputStream.close(); - std::shared_ptr p_vectorSet(new BasicVectorSet(vectorSet, options->m_inputValueType, col, row)); + + p_vectorSet.reset(new BasicVectorSet(vectorSet, options->m_inputValueType, col, row)); - std::shared_ptr p_metaSet = nullptr; if (files.size() >= 3) { p_metaSet.reset(new FileMetadataSet(files[1], files[2])); } - code = indexBuilder->BuildIndex(p_vectorSet, p_metaSet); - indexBuilder->SaveIndex(options->m_outputFolder); } else { auto vectorReader = IndexBuilder::VectorSetReader::CreateInstance(options); @@ -87,9 +87,21 @@ int main(int argc, char* argv[]) fprintf(stderr, "Failed to read input file.\n"); exit(1); } - code = indexBuilder->BuildIndex(vectorReader->GetVectorSet(), vectorReader->GetMetadataSet()); - indexBuilder->SaveIndex(options->m_outputFolder); + + p_vectorSet = vectorReader->GetVectorSet(); + p_metaSet = vectorReader->GetMetadataSet(); + } + + ErrorCode code; + std::shared_ptr vecIndex; + if (options->m_append && ErrorCode::Success == indexBuilder->LoadIndex(options->m_outputFolder, vecIndex) && nullptr != vecIndex) { + code = vecIndex->AddIndex(p_vectorSet, p_metaSet); + indexBuilder = vecIndex; + } + else { + code = indexBuilder->BuildIndex(p_vectorSet, p_metaSet); } + indexBuilder->SaveIndex(options->m_outputFolder); if (ErrorCode::Success != code) { diff --git a/docs/GettingStart.md b/docs/GettingStart.md index 756aa70ed..1d2dae300 100644 --- a/docs/GettingStart.md +++ b/docs/GettingStart.md @@ -11,6 +11,7 @@ -o, --outputfolder Output folder, required. -a, --algo Index Algorithm type (e.g. BKT, KDT), required. + -p, --append Indicate whether the data should be appended to an existing index, if one exists. -t, --thread Thread Number, default is 32. --delimiter Vector delimiter, default is |. Index.= Set the algorithm parameter ArgName with value ArgValue.