Skip to content

Commit f9a42c7

Browse files
committed
src: Boyer-Moore array bounds safety
1 parent ab9ef37 commit f9a42c7

File tree

1 file changed

+30
-22
lines changed

1 file changed

+30
-22
lines changed

deps/nbytes/include/nbytes.h

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,11 @@ size_t StringSearch<Char>::BoyerMooreSearch(Vector subject,
548548
size_t start = start_;
549549

550550
int *bad_char_occurrence = bad_char_shift_table_;
551-
int *good_suffix_shift = good_suffix_shift_table_ - start_;
551+
552+
auto good_suffix_get = [&](size_t idx) -> int {
553+
if (idx < start || idx - start > kBMMaxShift) return 0;
554+
return good_suffix_shift_table_[idx - start];
555+
};
552556

553557
Char last_char = pattern_[pattern_length - 1];
554558
size_t index = start_index;
@@ -575,7 +579,7 @@ size_t StringSearch<Char>::BoyerMooreSearch(Vector subject,
575579
index +=
576580
pattern_length - 1 - CharOccurrence(bad_char_occurrence, last_char);
577581
} else {
578-
int gs_shift = good_suffix_shift[j + 1];
582+
int gs_shift = good_suffix_get(j + 1);
579583
int bc_occ = CharOccurrence(bad_char_occurrence, c);
580584
int shift = j - bc_occ;
581585
if (gs_shift > shift) {
@@ -591,22 +595,25 @@ size_t StringSearch<Char>::BoyerMooreSearch(Vector subject,
591595
template <typename Char>
592596
void StringSearch<Char>::PopulateBoyerMooreTable() {
593597
const size_t pattern_length = pattern_.length();
594-
// Only look at the last kBMMaxShift characters of pattern (from start_
595-
// to pattern_length).
596598
const size_t start = start_;
597599
const size_t length = pattern_length - start;
598600

599-
// Biased tables so that we can use pattern indices as table indices,
600-
// even if we only cover the part of the pattern from offset start.
601-
int *shift_table = good_suffix_shift_table_ - start_;
602-
int *suffix_table = suffix_table_ - start_;
601+
auto shift_get = [&](size_t idx) -> int& {
602+
if (idx < start) abort();
603+
return good_suffix_shift_table_[idx - start];
604+
};
605+
606+
auto suffix_get = [&](size_t idx) -> int& {
607+
if (idx < start) abort();
608+
return suffix_table_[idx - start];
609+
};
603610

604611
// Initialize table.
605612
for (size_t i = start; i < pattern_length; i++) {
606-
shift_table[i] = length;
613+
shift_get(i) = length;
607614
}
608-
shift_table[pattern_length] = 1;
609-
suffix_table[pattern_length] = pattern_length + 1;
615+
shift_get(pattern_length) = 1;
616+
suffix_get(pattern_length) = pattern_length + 1;
610617

611618
if (pattern_length <= start) {
612619
return;
@@ -620,34 +627,35 @@ void StringSearch<Char>::PopulateBoyerMooreTable() {
620627
while (i > start) {
621628
Char c = pattern_[i - 1];
622629
while (suffix <= pattern_length && c != pattern_[suffix - 1]) {
623-
if (static_cast<size_t>(shift_table[suffix]) == length) {
624-
shift_table[suffix] = suffix - i;
630+
if (static_cast<size_t>(shift_get(suffix)) == length) {
631+
shift_get(suffix) = suffix - i;
625632
}
626-
suffix = suffix_table[suffix];
633+
suffix = suffix_get(suffix);
627634
}
628-
suffix_table[--i] = --suffix;
635+
suffix_get(--i) = --suffix;
629636
if (suffix == pattern_length) {
630637
// No suffix to extend, so we check against last_char only.
631638
while ((i > start) && (pattern_[i - 1] != last_char)) {
632-
if (static_cast<size_t>(shift_table[pattern_length]) == length) {
633-
shift_table[pattern_length] = pattern_length - i;
639+
if (static_cast<size_t>(shift_get(pattern_length)) == length) {
640+
shift_get(pattern_length) = pattern_length - i;
634641
}
635-
suffix_table[--i] = pattern_length;
642+
suffix_get(--i) = pattern_length;
636643
}
637644
if (i > start) {
638-
suffix_table[--i] = --suffix;
645+
suffix_get(--i) = --suffix;
639646
}
640647
}
641648
}
642649
}
650+
643651
// Build shift table using suffixes.
644652
if (suffix < pattern_length) {
645653
for (size_t i = start; i <= pattern_length; i++) {
646-
if (static_cast<size_t>(shift_table[i]) == length) {
647-
shift_table[i] = suffix - start;
654+
if (static_cast<size_t>(shift_get(i)) == length) {
655+
shift_get(i) = suffix - start;
648656
}
649657
if (i == suffix) {
650-
suffix = suffix_table[suffix];
658+
suffix = suffix_get(suffix);
651659
}
652660
}
653661
}

0 commit comments

Comments
 (0)