[FFmpeg-devel] [PATCH v3 2/2] bloom: optimize multiple pathspec items in revision traversal
Lidong Yan
yldhome2d2 at gmail.com
Sat Jun 28 07:19:21 EEST 2025
To enable optimize multiple pathspec items in revision traversal,
return 0 if all pathspec item is literal in forbid_bloom_filters().
Add code to initialize and check each pathspec item's bloom_keyvec.
Add new function release_revisions_bloom_keyvecs() to free all bloom
keyvec owned by rev_info.
Add new test cases in t/t4216-log-bloom.sh to ensure
- consistent results between the optimization for multiple pathspec
items using bloom filter and the case without bloom filter
optimization.
- does not use bloom filter if any pathspec item is not literal.
Signed-off-by: Lidong Yan <502024330056 at smail.nju.edu.cn>
---
revision.c | 126 ++++++++++++++++++++++++-------------------
t/t4216-log-bloom.sh | 23 ++++----
2 files changed, 85 insertions(+), 64 deletions(-)
diff --git a/revision.c b/revision.c
index 3aa544c137..8d73395f26 100644
--- a/revision.c
+++ b/revision.c
@@ -675,16 +675,17 @@ static int forbid_bloom_filters(struct pathspec *spec)
{
if (spec->has_wildcard)
return 1;
- if (spec->nr > 1)
- return 1;
if (spec->magic & ~PATHSPEC_LITERAL)
return 1;
- if (spec->nr && (spec->items[0].magic & ~PATHSPEC_LITERAL))
- return 1;
+ for (size_t nr = 0; nr < spec->nr; nr++)
+ if (spec->items[nr].magic & ~PATHSPEC_LITERAL)
+ return 1;
return 0;
}
+static void release_revisions_bloom_keyvecs(struct rev_info *revs);
+
static void prepare_to_use_bloom_filter(struct rev_info *revs)
{
struct pathspec_item *pi;
@@ -692,7 +693,7 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
char *path_alloc = NULL;
const char *path, *p;
size_t len;
- int path_component_nr = 1;
+ int path_component_nr;
if (!revs->commits)
return;
@@ -709,50 +710,53 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
if (!revs->pruning.pathspec.nr)
return;
- pi = &revs->pruning.pathspec.items[0];
-
- /* remove single trailing slash from path, if needed */
- if (pi->len > 0 && pi->match[pi->len - 1] == '/') {
- path_alloc = xmemdupz(pi->match, pi->len - 1);
- path = path_alloc;
- } else
- path = pi->match;
-
- len = strlen(path);
- if (!len) {
- revs->bloom_filter_settings = NULL;
- free(path_alloc);
- return;
- }
-
- p = path;
- while (*p) {
- /*
- * At this point, the path is normalized to use Unix-style
- * path separators. This is required due to how the
- * changed-path Bloom filters store the paths.
- */
- if (*p == '/')
- path_component_nr++;
- p++;
- }
-
- revs->bloom_keyvecs_nr = 1;
- CALLOC_ARRAY(revs->bloom_keyvecs, 1);
- bloom_keyvec = create_bloom_keyvec(path_component_nr);
- revs->bloom_keyvecs[0] = bloom_keyvec;
+ revs->bloom_keyvecs_nr = revs->pruning.pathspec.nr;
+ CALLOC_ARRAY(revs->bloom_keyvecs, revs->bloom_keyvecs_nr);
+ for (int i = 0; i < revs->pruning.pathspec.nr; i++) {
+ pi = &revs->pruning.pathspec.items[i];
+ path_component_nr = 1;
+
+ /* remove single trailing slash from path, if needed */
+ if (pi->len > 0 && pi->match[pi->len - 1] == '/') {
+ path_alloc = xmemdupz(pi->match, pi->len - 1);
+ path = path_alloc;
+ } else
+ path = pi->match;
+
+ len = strlen(path);
+ if (!len)
+ goto fail;
+
+ p = path;
+ while (*p) {
+ /*
+ * At this point, the path is normalized to use
+ * Unix-style path separators. This is required due to
+ * how the changed-path Bloom filters store the paths.
+ */
+ if (*p == '/')
+ path_component_nr++;
+ p++;
+ }
- fill_bloom_keyvec_key(path, len, bloom_keyvec, 0,
- revs->bloom_filter_settings);
- path_component_nr = 1;
+ bloom_keyvec = create_bloom_keyvec(path_component_nr);
+ revs->bloom_keyvecs[i] = bloom_keyvec;
+
+ fill_bloom_keyvec_key(path, len, bloom_keyvec, 0,
+ revs->bloom_filter_settings);
+ path_component_nr = 1;
+
+ p = path + len - 1;
+ while (p > path) {
+ if (*p == '/')
+ fill_bloom_keyvec_key(path, p - path,
+ bloom_keyvec,
+ path_component_nr++,
+ revs->bloom_filter_settings);
+ p--;
+ }
- p = path + len - 1;
- while (p > path) {
- if (*p == '/')
- fill_bloom_keyvec_key(path, p - path, bloom_keyvec,
- path_component_nr++,
- revs->bloom_filter_settings);
- p--;
+ FREE_AND_NULL(path_alloc);
}
if (trace2_is_enabled() && !bloom_filter_atexit_registered) {
@@ -760,14 +764,19 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
bloom_filter_atexit_registered = 1;
}
+ return;
+
+fail:
+ revs->bloom_filter_settings = NULL;
free(path_alloc);
+ release_revisions_bloom_keyvecs(revs);
}
static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
struct commit *commit)
{
struct bloom_filter *filter;
- int result = 1, j;
+ int result = 0;
if (!revs->repo->objects->commit_graph)
return -1;
@@ -782,8 +791,11 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
return -1;
}
- result = bloom_filter_contains_vec(filter, revs->bloom_keyvecs[0],
- revs->bloom_filter_settings);
+ for (size_t nr = 0; !result && nr < revs->bloom_keyvecs_nr; nr++) {
+ result = bloom_filter_contains_vec(filter,
+ revs->bloom_keyvecs[nr],
+ revs->bloom_filter_settings);
+ }
if (result)
count_bloom_filter_maybe++;
@@ -3201,6 +3213,14 @@ static void release_revisions_mailmap(struct string_list *mailmap)
static void release_revisions_topo_walk_info(struct topo_walk_info *info);
+static void release_revisions_bloom_keyvecs(struct rev_info *revs)
+{
+ for (size_t nr = 0; nr < revs->bloom_keyvecs_nr; nr++)
+ destroy_bloom_keyvec(revs->bloom_keyvecs[nr]);
+ FREE_AND_NULL(revs->bloom_keyvecs);
+ revs->bloom_keyvecs_nr = 0;
+}
+
static void free_void_commit_list(void *list)
{
free_commit_list(list);
@@ -3229,11 +3249,7 @@ void release_revisions(struct rev_info *revs)
clear_decoration(&revs->treesame, free);
line_log_free(revs);
oidset_clear(&revs->missing_commits);
-
- for (int i = 0; i < revs->bloom_keyvecs_nr; i++)
- destroy_bloom_keyvec(revs->bloom_keyvecs[i]);
- FREE_AND_NULL(revs->bloom_keyvecs);
- revs->bloom_keyvecs_nr = 0;
+ release_revisions_bloom_keyvecs(revs);
}
static void add_child(struct rev_info *revs, struct commit *parent, struct commit *child)
diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh
index 8910d53cac..639868ac56 100755
--- a/t/t4216-log-bloom.sh
+++ b/t/t4216-log-bloom.sh
@@ -66,8 +66,9 @@ sane_unset GIT_TRACE2_CONFIG_PARAMS
setup () {
rm -f "$TRASH_DIRECTORY/trace.perf" &&
- git -c core.commitGraph=false log --pretty="format:%s" $1 >log_wo_bloom &&
- GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.perf" git -c core.commitGraph=true log --pretty="format:%s" $1 >log_w_bloom
+ eval git -c core.commitGraph=false log --pretty="format:%s" "$1" >log_wo_bloom &&
+ eval "GIT_TRACE2_PERF=\"$TRASH_DIRECTORY/trace.perf\"" \
+ git -c core.commitGraph=true log --pretty="format:%s" "$1" >log_w_bloom
}
test_bloom_filters_used () {
@@ -138,10 +139,6 @@ test_expect_success 'git log with --walk-reflogs does not use Bloom filters' '
test_bloom_filters_not_used "--walk-reflogs -- A"
'
-test_expect_success 'git log -- multiple path specs does not use Bloom filters' '
- test_bloom_filters_not_used "-- file4 A/file1"
-'
-
test_expect_success 'git log -- "." pathspec at root does not use Bloom filters' '
test_bloom_filters_not_used "-- ."
'
@@ -151,9 +148,17 @@ test_expect_success 'git log with wildcard that resolves to a single path uses B
test_bloom_filters_used "-- *renamed"
'
-test_expect_success 'git log with wildcard that resolves to a multiple paths does not uses Bloom filters' '
- test_bloom_filters_not_used "-- *" &&
- test_bloom_filters_not_used "-- file*"
+test_expect_success 'git log with multiple literal paths uses Bloom filter' '
+ test_bloom_filters_used "-- file4 A/file1" &&
+ test_bloom_filters_used "-- *" &&
+ test_bloom_filters_used "-- file*"
+'
+
+test_expect_success 'git log with path contains a wildcard does not use Bloom filter' '
+ test_bloom_filters_not_used "-- file\*" &&
+ test_bloom_filters_not_used "-- A/\* file4" &&
+ test_bloom_filters_not_used "-- file4 A/\*" &&
+ test_bloom_filters_not_used "-- * A/\*"
'
test_expect_success 'setup - add commit-graph to the chain without Bloom filters' '
--
2.50.0.108.g6ae0c543ae
More information about the ffmpeg-devel
mailing list