From c59c7c879ef60b2b0d0b050982712c53ec4d0680 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 25 Jun 2019 15:40:25 +0200 Subject: [PATCH 001/710] t0410: remove pipes after git commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's not run a git command, especially one with "verify" in its name, upstream of a pipe, because the pipe will hide the git command's exit code. While at it, let's also avoid a useless `cat` command piping into `sed`. Helped-by: SZEDER Gábor Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- t/t0410-partial-clone.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh index 5bd892f2f7a90a..3559313bd03097 100755 --- a/t/t0410-partial-clone.sh +++ b/t/t0410-partial-clone.sh @@ -166,8 +166,9 @@ test_expect_success 'fetching of missing objects' ' # associated packfile contains the object ls repo/.git/objects/pack/pack-*.promisor >promisorlist && test_line_count = 1 promisorlist && - IDX=$(cat promisorlist | sed "s/promisor$/idx/") && - git verify-pack --verbose "$IDX" | grep "$HASH" + IDX=$(sed "s/promisor$/idx/" promisorlist) && + git verify-pack --verbose "$IDX" >out && + grep "$HASH" out ' test_expect_success 'fetching of missing objects works with ref-in-want enabled' ' @@ -514,8 +515,9 @@ test_expect_success 'fetching of missing objects from an HTTP server' ' # associated packfile contains the object ls repo/.git/objects/pack/pack-*.promisor >promisorlist && test_line_count = 1 promisorlist && - IDX=$(cat promisorlist | sed "s/promisor$/idx/") && - git verify-pack --verbose "$IDX" | grep "$HASH" + IDX=$(sed "s/promisor$/idx/" promisorlist) && + git verify-pack --verbose "$IDX" >out && + grep "$HASH" out ' test_done From 2e860675b6572cf476e99888134a5b307fd7eb62 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 25 Jun 2019 15:40:26 +0200 Subject: [PATCH 002/710] fetch-object: make functions return an error code The callers of the fetch_object() and fetch_objects() might be interested in knowing if these functions succeeded or not. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- fetch-object.c | 13 ++++++++----- fetch-object.h | 4 ++-- sha1-file.c | 4 ++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/fetch-object.c b/fetch-object.c index 42665488008bc0..eac4d448ef2ce9 100644 --- a/fetch-object.c +++ b/fetch-object.c @@ -5,11 +5,12 @@ #include "transport.h" #include "fetch-object.h" -static void fetch_refs(const char *remote_name, struct ref *ref) +static int fetch_refs(const char *remote_name, struct ref *ref) { struct remote *remote; struct transport *transport; int original_fetch_if_missing = fetch_if_missing; + int res; fetch_if_missing = 0; remote = remote_get(remote_name); @@ -19,12 +20,14 @@ static void fetch_refs(const char *remote_name, struct ref *ref) transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); transport_set_option(transport, TRANS_OPT_NO_DEPENDENTS, "1"); - transport_fetch_refs(transport, ref); + res = transport_fetch_refs(transport, ref); fetch_if_missing = original_fetch_if_missing; + + return res; } -void fetch_objects(const char *remote_name, const struct object_id *oids, - int oid_nr) +int fetch_objects(const char *remote_name, const struct object_id *oids, + int oid_nr) { struct ref *ref = NULL; int i; @@ -36,5 +39,5 @@ void fetch_objects(const char *remote_name, const struct object_id *oids, new_ref->next = ref; ref = new_ref; } - fetch_refs(remote_name, ref); + return fetch_refs(remote_name, ref); } diff --git a/fetch-object.h b/fetch-object.h index d6444caa5ac11b..7bcc7cadb0c4ac 100644 --- a/fetch-object.h +++ b/fetch-object.h @@ -3,7 +3,7 @@ struct object_id; -void fetch_objects(const char *remote_name, const struct object_id *oids, - int oid_nr); +int fetch_objects(const char *remote_name, const struct object_id *oids, + int oid_nr); #endif diff --git a/sha1-file.c b/sha1-file.c index 888b6024d5de05..819d32cdb8dbe6 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -1381,8 +1381,8 @@ int oid_object_info_extended(struct repository *r, const struct object_id *oid, !already_retried && r == the_repository && !(flags & OBJECT_INFO_SKIP_FETCH_OBJECT)) { /* - * TODO Investigate having fetch_object() return - * TODO error/success and stopping the music here. + * TODO Investigate checking fetch_object() return + * TODO value and stopping on error here. * TODO Pass a repository struct through fetch_object, * such that arbitrary repositories work. */ From 48de315817281e49a5e9000d40550b5257b437c6 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 25 Jun 2019 15:40:27 +0200 Subject: [PATCH 003/710] Add initial support for many promisor remotes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The promisor-remote.{c,h} files will contain functions to manage many promisor remotes. We expect that there will not be a lot of promisor remotes, so it is ok to use a simple linked list to manage them. Helped-by: Jeff King Helped-by: SZEDER Gábor Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- Makefile | 1 + promisor-remote.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++ promisor-remote.h | 16 +++++++++ 3 files changed, 109 insertions(+) create mode 100644 promisor-remote.c create mode 100644 promisor-remote.h diff --git a/Makefile b/Makefile index f58bf14c7bf3d9..049bc8cfd4f7fd 100644 --- a/Makefile +++ b/Makefile @@ -944,6 +944,7 @@ LIB_OBJS += preload-index.o LIB_OBJS += pretty.o LIB_OBJS += prio-queue.o LIB_OBJS += progress.o +LIB_OBJS += promisor-remote.o LIB_OBJS += prompt.o LIB_OBJS += protocol.o LIB_OBJS += quote.o diff --git a/promisor-remote.c b/promisor-remote.c new file mode 100644 index 00000000000000..c249b80e02022c --- /dev/null +++ b/promisor-remote.c @@ -0,0 +1,92 @@ +#include "cache.h" +#include "promisor-remote.h" +#include "config.h" + +static struct promisor_remote *promisors; +static struct promisor_remote **promisors_tail = &promisors; + +static struct promisor_remote *promisor_remote_new(const char *remote_name) +{ + struct promisor_remote *r; + + if (*remote_name == '/') { + warning(_("promisor remote name cannot begin with '/': %s"), + remote_name); + return NULL; + } + + FLEX_ALLOC_STR(r, name, remote_name); + + *promisors_tail = r; + promisors_tail = &r->next; + + return r; +} + +static struct promisor_remote *promisor_remote_lookup(const char *remote_name, + struct promisor_remote **previous) +{ + struct promisor_remote *r, *p; + + for (p = NULL, r = promisors; r; p = r, r = r->next) + if (!strcmp(r->name, remote_name)) { + if (previous) + *previous = p; + return r; + } + + return NULL; +} + +static int promisor_remote_config(const char *var, const char *value, void *data) +{ + const char *name; + int namelen; + const char *subkey; + + if (parse_config_key(var, "remote", &name, &namelen, &subkey) < 0) + return 0; + + if (!strcmp(subkey, "promisor")) { + char *remote_name; + + if (!git_config_bool(var, value)) + return 0; + + remote_name = xmemdupz(name, namelen); + + if (!promisor_remote_lookup(remote_name, NULL)) + promisor_remote_new(remote_name); + + free(remote_name); + return 0; + } + + return 0; +} + +static void promisor_remote_init(void) +{ + static int initialized; + + if (initialized) + return; + initialized = 1; + + git_config(promisor_remote_config, NULL); +} + +struct promisor_remote *promisor_remote_find(const char *remote_name) +{ + promisor_remote_init(); + + if (!remote_name) + return promisors; + + return promisor_remote_lookup(remote_name, NULL); +} + +int has_promisor_remote(void) +{ + return !!promisor_remote_find(NULL); +} diff --git a/promisor-remote.h b/promisor-remote.h new file mode 100644 index 00000000000000..01dcdf4dc72c20 --- /dev/null +++ b/promisor-remote.h @@ -0,0 +1,16 @@ +#ifndef PROMISOR_REMOTE_H +#define PROMISOR_REMOTE_H + +/* + * Promisor remote linked list + * Its information come from remote.XXX config entries. + */ +struct promisor_remote { + struct promisor_remote *next; + const char name[FLEX_ARRAY]; +}; + +extern struct promisor_remote *promisor_remote_find(const char *remote_name); +extern int has_promisor_remote(void); + +#endif /* PROMISOR_REMOTE_H */ From 9e27beaa2344dc6dd422d7711a666c082785118f Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 25 Jun 2019 15:40:28 +0200 Subject: [PATCH 004/710] promisor-remote: implement promisor_remote_get_direct() This is implemented for now by calling fetch_objects(). It fetches from all the promisor remotes. Helped-by: Ramsay Jones Helped-by: Derrick Stolee Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- promisor-remote.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++ promisor-remote.h | 5 ++++ 2 files changed, 72 insertions(+) diff --git a/promisor-remote.c b/promisor-remote.c index c249b80e02022c..b79a84ce3a5c0f 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -1,6 +1,8 @@ #include "cache.h" +#include "object-store.h" #include "promisor-remote.h" #include "config.h" +#include "fetch-object.h" static struct promisor_remote *promisors; static struct promisor_remote **promisors_tail = &promisors; @@ -90,3 +92,68 @@ int has_promisor_remote(void) { return !!promisor_remote_find(NULL); } + +static int remove_fetched_oids(struct repository *repo, + struct object_id **oids, + int oid_nr, int to_free) +{ + int i, remaining_nr = 0; + int *remaining = xcalloc(oid_nr, sizeof(*remaining)); + struct object_id *old_oids = *oids; + struct object_id *new_oids; + + for (i = 0; i < oid_nr; i++) + if (oid_object_info_extended(repo, &old_oids[i], NULL, + OBJECT_INFO_SKIP_FETCH_OBJECT)) { + remaining[i] = 1; + remaining_nr++; + } + + if (remaining_nr) { + int j = 0; + new_oids = xcalloc(remaining_nr, sizeof(*new_oids)); + for (i = 0; i < oid_nr; i++) + if (remaining[i]) + oidcpy(&new_oids[j++], &old_oids[i]); + *oids = new_oids; + if (to_free) + free(old_oids); + } + + free(remaining); + + return remaining_nr; +} + +int promisor_remote_get_direct(struct repository *repo, + const struct object_id *oids, + int oid_nr) +{ + struct promisor_remote *r; + struct object_id *remaining_oids = (struct object_id *)oids; + int remaining_nr = oid_nr; + int to_free = 0; + int res = -1; + + promisor_remote_init(); + + for (r = promisors; r; r = r->next) { + if (fetch_objects(r->name, remaining_oids, remaining_nr) < 0) { + if (remaining_nr == 1) + continue; + remaining_nr = remove_fetched_oids(repo, &remaining_oids, + remaining_nr, to_free); + if (remaining_nr) { + to_free = 1; + continue; + } + } + res = 0; + break; + } + + if (to_free) + free(remaining_oids); + + return res; +} diff --git a/promisor-remote.h b/promisor-remote.h index 01dcdf4dc72c20..ed4ecead36f5a3 100644 --- a/promisor-remote.h +++ b/promisor-remote.h @@ -1,6 +1,8 @@ #ifndef PROMISOR_REMOTE_H #define PROMISOR_REMOTE_H +struct object_id; + /* * Promisor remote linked list * Its information come from remote.XXX config entries. @@ -12,5 +14,8 @@ struct promisor_remote { extern struct promisor_remote *promisor_remote_find(const char *remote_name); extern int has_promisor_remote(void); +extern int promisor_remote_get_direct(struct repository *repo, + const struct object_id *oids, + int oid_nr); #endif /* PROMISOR_REMOTE_H */ From 9cfebc1f3b2b60290b6321b95e5038b6e5b758ab Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 25 Jun 2019 15:40:29 +0200 Subject: [PATCH 005/710] promisor-remote: add promisor_remote_reinit() We will need to reinitialize the promisor remote configuration as we will make some changes to it in a later commit. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- promisor-remote.c | 22 ++++++++++++++++++++-- promisor-remote.h | 1 + 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/promisor-remote.c b/promisor-remote.c index b79a84ce3a5c0f..763d98aedd85d6 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -67,10 +67,10 @@ static int promisor_remote_config(const char *var, const char *value, void *data return 0; } +static int initialized; + static void promisor_remote_init(void) { - static int initialized; - if (initialized) return; initialized = 1; @@ -78,6 +78,24 @@ static void promisor_remote_init(void) git_config(promisor_remote_config, NULL); } +static void promisor_remote_clear(void) +{ + while (promisors) { + struct promisor_remote *r = promisors; + promisors = promisors->next; + free(r); + } + + promisors_tail = &promisors; +} + +void promisor_remote_reinit(void) +{ + initialized = 0; + promisor_remote_clear(); + promisor_remote_init(); +} + struct promisor_remote *promisor_remote_find(const char *remote_name) { promisor_remote_init(); diff --git a/promisor-remote.h b/promisor-remote.h index ed4ecead36f5a3..dddd4048e0cc01 100644 --- a/promisor-remote.h +++ b/promisor-remote.h @@ -12,6 +12,7 @@ struct promisor_remote { const char name[FLEX_ARRAY]; }; +extern void promisor_remote_reinit(void); extern struct promisor_remote *promisor_remote_find(const char *remote_name); extern int has_promisor_remote(void); extern int promisor_remote_get_direct(struct repository *repo, From faf2abf496bb8e5a5fbf3818f3e78077b2f3e143 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 25 Jun 2019 15:40:30 +0200 Subject: [PATCH 006/710] promisor-remote: use repository_format_partial_clone A remote specified using the extensions.partialClone config option should be considered a promisor remote too. For simplicity and to make things predictable, this promisor remote should be either always the last one we try to get objects from, or the first one. So it should always be either at the end of the promisor remote list, or at its start. We decided to make it the last one we try, because it is likely that someone using many promisor remotes is doing so because the other promisor remotes are better for some reason (maybe they are closer or faster for some kind of objects) than the origin, and the origin is likely to be the remote specified by extensions.partialClone. This justification is not very strong, but one choice had to be made, and anyway the long term plan should be to make the order somehow fully configurable. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- promisor-remote.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/promisor-remote.c b/promisor-remote.c index 763d98aedd85d6..6a8856f475502d 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -40,6 +40,18 @@ static struct promisor_remote *promisor_remote_lookup(const char *remote_name, return NULL; } +static void promisor_remote_move_to_tail(struct promisor_remote *r, + struct promisor_remote *previous) +{ + if (previous) + previous->next = r->next; + else + promisors = r->next ? r->next : r; + r->next = NULL; + *promisors_tail = r; + promisors_tail = &r->next; +} + static int promisor_remote_config(const char *var, const char *value, void *data) { const char *name; @@ -76,6 +88,17 @@ static void promisor_remote_init(void) initialized = 1; git_config(promisor_remote_config, NULL); + + if (repository_format_partial_clone) { + struct promisor_remote *o, *previous; + + o = promisor_remote_lookup(repository_format_partial_clone, + &previous); + if (o) + promisor_remote_move_to_tail(o, previous); + else + promisor_remote_new(repository_format_partial_clone); + } } static void promisor_remote_clear(void) From b14ed5adaf87c5943433fd6b1d2cbe8c060f9264 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 25 Jun 2019 15:40:31 +0200 Subject: [PATCH 007/710] Use promisor_remote_get_direct() and has_promisor_remote() Instead of using the repository_format_partial_clone global and fetch_objects() directly, let's use has_promisor_remote() and promisor_remote_get_direct(). This way all the configured promisor remotes will be taken into account, not only the one specified by extensions.partialClone. Also when cloning or fetching using a partial clone filter, remote.origin.promisor will be set to "true" instead of setting extensions.partialClone to "origin". This makes it possible to use many promisor remote just by fetching from them. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 5 +++-- builtin/fetch.c | 11 ++++++----- builtin/gc.c | 3 ++- builtin/index-pack.c | 8 ++++---- builtin/repack.c | 3 ++- cache-tree.c | 3 ++- connected.c | 3 ++- diff.c | 9 ++++----- list-objects-filter-options.c | 28 +++++++++++++++------------- packfile.c | 3 ++- sha1-file.c | 15 ++++++++------- t/t5601-clone.sh | 2 +- t/t5616-partial-clone.sh | 2 +- unpack-trees.c | 8 ++++---- 14 files changed, 56 insertions(+), 47 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 0f092382e175cf..85ae10bf0b81bf 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -15,6 +15,7 @@ #include "sha1-array.h" #include "packfile.h" #include "object-store.h" +#include "promisor-remote.h" struct batch_options { int enabled; @@ -523,8 +524,8 @@ static int batch_objects(struct batch_options *opt) if (opt->all_objects) { struct object_cb_data cb; - if (repository_format_partial_clone) - warning("This repository has extensions.partialClone set. Some objects may not be loaded."); + if (has_promisor_remote()) + warning("This repository uses promisor remotes. Some objects may not be loaded."); cb.opt = opt; cb.expand = &data; diff --git a/builtin/fetch.c b/builtin/fetch.c index 4ba63d5ac64284..f74bd78144b1fb 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -23,6 +23,7 @@ #include "packfile.h" #include "list-objects-filter-options.h" #include "commit-reach.h" +#include "promisor-remote.h" static const char * const builtin_fetch_usage[] = { N_("git fetch [] [ [...]]"), @@ -1460,7 +1461,7 @@ static inline void fetch_one_setup_partial(struct remote *remote) * If no prior partial clone/fetch and the current fetch DID NOT * request a partial-fetch, do a normal fetch. */ - if (!repository_format_partial_clone && !filter_options.choice) + if (!has_promisor_remote() && !filter_options.choice) return; /* @@ -1468,7 +1469,7 @@ static inline void fetch_one_setup_partial(struct remote *remote) * on this repo and remember the given filter-spec as the default * for subsequent fetches to this remote. */ - if (!repository_format_partial_clone && filter_options.choice) { + if (!has_promisor_remote() && filter_options.choice) { partial_clone_register(remote->name, &filter_options); return; } @@ -1477,7 +1478,7 @@ static inline void fetch_one_setup_partial(struct remote *remote) * We are currently limited to only ONE promisor remote and only * allow partial-fetches from the promisor remote. */ - if (strcmp(remote->name, repository_format_partial_clone)) { + if (!promisor_remote_find(remote->name)) { if (filter_options.choice) die(_("--filter can only be used with the remote " "configured in extensions.partialClone")); @@ -1611,7 +1612,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) if (depth || deepen_since || deepen_not.nr) deepen = 1; - if (filter_options.choice && !repository_format_partial_clone) + if (filter_options.choice && !has_promisor_remote()) die("--filter can only be used when extensions.partialClone is set"); if (all) { @@ -1645,7 +1646,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) } if (remote) { - if (filter_options.choice || repository_format_partial_clone) + if (filter_options.choice || has_promisor_remote()) fetch_one_setup_partial(remote); result = fetch_one(remote, argc, argv, prune_tags_ok); } else { diff --git a/builtin/gc.c b/builtin/gc.c index 8943bcc300d4a2..824a8832b5293c 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -27,6 +27,7 @@ #include "pack-objects.h" #include "blob.h" #include "tree.h" +#include "promisor-remote.h" #define FAILED_RUN "failed to run %s" @@ -661,7 +662,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) argv_array_push(&prune, prune_expire); if (quiet) argv_array_push(&prune, "--no-progress"); - if (repository_format_partial_clone) + if (has_promisor_remote()) argv_array_push(&prune, "--exclude-promisor-objects"); if (run_command_v_opt(prune.argv, RUN_GIT_CMD)) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 0d55f73b0b443b..a23454da6ef9b3 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -14,7 +14,7 @@ #include "thread-utils.h" #include "packfile.h" #include "object-store.h" -#include "fetch-object.h" +#include "promisor-remote.h" static const char index_pack_usage[] = "git index-pack [-v] [-o ] [--keep | --keep=] [--verify] [--strict] ( | --stdin [--fix-thin] [])"; @@ -1352,7 +1352,7 @@ static void fix_unresolved_deltas(struct hashfile *f) sorted_by_pos[i] = &ref_deltas[i]; QSORT(sorted_by_pos, nr_ref_deltas, delta_pos_compare); - if (repository_format_partial_clone) { + if (has_promisor_remote()) { /* * Prefetch the delta bases. */ @@ -1366,8 +1366,8 @@ static void fix_unresolved_deltas(struct hashfile *f) oid_array_append(&to_fetch, &d->oid); } if (to_fetch.nr) - fetch_objects(repository_format_partial_clone, - to_fetch.oid, to_fetch.nr); + promisor_remote_get_direct(the_repository, + to_fetch.oid, to_fetch.nr); oid_array_clear(&to_fetch); } diff --git a/builtin/repack.c b/builtin/repack.c index caca11392713eb..df9a32c9060e46 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -11,6 +11,7 @@ #include "midx.h" #include "packfile.h" #include "object-store.h" +#include "promisor-remote.h" static int delta_base_offset = 1; static int pack_kept_objects = -1; @@ -369,7 +370,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) argv_array_push(&cmd.args, "--all"); argv_array_push(&cmd.args, "--reflog"); argv_array_push(&cmd.args, "--indexed-objects"); - if (repository_format_partial_clone) + if (has_promisor_remote()) argv_array_push(&cmd.args, "--exclude-promisor-objects"); if (write_bitmaps) argv_array_push(&cmd.args, "--write-bitmap-index"); diff --git a/cache-tree.c b/cache-tree.c index b13bfaf71e9e15..64c285a746a886 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -5,6 +5,7 @@ #include "cache-tree.h" #include "object-store.h" #include "replace-object.h" +#include "promisor-remote.h" #ifndef DEBUG #define DEBUG 0 @@ -357,7 +358,7 @@ static int update_one(struct cache_tree *it, } ce_missing_ok = mode == S_IFGITLINK || missing_ok || - (repository_format_partial_clone && + (has_promisor_remote() && ce_skip_worktree(ce)); if (is_null_oid(oid) || (!ce_missing_ok && !has_object_file(oid))) { diff --git a/connected.c b/connected.c index 1ab481fed69b33..b0e4968fbdfb68 100644 --- a/connected.c +++ b/connected.c @@ -5,6 +5,7 @@ #include "connected.h" #include "transport.h" #include "packfile.h" +#include "promisor-remote.h" /* * If we feed all the commits we want to verify to this command @@ -73,7 +74,7 @@ int check_connected(oid_iterate_fn fn, void *cb_data, argv_array_push(&rev_list.args,"rev-list"); argv_array_push(&rev_list.args, "--objects"); argv_array_push(&rev_list.args, "--stdin"); - if (repository_format_partial_clone) + if (has_promisor_remote()) argv_array_push(&rev_list.args, "--exclude-promisor-objects"); if (!opt->is_deepening_fetch) { argv_array_push(&rev_list.args, "--not"); diff --git a/diff.c b/diff.c index 1ee04e321b1b5c..249cc6eaceba66 100644 --- a/diff.c +++ b/diff.c @@ -25,7 +25,7 @@ #include "packfile.h" #include "parse-options.h" #include "help.h" -#include "fetch-object.h" +#include "promisor-remote.h" #ifdef NO_FAST_WORKING_DIRECTORY #define FAST_WORKING_DIRECTORY 0 @@ -6514,8 +6514,7 @@ static void add_if_missing(struct repository *r, void diffcore_std(struct diff_options *options) { - if (options->repo == the_repository && - repository_format_partial_clone) { + if (options->repo == the_repository && has_promisor_remote()) { /* * Prefetch the diff pairs that are about to be flushed. */ @@ -6532,8 +6531,8 @@ void diffcore_std(struct diff_options *options) /* * NEEDSWORK: Consider deduplicating the OIDs sent. */ - fetch_objects(repository_format_partial_clone, - to_fetch.oid, to_fetch.nr); + promisor_remote_get_direct(options->repo, + to_fetch.oid, to_fetch.nr); oid_array_clear(&to_fetch); } diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 1cb20c659c82b1..b0de7d3c176ab7 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -6,6 +6,7 @@ #include "list-objects.h" #include "list-objects-filter.h" #include "list-objects-filter-options.h" +#include "promisor-remote.h" /* * Parse value of the argument to the "filter" keyword. @@ -146,30 +147,31 @@ void partial_clone_register( const char *remote, const struct list_objects_filter_options *filter_options) { - /* - * Record the name of the partial clone remote in the - * config and in the global variable -- the latter is - * used throughout to indicate that partial clone is - * enabled and to expect missing objects. - */ - if (repository_format_partial_clone && - *repository_format_partial_clone && - strcmp(remote, repository_format_partial_clone)) - die(_("cannot change partial clone promisor remote")); + char *cfg_name; - git_config_set("core.repositoryformatversion", "1"); - git_config_set("extensions.partialclone", remote); + /* Check if it is already registered */ + if (!promisor_remote_find(remote)) { + git_config_set("core.repositoryformatversion", "1"); - repository_format_partial_clone = xstrdup(remote); + /* Add promisor config for the remote */ + cfg_name = xstrfmt("remote.%s.promisor", remote); + git_config_set(cfg_name, "true"); + free(cfg_name); + } /* * Record the initial filter-spec in the config as * the default for subsequent fetches from this remote. + * + * TODO: record it into remote..partialclonefilter */ core_partial_clone_filter_default = xstrdup(filter_options->filter_spec); git_config_set("core.partialclonefilter", core_partial_clone_filter_default); + + /* Make sure the config info are reset */ + promisor_remote_reinit(); } void partial_clone_get_default_filter_spec( diff --git a/packfile.c b/packfile.c index d786ec731202e5..50aaf93bc36cc4 100644 --- a/packfile.c +++ b/packfile.c @@ -16,6 +16,7 @@ #include "tree.h" #include "object-store.h" #include "midx.h" +#include "promisor-remote.h" char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, @@ -2119,7 +2120,7 @@ int is_promisor_object(const struct object_id *oid) static int promisor_objects_prepared; if (!promisor_objects_prepared) { - if (repository_format_partial_clone) { + if (has_promisor_remote()) { for_each_packed_object(add_promisor_object, &promisor_objects, FOR_EACH_OBJECT_PROMISOR_ONLY); diff --git a/sha1-file.c b/sha1-file.c index 819d32cdb8dbe6..fe250c4b6eb704 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -30,8 +30,8 @@ #include "mergesort.h" #include "quote.h" #include "packfile.h" -#include "fetch-object.h" #include "object-store.h" +#include "promisor-remote.h" /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 @@ -1377,16 +1377,17 @@ int oid_object_info_extended(struct repository *r, const struct object_id *oid, } /* Check if it is a missing object */ - if (fetch_if_missing && repository_format_partial_clone && + if (fetch_if_missing && has_promisor_remote() && !already_retried && r == the_repository && !(flags & OBJECT_INFO_SKIP_FETCH_OBJECT)) { /* - * TODO Investigate checking fetch_object() return - * TODO value and stopping on error here. - * TODO Pass a repository struct through fetch_object, - * such that arbitrary repositories work. + * TODO Investigate checking promisor_remote_get_direct() + * TODO return value and stopping on error here. + * TODO Pass a repository struct through + * promisor_remote_get_direct(), such that arbitrary + * repositories work. */ - fetch_objects(repository_format_partial_clone, real, 1); + promisor_remote_get_direct(r, real, 1); already_retried = 1; continue; } diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh index 37d76808d4a74b..534d03a4d787ae 100755 --- a/t/t5601-clone.sh +++ b/t/t5601-clone.sh @@ -654,7 +654,7 @@ partial_clone () { git -C client fsck && # Ensure that unneeded blobs are not inadvertently fetched. - test_config -C client extensions.partialclone "not a remote" && + test_config -C client remote.origin.promisor "false" && test_must_fail git -C client cat-file -e "$HASH1" && # But this blob was fetched, because clone performs an initial checkout diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index b91ef548f86b0e..8f9a62aac0b9a5 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -42,7 +42,7 @@ test_expect_success 'do partial clone 1' ' test_cmp expect_1.oids observed.oids && test "$(git -C pc1 config --local core.repositoryformatversion)" = "1" && - test "$(git -C pc1 config --local extensions.partialclone)" = "origin" && + test "$(git -C pc1 config --local remote.origin.promisor)" = "true" && test "$(git -C pc1 config --local core.partialclonefilter)" = "blob:none" ' diff --git a/unpack-trees.c b/unpack-trees.c index 50189909b86d6a..aebd865ef6bd27 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -16,7 +16,7 @@ #include "submodule-config.h" #include "fsmonitor.h" #include "object-store.h" -#include "fetch-object.h" +#include "promisor-remote.h" /* * Error messages expected by scripts out of plumbing commands such as @@ -400,7 +400,7 @@ static int check_updates(struct unpack_trees_options *o) load_gitmodules_file(index, &state); enable_delayed_checkout(&state); - if (repository_format_partial_clone && o->update && !o->dry_run) { + if (has_promisor_remote() && o->update && !o->dry_run) { /* * Prefetch the objects that are to be checked out in the loop * below. @@ -419,8 +419,8 @@ static int check_updates(struct unpack_trees_options *o) oid_array_append(&to_fetch, &ce->oid); } if (to_fetch.nr) - fetch_objects(repository_format_partial_clone, - to_fetch.oid, to_fetch.nr); + promisor_remote_get_direct(the_repository, + to_fetch.oid, to_fetch.nr); oid_array_clear(&to_fetch); } for (i = 0; i < index->cache_nr; i++) { From fa3d1b63e866d6b893934ab69da10b4516150cdc Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 25 Jun 2019 15:40:32 +0200 Subject: [PATCH 008/710] promisor-remote: parse remote.*.partialclonefilter This makes it possible to specify a different partial clone filter for each promisor remote. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- builtin/fetch.c | 2 +- list-objects-filter-options.c | 27 +++++++++++++++------------ list-objects-filter-options.h | 3 ++- promisor-remote.c | 15 +++++++++++++++ promisor-remote.h | 5 ++++- t/t0410-partial-clone.sh | 2 +- t/t5601-clone.sh | 1 + t/t5616-partial-clone.sh | 2 +- 8 files changed, 40 insertions(+), 17 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index f74bd78144b1fb..13d813313089c7 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1491,7 +1491,7 @@ static inline void fetch_one_setup_partial(struct remote *remote) * the config. */ if (!filter_options.choice) - partial_clone_get_default_filter_spec(&filter_options); + partial_clone_get_default_filter_spec(&filter_options, remote->name); return; } diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index b0de7d3c176ab7..28c571f922e46c 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -30,6 +30,9 @@ static int gently_parse_list_objects_filter( { const char *v0; + if (!arg) + return 0; + if (filter_options->choice) { if (errbuf) { strbuf_addstr( @@ -148,6 +151,7 @@ void partial_clone_register( const struct list_objects_filter_options *filter_options) { char *cfg_name; + char *filter_name; /* Check if it is already registered */ if (!promisor_remote_find(remote)) { @@ -162,27 +166,26 @@ void partial_clone_register( /* * Record the initial filter-spec in the config as * the default for subsequent fetches from this remote. - * - * TODO: record it into remote..partialclonefilter */ - core_partial_clone_filter_default = - xstrdup(filter_options->filter_spec); - git_config_set("core.partialclonefilter", - core_partial_clone_filter_default); + filter_name = xstrfmt("remote.%s.partialclonefilter", remote); + git_config_set(filter_name, filter_options->filter_spec); + free(filter_name); /* Make sure the config info are reset */ promisor_remote_reinit(); } void partial_clone_get_default_filter_spec( - struct list_objects_filter_options *filter_options) + struct list_objects_filter_options *filter_options, + const char *remote) { + struct promisor_remote *promisor = promisor_remote_find(remote); + /* * Parse default value, but silently ignore it if it is invalid. */ - if (!core_partial_clone_filter_default) - return; - gently_parse_list_objects_filter(filter_options, - core_partial_clone_filter_default, - NULL); + if (promisor) + gently_parse_list_objects_filter(filter_options, + promisor->partial_clone_filter, + NULL); } diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index c54f0000fbade5..8deaa287b57cbb 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -87,6 +87,7 @@ void partial_clone_register( const char *remote, const struct list_objects_filter_options *filter_options); void partial_clone_get_default_filter_spec( - struct list_objects_filter_options *filter_options); + struct list_objects_filter_options *filter_options, + const char *remote); #endif /* LIST_OBJECTS_FILTER_OPTIONS_H */ diff --git a/promisor-remote.c b/promisor-remote.c index 6a8856f475502d..826890f7b805f8 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -75,6 +75,21 @@ static int promisor_remote_config(const char *var, const char *value, void *data free(remote_name); return 0; } + if (!strcmp(subkey, "partialclonefilter")) { + struct promisor_remote *r; + char *remote_name = xmemdupz(name, namelen); + + r = promisor_remote_lookup(remote_name, NULL); + if (!r) + r = promisor_remote_new(remote_name); + + free(remote_name); + + if (!r) + return 0; + + return git_config_string(&r->partial_clone_filter, var, value); + } return 0; } diff --git a/promisor-remote.h b/promisor-remote.h index dddd4048e0cc01..838cb092f3c8bb 100644 --- a/promisor-remote.h +++ b/promisor-remote.h @@ -5,10 +5,13 @@ struct object_id; /* * Promisor remote linked list - * Its information come from remote.XXX config entries. + * + * Information in its fields come from remote.XXX config entries or + * from extensions.partialclone or core.partialclonefilter. */ struct promisor_remote { struct promisor_remote *next; + const char *partial_clone_filter; const char name[FLEX_ARRAY]; }; diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh index 3559313bd03097..3082eff2bf1a37 100755 --- a/t/t0410-partial-clone.sh +++ b/t/t0410-partial-clone.sh @@ -26,7 +26,7 @@ promise_and_delete () { test_expect_success 'extensions.partialclone without filter' ' test_create_repo server && git clone --filter="blob:none" "file://$(pwd)/server" client && - git -C client config --unset core.partialclonefilter && + git -C client config --unset remote.origin.partialclonefilter && git -C client fetch origin ' diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh index 534d03a4d787ae..078cf48dd610c5 100755 --- a/t/t5601-clone.sh +++ b/t/t5601-clone.sh @@ -655,6 +655,7 @@ partial_clone () { # Ensure that unneeded blobs are not inadvertently fetched. test_config -C client remote.origin.promisor "false" && + git -C client config --unset remote.origin.partialclonefilter && test_must_fail git -C client cat-file -e "$HASH1" && # But this blob was fetched, because clone performs an initial checkout diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 8f9a62aac0b9a5..8ae7ba9c950f54 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -43,7 +43,7 @@ test_expect_success 'do partial clone 1' ' test_cmp expect_1.oids observed.oids && test "$(git -C pc1 config --local core.repositoryformatversion)" = "1" && test "$(git -C pc1 config --local remote.origin.promisor)" = "true" && - test "$(git -C pc1 config --local core.partialclonefilter)" = "blob:none" + test "$(git -C pc1 config --local remote.origin.partialclonefilter)" = "blob:none" ' # checkout master to force dynamic object fetch of blobs at HEAD. From 5e46139376a4b8d31f02a521bdf801f3b8913e57 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 25 Jun 2019 15:40:33 +0200 Subject: [PATCH 009/710] builtin/fetch: remove unique promisor remote limitation As the infrastructure for more than one promisor remote has been introduced in previous patches, we can remove code that forbids the registration of more than one promisor remote. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- builtin/fetch.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 13d813313089c7..5657d054ec55f7 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1465,26 +1465,16 @@ static inline void fetch_one_setup_partial(struct remote *remote) return; /* - * If this is the FIRST partial-fetch request, we enable partial - * on this repo and remember the given filter-spec as the default - * for subsequent fetches to this remote. + * If this is a partial-fetch request, we enable partial on + * this repo if not already enabled and remember the given + * filter-spec as the default for subsequent fetches to this + * remote. */ - if (!has_promisor_remote() && filter_options.choice) { + if (filter_options.choice) { partial_clone_register(remote->name, &filter_options); return; } - /* - * We are currently limited to only ONE promisor remote and only - * allow partial-fetches from the promisor remote. - */ - if (!promisor_remote_find(remote->name)) { - if (filter_options.choice) - die(_("--filter can only be used with the remote " - "configured in extensions.partialClone")); - return; - } - /* * Do a partial-fetch from the promisor remote using either the * explicitly given filter-spec or inherit the filter-spec from From 9a4c50788641887510876ea1b40c77daf9b7f7cf Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 25 Jun 2019 15:40:34 +0200 Subject: [PATCH 010/710] t0410: test fetching from many promisor remotes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This shows that it is now possible to fetch objects from more than one promisor remote, and that fetching from a new promisor remote can configure it as one. Helped-by: SZEDER Gábor Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- t/t0410-partial-clone.sh | 49 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh index 3082eff2bf1a37..2498e72a3412da 100755 --- a/t/t0410-partial-clone.sh +++ b/t/t0410-partial-clone.sh @@ -183,8 +183,55 @@ test_expect_success 'fetching of missing objects works with ref-in-want enabled' grep "git< fetch=.*ref-in-want" trace ' +test_expect_success 'fetching of missing objects from another promisor remote' ' + git clone "file://$(pwd)/server" server2 && + test_commit -C server2 bar && + git -C server2 repack -a -d --write-bitmap-index && + HASH2=$(git -C server2 rev-parse bar) && + + git -C repo remote add server2 "file://$(pwd)/server2" && + git -C repo config remote.server2.promisor true && + git -C repo cat-file -p "$HASH2" && + + git -C repo fetch server2 && + rm -rf repo/.git/objects/* && + git -C repo cat-file -p "$HASH2" && + + # Ensure that the .promisor file is written, and check that its + # associated packfile contains the object + ls repo/.git/objects/pack/pack-*.promisor >promisorlist && + test_line_count = 1 promisorlist && + IDX=$(sed "s/promisor$/idx/" promisorlist) && + git verify-pack --verbose "$IDX" >out && + grep "$HASH2" out +' + +test_expect_success 'fetching of missing objects configures a promisor remote' ' + git clone "file://$(pwd)/server" server3 && + test_commit -C server3 baz && + git -C server3 repack -a -d --write-bitmap-index && + HASH3=$(git -C server3 rev-parse baz) && + git -C server3 config uploadpack.allowfilter 1 && + + rm repo/.git/objects/pack/pack-*.promisor && + + git -C repo remote add server3 "file://$(pwd)/server3" && + git -C repo fetch --filter="blob:none" server3 $HASH3 && + + test_cmp_config -C repo true remote.server3.promisor && + + # Ensure that the .promisor file is written, and check that its + # associated packfile contains the object + ls repo/.git/objects/pack/pack-*.promisor >promisorlist && + test_line_count = 1 promisorlist && + IDX=$(sed "s/promisor$/idx/" promisorlist) && + git verify-pack --verbose "$IDX" >out && + grep "$HASH3" out +' + test_expect_success 'fetching of missing blobs works' ' - rm -rf server repo && + rm -rf server server2 repo && + rm -rf server server3 repo && test_create_repo server && test_commit -C server foo && git -C server repack -a -d --write-bitmap-index && From 7e154badc007db7c99f0fb507a3893d7e333b7b2 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 25 Jun 2019 15:40:35 +0200 Subject: [PATCH 011/710] partial-clone: add multiple remotes in the doc While at it, let's remove a reference to ODB effort as the ODB effort has been replaced by directly enhancing partial clone and promisor remote features. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- Documentation/technical/partial-clone.txt | 117 ++++++++++++++++------ 1 file changed, 84 insertions(+), 33 deletions(-) diff --git a/Documentation/technical/partial-clone.txt b/Documentation/technical/partial-clone.txt index 896c7b3878869d..210373e258890d 100644 --- a/Documentation/technical/partial-clone.txt +++ b/Documentation/technical/partial-clone.txt @@ -30,12 +30,20 @@ advance* during clone and fetch operations and thereby reduce download times and disk usage. Missing objects can later be "demand fetched" if/when needed. +A remote that can later provide the missing objects is called a +promisor remote, as it promises to send the objects when +requested. Initialy Git supported only one promisor remote, the origin +remote from which the user cloned and that was configured in the +"extensions.partialClone" config option. Later support for more than +one promisor remote has been implemented. + Use of partial clone requires that the user be online and the origin -remote be available for on-demand fetching of missing objects. This may -or may not be problematic for the user. For example, if the user can -stay within the pre-selected subset of the source tree, they may not -encounter any missing objects. Alternatively, the user could try to -pre-fetch various objects if they know that they are going offline. +remote or other promisor remotes be available for on-demand fetching +of missing objects. This may or may not be problematic for the user. +For example, if the user can stay within the pre-selected subset of +the source tree, they may not encounter any missing objects. +Alternatively, the user could try to pre-fetch various objects if they +know that they are going offline. Non-Goals @@ -100,18 +108,18 @@ or commits that reference missing trees. Handling Missing Objects ------------------------ -- An object may be missing due to a partial clone or fetch, or missing due - to repository corruption. To differentiate these cases, the local - repository specially indicates such filtered packfiles obtained from the - promisor remote as "promisor packfiles". +- An object may be missing due to a partial clone or fetch, or missing + due to repository corruption. To differentiate these cases, the + local repository specially indicates such filtered packfiles + obtained from promisor remotes as "promisor packfiles". + These promisor packfiles consist of a ".promisor" file with arbitrary contents (like the ".keep" files), in addition to their ".pack" and ".idx" files. - The local repository considers a "promisor object" to be an object that - it knows (to the best of its ability) that the promisor remote has promised - that it has, either because the local repository has that object in one of + it knows (to the best of its ability) that promisor remotes have promised + that they have, either because the local repository has that object in one of its promisor packfiles, or because another promisor object refers to it. + When Git encounters a missing object, Git can see if it is a promisor object @@ -123,12 +131,12 @@ expensive-to-modify list of missing objects.[a] - Since almost all Git code currently expects any referenced object to be present locally and because we do not want to force every command to do a dry-run first, a fallback mechanism is added to allow Git to attempt - to dynamically fetch missing objects from the promisor remote. + to dynamically fetch missing objects from promisor remotes. + When the normal object lookup fails to find an object, Git invokes -fetch-object to try to get the object from the server and then retry -the object lookup. This allows objects to be "faulted in" without -complicated prediction algorithms. +promisor_remote_get_direct() to try to get the object from a promisor +remote and then retry the object lookup. This allows objects to be +"faulted in" without complicated prediction algorithms. + For efficiency reasons, no check as to whether the missing object is actually a promisor object is performed. @@ -157,8 +165,7 @@ and prefetch those objects in bulk. + We are not happy with this global variable and would like to remove it, but that requires significant refactoring of the object code to pass an -additional flag. We hope that concurrent efforts to add an ODB API can -encompass this. +additional flag. Fetching Missing Objects @@ -182,21 +189,63 @@ has been updated to not use any object flags when the corresponding argument though they are not necessary. +Using many promisor remotes +--------------------------- + +Many promisor remotes can be configured and used. + +This allows for example a user to have multiple geographically-close +cache servers for fetching missing blobs while continuing to do +filtered `git-fetch` commands from the central server. + +When fetching objects, promisor remotes are tried one after the other +until all the objects have been fetched. + +Remotes that are considered "promisor" remotes are those specified by +the following configuration variables: + +- `extensions.partialClone = ` + +- `remote..promisor = true` + +- `remote..partialCloneFilter = ...` + +Only one promisor remote can be configured using the +`extensions.partialClone` config variable. This promisor remote will +be the last one tried when fetching objects. + +We decided to make it the last one we try, because it is likely that +someone using many promisor remotes is doing so because the other +promisor remotes are better for some reason (maybe they are closer or +faster for some kind of objects) than the origin, and the origin is +likely to be the remote specified by extensions.partialClone. + +This justification is not very strong, but one choice had to be made, +and anyway the long term plan should be to make the order somehow +fully configurable. + +For now though the other promisor remotes will be tried in the order +they appear in the config file. + Current Limitations ------------------- -- The remote used for a partial clone (or the first partial fetch - following a regular clone) is marked as the "promisor remote". +- It is not possible to specify the order in which the promisor + remotes are tried in other ways than the order in which they appear + in the config file. + -We are currently limited to a single promisor remote and only that -remote may be used for subsequent partial fetches. +It is also not possible to specify an order to be used when fetching +from one remote and a different order when fetching from another +remote. + +- It is not possible to push only specific objects to a promisor + remote. + -We accept this limitation because we believe initial users of this -feature will be using it on repositories with a strong single central -server. +It is not possible to push at the same time to multiple promisor +remote in a specific order. -- Dynamic object fetching will only ask the promisor remote for missing - objects. We assume that the promisor remote has a complete view of the +- Dynamic object fetching will only ask promisor remotes for missing + objects. We assume that promisor remotes have a complete view of the repository and can satisfy all such requests. - Repack essentially treats promisor and non-promisor packfiles as 2 @@ -218,15 +267,17 @@ server. Future Work ----------- -- Allow more than one promisor remote and define a strategy for fetching - missing objects from specific promisor remotes or of iterating over the - set of promisor remotes until a missing object is found. +- Improve the way to specify the order in which promisor remotes are + tried. + -A user might want to have multiple geographically-close cache servers -for fetching missing blobs while continuing to do filtered `git-fetch` -commands from the central server, for example. +For example this could allow to specify explicitly something like: +"When fetching from this remote, I want to use these promisor remotes +in this order, though, when pushing or fetching to that remote, I want +to use those promisor remotes in that order." + +- Allow pushing to promisor remotes. + -Or the user might want to work in a triangular work flow with multiple +The user might want to work in a triangular work flow with multiple promisor remotes that each have an incomplete view of the repository. - Allow repack to work on promisor packfiles (while keeping them distinct From 75de0852119bc73c60ace232ba799c3cada2608a Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 25 Jun 2019 15:40:36 +0200 Subject: [PATCH 012/710] remote: add promisor and partial clone config to the doc Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- Documentation/config/remote.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt index 6c4cad83a2c9f4..a8e6437a903592 100644 --- a/Documentation/config/remote.txt +++ b/Documentation/config/remote.txt @@ -76,3 +76,11 @@ remote..pruneTags:: + See also `remote..prune` and the PRUNING section of linkgit:git-fetch[1]. + +remote..promisor:: + When set to true, this remote will be used to fetch promisor + objects. + +remote..partialclonefilter:: + The filter that will be applied when fetching from this + promisor remote. From db27dca5cfe41ffa62f3a34cff2f7fafb2547307 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 25 Jun 2019 15:40:37 +0200 Subject: [PATCH 013/710] Remove fetch-object.{c,h} in favor of promisor-remote.{c,h} As fetch_objects() is now used only in promisor-remote.c and should't be used outside it, let's move it into promisor-remote.c, make it static there, and remove fetch-object.{c,h}. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- Makefile | 1 - fetch-object.c | 43 ------------------------------------------- fetch-object.h | 9 --------- promisor-remote.c | 40 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 39 insertions(+), 54 deletions(-) delete mode 100644 fetch-object.c delete mode 100644 fetch-object.h diff --git a/Makefile b/Makefile index 049bc8cfd4f7fd..9b0baa72390e94 100644 --- a/Makefile +++ b/Makefile @@ -880,7 +880,6 @@ LIB_OBJS += ewah/ewah_io.o LIB_OBJS += ewah/ewah_rlw.o LIB_OBJS += exec-cmd.o LIB_OBJS += fetch-negotiator.o -LIB_OBJS += fetch-object.o LIB_OBJS += fetch-pack.o LIB_OBJS += fsck.o LIB_OBJS += fsmonitor.o diff --git a/fetch-object.c b/fetch-object.c deleted file mode 100644 index eac4d448ef2ce9..00000000000000 --- a/fetch-object.c +++ /dev/null @@ -1,43 +0,0 @@ -#include "cache.h" -#include "packfile.h" -#include "pkt-line.h" -#include "strbuf.h" -#include "transport.h" -#include "fetch-object.h" - -static int fetch_refs(const char *remote_name, struct ref *ref) -{ - struct remote *remote; - struct transport *transport; - int original_fetch_if_missing = fetch_if_missing; - int res; - - fetch_if_missing = 0; - remote = remote_get(remote_name); - if (!remote->url[0]) - die(_("Remote with no URL")); - transport = transport_get(remote, remote->url[0]); - - transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); - transport_set_option(transport, TRANS_OPT_NO_DEPENDENTS, "1"); - res = transport_fetch_refs(transport, ref); - fetch_if_missing = original_fetch_if_missing; - - return res; -} - -int fetch_objects(const char *remote_name, const struct object_id *oids, - int oid_nr) -{ - struct ref *ref = NULL; - int i; - - for (i = 0; i < oid_nr; i++) { - struct ref *new_ref = alloc_ref(oid_to_hex(&oids[i])); - oidcpy(&new_ref->old_oid, &oids[i]); - new_ref->exact_oid = 1; - new_ref->next = ref; - ref = new_ref; - } - return fetch_refs(remote_name, ref); -} diff --git a/fetch-object.h b/fetch-object.h deleted file mode 100644 index 7bcc7cadb0c4ac..00000000000000 --- a/fetch-object.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef FETCH_OBJECT_H -#define FETCH_OBJECT_H - -struct object_id; - -int fetch_objects(const char *remote_name, const struct object_id *oids, - int oid_nr); - -#endif diff --git a/promisor-remote.c b/promisor-remote.c index 826890f7b805f8..92c4c12c1cadbe 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -2,7 +2,45 @@ #include "object-store.h" #include "promisor-remote.h" #include "config.h" -#include "fetch-object.h" +#include "transport.h" + +static int fetch_refs(const char *remote_name, struct ref *ref) +{ + struct remote *remote; + struct transport *transport; + int original_fetch_if_missing = fetch_if_missing; + int res; + + fetch_if_missing = 0; + remote = remote_get(remote_name); + if (!remote->url[0]) + die(_("Remote with no URL")); + transport = transport_get(remote, remote->url[0]); + + transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); + transport_set_option(transport, TRANS_OPT_NO_DEPENDENTS, "1"); + res = transport_fetch_refs(transport, ref); + fetch_if_missing = original_fetch_if_missing; + + return res; +} + +static int fetch_objects(const char *remote_name, + const struct object_id *oids, + int oid_nr) +{ + struct ref *ref = NULL; + int i; + + for (i = 0; i < oid_nr; i++) { + struct ref *new_ref = alloc_ref(oid_to_hex(&oids[i])); + oidcpy(&new_ref->old_oid, &oids[i]); + new_ref->exact_oid = 1; + new_ref->next = ref; + ref = new_ref; + } + return fetch_refs(remote_name, ref); +} static struct promisor_remote *promisors; static struct promisor_remote **promisors_tail = &promisors; From 60b7a92d846a6cc542f96fa0045735cc0b9265fd Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 25 Jun 2019 15:40:38 +0200 Subject: [PATCH 014/710] Move repository_format_partial_clone to promisor-remote.c Now that we have has_promisor_remote() and can use many promisor remotes, let's hide repository_format_partial_clone as a static in promisor-remote.c to avoid it being use for anything other than managing backward compatibility. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- cache.h | 1 - environment.c | 1 - promisor-remote.c | 7 +++++++ promisor-remote.h | 6 ++++++ setup.c | 3 ++- 5 files changed, 15 insertions(+), 3 deletions(-) diff --git a/cache.h b/cache.h index bf20337ef43523..e34b9e66d2a57d 100644 --- a/cache.h +++ b/cache.h @@ -961,7 +961,6 @@ extern int grafts_replace_parents; #define GIT_REPO_VERSION 0 #define GIT_REPO_VERSION_READ 1 extern int repository_format_precious_objects; -extern char *repository_format_partial_clone; extern const char *core_partial_clone_filter_default; extern int repository_format_worktree_config; diff --git a/environment.c b/environment.c index 89af47cb850490..8855d2fc1101b5 100644 --- a/environment.c +++ b/environment.c @@ -31,7 +31,6 @@ int warn_ambiguous_refs = 1; int warn_on_object_refname_ambiguity = 1; int ref_paranoia = -1; int repository_format_precious_objects; -char *repository_format_partial_clone; const char *core_partial_clone_filter_default; int repository_format_worktree_config; const char *git_commit_encoding; diff --git a/promisor-remote.c b/promisor-remote.c index 92c4c12c1cadbe..31d51bb50e0aea 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -4,6 +4,13 @@ #include "config.h" #include "transport.h" +static char *repository_format_partial_clone; + +void set_repository_format_partial_clone(char *partial_clone) +{ + repository_format_partial_clone = xstrdup_or_null(partial_clone); +} + static int fetch_refs(const char *remote_name, struct ref *ref) { struct remote *remote; diff --git a/promisor-remote.h b/promisor-remote.h index 838cb092f3c8bb..8200dfc9408e29 100644 --- a/promisor-remote.h +++ b/promisor-remote.h @@ -22,4 +22,10 @@ extern int promisor_remote_get_direct(struct repository *repo, const struct object_id *oids, int oid_nr); +/* + * This should be used only once from setup.c to set the value we got + * from the extensions.partialclone config option. + */ +extern void set_repository_format_partial_clone(char *partial_clone); + #endif /* PROMISOR_REMOTE_H */ diff --git a/setup.c b/setup.c index 8dcb4631f7d330..25a3038277cdaa 100644 --- a/setup.c +++ b/setup.c @@ -4,6 +4,7 @@ #include "dir.h" #include "string-list.h" #include "chdir-notify.h" +#include "promisor-remote.h" static int inside_git_dir = -1; static int inside_work_tree = -1; @@ -478,7 +479,7 @@ static int check_repository_format_gently(const char *gitdir, struct repository_ } repository_format_precious_objects = candidate->precious_objects; - repository_format_partial_clone = xstrdup_or_null(candidate->partial_clone); + set_repository_format_partial_clone(candidate->partial_clone); repository_format_worktree_config = candidate->worktree_config; string_list_clear(&candidate->unknown_extensions, 0); From 4ca9474efa4fd431c07a470513e684c5b2eec34c Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 25 Jun 2019 15:40:39 +0200 Subject: [PATCH 015/710] Move core_partial_clone_filter_default to promisor-remote.c Now that we can have a different default partial clone filter for each promisor remote, let's hide core_partial_clone_filter_default as a static in promisor-remote.c to avoid it being use for anything other than managing backward compatibility. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- cache.h | 1 - config.c | 5 ----- environment.c | 1 - promisor-remote.c | 5 +++++ 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/cache.h b/cache.h index e34b9e66d2a57d..a4d7f84eebcab2 100644 --- a/cache.h +++ b/cache.h @@ -961,7 +961,6 @@ extern int grafts_replace_parents; #define GIT_REPO_VERSION 0 #define GIT_REPO_VERSION_READ 1 extern int repository_format_precious_objects; -extern const char *core_partial_clone_filter_default; extern int repository_format_worktree_config; /* diff --git a/config.c b/config.c index 296a6d9cc4110b..317b226bc88845 100644 --- a/config.c +++ b/config.c @@ -1344,11 +1344,6 @@ static int git_default_core_config(const char *var, const char *value, void *cb) return 0; } - if (!strcmp(var, "core.partialclonefilter")) { - return git_config_string(&core_partial_clone_filter_default, - var, value); - } - if (!strcmp(var, "core.usereplacerefs")) { read_replace_refs = git_config_bool(var, value); return 0; diff --git a/environment.c b/environment.c index 8855d2fc1101b5..efa072680a2bca 100644 --- a/environment.c +++ b/environment.c @@ -31,7 +31,6 @@ int warn_ambiguous_refs = 1; int warn_on_object_refname_ambiguity = 1; int ref_paranoia = -1; int repository_format_precious_objects; -const char *core_partial_clone_filter_default; int repository_format_worktree_config; const char *git_commit_encoding; const char *git_log_output_encoding; diff --git a/promisor-remote.c b/promisor-remote.c index 31d51bb50e0aea..9bc296cdde2acc 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -5,6 +5,7 @@ #include "transport.h" static char *repository_format_partial_clone; +static const char *core_partial_clone_filter_default; void set_repository_format_partial_clone(char *partial_clone) { @@ -103,6 +104,10 @@ static int promisor_remote_config(const char *var, const char *value, void *data int namelen; const char *subkey; + if (!strcmp(var, "core.partialclonefilter")) + return git_config_string(&core_partial_clone_filter_default, + var, value); + if (parse_config_key(var, "remote", &name, &namelen, &subkey) < 0) return 0; From 9430147ca0aab0189d7e52df97b95a0985fc0c8a Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Thu, 27 Jun 2019 15:54:05 -0700 Subject: [PATCH 016/710] list-objects-filter: encapsulate filter components Encapsulate filter_fn, filter_free_fn, and filter_data into their own opaque struct. Due to opaqueness, filter_fn and filter_free_fn can no longer be accessed directly by users. Currently, all usages of filter_fn are guarded by a necessary check: (obj->flags & NOT_USER_GIVEN) && filter_fn Take the opportunity to include this check into the new function list_objects_filter__filter_object(), so that we no longer need to write this check at every caller of the filter function. Also, the init functions in list-objects-filter.c no longer need to confusingly return the filter constituents in various places (filter_fn and filter_free_fn as out parameters, and filter_data as the function's return value); they can just initialize the "struct filter" passed in. Helped-by: Jeff Hostetler Helped-by: Jonathan Tan Helped-by: Junio C Hamano Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects-filter.c | 112 ++++++++++++++++++++++++++++-------------- list-objects-filter.h | 35 ++++++------- list-objects.c | 55 +++++++++------------ 3 files changed, 113 insertions(+), 89 deletions(-) diff --git a/list-objects-filter.c b/list-objects-filter.c index 36e1f774bcfc50..e06b82def05cf5 100644 --- a/list-objects-filter.c +++ b/list-objects-filter.c @@ -26,6 +26,20 @@ */ #define FILTER_SHOWN_BUT_REVISIT (1<<21) +struct filter { + enum list_objects_filter_result (*filter_object_fn)( + struct repository *r, + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + void *filter_data); + + void (*free_fn)(void *filter_data); + + void *filter_data; +}; + /* * A filter for list-objects to omit ALL blobs from the traversal. * And to OPTIONALLY collect a list of the omitted OIDs. @@ -67,18 +81,17 @@ static enum list_objects_filter_result filter_blobs_none( } } -static void *filter_blobs_none__init( +static void filter_blobs_none__init( struct oidset *omitted, struct list_objects_filter_options *filter_options, - filter_object_fn *filter_fn, - filter_free_fn *filter_free_fn) + struct filter *filter) { struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d)); d->omits = omitted; - *filter_fn = filter_blobs_none; - *filter_free_fn = free; - return d; + filter->filter_data = d; + filter->filter_object_fn = filter_blobs_none; + filter->free_fn = free; } /* @@ -201,11 +214,10 @@ static void filter_trees_free(void *filter_data) { free(d); } -static void *filter_trees_depth__init( +static void filter_trees_depth__init( struct oidset *omitted, struct list_objects_filter_options *filter_options, - filter_object_fn *filter_fn, - filter_free_fn *filter_free_fn) + struct filter *filter) { struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d)); d->omits = omitted; @@ -213,9 +225,9 @@ static void *filter_trees_depth__init( d->exclude_depth = filter_options->tree_exclude_depth; d->current_depth = 0; - *filter_fn = filter_trees_depth; - *filter_free_fn = filter_trees_free; - return d; + filter->filter_data = d; + filter->filter_object_fn = filter_trees_depth; + filter->free_fn = filter_trees_free; } /* @@ -281,19 +293,18 @@ static enum list_objects_filter_result filter_blobs_limit( return LOFR_MARK_SEEN | LOFR_DO_SHOW; } -static void *filter_blobs_limit__init( +static void filter_blobs_limit__init( struct oidset *omitted, struct list_objects_filter_options *filter_options, - filter_object_fn *filter_fn, - filter_free_fn *filter_free_fn) + struct filter *filter) { struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d)); d->omits = omitted; d->max_bytes = filter_options->blob_limit_value; - *filter_fn = filter_blobs_limit; - *filter_free_fn = free; - return d; + filter->filter_data = d; + filter->filter_object_fn = filter_blobs_limit; + filter->free_fn = free; } /* @@ -456,11 +467,10 @@ static void filter_sparse_free(void *filter_data) free(d); } -static void *filter_sparse_oid__init( +static void filter_sparse_oid__init( struct oidset *omitted, struct list_objects_filter_options *filter_options, - filter_object_fn *filter_fn, - filter_free_fn *filter_free_fn) + struct filter *filter) { struct filter_sparse_data *d = xcalloc(1, sizeof(*d)); d->omits = omitted; @@ -473,16 +483,15 @@ static void *filter_sparse_oid__init( d->array_frame[d->nr].child_prov_omit = 0; d->nr++; - *filter_fn = filter_sparse; - *filter_free_fn = filter_sparse_free; - return d; + filter->filter_data = d; + filter->filter_object_fn = filter_sparse; + filter->free_fn = filter_sparse_free; } -typedef void *(*filter_init_fn)( +typedef void (*filter_init_fn)( struct oidset *omitted, struct list_objects_filter_options *filter_options, - filter_object_fn *filter_fn, - filter_free_fn *filter_free_fn); + struct filter *filter); /* * Must match "enum list_objects_filter_choice". @@ -495,12 +504,11 @@ static filter_init_fn s_filters[] = { filter_sparse_oid__init, }; -void *list_objects_filter__init( +struct filter *list_objects_filter__init( struct oidset *omitted, - struct list_objects_filter_options *filter_options, - filter_object_fn *filter_fn, - filter_free_fn *filter_free_fn) + struct list_objects_filter_options *filter_options) { + struct filter *filter; filter_init_fn init_fn; assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT); @@ -510,10 +518,40 @@ void *list_objects_filter__init( filter_options->choice); init_fn = s_filters[filter_options->choice]; - if (init_fn) - return init_fn(omitted, filter_options, - filter_fn, filter_free_fn); - *filter_fn = NULL; - *filter_free_fn = NULL; - return NULL; + if (!init_fn) + return NULL; + + filter = xcalloc(1, sizeof(*filter)); + init_fn(omitted, filter_options, filter); + return filter; +} + +enum list_objects_filter_result list_objects_filter__filter_object( + struct repository *r, + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + struct filter *filter) +{ + if (filter && (obj->flags & NOT_USER_GIVEN)) + return filter->filter_object_fn(r, filter_situation, obj, + pathname, filename, + filter->filter_data); + /* + * No filter is active or user gave object explicitly. In this case, + * always show the object (except when LOFS_END_TREE, since this tree + * had already been shown when LOFS_BEGIN_TREE). + */ + if (filter_situation == LOFS_END_TREE) + return 0; + return LOFR_MARK_SEEN | LOFR_DO_SHOW; +} + +void list_objects_filter__free(struct filter *filter) +{ + if (!filter) + return; + filter->free_fn(filter->filter_data); + free(filter); } diff --git a/list-objects-filter.h b/list-objects-filter.h index 1d45a4ad5786c9..69089542668803 100644 --- a/list-objects-filter.h +++ b/list-objects-filter.h @@ -60,30 +60,27 @@ enum list_objects_filter_situation { LOFS_BLOB }; -typedef enum list_objects_filter_result (*filter_object_fn)( +struct filter; + +/* Constructor for the set of defined list-objects filters. */ +struct filter *list_objects_filter__init( + struct oidset *omitted, + struct list_objects_filter_options *filter_options); + +/* + * Lets `filter` decide how to handle the `obj`. If `filter` is NULL, this + * function behaves as expected if no filter is configured: all objects are + * included. + */ +enum list_objects_filter_result list_objects_filter__filter_object( struct repository *r, enum list_objects_filter_situation filter_situation, struct object *obj, const char *pathname, const char *filename, - void *filter_data); - -typedef void (*filter_free_fn)(void *filter_data); + struct filter *filter); -/* - * Constructor for the set of defined list-objects filters. - * Returns a generic "void *filter_data". - * - * The returned "filter_fn" will be used by traverse_commit_list() - * to filter the results. - * - * The returned "filter_free_fn" is a destructor for the - * filter_data. - */ -void *list_objects_filter__init( - struct oidset *omitted, - struct list_objects_filter_options *filter_options, - filter_object_fn *filter_fn, - filter_free_fn *filter_free_fn); +/* Destroys `filter`. Does nothing if `filter` is null. */ +void list_objects_filter__free(struct filter *filter); #endif /* LIST_OBJECTS_FILTER_H */ diff --git a/list-objects.c b/list-objects.c index b5651ddd5bfdd6..9307d91fb3fc8b 100644 --- a/list-objects.c +++ b/list-objects.c @@ -18,8 +18,7 @@ struct traversal_context { show_object_fn show_object; show_commit_fn show_commit; void *show_data; - filter_object_fn filter_fn; - void *filter_data; + struct filter *filter; }; static void process_blob(struct traversal_context *ctx, @@ -29,7 +28,7 @@ static void process_blob(struct traversal_context *ctx, { struct object *obj = &blob->object; size_t pathlen; - enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW; + enum list_objects_filter_result r; if (!ctx->revs->blob_objects) return; @@ -54,11 +53,10 @@ static void process_blob(struct traversal_context *ctx, pathlen = path->len; strbuf_addstr(path, name); - if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) - r = ctx->filter_fn(ctx->revs->repo, - LOFS_BLOB, obj, - path->buf, &path->buf[pathlen], - ctx->filter_data); + r = list_objects_filter__filter_object(ctx->revs->repo, + LOFS_BLOB, obj, + path->buf, &path->buf[pathlen], + ctx->filter); if (r & LOFR_MARK_SEEN) obj->flags |= SEEN; if (r & LOFR_DO_SHOW) @@ -157,7 +155,7 @@ static void process_tree(struct traversal_context *ctx, struct object *obj = &tree->object; struct rev_info *revs = ctx->revs; int baselen = base->len; - enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW; + enum list_objects_filter_result r; int failed_parse; if (!revs->tree_objects) @@ -186,11 +184,10 @@ static void process_tree(struct traversal_context *ctx, } strbuf_addstr(base, name); - if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) - r = ctx->filter_fn(ctx->revs->repo, - LOFS_BEGIN_TREE, obj, - base->buf, &base->buf[baselen], - ctx->filter_data); + r = list_objects_filter__filter_object(ctx->revs->repo, + LOFS_BEGIN_TREE, obj, + base->buf, &base->buf[baselen], + ctx->filter); if (r & LOFR_MARK_SEEN) obj->flags |= SEEN; if (r & LOFR_DO_SHOW) @@ -203,16 +200,14 @@ static void process_tree(struct traversal_context *ctx, else if (!failed_parse) process_tree_contents(ctx, tree, base); - if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) { - r = ctx->filter_fn(ctx->revs->repo, - LOFS_END_TREE, obj, - base->buf, &base->buf[baselen], - ctx->filter_data); - if (r & LOFR_MARK_SEEN) - obj->flags |= SEEN; - if (r & LOFR_DO_SHOW) - ctx->show_object(obj, base->buf, ctx->show_data); - } + r = list_objects_filter__filter_object(ctx->revs->repo, + LOFS_END_TREE, obj, + base->buf, &base->buf[baselen], + ctx->filter); + if (r & LOFR_MARK_SEEN) + obj->flags |= SEEN; + if (r & LOFR_DO_SHOW) + ctx->show_object(obj, base->buf, ctx->show_data); strbuf_setlen(base, baselen); free_tree_buffer(tree); @@ -402,8 +397,7 @@ void traverse_commit_list(struct rev_info *revs, ctx.show_commit = show_commit; ctx.show_object = show_object; ctx.show_data = show_data; - ctx.filter_fn = NULL; - ctx.filter_data = NULL; + ctx.filter = NULL; do_traverse(&ctx); } @@ -416,17 +410,12 @@ void traverse_commit_list_filtered( struct oidset *omitted) { struct traversal_context ctx; - filter_free_fn filter_free_fn = NULL; ctx.revs = revs; ctx.show_object = show_object; ctx.show_commit = show_commit; ctx.show_data = show_data; - ctx.filter_fn = NULL; - - ctx.filter_data = list_objects_filter__init(omitted, filter_options, - &ctx.filter_fn, &filter_free_fn); + ctx.filter = list_objects_filter__init(omitted, filter_options); do_traverse(&ctx); - if (ctx.filter_data && filter_free_fn) - filter_free_fn(ctx.filter_data); + list_objects_filter__free(ctx.filter); } From 7a7c7f4a6d22477e3548021eb3571384651c00be Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Thu, 27 Jun 2019 15:54:06 -0700 Subject: [PATCH 017/710] list-objects-filter: put omits set in filter struct The oidset *omits pointer must be accessed by the combine filter in a type-agnostic way once the graph traversal is over. Store that pointer in the general `filter` struct. This will be used in a follow-up patch to implement the combine filter. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects-filter.c | 68 +++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 42 deletions(-) diff --git a/list-objects-filter.c b/list-objects-filter.c index e06b82def05cf5..3b4b6764ca4774 100644 --- a/list-objects-filter.c +++ b/list-objects-filter.c @@ -33,18 +33,14 @@ struct filter { struct object *obj, const char *pathname, const char *filename, + struct oidset *omits, void *filter_data); void (*free_fn)(void *filter_data); void *filter_data; -}; -/* - * A filter for list-objects to omit ALL blobs from the traversal. - * And to OPTIONALLY collect a list of the omitted OIDs. - */ -struct filter_blobs_none_data { + /* If non-NULL, the filter collects a list of the omitted OIDs here. */ struct oidset *omits; }; @@ -54,10 +50,9 @@ static enum list_objects_filter_result filter_blobs_none( struct object *obj, const char *pathname, const char *filename, + struct oidset *omits, void *filter_data_) { - struct filter_blobs_none_data *filter_data = filter_data_; - switch (filter_situation) { default: BUG("unknown filter_situation: %d", filter_situation); @@ -75,21 +70,16 @@ static enum list_objects_filter_result filter_blobs_none( assert(obj->type == OBJ_BLOB); assert((obj->flags & SEEN) == 0); - if (filter_data->omits) - oidset_insert(filter_data->omits, &obj->oid); + if (omits) + oidset_insert(omits, &obj->oid); return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */ } } static void filter_blobs_none__init( - struct oidset *omitted, struct list_objects_filter_options *filter_options, struct filter *filter) { - struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d)); - d->omits = omitted; - - filter->filter_data = d; filter->filter_object_fn = filter_blobs_none; filter->free_fn = free; } @@ -99,8 +89,6 @@ static void filter_blobs_none__init( * Can OPTIONALLY collect a list of the omitted OIDs. */ struct filter_trees_depth_data { - struct oidset *omits; - /* * Maps trees to the minimum depth at which they were seen. It is not * necessary to re-traverse a tree at deeper or equal depths than it has @@ -123,16 +111,16 @@ struct seen_map_entry { /* Returns 1 if the oid was in the omits set before it was invoked. */ static int filter_trees_update_omits( struct object *obj, - struct filter_trees_depth_data *filter_data, + struct oidset *omits, int include_it) { - if (!filter_data->omits) + if (!omits) return 0; if (include_it) - return oidset_remove(filter_data->omits, &obj->oid); + return oidset_remove(omits, &obj->oid); else - return oidset_insert(filter_data->omits, &obj->oid); + return oidset_insert(omits, &obj->oid); } static enum list_objects_filter_result filter_trees_depth( @@ -141,6 +129,7 @@ static enum list_objects_filter_result filter_trees_depth( struct object *obj, const char *pathname, const char *filename, + struct oidset *omits, void *filter_data_) { struct filter_trees_depth_data *filter_data = filter_data_; @@ -165,7 +154,7 @@ static enum list_objects_filter_result filter_trees_depth( return LOFR_ZERO; case LOFS_BLOB: - filter_trees_update_omits(obj, filter_data, include_it); + filter_trees_update_omits(obj, omits, include_it); return include_it ? LOFR_MARK_SEEN | LOFR_DO_SHOW : LOFR_ZERO; case LOFS_BEGIN_TREE: @@ -186,12 +175,12 @@ static enum list_objects_filter_result filter_trees_depth( filter_res = LOFR_SKIP_TREE; } else { int been_omitted = filter_trees_update_omits( - obj, filter_data, include_it); + obj, omits, include_it); seen_info->depth = filter_data->current_depth; if (include_it) filter_res = LOFR_DO_SHOW; - else if (filter_data->omits && !been_omitted) + else if (omits && !been_omitted) /* * Must update omit information of children * recursively; they have not been omitted yet. @@ -215,12 +204,10 @@ static void filter_trees_free(void *filter_data) { } static void filter_trees_depth__init( - struct oidset *omitted, struct list_objects_filter_options *filter_options, struct filter *filter) { struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d)); - d->omits = omitted; oidmap_init(&d->seen_at_depth, 0); d->exclude_depth = filter_options->tree_exclude_depth; d->current_depth = 0; @@ -235,7 +222,6 @@ static void filter_trees_depth__init( * And to OPTIONALLY collect a list of the omitted OIDs. */ struct filter_blobs_limit_data { - struct oidset *omits; unsigned long max_bytes; }; @@ -245,6 +231,7 @@ static enum list_objects_filter_result filter_blobs_limit( struct object *obj, const char *pathname, const char *filename, + struct oidset *omits, void *filter_data_) { struct filter_blobs_limit_data *filter_data = filter_data_; @@ -282,24 +269,22 @@ static enum list_objects_filter_result filter_blobs_limit( if (object_length < filter_data->max_bytes) goto include_it; - if (filter_data->omits) - oidset_insert(filter_data->omits, &obj->oid); + if (omits) + oidset_insert(omits, &obj->oid); return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */ } include_it: - if (filter_data->omits) - oidset_remove(filter_data->omits, &obj->oid); + if (omits) + oidset_remove(omits, &obj->oid); return LOFR_MARK_SEEN | LOFR_DO_SHOW; } static void filter_blobs_limit__init( - struct oidset *omitted, struct list_objects_filter_options *filter_options, struct filter *filter) { struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d)); - d->omits = omitted; d->max_bytes = filter_options->blob_limit_value; filter->filter_data = d; @@ -337,7 +322,6 @@ struct frame { }; struct filter_sparse_data { - struct oidset *omits; struct exclude_list el; size_t nr, alloc; @@ -350,6 +334,7 @@ static enum list_objects_filter_result filter_sparse( struct object *obj, const char *pathname, const char *filename, + struct oidset *omits, void *filter_data_) { struct filter_sparse_data *filter_data = filter_data_; @@ -431,8 +416,8 @@ static enum list_objects_filter_result filter_sparse( if (val < 0) val = frame->defval; if (val > 0) { - if (filter_data->omits) - oidset_remove(filter_data->omits, &obj->oid); + if (omits) + oidset_remove(omits, &obj->oid); return LOFR_MARK_SEEN | LOFR_DO_SHOW; } @@ -446,8 +431,8 @@ static enum list_objects_filter_result filter_sparse( * Leave the LOFR_ bits unset so that if the blob appears * again in the traversal, we will be asked again. */ - if (filter_data->omits) - oidset_insert(filter_data->omits, &obj->oid); + if (omits) + oidset_insert(omits, &obj->oid); /* * Remember that at least 1 blob in this tree was @@ -468,12 +453,10 @@ static void filter_sparse_free(void *filter_data) } static void filter_sparse_oid__init( - struct oidset *omitted, struct list_objects_filter_options *filter_options, struct filter *filter) { struct filter_sparse_data *d = xcalloc(1, sizeof(*d)); - d->omits = omitted; if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value, NULL, 0, &d->el) < 0) die("could not load filter specification"); @@ -489,7 +472,6 @@ static void filter_sparse_oid__init( } typedef void (*filter_init_fn)( - struct oidset *omitted, struct list_objects_filter_options *filter_options, struct filter *filter); @@ -522,7 +504,8 @@ struct filter *list_objects_filter__init( return NULL; filter = xcalloc(1, sizeof(*filter)); - init_fn(omitted, filter_options, filter); + filter->omits = omitted; + init_fn(filter_options, filter); return filter; } @@ -537,6 +520,7 @@ enum list_objects_filter_result list_objects_filter__filter_object( if (filter && (obj->flags & NOT_USER_GIVEN)) return filter->filter_object_fn(r, filter_situation, obj, pathname, filename, + filter->omits, filter->filter_data); /* * No filter is active or user gave object explicitly. In this case, From 842b00516aebee06fc99c51a663b6587f642d36d Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Thu, 27 Jun 2019 15:54:07 -0700 Subject: [PATCH 018/710] list-objects-filter-options: always supply *errbuf Making errbuf an optional argument complicates error reporting. Fix this by making all callers supply an errbuf, even if they may ignore it. This will be important in follow-up patches where the filter-spec parsing has more pitfalls and possible errors. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects-filter-options.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 1cb20c659c82b1..7c3e397d299724 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -30,11 +30,8 @@ static int gently_parse_list_objects_filter( const char *v0; if (filter_options->choice) { - if (errbuf) { - strbuf_addstr( - errbuf, - _("multiple filter-specs cannot be combined")); - } + strbuf_addstr( + errbuf, _("multiple filter-specs cannot be combined")); return 1; } @@ -52,11 +49,7 @@ static int gently_parse_list_objects_filter( } else if (skip_prefix(arg, "tree:", &v0)) { if (!git_parse_ulong(v0, &filter_options->tree_exclude_depth)) { - if (errbuf) { - strbuf_addstr( - errbuf, - _("expected 'tree:'")); - } + strbuf_addstr(errbuf, _("expected 'tree:'")); return 1; } filter_options->choice = LOFC_TREE_DEPTH; @@ -90,8 +83,7 @@ static int gently_parse_list_objects_filter( * add new filters */ - if (errbuf) - strbuf_addf(errbuf, _("invalid filter-spec '%s'"), arg); + strbuf_addf(errbuf, _("invalid filter-spec '%s'"), arg); memset(filter_options, 0, sizeof(*filter_options)); return 1; @@ -175,6 +167,8 @@ void partial_clone_register( void partial_clone_get_default_filter_spec( struct list_objects_filter_options *filter_options) { + struct strbuf errbuf = STRBUF_INIT; + /* * Parse default value, but silently ignore it if it is invalid. */ @@ -182,5 +176,6 @@ void partial_clone_get_default_filter_spec( return; gently_parse_list_objects_filter(filter_options, core_partial_clone_filter_default, - NULL); + &errbuf); + strbuf_release(&errbuf); } From e987df5fe62b8b29be4cdcdeb3704681ada2b29e Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Thu, 27 Jun 2019 15:54:08 -0700 Subject: [PATCH 019/710] list-objects-filter: implement composite filters Allow combining filters such that only objects accepted by all filters are shown. The motivation for this is to allow getting directory listings without also fetching blobs. This can be done by combining blob:none with tree:. There are massive repositories that have larger-than-expected trees - even if you include only a single commit. A combined filter supports any number of subfilters, and is written in the following form: combine:++ Certain non-alphanumeric characters in each filter must be URL-encoded. For now, combined filters must be specified in this form. In a subsequent commit, rev-list will support multiple --filter arguments which will have the same effect as specifying one filter argument starting with "combine:". The documentation will be updated in that commit, as the URL-encoding scheme is in general not meant to be used directly by the user, and it is better to describe the URL-encoding feature in terms of the repeated flag. Helped-by: Emily Shaffer Helped-by: Jeff Hostetler Helped-by: Johannes Schindelin Helped-by: Jonathan Tan Helped-by: Junio C Hamano Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects-filter-options.c | 106 +++++++++++++++++- list-objects-filter-options.h | 17 ++- list-objects-filter.c | 161 ++++++++++++++++++++++++++++ list-objects-filter.h | 13 ++- t/t6112-rev-list-filters-objects.sh | 151 +++++++++++++++++++++++++- url.c | 6 ++ url.h | 8 ++ 7 files changed, 454 insertions(+), 8 deletions(-) diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 7c3e397d299724..75d0236ee2da9f 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -6,6 +6,12 @@ #include "list-objects.h" #include "list-objects-filter.h" #include "list-objects-filter-options.h" +#include "url.h" + +static int parse_combine_filter( + struct list_objects_filter_options *filter_options, + const char *arg, + struct strbuf *errbuf); /* * Parse value of the argument to the "filter" keyword. @@ -35,8 +41,6 @@ static int gently_parse_list_objects_filter( return 1; } - filter_options->filter_spec = strdup(arg); - if (!strcmp(arg, "blob:none")) { filter_options->choice = LOFC_BLOB_NONE; return 0; @@ -77,6 +81,10 @@ static int gently_parse_list_objects_filter( _("sparse:path filters support has been dropped")); } return 1; + + } else if (skip_prefix(arg, "combine:", &v0)) { + return parse_combine_filter(filter_options, v0, errbuf); + } /* * Please update _git_fetch() in git-completion.bash when you @@ -89,10 +97,95 @@ static int gently_parse_list_objects_filter( return 1; } +static const char *RESERVED_NON_WS = "~`!@#$^&*()[]{}\\;'\",<>?"; + +static int has_reserved_character( + struct strbuf *sub_spec, struct strbuf *errbuf) +{ + const char *c = sub_spec->buf; + while (*c) { + if (*c <= ' ' || strchr(RESERVED_NON_WS, *c)) { + strbuf_addf( + errbuf, + _("must escape char in sub-filter-spec: '%c'"), + *c); + return 1; + } + c++; + } + + return 0; +} + +static int parse_combine_subfilter( + struct list_objects_filter_options *filter_options, + struct strbuf *subspec, + struct strbuf *errbuf) +{ + size_t new_index = filter_options->sub_nr++; + char *decoded; + int result; + + ALLOC_GROW(filter_options->sub, filter_options->sub_nr, + filter_options->sub_alloc); + memset(&filter_options->sub[new_index], 0, + sizeof(*filter_options->sub)); + + decoded = url_percent_decode(subspec->buf); + + result = has_reserved_character(subspec, errbuf) || + gently_parse_list_objects_filter( + &filter_options->sub[new_index], decoded, errbuf); + + free(decoded); + return result; +} + +static int parse_combine_filter( + struct list_objects_filter_options *filter_options, + const char *arg, + struct strbuf *errbuf) +{ + struct strbuf **subspecs = strbuf_split_str(arg, '+', 0); + size_t sub; + int result = 0; + + if (!subspecs[0]) { + strbuf_addstr(errbuf, _("expected something after combine:")); + result = 1; + goto cleanup; + } + + for (sub = 0; subspecs[sub] && !result; sub++) { + if (subspecs[sub + 1]) { + /* + * This is not the last subspec. Remove trailing "+" so + * we can parse it. + */ + size_t last = subspecs[sub]->len - 1; + assert(subspecs[sub]->buf[last] == '+'); + strbuf_remove(subspecs[sub], last, 1); + } + result = parse_combine_subfilter( + filter_options, subspecs[sub], errbuf); + } + + filter_options->choice = LOFC_COMBINE; + +cleanup: + strbuf_list_free(subspecs); + if (result) { + list_objects_filter_release(filter_options); + memset(filter_options, 0, sizeof(*filter_options)); + } + return result; +} + int parse_list_objects_filter(struct list_objects_filter_options *filter_options, const char *arg) { struct strbuf buf = STRBUF_INIT; + filter_options->filter_spec = strdup(arg); if (gently_parse_list_objects_filter(filter_options, arg, &buf)) die("%s", buf.buf); return 0; @@ -129,8 +222,15 @@ void expand_list_objects_filter_spec( void list_objects_filter_release( struct list_objects_filter_options *filter_options) { + size_t sub; + + if (!filter_options) + return; free(filter_options->filter_spec); free(filter_options->sparse_oid_value); + for (sub = 0; sub < filter_options->sub_nr; sub++) + list_objects_filter_release(&filter_options->sub[sub]); + free(filter_options->sub); memset(filter_options, 0, sizeof(*filter_options)); } @@ -174,6 +274,8 @@ void partial_clone_get_default_filter_spec( */ if (!core_partial_clone_filter_default) return; + + filter_options->filter_spec = strdup(core_partial_clone_filter_default); gently_parse_list_objects_filter(filter_options, core_partial_clone_filter_default, &errbuf); diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index c54f0000fbade5..789faef1e500ba 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -13,6 +13,7 @@ enum list_objects_filter_choice { LOFC_BLOB_LIMIT, LOFC_TREE_DEPTH, LOFC_SPARSE_OID, + LOFC_COMBINE, LOFC__COUNT /* must be last */ }; @@ -38,13 +39,23 @@ struct list_objects_filter_options { unsigned int no_filter : 1; /* - * Parsed values (fields) from within the filter-spec. These are - * choice-specific; not all values will be defined for any given - * choice. + * BEGIN choice-specific parsed values from within the filter-spec. Only + * some values will be defined for any given choice. */ + struct object_id *sparse_oid_value; unsigned long blob_limit_value; unsigned long tree_exclude_depth; + + /* LOFC_COMBINE values */ + + /* This array contains all the subfilters which this filter combines. */ + size_t sub_nr, sub_alloc; + struct list_objects_filter_options *sub; + + /* + * END choice-specific parsed values. + */ }; /* Normalized command line arguments */ diff --git a/list-objects-filter.c b/list-objects-filter.c index 3b4b6764ca4774..d664264d65947f 100644 --- a/list-objects-filter.c +++ b/list-objects-filter.c @@ -26,6 +26,14 @@ */ #define FILTER_SHOWN_BUT_REVISIT (1<<21) +struct subfilter { + struct filter *filter; + struct oidset seen; + struct oidset omits; + struct object_id skip_tree; + unsigned is_skipping_tree : 1; +}; + struct filter { enum list_objects_filter_result (*filter_object_fn)( struct repository *r, @@ -36,6 +44,23 @@ struct filter { struct oidset *omits, void *filter_data); + /* + * Optional. If this function is supplied and the filter needs + * to collect omits, then this function is called once before + * free_fn is called. + * + * This is required because the following two conditions hold: + * + * a. A tree filter can add and remove objects as an object + * graph is traversed. + * b. A combine filter's omit set is the union of all its + * subfilters, which may include tree: filters. + * + * As such, the omits sets must be separate sets, and can only + * be unioned after the traversal is completed. + */ + void (*finalize_omits_fn)(struct oidset *omits, void *filter_data); + void (*free_fn)(void *filter_data); void *filter_data; @@ -471,6 +496,139 @@ static void filter_sparse_oid__init( filter->free_fn = filter_sparse_free; } +/* A filter which only shows objects shown by all sub-filters. */ +struct combine_filter_data { + struct subfilter *sub; + size_t nr; +}; + +static enum list_objects_filter_result process_subfilter( + struct repository *r, + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + struct subfilter *sub) +{ + enum list_objects_filter_result result; + + /* + * Check and update is_skipping_tree before oidset_contains so + * that is_skipping_tree gets unset even when the object is + * marked as seen. As of this writing, no filter uses + * LOFR_MARK_SEEN on trees that also uses LOFR_SKIP_TREE, so the + * ordering is only theoretically important. Be cautious if you + * change the order of the below checks and more filters have + * been added! + */ + if (sub->is_skipping_tree) { + if (filter_situation == LOFS_END_TREE && + oideq(&obj->oid, &sub->skip_tree)) + sub->is_skipping_tree = 0; + else + return LOFR_ZERO; + } + if (oidset_contains(&sub->seen, &obj->oid)) + return LOFR_ZERO; + + result = list_objects_filter__filter_object( + r, filter_situation, obj, pathname, filename, sub->filter); + + if (result & LOFR_MARK_SEEN) + oidset_insert(&sub->seen, &obj->oid); + + if (result & LOFR_SKIP_TREE) { + sub->is_skipping_tree = 1; + sub->skip_tree = obj->oid; + } + + return result; +} + +static enum list_objects_filter_result filter_combine( + struct repository *r, + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + struct oidset *omits, + void *filter_data) +{ + struct combine_filter_data *d = filter_data; + enum list_objects_filter_result combined_result = + LOFR_DO_SHOW | LOFR_MARK_SEEN | LOFR_SKIP_TREE; + size_t sub; + + for (sub = 0; sub < d->nr; sub++) { + enum list_objects_filter_result sub_result = process_subfilter( + r, filter_situation, obj, pathname, filename, + &d->sub[sub]); + if (!(sub_result & LOFR_DO_SHOW)) + combined_result &= ~LOFR_DO_SHOW; + if (!(sub_result & LOFR_MARK_SEEN)) + combined_result &= ~LOFR_MARK_SEEN; + if (!d->sub[sub].is_skipping_tree) + combined_result &= ~LOFR_SKIP_TREE; + } + + return combined_result; +} + +static void filter_combine__free(void *filter_data) +{ + struct combine_filter_data *d = filter_data; + size_t sub; + for (sub = 0; sub < d->nr; sub++) { + list_objects_filter__free(d->sub[sub].filter); + oidset_clear(&d->sub[sub].seen); + if (d->sub[sub].omits.set.size) + BUG("expected oidset to be cleared already"); + } + free(d->sub); +} + +static void add_all(struct oidset *dest, struct oidset *src) { + struct oidset_iter iter; + struct object_id *src_oid; + + oidset_iter_init(src, &iter); + while ((src_oid = oidset_iter_next(&iter)) != NULL) + oidset_insert(dest, src_oid); +} + +static void filter_combine__finalize_omits( + struct oidset *omits, + void *filter_data) +{ + struct combine_filter_data *d = filter_data; + size_t sub; + + for (sub = 0; sub < d->nr; sub++) { + add_all(omits, &d->sub[sub].omits); + oidset_clear(&d->sub[sub].omits); + } +} + +static void filter_combine__init( + struct list_objects_filter_options *filter_options, + struct filter* filter) +{ + struct combine_filter_data *d = xcalloc(1, sizeof(*d)); + size_t sub; + + d->nr = filter_options->sub_nr; + d->sub = xcalloc(d->nr, sizeof(*d->sub)); + for (sub = 0; sub < d->nr; sub++) + d->sub[sub].filter = list_objects_filter__init( + filter->omits ? &d->sub[sub].omits : NULL, + &filter_options->sub[sub]); + + filter->filter_data = d; + filter->filter_object_fn = filter_combine; + filter->free_fn = filter_combine__free; + filter->finalize_omits_fn = filter_combine__finalize_omits; +} + typedef void (*filter_init_fn)( struct list_objects_filter_options *filter_options, struct filter *filter); @@ -484,6 +642,7 @@ static filter_init_fn s_filters[] = { filter_blobs_limit__init, filter_trees_depth__init, filter_sparse_oid__init, + filter_combine__init, }; struct filter *list_objects_filter__init( @@ -536,6 +695,8 @@ void list_objects_filter__free(struct filter *filter) { if (!filter) return; + if (filter->finalize_omits_fn && filter->omits) + filter->finalize_omits_fn(filter->omits, filter->filter_data); filter->free_fn(filter->filter_data); free(filter); } diff --git a/list-objects-filter.h b/list-objects-filter.h index 69089542668803..cfd784e203f30f 100644 --- a/list-objects-filter.h +++ b/list-objects-filter.h @@ -62,7 +62,13 @@ enum list_objects_filter_situation { struct filter; -/* Constructor for the set of defined list-objects filters. */ +/* + * Constructor for the set of defined list-objects filters. + * The `omitted` set is optional. It is populated with objects that the + * filter excludes. This set should not be considered finalized until + * after list_objects_filter__free is called on the returned `struct + * filter *`. + */ struct filter *list_objects_filter__init( struct oidset *omitted, struct list_objects_filter_options *filter_options); @@ -80,7 +86,10 @@ enum list_objects_filter_result list_objects_filter__filter_object( const char *filename, struct filter *filter); -/* Destroys `filter`. Does nothing if `filter` is null. */ +/* + * Destroys `filter` and finalizes the `omitted` set, if present. Does + * nothing if `filter` is null. + */ void list_objects_filter__free(struct filter *filter); #endif /* LIST_OBJECTS_FILTER_H */ diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh index acd7f5ab80d9c8..05d4f2e9c2e335 100755 --- a/t/t6112-rev-list-filters-objects.sh +++ b/t/t6112-rev-list-filters-objects.sh @@ -278,7 +278,19 @@ test_expect_success 'verify skipping tree iteration when not collecting omits' ' test_line_count = 2 actual && # Make sure no other trees were considered besides the root. - ! grep "Skipping contents of tree [^.]" filter_trace + ! grep "Skipping contents of tree [^.]" filter_trace && + + # Try this again with "combine:". If both sub-filters are skipping + # trees, the composite filter should also skip trees. This is not + # important unless the user does combine:tree:X+tree:Y or another filter + # besides "tree:" is implemented in the future which can skip trees. + GIT_TRACE=1 git -C r3 rev-list \ + --objects --filter=combine:tree:1+tree:3 HEAD 2>filter_trace && + + # Only skip the dir1/ tree, which is shared between the two commits. + grep "Skipping contents of tree " filter_trace >actual && + test_write_lines "Skipping contents of tree dir1/..." >expected && + test_cmp expected actual ' # Test tree:# filters. @@ -330,6 +342,112 @@ test_expect_success 'verify tree:3 includes everything expected' ' test_line_count = 10 actual ' +test_expect_success 'combine:... for a simple combination' ' + git -C r3 rev-list --objects --filter=combine:tree:2+blob:none HEAD \ + >actual && + + expect_has HEAD "" && + expect_has HEAD~1 "" && + expect_has HEAD dir1 && + + # There are also 2 commit objects + test_line_count = 5 actual +' + +test_expect_success 'combine:... with URL encoding' ' + git -C r3 rev-list --objects \ + --filter=combine:tree%3a2+blob:%6Eon%65 HEAD >actual && + + expect_has HEAD "" && + expect_has HEAD~1 "" && + expect_has HEAD dir1 && + + # There are also 2 commit objects + test_line_count = 5 actual +' + +expect_invalid_filter_spec () { + spec="$1" && + err="$2" && + + test_must_fail git -C r3 rev-list --objects --filter="$spec" HEAD \ + >actual 2>actual_stderr && + test_must_be_empty actual && + test_i18ngrep "$err" actual_stderr +} + +test_expect_success 'combine:... while URL-encoding things that should not be' ' + expect_invalid_filter_spec combine%3Atree:2+blob:none \ + "invalid filter-spec" +' + +test_expect_success 'combine: with nothing after the :' ' + expect_invalid_filter_spec combine: "expected something after combine:" +' + +test_expect_success 'parse error in first sub-filter in combine:' ' + expect_invalid_filter_spec combine:tree:asdf+blob:none \ + "expected .tree:." +' + +test_expect_success 'combine:... with non-encoded reserved chars' ' + expect_invalid_filter_spec combine:tree:2+sparse:@xyz \ + "must escape char in sub-filter-spec: .@." && + expect_invalid_filter_spec combine:tree:2+sparse:\` \ + "must escape char in sub-filter-spec: .\`." && + expect_invalid_filter_spec combine:tree:2+sparse:~abc \ + "must escape char in sub-filter-spec: .\~." +' + +test_expect_success 'validate err msg for "combine:+"' ' + expect_invalid_filter_spec combine:tree:2+ "expected .tree:." +' + +test_expect_success 'combine:... with edge-case hex digits: Ff Aa 0 9' ' + git -C r3 rev-list --objects --filter="combine:tree:2+bl%6Fb:n%6fne" \ + HEAD >actual && + test_line_count = 5 actual && + git -C r3 rev-list --objects --filter="combine:tree%3A2+blob%3anone" \ + HEAD >actual && + test_line_count = 5 actual && + git -C r3 rev-list --objects --filter="combine:tree:%30" HEAD >actual && + test_line_count = 2 actual && + git -C r3 rev-list --objects --filter="combine:tree:%39+blob:none" \ + HEAD >actual && + test_line_count = 5 actual +' + +test_expect_success 'add a sparse pattern blob whose path has reserved chars' ' + cp r3/pattern r3/pattern1+renamed% && + git -C r3 add pattern1+renamed% && + git -C r3 commit -m "add sparse pattern file with reserved chars" +' + +test_expect_success 'combine:... with more than two sub-filters' ' + git -C r3 rev-list --objects \ + --filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern \ + HEAD >actual && + + expect_has HEAD "" && + expect_has HEAD~1 "" && + expect_has HEAD~2 "" && + expect_has HEAD dir1 && + expect_has HEAD dir1/sparse1 && + expect_has HEAD dir1/sparse2 && + + # Should also have 3 commits + test_line_count = 9 actual && + + # Try again, this time making sure the last sub-filter is only + # URL-decoded once. + cp actual expect && + + git -C r3 rev-list --objects \ + --filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern1%2brenamed%25 \ + HEAD >actual && + test_cmp expect actual +' + # Test provisional omit collection logic with a repo that has objects appearing # at multiple depths - first deeper than the filter's threshold, then shallow. @@ -373,6 +491,37 @@ test_expect_success 'verify skipping tree iteration when collecting omits' ' test_cmp expect actual ' +test_expect_success 'setup r5' ' + git init r5 && + mkdir -p r5/subdir && + + echo 1 >r5/short-root && + echo 12345 >r5/long-root && + echo a >r5/subdir/short-subdir && + echo abcde >r5/subdir/long-subdir && + + git -C r5 add short-root long-root subdir && + git -C r5 commit -m "commit msg" +' + +test_expect_success 'verify collecting omits in combined: filter' ' + # Note that this test guards against the naive implementation of simply + # giving both filters the same "omits" set and expecting it to + # automatically merge them. + git -C r5 rev-list --objects --quiet --filter-print-omitted \ + --filter=combine:tree:2+blob:limit=3 HEAD >actual && + + # Expect 0 trees/commits, 3 blobs omitted (all blobs except short-root) + omitted_1=$(echo 12345 | git hash-object --stdin) && + omitted_2=$(echo a | git hash-object --stdin) && + omitted_3=$(echo abcde | git hash-object --stdin) && + + grep ~$omitted_1 actual && + grep ~$omitted_2 actual && + grep ~$omitted_3 actual && + test_line_count = 3 actual +' + # Test tree: where a tree is iterated to twice - once where a subentry is # too deep to be included, and again where the blob inside it is shallow enough # to be included. This makes sure we don't use LOFR_MARK_SEEN incorrectly (we diff --git a/url.c b/url.c index 1b8ef78ceab037..e34e5e751737ae 100644 --- a/url.c +++ b/url.c @@ -86,6 +86,12 @@ char *url_decode_mem(const char *url, int len) return url_decode_internal(&url, len, NULL, &out, 0); } +char *url_percent_decode(const char *encoded) +{ + struct strbuf out = STRBUF_INIT; + return url_decode_internal(&encoded, strlen(encoded), NULL, &out, 0); +} + char *url_decode_parameter_name(const char **query) { struct strbuf out = STRBUF_INIT; diff --git a/url.h b/url.h index 00b7d58c33e38b..2a27c3427763b2 100644 --- a/url.h +++ b/url.h @@ -7,6 +7,14 @@ int is_url(const char *url); int is_urlschemechar(int first_flag, int ch); char *url_decode(const char *url); char *url_decode_mem(const char *url, int len); + +/* + * Similar to the url_decode_{,mem} methods above, but doesn't assume there + * is a scheme followed by a : at the start of the string. Instead, %-sequences + * before any : are also parsed. + */ +char *url_percent_decode(const char *encoded); + char *url_decode_parameter_name(const char **query); char *url_decode_parameter_value(const char **query); From f56f764279be2433ecf3cb3a484210bcaffea70c Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Thu, 27 Jun 2019 15:54:09 -0700 Subject: [PATCH 020/710] list-objects-filter-options: move error check up Move the check that filter_options->choice is set to higher in the call stack. This can only be set when the gentle parse function is called from one of the two call sites. This is important because in an upcoming patch this may or may not be an error, and whether it is an error is only known to the parse_list_objects_filter function. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects-filter-options.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 75d0236ee2da9f..5fe2814841433c 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -35,11 +35,8 @@ static int gently_parse_list_objects_filter( { const char *v0; - if (filter_options->choice) { - strbuf_addstr( - errbuf, _("multiple filter-specs cannot be combined")); - return 1; - } + if (filter_options->choice) + BUG("filter_options already populated"); if (!strcmp(arg, "blob:none")) { filter_options->choice = LOFC_BLOB_NONE; @@ -185,6 +182,8 @@ int parse_list_objects_filter(struct list_objects_filter_options *filter_options const char *arg) { struct strbuf buf = STRBUF_INIT; + if (filter_options->choice) + die(_("multiple filter-specs cannot be combined")); filter_options->filter_spec = strdup(arg); if (gently_parse_list_objects_filter(filter_options, arg, &buf)) die("%s", buf.buf); From cf9ceb5a12cad9c9153d227a0f497d1b522ce085 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Thu, 27 Jun 2019 15:54:10 -0700 Subject: [PATCH 021/710] list-objects-filter-options: make filter_spec a string_list Make the filter_spec string a string_list rather than a raw C string. The list of strings must be concatted together to make a complete filter_spec. A future patch will use this capability to build "combine:" filter specs gradually. A strbuf would seem to be a more natural choice for this object, but it unfortunately requires initialization besides just zero'ing out the memory. This results in all container structs, and all containers of those structs, etc., to also require initialization. Initializing them all would be more cumbersome that simply using a string_list, which behaves properly when its contents are zero'd. For the purposes of code simplification, change behavior in how filter specs are conveyed over the protocol: do not normalize the tree: filter specs since there should be no server in existence that supports tree:# but not tree:#k etc. Helped-by: Junio C Hamano Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- builtin/clone.c | 8 ++--- builtin/fetch.c | 9 ++---- builtin/rev-list.c | 6 ++-- fetch-pack.c | 20 ++++-------- list-objects-filter-options.c | 50 ++++++++++++++++++++--------- list-objects-filter-options.h | 27 +++++++++++----- t/t6112-rev-list-filters-objects.sh | 7 ---- transport-helper.c | 10 ++---- upload-pack.c | 11 +++---- 9 files changed, 78 insertions(+), 70 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 5b9ebe994761bd..a693e6ca44c8fd 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1149,13 +1149,11 @@ int cmd_clone(int argc, const char **argv, const char *prefix) transport->server_options = &server_options; if (filter_options.choice) { - struct strbuf expanded_filter_spec = STRBUF_INIT; - expand_list_objects_filter_spec(&filter_options, - &expanded_filter_spec); + const char *spec = + expand_list_objects_filter_spec(&filter_options); transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, - expanded_filter_spec.buf); + spec); transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); - strbuf_release(&expanded_filter_spec); } if (transport->smart_options && !deepen && !filter_options.choice) diff --git a/builtin/fetch.c b/builtin/fetch.c index 4ba63d5ac64284..dee89e1a19290c 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1188,13 +1188,10 @@ static struct transport *prepare_transport(struct remote *remote, int deepen) if (update_shallow) set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes"); if (filter_options.choice) { - struct strbuf expanded_filter_spec = STRBUF_INIT; - expand_list_objects_filter_spec(&filter_options, - &expanded_filter_spec); - set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, - expanded_filter_spec.buf); + const char *spec = + expand_list_objects_filter_spec(&filter_options); + set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, spec); set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); - strbuf_release(&expanded_filter_spec); } if (negotiation_tip.nr) { if (transport->smart_options) diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 660172b01486f5..68acbe8fd263ce 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -466,8 +466,10 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) die(_("object filtering requires --objects")); if (filter_options.choice == LOFC_SPARSE_OID && !filter_options.sparse_oid_value) - die(_("invalid sparse value '%s'"), - filter_options.filter_spec); + die( + _("invalid sparse value '%s'"), + list_objects_filter_spec( + &filter_options)); continue; } if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) { diff --git a/fetch-pack.c b/fetch-pack.c index 1c10f54e788ca5..72e13b0a1da516 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -339,12 +339,9 @@ static int find_common(struct fetch_negotiator *negotiator, } } if (server_supports_filtering && args->filter_options.choice) { - struct strbuf expanded_filter_spec = STRBUF_INIT; - expand_list_objects_filter_spec(&args->filter_options, - &expanded_filter_spec); - packet_buf_write(&req_buf, "filter %s", - expanded_filter_spec.buf); - strbuf_release(&expanded_filter_spec); + const char *spec = + expand_list_objects_filter_spec(&args->filter_options); + packet_buf_write(&req_buf, "filter %s", spec); } packet_buf_flush(&req_buf); state_len = req_buf.len; @@ -1099,7 +1096,7 @@ static int add_haves(struct fetch_negotiator *negotiator, } static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, - const struct fetch_pack_args *args, + struct fetch_pack_args *args, const struct ref *wants, struct oidset *common, int *haves_to_send, int *in_vain, int sideband_all) @@ -1140,13 +1137,10 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, /* Add filter */ if (server_supports_feature("fetch", "filter", 0) && args->filter_options.choice) { - struct strbuf expanded_filter_spec = STRBUF_INIT; + const char *spec = + expand_list_objects_filter_spec(&args->filter_options); print_verbose(args, _("Server supports filter")); - expand_list_objects_filter_spec(&args->filter_options, - &expanded_filter_spec); - packet_buf_write(&req_buf, "filter %s", - expanded_filter_spec.buf); - strbuf_release(&expanded_filter_spec); + packet_buf_write(&req_buf, "filter %s", spec); } else if (args->filter_options.choice) { warning("filtering not recognized by server, ignoring"); } diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 5fe2814841433c..01c0f133464d24 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -184,7 +184,7 @@ int parse_list_objects_filter(struct list_objects_filter_options *filter_options struct strbuf buf = STRBUF_INIT; if (filter_options->choice) die(_("multiple filter-specs cannot be combined")); - filter_options->filter_spec = strdup(arg); + string_list_append(&filter_options->filter_spec, xstrdup(arg)); if (gently_parse_list_objects_filter(filter_options, arg, &buf)) die("%s", buf.buf); return 0; @@ -203,19 +203,36 @@ int opt_parse_list_objects_filter(const struct option *opt, return parse_list_objects_filter(filter_options, arg); } -void expand_list_objects_filter_spec( - const struct list_objects_filter_options *filter, - struct strbuf *expanded_spec) +const char *list_objects_filter_spec(struct list_objects_filter_options *filter) { - strbuf_init(expanded_spec, strlen(filter->filter_spec)); - if (filter->choice == LOFC_BLOB_LIMIT) - strbuf_addf(expanded_spec, "blob:limit=%lu", + if (!filter->filter_spec.nr) + BUG("no filter_spec available for this filter"); + if (filter->filter_spec.nr != 1) { + struct strbuf concatted = STRBUF_INIT; + strbuf_add_separated_string_list( + &concatted, "", &filter->filter_spec); + string_list_clear(&filter->filter_spec, /*free_util=*/0); + string_list_append( + &filter->filter_spec, strbuf_detach(&concatted, NULL)); + } + + return filter->filter_spec.items[0].string; +} + +const char *expand_list_objects_filter_spec( + struct list_objects_filter_options *filter) +{ + if (filter->choice == LOFC_BLOB_LIMIT) { + struct strbuf expanded_spec = STRBUF_INIT; + strbuf_addf(&expanded_spec, "blob:limit=%lu", filter->blob_limit_value); - else if (filter->choice == LOFC_TREE_DEPTH) - strbuf_addf(expanded_spec, "tree:%lu", - filter->tree_exclude_depth); - else - strbuf_addstr(expanded_spec, filter->filter_spec); + string_list_clear(&filter->filter_spec, /*free_util=*/0); + string_list_append( + &filter->filter_spec, + strbuf_detach(&expanded_spec, NULL)); + } + + return list_objects_filter_spec(filter); } void list_objects_filter_release( @@ -225,7 +242,7 @@ void list_objects_filter_release( if (!filter_options) return; - free(filter_options->filter_spec); + string_list_clear(&filter_options->filter_spec, /*free_util=*/0); free(filter_options->sparse_oid_value); for (sub = 0; sub < filter_options->sub_nr; sub++) list_objects_filter_release(&filter_options->sub[sub]); @@ -235,7 +252,7 @@ void list_objects_filter_release( void partial_clone_register( const char *remote, - const struct list_objects_filter_options *filter_options) + struct list_objects_filter_options *filter_options) { /* * Record the name of the partial clone remote in the @@ -258,7 +275,7 @@ void partial_clone_register( * the default for subsequent fetches from this remote. */ core_partial_clone_filter_default = - xstrdup(filter_options->filter_spec); + xstrdup(expand_list_objects_filter_spec(filter_options)); git_config_set("core.partialclonefilter", core_partial_clone_filter_default); } @@ -274,7 +291,8 @@ void partial_clone_get_default_filter_spec( if (!core_partial_clone_filter_default) return; - filter_options->filter_spec = strdup(core_partial_clone_filter_default); + string_list_append(&filter_options->filter_spec, + core_partial_clone_filter_default); gently_parse_list_objects_filter(filter_options, core_partial_clone_filter_default, &errbuf); diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index 789faef1e500ba..bb33303f9b7084 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -2,7 +2,7 @@ #define LIST_OBJECTS_FILTER_OPTIONS_H #include "parse-options.h" -#include "strbuf.h" +#include "string-list.h" /* * The list of defined filters for list-objects. @@ -24,8 +24,10 @@ struct list_objects_filter_options { * commands that launch filtering sub-processes, or for communication * over the network, don't use this value; use the result of * expand_list_objects_filter_spec() instead. + * To get the raw filter spec given by the user, use the result of + * list_objects_filter_spec(). */ - char *filter_spec; + struct string_list filter_spec; /* * 'choice' is determined by parsing the filter-spec. This indicates @@ -76,13 +78,22 @@ int opt_parse_list_objects_filter(const struct option *opt, /* * Translates abbreviated numbers in the filter's filter_spec into their * fully-expanded forms (e.g., "limit:blob=1k" becomes "limit:blob=1024"). + * Returns a string owned by the list_objects_filter_options object. * - * This form should be used instead of the raw filter_spec field when - * communicating with a remote process or subprocess. + * This form should be used instead of the raw list_objects_filter_spec() + * value when communicating with a remote process or subprocess. */ -void expand_list_objects_filter_spec( - const struct list_objects_filter_options *filter, - struct strbuf *expanded_spec); +const char *expand_list_objects_filter_spec( + struct list_objects_filter_options *filter); + +/* + * Returns the filter spec string more or less in the form as the user + * entered it. This form of the filter_spec can be used in user-facing + * messages. Returns a string owned by the list_objects_filter_options + * object. + */ +const char *list_objects_filter_spec( + struct list_objects_filter_options *filter); void list_objects_filter_release( struct list_objects_filter_options *filter_options); @@ -96,7 +107,7 @@ static inline void list_objects_filter_set_no_filter( void partial_clone_register( const char *remote, - const struct list_objects_filter_options *filter_options); + struct list_objects_filter_options *filter_options); void partial_clone_get_default_filter_spec( struct list_objects_filter_options *filter_options); diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh index 05d4f2e9c2e335..27ba15719a7d12 100755 --- a/t/t6112-rev-list-filters-objects.sh +++ b/t/t6112-rev-list-filters-objects.sh @@ -590,11 +590,4 @@ test_expect_success 'expand blob limit in protocol' ' grep "blob:limit=1024" trace ' -test_expect_success 'expand tree depth limit in protocol' ' - GIT_TRACE_PACKET="$(pwd)/tree_trace" git -c protocol.version=2 clone \ - --filter=tree:0k "file://$(pwd)/r2" tree && - ! grep "tree:0k" tree_trace && - grep "tree:0" tree_trace -' - test_done diff --git a/transport-helper.c b/transport-helper.c index c7e17ec9cb61e6..0a34544df06b9a 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -682,13 +682,9 @@ static int fetch(struct transport *transport, set_helper_option(transport, "update-shallow", "true"); if (data->transport_options.filter_options.choice) { - struct strbuf expanded_filter_spec = STRBUF_INIT; - expand_list_objects_filter_spec( - &data->transport_options.filter_options, - &expanded_filter_spec); - set_helper_option(transport, "filter", - expanded_filter_spec.buf); - strbuf_release(&expanded_filter_spec); + const char *spec = expand_list_objects_filter_spec( + &data->transport_options.filter_options); + set_helper_option(transport, "filter", spec); } if (data->transport_options.negotiation_tips) diff --git a/upload-pack.c b/upload-pack.c index 4d2129e7fc134c..d404d88941cb4f 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -140,18 +140,17 @@ static void create_pack_file(const struct object_array *have_obj, argv_array_push(&pack_objects.args, "--delta-base-offset"); if (use_include_tag) argv_array_push(&pack_objects.args, "--include-tag"); - if (filter_options.filter_spec) { - struct strbuf expanded_filter_spec = STRBUF_INIT; - expand_list_objects_filter_spec(&filter_options, - &expanded_filter_spec); + if (filter_options.choice) { + const char *spec = + expand_list_objects_filter_spec(&filter_options); if (pack_objects.use_shell) { struct strbuf buf = STRBUF_INIT; - sq_quote_buf(&buf, expanded_filter_spec.buf); + sq_quote_buf(&buf, spec); argv_array_pushf(&pack_objects.args, "--filter=%s", buf.buf); strbuf_release(&buf); } else { argv_array_pushf(&pack_objects.args, "--filter=%s", - expanded_filter_spec.buf); + spec); } } From c2694952e33764818983fa247dcee72113c6ac6a Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Thu, 27 Jun 2019 15:54:11 -0700 Subject: [PATCH 022/710] strbuf: give URL-encoding API a char predicate fn Allow callers to specify exactly what characters need to be URL-encoded and which do not. This new API will be taken advantage of in a patch later in this set. Helped-by: Jeff King Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- credential-store.c | 9 +++++---- http.c | 6 ++++-- strbuf.c | 15 ++++++++------- strbuf.h | 7 ++++++- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/credential-store.c b/credential-store.c index ac295420dd0d03..c010497cb21db3 100644 --- a/credential-store.c +++ b/credential-store.c @@ -72,15 +72,16 @@ static void store_credential_file(const char *fn, struct credential *c) struct strbuf buf = STRBUF_INIT; strbuf_addf(&buf, "%s://", c->protocol); - strbuf_addstr_urlencode(&buf, c->username, 1); + strbuf_addstr_urlencode(&buf, c->username, is_rfc3986_unreserved); strbuf_addch(&buf, ':'); - strbuf_addstr_urlencode(&buf, c->password, 1); + strbuf_addstr_urlencode(&buf, c->password, is_rfc3986_unreserved); strbuf_addch(&buf, '@'); if (c->host) - strbuf_addstr_urlencode(&buf, c->host, 1); + strbuf_addstr_urlencode(&buf, c->host, is_rfc3986_unreserved); if (c->path) { strbuf_addch(&buf, '/'); - strbuf_addstr_urlencode(&buf, c->path, 0); + strbuf_addstr_urlencode(&buf, c->path, + is_rfc3986_reserved_or_unreserved); } rewrite_credential_file(fn, c, &buf); diff --git a/http.c b/http.c index 27aa0a3192988c..938b9e55af435c 100644 --- a/http.c +++ b/http.c @@ -513,9 +513,11 @@ static void set_proxyauth_name_password(CURL *result) #else struct strbuf s = STRBUF_INIT; - strbuf_addstr_urlencode(&s, proxy_auth.username, 1); + strbuf_addstr_urlencode(&s, proxy_auth.username, + is_rfc3986_unreserved); strbuf_addch(&s, ':'); - strbuf_addstr_urlencode(&s, proxy_auth.password, 1); + strbuf_addstr_urlencode(&s, proxy_auth.password, + is_rfc3986_unreserved); curl_proxyuserpwd = strbuf_detach(&s, NULL); curl_easy_setopt(result, CURLOPT_PROXYUSERPWD, curl_proxyuserpwd); #endif diff --git a/strbuf.c b/strbuf.c index 0e18b259ce51fc..60ab5144f2ff1f 100644 --- a/strbuf.c +++ b/strbuf.c @@ -774,8 +774,10 @@ void strbuf_addstr_xml_quoted(struct strbuf *buf, const char *s) } } -static int is_rfc3986_reserved(char ch) +int is_rfc3986_reserved_or_unreserved(char ch) { + if (is_rfc3986_unreserved(ch)) + return 1; switch (ch) { case '!': case '*': case '\'': case '(': case ')': case ';': case ':': case '@': case '&': case '=': case '+': case '$': @@ -785,20 +787,19 @@ static int is_rfc3986_reserved(char ch) return 0; } -static int is_rfc3986_unreserved(char ch) +int is_rfc3986_unreserved(char ch) { return isalnum(ch) || ch == '-' || ch == '_' || ch == '.' || ch == '~'; } static void strbuf_add_urlencode(struct strbuf *sb, const char *s, size_t len, - int reserved) + char_predicate allow_unencoded_fn) { strbuf_grow(sb, len); while (len--) { char ch = *s++; - if (is_rfc3986_unreserved(ch) || - (!reserved && is_rfc3986_reserved(ch))) + if (allow_unencoded_fn(ch)) strbuf_addch(sb, ch); else strbuf_addf(sb, "%%%02x", (unsigned char)ch); @@ -806,9 +807,9 @@ static void strbuf_add_urlencode(struct strbuf *sb, const char *s, size_t len, } void strbuf_addstr_urlencode(struct strbuf *sb, const char *s, - int reserved) + char_predicate allow_unencoded_fn) { - strbuf_add_urlencode(sb, s, strlen(s), reserved); + strbuf_add_urlencode(sb, s, strlen(s), allow_unencoded_fn); } void strbuf_humanise_bytes(struct strbuf *buf, off_t bytes) diff --git a/strbuf.h b/strbuf.h index c8d98dfb95b8d8..346d72249291e2 100644 --- a/strbuf.h +++ b/strbuf.h @@ -666,8 +666,13 @@ void strbuf_branchname(struct strbuf *sb, const char *name, */ int strbuf_check_branch_ref(struct strbuf *sb, const char *name); +typedef int (*char_predicate)(char ch); + +int is_rfc3986_unreserved(char ch); +int is_rfc3986_reserved_or_unreserved(char ch); + void strbuf_addstr_urlencode(struct strbuf *sb, const char *name, - int reserved); + char_predicate allow_unencoded_fn); __attribute__((format (printf,1,2))) int printf_ln(const char *fmt, ...); From 489fc9ee718b7c8594f17b55f090ac5292c655e1 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Thu, 27 Jun 2019 15:54:12 -0700 Subject: [PATCH 023/710] list-objects-filter-options: allow mult. --filter Allow combining of multiple filters by simply repeating the --filter flag. Before this patch, the user had to combine them in a single flag somewhat awkwardly (e.g. --filter=combine:FOO+BAR), including URL-encoding the individual filters. To make this work, in the --filter flag parsing callback, rather than error out when we detect that the filter_options struct is already populated, we modify it in-place to contain the added sub-filter. The existing sub-filter becomes the lhs of the combined filter, and the next sub-filter becomes the rhs. We also have to URL-encode the LHS and RHS sub-filters. We can simplify the operation if the LHS is already a combine: filter. In that case, we just append the URL-encoded RHS sub-filter to the LHS spec to get the new spec. Helped-by: Emily Shaffer Helped-by: Jeff Hostetler Helped-by: Jeff King Helped-by: Junio C Hamano Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- Documentation/rev-list-options.txt | 16 ++++++ list-objects-filter-options.c | 88 +++++++++++++++++++++++++++-- list-objects-filter-options.h | 11 ++++ t/t5616-partial-clone.sh | 19 +++++++ t/t6112-rev-list-filters-objects.sh | 46 +++++++++++++-- transport.c | 1 + upload-pack.c | 2 + 7 files changed, 173 insertions(+), 10 deletions(-) diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 71a1fcc0939f79..d1f080bf6d54f0 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -738,6 +738,22 @@ explicitly-given commit or tree. Note that the form '--filter=sparse:path=' that wants to read from an arbitrary path on the filesystem has been dropped for security reasons. ++ +Multiple '--filter=' flags can be specified to combine filters. Only +objects which are accepted by every filter are included. ++ +The form '--filter=combine:++...' can also be +used to combined several filters, but this is harder than just repeating +the '--filter' flag and is usually not necessary. Filters are joined by +'{plus}' and individual filters are %-encoded (i.e. URL-encoded). +Besides the '{plus}' and '%' characters, the following characters are +reserved and also must be encoded: `~!@#$^&*()[]{}\;",<>?`+'`+ +as well as all characters with ASCII code <= `0x20`, which includes +space and newline. ++ +Other arbitrary characters can also be encoded. For instance, +'combine:tree:3+blob:none' and 'combine:tree%3A3+blob%3Anone' are +equivalent. --no-filter:: Turn off any previous `--filter=` argument. diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 01c0f133464d24..2506dc8327482f 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -6,6 +6,7 @@ #include "list-objects.h" #include "list-objects-filter.h" #include "list-objects-filter-options.h" +#include "trace.h" #include "url.h" static int parse_combine_filter( @@ -178,15 +179,92 @@ static int parse_combine_filter( return result; } -int parse_list_objects_filter(struct list_objects_filter_options *filter_options, - const char *arg) +static int allow_unencoded(char ch) +{ + if (ch <= ' ' || ch == '%' || ch == '+') + return 0; + return !strchr(RESERVED_NON_WS, ch); +} + +static void filter_spec_append_urlencode( + struct list_objects_filter_options *filter, const char *raw) { struct strbuf buf = STRBUF_INIT; + strbuf_addstr_urlencode(&buf, raw, allow_unencoded); + trace_printf("Add to combine filter-spec: %s\n", buf.buf); + string_list_append(&filter->filter_spec, strbuf_detach(&buf, NULL)); +} + +/* + * Changes filter_options into an equivalent LOFC_COMBINE filter options + * instance. Does not do anything if filter_options is already LOFC_COMBINE. + */ +static void transform_to_combine_type( + struct list_objects_filter_options *filter_options) +{ + assert(filter_options->choice); + if (filter_options->choice == LOFC_COMBINE) + return; + { + const int initial_sub_alloc = 2; + struct list_objects_filter_options *sub_array = + xcalloc(initial_sub_alloc, sizeof(*sub_array)); + sub_array[0] = *filter_options; + memset(filter_options, 0, sizeof(*filter_options)); + filter_options->sub = sub_array; + filter_options->sub_alloc = initial_sub_alloc; + } + filter_options->sub_nr = 1; + filter_options->choice = LOFC_COMBINE; + string_list_append(&filter_options->filter_spec, xstrdup("combine:")); + filter_spec_append_urlencode( + filter_options, + list_objects_filter_spec(&filter_options->sub[0])); + /* + * We don't need the filter_spec strings for subfilter specs, only the + * top level. + */ + string_list_clear(&filter_options->sub[0].filter_spec, /*free_util=*/0); +} + +void list_objects_filter_die_if_populated( + struct list_objects_filter_options *filter_options) +{ if (filter_options->choice) die(_("multiple filter-specs cannot be combined")); - string_list_append(&filter_options->filter_spec, xstrdup(arg)); - if (gently_parse_list_objects_filter(filter_options, arg, &buf)) - die("%s", buf.buf); +} + +int parse_list_objects_filter( + struct list_objects_filter_options *filter_options, + const char *arg) +{ + struct strbuf errbuf = STRBUF_INIT; + int parse_error; + + if (!filter_options->choice) { + string_list_append(&filter_options->filter_spec, xstrdup(arg)); + + parse_error = gently_parse_list_objects_filter( + filter_options, arg, &errbuf); + } else { + /* + * Make filter_options an LOFC_COMBINE spec so we can trivially + * add subspecs to it. + */ + transform_to_combine_type(filter_options); + + string_list_append(&filter_options->filter_spec, xstrdup("+")); + filter_spec_append_urlencode(filter_options, arg); + ALLOC_GROW(filter_options->sub, filter_options->sub_nr + 1, + filter_options->sub_alloc); + filter_options = &filter_options->sub[filter_options->sub_nr++]; + memset(filter_options, 0, sizeof(*filter_options)); + + parse_error = gently_parse_list_objects_filter( + filter_options, arg, &errbuf); + } + if (parse_error) + die("%s", errbuf.buf); return 0; } diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index bb33303f9b7084..d8bc7e946e59e9 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -63,6 +63,17 @@ struct list_objects_filter_options { /* Normalized command line arguments */ #define CL_ARG__FILTER "filter" +void list_objects_filter_die_if_populated( + struct list_objects_filter_options *filter_options); + +/* + * Parses the filter spec string given by arg and either (1) simply places the + * result in filter_options if it is not yet populated or (2) combines it with + * the filter already in filter_options if it is already populated. In the case + * of (2), the filter specs are combined as if specified with 'combine:'. + * + * Dies and prints a user-facing message if an error occurs. + */ int parse_list_objects_filter( struct list_objects_filter_options *filter_options, const char *arg); diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index b91ef548f86b0e..32b7d72f3ca8e4 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -208,6 +208,25 @@ test_expect_success 'use fsck before and after manually fetching a missing subtr test_cmp unique_types.expected unique_types.observed ' +test_expect_success 'implicitly construct combine: filter with repeated flags' ' + GIT_TRACE=$(pwd)/trace git clone --bare \ + --filter=blob:none --filter=tree:1 \ + "file://$(pwd)/srv.bare" pc2 && + grep "trace:.* git pack-objects .*--filter=combine:blob:none+tree:1" \ + trace && + git -C pc2 rev-list --objects --missing=allow-any HEAD >objects && + + # We should have gotten some root trees. + grep " $" objects && + # Should not have gotten any non-root trees or blobs. + ! grep " ." objects && + + xargs -n 1 git -C pc2 cat-file -t types && + sort -u types >unique_types.actual && + test_write_lines commit tree >unique_types.expected && + test_cmp unique_types.expected unique_types.actual +' + test_expect_success 'partial clone fetches blobs pointed to by refs even if normally filtered out' ' rm -rf src dst && git init src && diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh index 27ba15719a7d12..de0e5a5d3646cd 100755 --- a/t/t6112-rev-list-filters-objects.sh +++ b/t/t6112-rev-list-filters-objects.sh @@ -351,7 +351,16 @@ test_expect_success 'combine:... for a simple combination' ' expect_has HEAD dir1 && # There are also 2 commit objects - test_line_count = 5 actual + test_line_count = 5 actual && + + cp actual expected && + + # Try again using repeated --filter - this is equivalent to a manual + # combine with "combine:...+..." + git -C r3 rev-list --objects --filter=combine:tree:2 \ + --filter=blob:none HEAD >actual && + + test_cmp expected actual ' test_expect_success 'combine:... with URL encoding' ' @@ -417,10 +426,12 @@ test_expect_success 'combine:... with edge-case hex digits: Ff Aa 0 9' ' test_line_count = 5 actual ' -test_expect_success 'add a sparse pattern blob whose path has reserved chars' ' +test_expect_success 'add sparse pattern blobs whose paths have reserved chars' ' cp r3/pattern r3/pattern1+renamed% && - git -C r3 add pattern1+renamed% && - git -C r3 commit -m "add sparse pattern file with reserved chars" + cp r3/pattern "r3/p;at%ter+n" && + cp r3/pattern r3/^~pattern && + git -C r3 add pattern1+renamed% "p;at%ter+n" ^~pattern && + git -C r3 commit -m "add sparse pattern files with reserved chars" ' test_expect_success 'combine:... with more than two sub-filters' ' @@ -445,7 +456,32 @@ test_expect_success 'combine:... with more than two sub-filters' ' git -C r3 rev-list --objects \ --filter=combine:tree:3+blob:limit=40+sparse:oid=master:pattern1%2brenamed%25 \ HEAD >actual && - test_cmp expect actual + test_cmp expect actual && + + # Use the same composite filter again, but with a pattern file name that + # requires encoding multiple characters, and use implicit filter + # combining. + test_when_finished "rm -f trace1" && + GIT_TRACE=$(pwd)/trace1 git -C r3 rev-list --objects \ + --filter=tree:3 --filter=blob:limit=40 \ + --filter=sparse:oid="master:p;at%ter+n" \ + HEAD >actual && + + test_cmp expect actual && + grep "Add to combine filter-spec: sparse:oid=master:p%3bat%25ter%2bn" \ + trace1 && + + # Repeat the above test, but this time, the characters to encode are in + # the LHS of the combined filter. + test_when_finished "rm -f trace2" && + GIT_TRACE=$(pwd)/trace2 git -C r3 rev-list --objects \ + --filter=sparse:oid=master:^~pattern \ + --filter=tree:3 --filter=blob:limit=40 \ + HEAD >actual && + + test_cmp expect actual && + grep "Add to combine filter-spec: sparse:oid=master:%5e%7epattern" \ + trace2 ' # Test provisional omit collection logic with a repo that has objects appearing diff --git a/transport.c b/transport.c index f1fcd2c4b006dc..ee7dd1c062fc90 100644 --- a/transport.c +++ b/transport.c @@ -224,6 +224,7 @@ static int set_git_option(struct git_transport_options *opts, opts->no_dependents = !!value; return 0; } else if (!strcmp(name, TRANS_OPT_LIST_OBJECTS_FILTER)) { + list_objects_filter_die_if_populated(&opts->filter_options); parse_list_objects_filter(&opts->filter_options, value); return 0; } diff --git a/upload-pack.c b/upload-pack.c index d404d88941cb4f..f8a76ebda32f24 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -883,6 +883,7 @@ static void receive_needs(struct packet_reader *reader, struct object_array *wan if (skip_prefix(reader->line, "filter ", &arg)) { if (!filter_capability_requested) die("git upload-pack: filtering capability not negotiated"); + list_objects_filter_die_if_populated(&filter_options); parse_list_objects_filter(&filter_options, arg); continue; } @@ -1304,6 +1305,7 @@ static void process_args(struct packet_reader *request, } if (allow_filter && skip_prefix(arg, "filter ", &p)) { + list_objects_filter_die_if_populated(&filter_options); parse_list_objects_filter(&filter_options, p); continue; } From 5a133e8a7f7c28c57f7a0a85e57692b8b781d896 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Thu, 27 Jun 2019 15:54:13 -0700 Subject: [PATCH 024/710] list-objects-filter-options: clean up use of ALLOC_GROW Introduce a new macro ALLOC_GROW_BY which automatically zeros the added array elements and takes care of updating the nr value. Use the macro in code introduced earlier in this patchset. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- cache.h | 22 ++++++++++++++++++++++ list-objects-filter-options.c | 17 +++++++---------- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/cache.h b/cache.h index bf20337ef43523..cf5d70c19622c6 100644 --- a/cache.h +++ b/cache.h @@ -660,6 +660,9 @@ int daemonize(void); * at least 'nr' entries; the number of entries currently allocated * is 'alloc', using the standard growing factor alloc_nr() macro. * + * Consider using ALLOC_GROW_BY instead of ALLOC_GROW as it has some + * added niceties. + * * DO NOT USE any expression with side-effect for 'x', 'nr', or 'alloc'. */ #define ALLOC_GROW(x, nr, alloc) \ @@ -673,6 +676,25 @@ int daemonize(void); } \ } while (0) +/* + * Similar to ALLOC_GROW but handles updating of the nr value and + * zeroing the bytes of the newly-grown array elements. + * + * DO NOT USE any expression with side-effect for any of the + * arguments. + */ +#define ALLOC_GROW_BY(x, nr, increase, alloc) \ + do { \ + if (increase) { \ + size_t new_nr = nr + (increase); \ + if (new_nr < nr) \ + BUG("negative growth in ALLOC_GROW_BY"); \ + ALLOC_GROW(x, new_nr, alloc); \ + memset((x) + nr, 0, sizeof(*(x)) * (increase)); \ + nr = new_nr; \ + } \ + } while (0) + /* Initialize and use the cache information */ struct lock_file; void preload_index(struct index_state *index, diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 2506dc8327482f..44bc1153d121b5 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -120,14 +120,12 @@ static int parse_combine_subfilter( struct strbuf *subspec, struct strbuf *errbuf) { - size_t new_index = filter_options->sub_nr++; + size_t new_index = filter_options->sub_nr; char *decoded; int result; - ALLOC_GROW(filter_options->sub, filter_options->sub_nr, - filter_options->sub_alloc); - memset(&filter_options->sub[new_index], 0, - sizeof(*filter_options->sub)); + ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1, + filter_options->sub_alloc); decoded = url_percent_decode(subspec->buf); @@ -255,13 +253,12 @@ int parse_list_objects_filter( string_list_append(&filter_options->filter_spec, xstrdup("+")); filter_spec_append_urlencode(filter_options, arg); - ALLOC_GROW(filter_options->sub, filter_options->sub_nr + 1, - filter_options->sub_alloc); - filter_options = &filter_options->sub[filter_options->sub_nr++]; - memset(filter_options, 0, sizeof(*filter_options)); + ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1, + filter_options->sub_alloc); parse_error = gently_parse_list_objects_filter( - filter_options, arg, &errbuf); + &filter_options->sub[filter_options->sub_nr - 1], arg, + &errbuf); } if (parse_error) die("%s", errbuf.buf); From 90d21f9ebf6906f0ebb4fb1b20ec9536072e2916 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Thu, 27 Jun 2019 15:54:14 -0700 Subject: [PATCH 025/710] list-objects-filter-options: make parser void This function always returns 0, so make it return void instead. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects-filter-options.c | 12 +++++------- list-objects-filter-options.h | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 44bc1153d121b5..ba1425cb4a071f 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -232,7 +232,7 @@ void list_objects_filter_die_if_populated( die(_("multiple filter-specs cannot be combined")); } -int parse_list_objects_filter( +void parse_list_objects_filter( struct list_objects_filter_options *filter_options, const char *arg) { @@ -262,7 +262,6 @@ int parse_list_objects_filter( } if (parse_error) die("%s", errbuf.buf); - return 0; } int opt_parse_list_objects_filter(const struct option *opt, @@ -270,12 +269,11 @@ int opt_parse_list_objects_filter(const struct option *opt, { struct list_objects_filter_options *filter_options = opt->value; - if (unset || !arg) { + if (unset || !arg) list_objects_filter_set_no_filter(filter_options); - return 0; - } - - return parse_list_objects_filter(filter_options, arg); + else + parse_list_objects_filter(filter_options, arg); + return 0; } const char *list_objects_filter_spec(struct list_objects_filter_options *filter) diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index d8bc7e946e59e9..db37dfb34a1740 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -74,7 +74,7 @@ void list_objects_filter_die_if_populated( * * Dies and prints a user-facing message if an error occurs. */ -int parse_list_objects_filter( +void parse_list_objects_filter( struct list_objects_filter_options *filter_options, const char *arg); From 4e2443b1813dded87c9cc1138f22af73748022b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 28 Jun 2019 01:39:04 +0200 Subject: [PATCH 026/710] log tests: test regex backends in "--encode=" tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve the tests added in 04deccda11 ("log: re-encode commit messages before grepping", 2013-02-11) to test the regex backends. Those tests never worked as advertised, due to the is_fixed() optimization in grep.c (which was in place at the time), and the needle in the tests being a fixed string. We'd thus always use the "fixed" backend during the tests, which would use the kwset() backend. This backend liberally accepts any garbage input, so invalid encodings would be silently accepted. In a follow-up commit we'll fix this bug, this test just demonstrates the existing issue. In practice this issue happened on Windows, see [1], but due to the structure of the existing tests & how liberal the kwset code is about garbage we missed this. Cover this blind spot by testing all our regex engines. The PCRE backend will spot these invalid encodings. It's possible that this test breaks the "basic" and "extended" backends on some systems that are more anal than glibc about the encoding of locale issues with POSIX functions that I can remember, but PCRE is more careful about the validation. 1. https://public-inbox.org/git/nycvar.QRO.7.76.6.1906271113090.44@tvgsbejvaqbjf.bet/ Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/t4210-log-i18n.sh | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/t/t4210-log-i18n.sh b/t/t4210-log-i18n.sh index 7c519436ef3d2e..86d22c1d4cf6a1 100755 --- a/t/t4210-log-i18n.sh +++ b/t/t4210-log-i18n.sh @@ -1,12 +1,15 @@ #!/bin/sh test_description='test log with i18n features' -. ./test-lib.sh +. ./lib-gettext.sh # two forms of é utf8_e=$(printf '\303\251') latin1_e=$(printf '\351') +# invalid UTF-8 +invalid_e=$(printf '\303\50)') # ")" at end to close opening "(" + test_expect_success 'create commits in different encodings' ' test_tick && cat >msg <<-EOF && @@ -53,4 +56,40 @@ test_expect_success 'log --grep does not find non-reencoded values (latin1)' ' test_must_be_empty actual ' +for engine in fixed basic extended perl +do + prereq= + result=success + if test $engine = "perl" + then + result=failure + prereq="PCRE" + else + prereq="" + fi + force_regex= + if test $engine != "fixed" + then + force_regex=.* + fi + test_expect_$result GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not find non-reencoded values (latin1 + locale)" " + cat >expect <<-\EOF && + latin1 + utf8 + EOF + LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$latin1_e\" >actual && + test_cmp expect actual + " + + test_expect_success GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not find non-reencoded values (latin1 + locale)" " + LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$utf8_e\" >actual && + test_must_be_empty actual + " + + test_expect_$result GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not die on invalid UTF-8 value (latin1 + locale + invalid needle)" " + LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$invalid_e\" >actual && + test_must_be_empty actual + " +done + test_done From 44570188a0e324048decf06b845d34c45b08a4fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 28 Jun 2019 01:39:05 +0200 Subject: [PATCH 027/710] grep: don't use PCRE2?_UTF8 with "log --encoding=" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a bug introduced in 18547aacf5 ("grep/pcre: support utf-8", 2016-06-25) that was missed due to a blindspot in our tests, as discussed in the previous commit. I then blindly copied the same bug in 94da9193a6 ("grep: add support for PCRE v2", 2017-06-01) when adding the PCRE v2 code. We should not tell PCRE that we're processing UTF-8 just because we're dealing with non-ASCII. In the case of e.g. "log --encoding=<...>" under is_utf8_locale() the haystack might be in ISO-8859-1, and the needle might be in a non-UTF-8 encoding. Maybe we should be more strict here and die earlier? Should we also be converting the needle to the encoding in question, and failing if it's not a string that's valid in that encoding? Maybe. But for now matching this as non-UTF8 at least has some hope of producing sensible results, since we know that our default heuristic of assuming the text to be matched is in the user locale encoding isn't true when we've explicitly encoded it to be in a different encoding. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 8 ++++---- grep.h | 1 + revision.c | 3 +++ t/t4210-log-i18n.sh | 6 ++---- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/grep.c b/grep.c index f7c3a5803e8ea0..1de4ab49c0c255 100644 --- a/grep.c +++ b/grep.c @@ -388,11 +388,11 @@ static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt) int options = PCRE_MULTILINE; if (opt->ignore_case) { - if (has_non_ascii(p->pattern)) + if (!opt->ignore_locale && has_non_ascii(p->pattern)) p->pcre1_tables = pcre_maketables(); options |= PCRE_CASELESS; } - if (is_utf8_locale() && has_non_ascii(p->pattern)) + if (!opt->ignore_locale && is_utf8_locale() && has_non_ascii(p->pattern)) options |= PCRE_UTF8; p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset, @@ -498,14 +498,14 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt p->pcre2_compile_context = NULL; if (opt->ignore_case) { - if (has_non_ascii(p->pattern)) { + if (!opt->ignore_locale && has_non_ascii(p->pattern)) { character_tables = pcre2_maketables(NULL); p->pcre2_compile_context = pcre2_compile_context_create(NULL); pcre2_set_character_tables(p->pcre2_compile_context, character_tables); } options |= PCRE2_CASELESS; } - if (is_utf8_locale() && has_non_ascii(p->pattern)) + if (!opt->ignore_locale && is_utf8_locale() && has_non_ascii(p->pattern)) options |= PCRE2_UTF; p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern, diff --git a/grep.h b/grep.h index 1875880f37990f..4bb8a79d93187b 100644 --- a/grep.h +++ b/grep.h @@ -173,6 +173,7 @@ struct grep_opt { int funcbody; int extended_regexp_option; int pattern_type_option; + int ignore_locale; char colors[NR_GREP_COLORS][COLOR_MAXLEN]; unsigned pre_context; unsigned post_context; diff --git a/revision.c b/revision.c index 621feb9df71640..a842fb158af309 100644 --- a/revision.c +++ b/revision.c @@ -28,6 +28,7 @@ #include "commit-graph.h" #include "prio-queue.h" #include "hashmap.h" +#include "utf8.h" volatile show_early_output_fn_t show_early_output; @@ -2655,6 +2656,8 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s grep_commit_pattern_type(GREP_PATTERN_TYPE_UNSPECIFIED, &revs->grep_filter); + if (!is_encoding_utf8(get_log_output_encoding())) + revs->grep_filter.ignore_locale = 1; compile_grep_patterns(&revs->grep_filter); if (revs->reverse && revs->reflog_info) diff --git a/t/t4210-log-i18n.sh b/t/t4210-log-i18n.sh index 86d22c1d4cf6a1..515bcb7ce12685 100755 --- a/t/t4210-log-i18n.sh +++ b/t/t4210-log-i18n.sh @@ -59,10 +59,8 @@ test_expect_success 'log --grep does not find non-reencoded values (latin1)' ' for engine in fixed basic extended perl do prereq= - result=success if test $engine = "perl" then - result=failure prereq="PCRE" else prereq="" @@ -72,7 +70,7 @@ do then force_regex=.* fi - test_expect_$result GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not find non-reencoded values (latin1 + locale)" " + test_expect_success GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not find non-reencoded values (latin1 + locale)" " cat >expect <<-\EOF && latin1 utf8 @@ -86,7 +84,7 @@ do test_must_be_empty actual " - test_expect_$result GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not die on invalid UTF-8 value (latin1 + locale + invalid needle)" " + test_expect_success GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not die on invalid UTF-8 value (latin1 + locale + invalid needle)" " LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$invalid_e\" >actual && test_must_be_empty actual " From b14cf112e2c3d86de931276c2c778004a168db65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 1 Jul 2019 23:20:53 +0200 Subject: [PATCH 028/710] t4210: skip more command-line encoding tests on MinGW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In 5212f91deb ("t4210: skip command-line encoding tests on mingw", 2014-07-17) the positive tests in this file were skipped. That left the negative tests that don't produce a match. An upcoming change to migrate the "fixed" backend of grep to PCRE v2 will cause these "log" commands to produce an error instead on MinGW. This is because the command-line on that platform implicitly has its encoding changed before being passed to git. See [1]. 1. https://public-inbox.org/git/nycvar.QRO.7.76.6.1907011515150.44@tvgsbejvaqbjf.bet/ Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/t4210-log-i18n.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/t/t4210-log-i18n.sh b/t/t4210-log-i18n.sh index 515bcb7ce12685..6e61f57f09fbcd 100755 --- a/t/t4210-log-i18n.sh +++ b/t/t4210-log-i18n.sh @@ -51,7 +51,7 @@ test_expect_success !MINGW 'log --grep does not find non-reencoded values (utf8) test_must_be_empty actual ' -test_expect_success 'log --grep does not find non-reencoded values (latin1)' ' +test_expect_success !MINGW 'log --grep does not find non-reencoded values (latin1)' ' git log --encoding=ISO-8859-1 --format=%s --grep=$utf8_e >actual && test_must_be_empty actual ' @@ -70,7 +70,7 @@ do then force_regex=.* fi - test_expect_success GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not find non-reencoded values (latin1 + locale)" " + test_expect_success !MINGW,GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not find non-reencoded values (latin1 + locale)" " cat >expect <<-\EOF && latin1 utf8 @@ -79,12 +79,12 @@ do test_cmp expect actual " - test_expect_success GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not find non-reencoded values (latin1 + locale)" " + test_expect_success !MINGW,GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not find non-reencoded values (latin1 + locale)" " LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$utf8_e\" >actual && test_must_be_empty actual " - test_expect_success GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not die on invalid UTF-8 value (latin1 + locale + invalid needle)" " + test_expect_success !MINGW,GETTEXT_LOCALE,$prereq "-c grep.patternType=$engine log --grep does not die on invalid UTF-8 value (latin1 + locale + invalid needle)" " LC_ALL=\"$is_IS_locale\" git -c grep.patternType=$engine log --encoding=ISO-8859-1 --format=%s --grep=\"$force_regex$invalid_e\" >actual && test_must_be_empty actual " From f463beb805638830e2a6b16359f94d8afca289ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 1 Jul 2019 23:20:54 +0200 Subject: [PATCH 029/710] grep: inline the return value of a function call used only once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since e944d9d932 ("grep: rewrite an if/else condition to avoid duplicate expression", 2016-06-25) the "ascii_only" variable has only been used once in compile_regexp(), let's just inline it there. This makes the code easier to read, and might make it marginally faster depending on compiler optimizations. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/grep.c b/grep.c index 1de4ab49c0c255..4e8d0645a83288 100644 --- a/grep.c +++ b/grep.c @@ -650,13 +650,11 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { - int ascii_only; int err; int regflags = REG_NEWLINE; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; - ascii_only = !has_non_ascii(p->pattern); /* * Even when -F (fixed) asks us to do a non-regexp search, we @@ -673,7 +671,7 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) if (opt->fixed || has_null(p->pattern, p->patternlen) || is_fixed(p->pattern, p->patternlen)) - p->fixed = !p->ignore_case || ascii_only; + p->fixed = !p->ignore_case || !has_non_ascii(p->pattern); if (p->fixed) { p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL); From 471dac5d2ceec4ccf7ad149402dfcb66ac066ddc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 1 Jul 2019 23:20:55 +0200 Subject: [PATCH 030/710] grep tests: move "grep binary" alongside the rest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the "grep binary" test case added in aca20dd558 ("grep: add test script for binary file handling", 2010-05-22) so that it lives alongside the rest of the "grep" tests in t781*. This would have left a gap in the t/700* namespace, so move a "filter-branch" test down, leaving the "t7010-setup.sh" test as the next one after that. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- ...ilter-branch-null-sha1.sh => t7008-filter-branch-null-sha1.sh} | 0 t/{t7008-grep-binary.sh => t7815-grep-binary.sh} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename t/{t7009-filter-branch-null-sha1.sh => t7008-filter-branch-null-sha1.sh} (100%) rename t/{t7008-grep-binary.sh => t7815-grep-binary.sh} (100%) diff --git a/t/t7009-filter-branch-null-sha1.sh b/t/t7008-filter-branch-null-sha1.sh similarity index 100% rename from t/t7009-filter-branch-null-sha1.sh rename to t/t7008-filter-branch-null-sha1.sh diff --git a/t/t7008-grep-binary.sh b/t/t7815-grep-binary.sh similarity index 100% rename from t/t7008-grep-binary.sh rename to t/t7815-grep-binary.sh From d316af059d67273f381048cdf538d2e0667b6485 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 1 Jul 2019 23:20:56 +0200 Subject: [PATCH 031/710] grep tests: move binary pattern tests into their own file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the tests for "-f " where "" contains a NUL byte pattern into their own file. I added most of these tests in 966be95549 ("grep: add tests to fix blind spots with \0 patterns", 2017-05-20). Whether a regex engine supports matching binary content is very different from whether it matches binary patterns. Since 2f8952250a ("regex: add regexec_buf() that can work on a non NUL-terminated string", 2016-09-21) we've required REG_STARTEND of our regex engines so we can match binary content, but only the PCRE v2 engine can sensibly match binary patterns. Since 9eceddeec6 ("Use kwset in grep", 2011-08-21) we've been punting patterns containing NUL-byte and considering them fixed, except in cases where "--ignore-case" is provided and they're non-ASCII, see 5c1ebcca4d ("grep/icase: avoid kwsset on literal non-ascii strings", 2016-06-25). Subsequent commits will change this behavior. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/t7815-grep-binary.sh | 101 ----------------------------- t/t7816-grep-binary-pattern.sh | 114 +++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 101 deletions(-) create mode 100755 t/t7816-grep-binary-pattern.sh diff --git a/t/t7815-grep-binary.sh b/t/t7815-grep-binary.sh index 2d87c49b753935..90ebb64f46ebfa 100755 --- a/t/t7815-grep-binary.sh +++ b/t/t7815-grep-binary.sh @@ -4,41 +4,6 @@ test_description='git grep in binary files' . ./test-lib.sh -nul_match () { - matches=$1 - flags=$2 - pattern=$3 - pattern_human=$(echo "$pattern" | sed 's/Q//g') - - if test "$matches" = 1 - then - test_expect_success "git grep -f f $flags '$pattern_human' a" " - printf '$pattern' | q_to_nul >f && - git grep -f f $flags a - " - elif test "$matches" = 0 - then - test_expect_success "git grep -f f $flags '$pattern_human' a" " - printf '$pattern' | q_to_nul >f && - test_must_fail git grep -f f $flags a - " - elif test "$matches" = T1 - then - test_expect_failure "git grep -f f $flags '$pattern_human' a" " - printf '$pattern' | q_to_nul >f && - git grep -f f $flags a - " - elif test "$matches" = T0 - then - test_expect_failure "git grep -f f $flags '$pattern_human' a" " - printf '$pattern' | q_to_nul >f && - test_must_fail git grep -f f $flags a - " - else - test_expect_success "PANIC: Test framework error. Unknown matches value $matches" 'false' - fi -} - test_expect_success 'setup' " echo 'binaryQfileQm[*]cQ*æQð' | q_to_nul >a && git add a && @@ -102,72 +67,6 @@ test_expect_failure 'git grep .fi a' ' git grep .fi a ' -nul_match 1 '-F' 'yQf' -nul_match 0 '-F' 'yQx' -nul_match 1 '-Fi' 'YQf' -nul_match 0 '-Fi' 'YQx' -nul_match 1 '' 'yQf' -nul_match 0 '' 'yQx' -nul_match 1 '' 'æQð' -nul_match 1 '-F' 'eQm[*]c' -nul_match 1 '-Fi' 'EQM[*]C' - -# Regex patterns that would match but shouldn't with -F -nul_match 0 '-F' 'yQ[f]' -nul_match 0 '-F' '[y]Qf' -nul_match 0 '-Fi' 'YQ[F]' -nul_match 0 '-Fi' '[Y]QF' -nul_match 0 '-F' 'æQ[ð]' -nul_match 0 '-F' '[æ]Qð' -nul_match 0 '-Fi' 'ÆQ[Ð]' -nul_match 0 '-Fi' '[Æ]QÐ' - -# kwset is disabled on -i & non-ASCII. No way to match non-ASCII \0 -# patterns case-insensitively. -nul_match T1 '-i' 'ÆQÐ' - -# \0 implicitly disables regexes. This is an undocumented internal -# limitation. -nul_match T1 '' 'yQ[f]' -nul_match T1 '' '[y]Qf' -nul_match T1 '-i' 'YQ[F]' -nul_match T1 '-i' '[Y]Qf' -nul_match T1 '' 'æQ[ð]' -nul_match T1 '' '[æ]Qð' -nul_match T1 '-i' 'ÆQ[Ð]' - -# ... because of \0 implicitly disabling regexes regexes that -# should/shouldn't match don't do the right thing. -nul_match T1 '' 'eQm.*cQ' -nul_match T1 '-i' 'EQM.*cQ' -nul_match T0 '' 'eQm[*]c' -nul_match T0 '-i' 'EQM[*]C' - -# Due to the REG_STARTEND extension when kwset() is disabled on -i & -# non-ASCII the string will be matched in its entirety, but the -# pattern will be cut off at the first \0. -nul_match 0 '-i' 'NOMATCHQð' -nul_match T0 '-i' '[Æ]QNOMATCH' -nul_match T0 '-i' '[æ]QNOMATCH' -# Matches, but for the wrong reasons, just stops at [æ] -nul_match 1 '-i' '[Æ]Qð' -nul_match 1 '-i' '[æ]Qð' - -# Ensure that the matcher doesn't regress to something that stops at -# \0 -nul_match 0 '-F' 'yQ[f]' -nul_match 0 '-Fi' 'YQ[F]' -nul_match 0 '' 'yQNOMATCH' -nul_match 0 '' 'QNOMATCH' -nul_match 0 '-i' 'YQNOMATCH' -nul_match 0 '-i' 'QNOMATCH' -nul_match 0 '-F' 'æQ[ð]' -nul_match 0 '-Fi' 'ÆQ[Ð]' -nul_match 0 '' 'yQNÓMATCH' -nul_match 0 '' 'QNÓMATCH' -nul_match 0 '-i' 'YQNÓMATCH' -nul_match 0 '-i' 'QNÓMATCH' - test_expect_success 'grep respects binary diff attribute' ' echo text >t && git add t && diff --git a/t/t7816-grep-binary-pattern.sh b/t/t7816-grep-binary-pattern.sh new file mode 100755 index 00000000000000..4060dbd67965e0 --- /dev/null +++ b/t/t7816-grep-binary-pattern.sh @@ -0,0 +1,114 @@ +#!/bin/sh + +test_description='git grep with a binary pattern files' + +. ./test-lib.sh + +nul_match () { + matches=$1 + flags=$2 + pattern=$3 + pattern_human=$(echo "$pattern" | sed 's/Q//g') + + if test "$matches" = 1 + then + test_expect_success "git grep -f f $flags '$pattern_human' a" " + printf '$pattern' | q_to_nul >f && + git grep -f f $flags a + " + elif test "$matches" = 0 + then + test_expect_success "git grep -f f $flags '$pattern_human' a" " + printf '$pattern' | q_to_nul >f && + test_must_fail git grep -f f $flags a + " + elif test "$matches" = T1 + then + test_expect_failure "git grep -f f $flags '$pattern_human' a" " + printf '$pattern' | q_to_nul >f && + git grep -f f $flags a + " + elif test "$matches" = T0 + then + test_expect_failure "git grep -f f $flags '$pattern_human' a" " + printf '$pattern' | q_to_nul >f && + test_must_fail git grep -f f $flags a + " + else + test_expect_success "PANIC: Test framework error. Unknown matches value $matches" 'false' + fi +} + +test_expect_success 'setup' " + echo 'binaryQfileQm[*]cQ*æQð' | q_to_nul >a && + git add a && + git commit -m. +" + +nul_match 1 '-F' 'yQf' +nul_match 0 '-F' 'yQx' +nul_match 1 '-Fi' 'YQf' +nul_match 0 '-Fi' 'YQx' +nul_match 1 '' 'yQf' +nul_match 0 '' 'yQx' +nul_match 1 '' 'æQð' +nul_match 1 '-F' 'eQm[*]c' +nul_match 1 '-Fi' 'EQM[*]C' + +# Regex patterns that would match but shouldn't with -F +nul_match 0 '-F' 'yQ[f]' +nul_match 0 '-F' '[y]Qf' +nul_match 0 '-Fi' 'YQ[F]' +nul_match 0 '-Fi' '[Y]QF' +nul_match 0 '-F' 'æQ[ð]' +nul_match 0 '-F' '[æ]Qð' +nul_match 0 '-Fi' 'ÆQ[Ð]' +nul_match 0 '-Fi' '[Æ]QÐ' + +# kwset is disabled on -i & non-ASCII. No way to match non-ASCII \0 +# patterns case-insensitively. +nul_match T1 '-i' 'ÆQÐ' + +# \0 implicitly disables regexes. This is an undocumented internal +# limitation. +nul_match T1 '' 'yQ[f]' +nul_match T1 '' '[y]Qf' +nul_match T1 '-i' 'YQ[F]' +nul_match T1 '-i' '[Y]Qf' +nul_match T1 '' 'æQ[ð]' +nul_match T1 '' '[æ]Qð' +nul_match T1 '-i' 'ÆQ[Ð]' + +# ... because of \0 implicitly disabling regexes regexes that +# should/shouldn't match don't do the right thing. +nul_match T1 '' 'eQm.*cQ' +nul_match T1 '-i' 'EQM.*cQ' +nul_match T0 '' 'eQm[*]c' +nul_match T0 '-i' 'EQM[*]C' + +# Due to the REG_STARTEND extension when kwset() is disabled on -i & +# non-ASCII the string will be matched in its entirety, but the +# pattern will be cut off at the first \0. +nul_match 0 '-i' 'NOMATCHQð' +nul_match T0 '-i' '[Æ]QNOMATCH' +nul_match T0 '-i' '[æ]QNOMATCH' +# Matches, but for the wrong reasons, just stops at [æ] +nul_match 1 '-i' '[Æ]Qð' +nul_match 1 '-i' '[æ]Qð' + +# Ensure that the matcher doesn't regress to something that stops at +# \0 +nul_match 0 '-F' 'yQ[f]' +nul_match 0 '-Fi' 'YQ[F]' +nul_match 0 '' 'yQNOMATCH' +nul_match 0 '' 'QNOMATCH' +nul_match 0 '-i' 'YQNOMATCH' +nul_match 0 '-i' 'QNOMATCH' +nul_match 0 '-F' 'æQ[ð]' +nul_match 0 '-Fi' 'ÆQ[Ð]' +nul_match 0 '' 'yQNÓMATCH' +nul_match 0 '' 'QNÓMATCH' +nul_match 0 '-i' 'YQNÓMATCH' +nul_match 0 '-i' 'QNÓMATCH' + +test_done From 25754125cef278c7e9492fbd6dc4a28319b01f18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 1 Jul 2019 23:20:57 +0200 Subject: [PATCH 032/710] grep: make the behavior for NUL-byte in patterns sane MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The behavior of "grep" when patterns contained a NUL-byte has always been haphazard, and has served the vagaries of the implementation more than anything else. A pattern containing a NUL-byte can only be provided via "-f ". Since pickaxe (log search) has no such flag the NUL-byte in patterns has only ever been supported by "grep" (and not "log --grep"). Since 9eceddeec6 ("Use kwset in grep", 2011-08-21) patterns containing "\0" were considered fixed. In 966be95549 ("grep: add tests to fix blind spots with \0 patterns", 2017-05-20) I added tests for this behavior. Change the behavior to do the obvious thing, i.e. don't silently discard a regex pattern and make it implicitly fixed just because they contain a NUL-byte. Instead die if the backend in question can't handle them, e.g. --basic-regexp is combined with such a pattern. This is desired because from a user's point of view it's the obvious thing to do. Whether we support BRE/ERE/Perl syntax is different from whether our implementation is limited by C-strings. These patterns are obscure enough that I think this behavior change is OK, especially since we never documented the old behavior. Doing this also makes it easier to replace the kwset backend with something else, since we'll no longer strictly need it for anything we can't easily use another fixed-string backend for. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- Documentation/git-grep.txt | 17 ++++ grep.c | 23 ++--- t/t7816-grep-binary-pattern.sh | 159 ++++++++++++++++++--------------- 3 files changed, 110 insertions(+), 89 deletions(-) diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt index 2d27969057fd24..c89fb569e35855 100644 --- a/Documentation/git-grep.txt +++ b/Documentation/git-grep.txt @@ -271,6 +271,23 @@ providing this option will cause it to die. -f :: Read patterns from , one per line. ++ +Passing the pattern via allows for providing a search pattern +containing a \0. ++ +Not all pattern types support patterns containing \0. Git will error +out if a given pattern type can't support such a pattern. The +`--perl-regexp` pattern type when compiled against the PCRE v2 backend +has the widest support for these types of patterns. ++ +In versions of Git before 2.23.0 patterns containing \0 would be +silently considered fixed. This was never documented, there were also +odd and undocumented interactions between e.g. non-ASCII patterns +containing \0 and `--ignore-case`. ++ +In future versions we may learn to support patterns containing \0 for +more search backends, until then we'll die when the pattern type in +question doesn't support them. -e:: The next parameter is the pattern. This option has to be diff --git a/grep.c b/grep.c index 4e8d0645a83288..d6603bc950ff0d 100644 --- a/grep.c +++ b/grep.c @@ -368,18 +368,6 @@ static int is_fixed(const char *s, size_t len) return 1; } -static int has_null(const char *s, size_t len) -{ - /* - * regcomp cannot accept patterns with NULs so when using it - * we consider any pattern containing a NUL fixed. - */ - if (memchr(s, 0, len)) - return 1; - - return 0; -} - #ifdef USE_LIBPCRE1 static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt) { @@ -668,9 +656,7 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) * simple string match using kws. p->fixed tells us if we * want to use kws. */ - if (opt->fixed || - has_null(p->pattern, p->patternlen) || - is_fixed(p->pattern, p->patternlen)) + if (opt->fixed || is_fixed(p->pattern, p->patternlen)) p->fixed = !p->ignore_case || !has_non_ascii(p->pattern); if (p->fixed) { @@ -678,7 +664,12 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) kwsincr(p->kws, p->pattern, p->patternlen); kwsprep(p->kws); return; - } else if (opt->fixed) { + } + + if (memchr(p->pattern, 0, p->patternlen) && !opt->pcre2) + die(_("given pattern contains NULL byte (via -f ). This is only supported with -P under PCRE v2")); + + if (opt->fixed) { /* * We come here when the pattern has the non-ascii * characters we cannot case-fold, and asked to diff --git a/t/t7816-grep-binary-pattern.sh b/t/t7816-grep-binary-pattern.sh index 4060dbd67965e0..9e09bd5d6ace87 100755 --- a/t/t7816-grep-binary-pattern.sh +++ b/t/t7816-grep-binary-pattern.sh @@ -2,113 +2,126 @@ test_description='git grep with a binary pattern files' -. ./test-lib.sh +. ./lib-gettext.sh -nul_match () { +nul_match_internal () { matches=$1 - flags=$2 - pattern=$3 + prereqs=$2 + lc_all=$3 + extra_flags=$4 + flags=$5 + pattern=$6 pattern_human=$(echo "$pattern" | sed 's/Q//g') if test "$matches" = 1 then - test_expect_success "git grep -f f $flags '$pattern_human' a" " + test_expect_success $prereqs "LC_ALL='$lc_all' git grep $extra_flags -f f $flags '$pattern_human' a" " printf '$pattern' | q_to_nul >f && - git grep -f f $flags a + LC_ALL='$lc_all' git grep $extra_flags -f f $flags a " elif test "$matches" = 0 then - test_expect_success "git grep -f f $flags '$pattern_human' a" " + test_expect_success $prereqs "LC_ALL='$lc_all' git grep $extra_flags -f f $flags '$pattern_human' a" " + >stderr && printf '$pattern' | q_to_nul >f && - test_must_fail git grep -f f $flags a + test_must_fail env LC_ALL=\"$lc_all\" git grep $extra_flags -f f $flags a 2>stderr && + test_i18ngrep ! 'This is only supported with -P under PCRE v2' stderr " - elif test "$matches" = T1 + elif test "$matches" = P then - test_expect_failure "git grep -f f $flags '$pattern_human' a" " + test_expect_success $prereqs "error, PCRE v2 only: LC_ALL='$lc_all' git grep -f f $flags '$pattern_human' a" " + >stderr && printf '$pattern' | q_to_nul >f && - git grep -f f $flags a - " - elif test "$matches" = T0 - then - test_expect_failure "git grep -f f $flags '$pattern_human' a" " - printf '$pattern' | q_to_nul >f && - test_must_fail git grep -f f $flags a + test_must_fail env LC_ALL=\"$lc_all\" git grep -f f $flags a 2>stderr && + test_i18ngrep 'This is only supported with -P under PCRE v2' stderr " else test_expect_success "PANIC: Test framework error. Unknown matches value $matches" 'false' fi } +nul_match () { + matches=$1 + matches_pcre2=$2 + matches_pcre2_locale=$3 + flags=$4 + pattern=$5 + pattern_human=$(echo "$pattern" | sed 's/Q//g') + + nul_match_internal "$matches" "" "C" "" "$flags" "$pattern" + nul_match_internal "$matches_pcre2" "LIBPCRE2" "C" "-P" "$flags" "$pattern" + nul_match_internal "$matches_pcre2_locale" "LIBPCRE2,GETTEXT_LOCALE" "$is_IS_locale" "-P" "$flags" "$pattern" +} + test_expect_success 'setup' " echo 'binaryQfileQm[*]cQ*æQð' | q_to_nul >a && git add a && git commit -m. " -nul_match 1 '-F' 'yQf' -nul_match 0 '-F' 'yQx' -nul_match 1 '-Fi' 'YQf' -nul_match 0 '-Fi' 'YQx' -nul_match 1 '' 'yQf' -nul_match 0 '' 'yQx' -nul_match 1 '' 'æQð' -nul_match 1 '-F' 'eQm[*]c' -nul_match 1 '-Fi' 'EQM[*]C' +# Simple fixed-string matching that can use kwset (no -i && non-ASCII) +nul_match 1 1 1 '-F' 'yQf' +nul_match 0 0 0 '-F' 'yQx' +nul_match 1 1 1 '-Fi' 'YQf' +nul_match 0 0 0 '-Fi' 'YQx' +nul_match 1 1 1 '' 'yQf' +nul_match 0 0 0 '' 'yQx' +nul_match 1 1 1 '' 'æQð' +nul_match 1 1 1 '-F' 'eQm[*]c' +nul_match 1 1 1 '-Fi' 'EQM[*]C' # Regex patterns that would match but shouldn't with -F -nul_match 0 '-F' 'yQ[f]' -nul_match 0 '-F' '[y]Qf' -nul_match 0 '-Fi' 'YQ[F]' -nul_match 0 '-Fi' '[Y]QF' -nul_match 0 '-F' 'æQ[ð]' -nul_match 0 '-F' '[æ]Qð' -nul_match 0 '-Fi' 'ÆQ[Ð]' -nul_match 0 '-Fi' '[Æ]QÐ' +nul_match 0 0 0 '-F' 'yQ[f]' +nul_match 0 0 0 '-F' '[y]Qf' +nul_match 0 0 0 '-Fi' 'YQ[F]' +nul_match 0 0 0 '-Fi' '[Y]QF' +nul_match 0 0 0 '-F' 'æQ[ð]' +nul_match 0 0 0 '-F' '[æ]Qð' -# kwset is disabled on -i & non-ASCII. No way to match non-ASCII \0 -# patterns case-insensitively. -nul_match T1 '-i' 'ÆQÐ' +# The -F kwset codepath can't handle -i && non-ASCII... +nul_match P 1 1 '-i' '[æ]Qð' -# \0 implicitly disables regexes. This is an undocumented internal -# limitation. -nul_match T1 '' 'yQ[f]' -nul_match T1 '' '[y]Qf' -nul_match T1 '-i' 'YQ[F]' -nul_match T1 '-i' '[Y]Qf' -nul_match T1 '' 'æQ[ð]' -nul_match T1 '' '[æ]Qð' -nul_match T1 '-i' 'ÆQ[Ð]' +# ...PCRE v2 only matches non-ASCII with -i casefolding under UTF-8 +# semantics +nul_match P P P '-Fi' 'ÆQ[Ð]' +nul_match P 0 1 '-i' 'ÆQ[Ð]' +nul_match P 0 1 '-i' '[Æ]QÐ' +nul_match P 0 1 '-i' '[Æ]Qð' +nul_match P 0 1 '-i' 'ÆQÐ' -# ... because of \0 implicitly disabling regexes regexes that -# should/shouldn't match don't do the right thing. -nul_match T1 '' 'eQm.*cQ' -nul_match T1 '-i' 'EQM.*cQ' -nul_match T0 '' 'eQm[*]c' -nul_match T0 '-i' 'EQM[*]C' +# \0 in regexes can only work with -P & PCRE v2 +nul_match P 1 1 '' 'yQ[f]' +nul_match P 1 1 '' '[y]Qf' +nul_match P 1 1 '-i' 'YQ[F]' +nul_match P 1 1 '-i' '[Y]Qf' +nul_match P 1 1 '' 'æQ[ð]' +nul_match P 1 1 '' '[æ]Qð' +nul_match P 0 1 '-i' 'ÆQ[Ð]' +nul_match P 1 1 '' 'eQm.*cQ' +nul_match P 1 1 '-i' 'EQM.*cQ' +nul_match P 0 0 '' 'eQm[*]c' +nul_match P 0 0 '-i' 'EQM[*]C' -# Due to the REG_STARTEND extension when kwset() is disabled on -i & -# non-ASCII the string will be matched in its entirety, but the -# pattern will be cut off at the first \0. -nul_match 0 '-i' 'NOMATCHQð' -nul_match T0 '-i' '[Æ]QNOMATCH' -nul_match T0 '-i' '[æ]QNOMATCH' -# Matches, but for the wrong reasons, just stops at [æ] -nul_match 1 '-i' '[Æ]Qð' -nul_match 1 '-i' '[æ]Qð' +# Assert that we're using REG_STARTEND and the pattern doesn't match +# just because it's cut off at the first \0. +nul_match 0 0 0 '-i' 'NOMATCHQð' +nul_match P 0 0 '-i' '[Æ]QNOMATCH' +nul_match P 0 0 '-i' '[æ]QNOMATCH' # Ensure that the matcher doesn't regress to something that stops at # \0 -nul_match 0 '-F' 'yQ[f]' -nul_match 0 '-Fi' 'YQ[F]' -nul_match 0 '' 'yQNOMATCH' -nul_match 0 '' 'QNOMATCH' -nul_match 0 '-i' 'YQNOMATCH' -nul_match 0 '-i' 'QNOMATCH' -nul_match 0 '-F' 'æQ[ð]' -nul_match 0 '-Fi' 'ÆQ[Ð]' -nul_match 0 '' 'yQNÓMATCH' -nul_match 0 '' 'QNÓMATCH' -nul_match 0 '-i' 'YQNÓMATCH' -nul_match 0 '-i' 'QNÓMATCH' +nul_match 0 0 0 '-F' 'yQ[f]' +nul_match 0 0 0 '-Fi' 'YQ[F]' +nul_match 0 0 0 '' 'yQNOMATCH' +nul_match 0 0 0 '' 'QNOMATCH' +nul_match 0 0 0 '-i' 'YQNOMATCH' +nul_match 0 0 0 '-i' 'QNOMATCH' +nul_match 0 0 0 '-F' 'æQ[ð]' +nul_match P P P '-Fi' 'ÆQ[Ð]' +nul_match P 0 1 '-i' 'ÆQ[Ð]' +nul_match 0 0 0 '' 'yQNÓMATCH' +nul_match 0 0 0 '' 'QNÓMATCH' +nul_match 0 0 0 '-i' 'YQNÓMATCH' +nul_match 0 0 0 '-i' 'QNÓMATCH' test_done From 45d1f37ccc16ad53303910e150f7fbe36213aad8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 1 Jul 2019 23:20:58 +0200 Subject: [PATCH 033/710] grep: drop support for \0 in --fixed-strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change "-f " to not support patterns with a NUL-byte in them under --fixed-strings. We'll now only support these under "--perl-regexp" with PCRE v2. A previous change to grep's documentation changed the description of "-f " to be vague enough as to not promise that this would work. By dropping support for this we make it a whole lot easier to move away from the kwset backend, which we'll do in a subsequent change. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 6 +-- t/t7816-grep-binary-pattern.sh | 82 +++++++++++++++++----------------- 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/grep.c b/grep.c index d6603bc950ff0d..8d0fff316c7c2d 100644 --- a/grep.c +++ b/grep.c @@ -644,6 +644,9 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; + if (memchr(p->pattern, 0, p->patternlen) && !opt->pcre2) + die(_("given pattern contains NULL byte (via -f ). This is only supported with -P under PCRE v2")); + /* * Even when -F (fixed) asks us to do a non-regexp search, we * may not be able to correctly case-fold when -i @@ -666,9 +669,6 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) return; } - if (memchr(p->pattern, 0, p->patternlen) && !opt->pcre2) - die(_("given pattern contains NULL byte (via -f ). This is only supported with -P under PCRE v2")); - if (opt->fixed) { /* * We come here when the pattern has the non-ascii diff --git a/t/t7816-grep-binary-pattern.sh b/t/t7816-grep-binary-pattern.sh index 9e09bd5d6ace87..60bab291e49c61 100755 --- a/t/t7816-grep-binary-pattern.sh +++ b/t/t7816-grep-binary-pattern.sh @@ -60,23 +60,23 @@ test_expect_success 'setup' " " # Simple fixed-string matching that can use kwset (no -i && non-ASCII) -nul_match 1 1 1 '-F' 'yQf' -nul_match 0 0 0 '-F' 'yQx' -nul_match 1 1 1 '-Fi' 'YQf' -nul_match 0 0 0 '-Fi' 'YQx' -nul_match 1 1 1 '' 'yQf' -nul_match 0 0 0 '' 'yQx' -nul_match 1 1 1 '' 'æQð' -nul_match 1 1 1 '-F' 'eQm[*]c' -nul_match 1 1 1 '-Fi' 'EQM[*]C' +nul_match P P P '-F' 'yQf' +nul_match P P P '-F' 'yQx' +nul_match P P P '-Fi' 'YQf' +nul_match P P P '-Fi' 'YQx' +nul_match P P 1 '' 'yQf' +nul_match P P 0 '' 'yQx' +nul_match P P 1 '' 'æQð' +nul_match P P P '-F' 'eQm[*]c' +nul_match P P P '-Fi' 'EQM[*]C' # Regex patterns that would match but shouldn't with -F -nul_match 0 0 0 '-F' 'yQ[f]' -nul_match 0 0 0 '-F' '[y]Qf' -nul_match 0 0 0 '-Fi' 'YQ[F]' -nul_match 0 0 0 '-Fi' '[Y]QF' -nul_match 0 0 0 '-F' 'æQ[ð]' -nul_match 0 0 0 '-F' '[æ]Qð' +nul_match P P P '-F' 'yQ[f]' +nul_match P P P '-F' '[y]Qf' +nul_match P P P '-Fi' 'YQ[F]' +nul_match P P P '-Fi' '[Y]QF' +nul_match P P P '-F' 'æQ[ð]' +nul_match P P P '-F' '[æ]Qð' # The -F kwset codepath can't handle -i && non-ASCII... nul_match P 1 1 '-i' '[æ]Qð' @@ -90,38 +90,38 @@ nul_match P 0 1 '-i' '[Æ]Qð' nul_match P 0 1 '-i' 'ÆQÐ' # \0 in regexes can only work with -P & PCRE v2 -nul_match P 1 1 '' 'yQ[f]' -nul_match P 1 1 '' '[y]Qf' -nul_match P 1 1 '-i' 'YQ[F]' -nul_match P 1 1 '-i' '[Y]Qf' -nul_match P 1 1 '' 'æQ[ð]' -nul_match P 1 1 '' '[æ]Qð' -nul_match P 0 1 '-i' 'ÆQ[Ð]' -nul_match P 1 1 '' 'eQm.*cQ' -nul_match P 1 1 '-i' 'EQM.*cQ' -nul_match P 0 0 '' 'eQm[*]c' -nul_match P 0 0 '-i' 'EQM[*]C' +nul_match P P 1 '' 'yQ[f]' +nul_match P P 1 '' '[y]Qf' +nul_match P P 1 '-i' 'YQ[F]' +nul_match P P 1 '-i' '[Y]Qf' +nul_match P P 1 '' 'æQ[ð]' +nul_match P P 1 '' '[æ]Qð' +nul_match P P 1 '-i' 'ÆQ[Ð]' +nul_match P P 1 '' 'eQm.*cQ' +nul_match P P 1 '-i' 'EQM.*cQ' +nul_match P P 0 '' 'eQm[*]c' +nul_match P P 0 '-i' 'EQM[*]C' # Assert that we're using REG_STARTEND and the pattern doesn't match # just because it's cut off at the first \0. -nul_match 0 0 0 '-i' 'NOMATCHQð' -nul_match P 0 0 '-i' '[Æ]QNOMATCH' -nul_match P 0 0 '-i' '[æ]QNOMATCH' +nul_match P P 0 '-i' 'NOMATCHQð' +nul_match P P 0 '-i' '[Æ]QNOMATCH' +nul_match P P 0 '-i' '[æ]QNOMATCH' # Ensure that the matcher doesn't regress to something that stops at # \0 -nul_match 0 0 0 '-F' 'yQ[f]' -nul_match 0 0 0 '-Fi' 'YQ[F]' -nul_match 0 0 0 '' 'yQNOMATCH' -nul_match 0 0 0 '' 'QNOMATCH' -nul_match 0 0 0 '-i' 'YQNOMATCH' -nul_match 0 0 0 '-i' 'QNOMATCH' -nul_match 0 0 0 '-F' 'æQ[ð]' +nul_match P P P '-F' 'yQ[f]' +nul_match P P P '-Fi' 'YQ[F]' +nul_match P P 0 '' 'yQNOMATCH' +nul_match P P 0 '' 'QNOMATCH' +nul_match P P 0 '-i' 'YQNOMATCH' +nul_match P P 0 '-i' 'QNOMATCH' +nul_match P P P '-F' 'æQ[ð]' nul_match P P P '-Fi' 'ÆQ[Ð]' -nul_match P 0 1 '-i' 'ÆQ[Ð]' -nul_match 0 0 0 '' 'yQNÓMATCH' -nul_match 0 0 0 '' 'QNÓMATCH' -nul_match 0 0 0 '-i' 'YQNÓMATCH' -nul_match 0 0 0 '-i' 'QNÓMATCH' +nul_match P P 1 '-i' 'ÆQ[Ð]' +nul_match P P 0 '' 'yQNÓMATCH' +nul_match P P 0 '' 'QNÓMATCH' +nul_match P P 0 '-i' 'YQNÓMATCH' +nul_match P P 0 '-i' 'QNÓMATCH' test_done From 48de2a768cfdb5bde3af40e0518c96f7df90c0b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 1 Jul 2019 23:20:59 +0200 Subject: [PATCH 034/710] grep: remove the kwset optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A later change will replace this optimization with optimistic use of PCRE v2. I'm completely removing it as an intermediate step, as opposed to replacing it with PCRE v2, to demonstrate that no grep semantics depend on this (or any other) optimization for the fixed backend anymore. For now this is mostly (but not entirely) a performance regression, as shown by this hacky one-liner: for opt in '' ' -i' do GIT_PERF_7821_GREP_OPTS=$opt GIT_PERF_REPEAT_COUNT=10 GIT_PERF_LARGE_REPO=~/g/linux GIT_PERF_MAKE_OPTS='-j8 CFLAGS=-O3 USE_LIBPCRE=YesPlease' ./run origin/master HEAD -- p7821-grep-engines-fixed.sh done && for opt in '' ' -i' do GIT_PERF_4221_LOG_OPTS=$opt GIT_PERF_REPEAT_COUNT=10 GIT_PERF_LARGE_REPO=~/g/linux GIT_PERF_MAKE_OPTS='-j8 CFLAGS=-O3 USE_LIBPCRE=YesPlease' ./run origin/master HEAD -- p4221-log-grep-engines-fixed.sh done Which produces: plain grep: Test origin/master HEAD ------------------------------------------------------------------------- 7821.1: fixed grep int 0.55(1.60+0.63) 0.82(3.11+0.51) +49.1% 7821.2: basic grep int 0.62(1.68+0.49) 0.85(3.02+0.52) +37.1% 7821.3: extended grep int 0.61(1.63+0.53) 0.91(3.09+0.44) +49.2% 7821.4: perl grep int 0.55(1.60+0.57) 0.41(0.93+0.57) -25.5% 7821.6: fixed grep uncommon 0.20(0.50+0.44) 0.35(1.27+0.42) +75.0% 7821.7: basic grep uncommon 0.20(0.49+0.45) 0.35(1.29+0.41) +75.0% 7821.8: extended grep uncommon 0.20(0.45+0.48) 0.35(1.25+0.44) +75.0% 7821.9: perl grep uncommon 0.20(0.53+0.41) 0.16(0.24+0.49) -20.0% 7821.11: fixed grep æ 0.35(1.27+0.40) 0.25(0.82+0.39) -28.6% 7821.12: basic grep æ 0.35(1.28+0.38) 0.25(0.75+0.44) -28.6% 7821.13: extended grep æ 0.36(1.21+0.46) 0.25(0.86+0.35) -30.6% 7821.14: perl grep æ 0.35(1.33+0.34) 0.16(0.26+0.47) -54.3% grep with -i: Test origin/master HEAD ----------------------------------------------------------------------------- 7821.1: fixed grep -i int 0.61(1.84+0.64) 1.11(4.12+0.64) +82.0% 7821.2: basic grep -i int 0.72(1.86+0.57) 1.15(4.48+0.49) +59.7% 7821.3: extended grep -i int 0.94(1.83+0.60) 1.53(4.12+0.58) +62.8% 7821.4: perl grep -i int 0.66(1.82+0.59) 0.55(1.08+0.58) -16.7% 7821.6: fixed grep -i uncommon 0.21(0.51+0.44) 0.44(1.74+0.34) +109.5% 7821.7: basic grep -i uncommon 0.21(0.55+0.41) 0.44(1.72+0.40) +109.5% 7821.8: extended grep -i uncommon 0.21(0.57+0.39) 0.42(1.64+0.45) +100.0% 7821.9: perl grep -i uncommon 0.21(0.48+0.48) 0.17(0.30+0.45) -19.0% 7821.11: fixed grep -i æ 0.25(0.73+0.45) 0.25(0.75+0.45) +0.0% 7821.12: basic grep -i æ 0.25(0.71+0.49) 0.26(0.77+0.44) +4.0% 7821.13: extended grep -i æ 0.25(0.75+0.44) 0.25(0.74+0.46) +0.0% 7821.14: perl grep -i æ 0.17(0.26+0.48) 0.16(0.20+0.52) -5.9% plain log: Test origin/master HEAD --------------------------------------------------------------------------------- 4221.1: fixed log --grep='int' 7.31(7.06+0.21) 8.11(7.85+0.20) +10.9% 4221.2: basic log --grep='int' 7.30(6.94+0.27) 8.16(7.89+0.19) +11.8% 4221.3: extended log --grep='int' 7.34(7.05+0.21) 8.08(7.76+0.25) +10.1% 4221.4: perl log --grep='int' 7.27(6.94+0.24) 7.05(6.76+0.25) -3.0% 4221.6: fixed log --grep='uncommon' 6.97(6.62+0.32) 7.86(7.51+0.30) +12.8% 4221.7: basic log --grep='uncommon' 7.05(6.69+0.29) 7.89(7.60+0.28) +11.9% 4221.8: extended log --grep='uncommon' 6.89(6.56+0.32) 7.99(7.66+0.24) +16.0% 4221.9: perl log --grep='uncommon' 7.02(6.66+0.33) 6.97(6.54+0.36) -0.7% 4221.11: fixed log --grep='æ' 7.37(7.03+0.33) 7.67(7.30+0.31) +4.1% 4221.12: basic log --grep='æ' 7.41(7.00+0.31) 7.60(7.28+0.26) +2.6% 4221.13: extended log --grep='æ' 7.35(6.96+0.38) 7.73(7.31+0.34) +5.2% 4221.14: perl log --grep='æ' 7.43(7.10+0.32) 6.95(6.61+0.27) -6.5% log with -i: Test origin/master HEAD ------------------------------------------------------------------------------------ 4221.1: fixed log -i --grep='int' 7.40(7.05+0.23) 8.66(8.38+0.20) +17.0% 4221.2: basic log -i --grep='int' 7.39(7.09+0.23) 8.67(8.39+0.20) +17.3% 4221.3: extended log -i --grep='int' 7.29(6.99+0.26) 8.69(8.31+0.26) +19.2% 4221.4: perl log -i --grep='int' 7.42(7.16+0.21) 7.14(6.80+0.24) -3.8% 4221.6: fixed log -i --grep='uncommon' 6.94(6.58+0.35) 8.43(8.04+0.30) +21.5% 4221.7: basic log -i --grep='uncommon' 6.95(6.62+0.31) 8.34(7.93+0.32) +20.0% 4221.8: extended log -i --grep='uncommon' 7.06(6.75+0.25) 8.32(7.98+0.31) +17.8% 4221.9: perl log -i --grep='uncommon' 6.96(6.69+0.26) 7.04(6.64+0.32) +1.1% 4221.11: fixed log -i --grep='æ' 7.92(7.55+0.33) 7.86(7.44+0.34) -0.8% 4221.12: basic log -i --grep='æ' 7.88(7.49+0.32) 7.84(7.46+0.34) -0.5% 4221.13: extended log -i --grep='æ' 7.91(7.51+0.32) 7.87(7.48+0.32) -0.5% 4221.14: perl log -i --grep='æ' 7.01(6.59+0.35) 6.99(6.64+0.28) -0.3% Some of those, as noted in [1] are because PCRE is faster at finding fixed strings. This looks bad for some engines, but in the next change we'll optimistically use PCRE v2 for all of these, so it'll look better. 1. https://public-inbox.org/git/87v9x793qi.fsf@evledraar.gmail.com/ Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 63 +++------------------------------------------------------- grep.h | 2 -- 2 files changed, 3 insertions(+), 62 deletions(-) diff --git a/grep.c b/grep.c index 8d0fff316c7c2d..4468519d5ce955 100644 --- a/grep.c +++ b/grep.c @@ -356,18 +356,6 @@ static NORETURN void compile_regexp_failed(const struct grep_pat *p, die("%s'%s': %s", where, p->pattern, error); } -static int is_fixed(const char *s, size_t len) -{ - size_t i; - - for (i = 0; i < len; i++) { - if (is_regex_special(s[i])) - return 0; - } - - return 1; -} - #ifdef USE_LIBPCRE1 static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt) { @@ -643,38 +631,12 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; + p->fixed = opt->fixed; if (memchr(p->pattern, 0, p->patternlen) && !opt->pcre2) die(_("given pattern contains NULL byte (via -f ). This is only supported with -P under PCRE v2")); - /* - * Even when -F (fixed) asks us to do a non-regexp search, we - * may not be able to correctly case-fold when -i - * (ignore-case) is asked (in which case, we'll synthesize a - * regexp to match the pattern that matches regexp special - * characters literally, while ignoring case differences). On - * the other hand, even without -F, if the pattern does not - * have any regexp special characters and there is no need for - * case-folding search, we can internally turn it into a - * simple string match using kws. p->fixed tells us if we - * want to use kws. - */ - if (opt->fixed || is_fixed(p->pattern, p->patternlen)) - p->fixed = !p->ignore_case || !has_non_ascii(p->pattern); - - if (p->fixed) { - p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL); - kwsincr(p->kws, p->pattern, p->patternlen); - kwsprep(p->kws); - return; - } - if (opt->fixed) { - /* - * We come here when the pattern has the non-ascii - * characters we cannot case-fold, and asked to - * ignore-case. - */ compile_fixed_regexp(p, opt); return; } @@ -1042,9 +1004,7 @@ void free_grep_patterns(struct grep_opt *opt) case GREP_PATTERN: /* atom */ case GREP_PATTERN_HEAD: case GREP_PATTERN_BODY: - if (p->kws) - kwsfree(p->kws); - else if (p->pcre1_regexp) + if (p->pcre1_regexp) free_pcre1_regexp(p); else if (p->pcre2_pattern) free_pcre2_pattern(p); @@ -1104,29 +1064,12 @@ static void show_name(struct grep_opt *opt, const char *name) opt->output(opt, opt->null_following_name ? "\0" : "\n", 1); } -static int fixmatch(struct grep_pat *p, char *line, char *eol, - regmatch_t *match) -{ - struct kwsmatch kwsm; - size_t offset = kwsexec(p->kws, line, eol - line, &kwsm); - if (offset == -1) { - match->rm_so = match->rm_eo = -1; - return REG_NOMATCH; - } else { - match->rm_so = offset; - match->rm_eo = match->rm_so + kwsm.size[0]; - return 0; - } -} - static int patmatch(struct grep_pat *p, char *line, char *eol, regmatch_t *match, int eflags) { int hit; - if (p->fixed) - hit = !fixmatch(p, line, eol, match); - else if (p->pcre1_regexp) + if (p->pcre1_regexp) hit = !pcre1match(p, line, eol, match, eflags); else if (p->pcre2_pattern) hit = !pcre2match(p, line, eol, match, eflags); diff --git a/grep.h b/grep.h index 4bb8a79d93187b..d35a137fcbcb23 100644 --- a/grep.h +++ b/grep.h @@ -32,7 +32,6 @@ typedef int pcre2_compile_context; typedef int pcre2_match_context; typedef int pcre2_jit_stack; #endif -#include "kwset.h" #include "thread-utils.h" #include "userdiff.h" @@ -97,7 +96,6 @@ struct grep_pat { pcre2_match_context *pcre2_match_context; pcre2_jit_stack *pcre2_jit_stack; uint32_t pcre2_jit_on; - kwset_t kws; unsigned fixed:1; unsigned ignore_case:1; unsigned word_regexp:1; From b65abcafc7abd93ed634125bcec98b1460e75d2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 1 Jul 2019 23:21:00 +0200 Subject: [PATCH 035/710] grep: use PCRE v2 for optimized fixed-string search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bring back optimized fixed-string search for "grep", this time with PCRE v2 as an optional backend. As noted in [1] with kwset we were slower than PCRE v1 and v2 JIT with the kwset backend, so that optimization was counterproductive. This brings back the optimization for "--fixed-strings", without changing the semantics of having a NUL-byte in patterns. As seen in previous commits in this series we could support it now, but I'd rather just leave that edge-case aside so we don't have one behavior or the other depending what "--fixed-strings" backend we're using. It makes the behavior harder to understand and document, and makes tests for the different backends more painful. This does change the behavior under non-C locales when "log"'s "--encoding" option is used and the heystack/needle in the content/command-line doesn't have a matching encoding. See the recent change in "t4210: skip more command-line encoding tests on MinGW" in this series. I think that's OK. We did nothing sensible before then (just compared raw bytes that had no hope of matching). At least now the user will get some idea why their grep/log never matches in that edge case. I could also support the PCRE v1 backend here, but that would make the code more complex. I'd rather aim for simplicity here and in future changes to the diffcore. We're not going to have someone who absolutely must have faster search, but for whom building PCRE v2 isn't acceptable. The difference between this series of commits and the current "master" is, using the same t/perf commands shown in the last commit: plain grep: Test origin/master HEAD ------------------------------------------------------------------------- 7821.1: fixed grep int 0.55(1.67+0.56) 0.41(0.98+0.60) -25.5% 7821.2: basic grep int 0.58(1.65+0.52) 0.41(0.96+0.57) -29.3% 7821.3: extended grep int 0.57(1.66+0.49) 0.42(0.93+0.60) -26.3% 7821.4: perl grep int 0.54(1.67+0.50) 0.43(0.88+0.65) -20.4% 7821.6: fixed grep uncommon 0.21(0.52+0.42) 0.16(0.24+0.51) -23.8% 7821.7: basic grep uncommon 0.20(0.49+0.45) 0.17(0.28+0.47) -15.0% 7821.8: extended grep uncommon 0.20(0.54+0.39) 0.16(0.25+0.50) -20.0% 7821.9: perl grep uncommon 0.20(0.58+0.36) 0.16(0.23+0.50) -20.0% 7821.11: fixed grep æ 0.35(1.24+0.43) 0.16(0.23+0.50) -54.3% 7821.12: basic grep æ 0.36(1.29+0.38) 0.16(0.20+0.54) -55.6% 7821.13: extended grep æ 0.35(1.23+0.44) 0.16(0.24+0.50) -54.3% 7821.14: perl grep æ 0.35(1.33+0.34) 0.16(0.28+0.46) -54.3% grep with -i: Test origin/master HEAD ---------------------------------------------------------------------------- 7821.1: fixed grep -i int 0.62(1.81+0.70) 0.47(1.11+0.64) -24.2% 7821.2: basic grep -i int 0.67(1.90+0.53) 0.46(1.07+0.62) -31.3% 7821.3: extended grep -i int 0.62(1.92+0.53) 0.53(1.12+0.58) -14.5% 7821.4: perl grep -i int 0.66(1.85+0.58) 0.45(1.10+0.59) -31.8% 7821.6: fixed grep -i uncommon 0.21(0.54+0.43) 0.17(0.20+0.55) -19.0% 7821.7: basic grep -i uncommon 0.20(0.52+0.45) 0.17(0.29+0.48) -15.0% 7821.8: extended grep -i uncommon 0.21(0.52+0.44) 0.17(0.26+0.50) -19.0% 7821.9: perl grep -i uncommon 0.21(0.53+0.44) 0.17(0.20+0.56) -19.0% 7821.11: fixed grep -i æ 0.26(0.79+0.44) 0.16(0.29+0.46) -38.5% 7821.12: basic grep -i æ 0.26(0.79+0.42) 0.16(0.20+0.54) -38.5% 7821.13: extended grep -i æ 0.26(0.84+0.39) 0.16(0.24+0.50) -38.5% 7821.14: perl grep -i æ 0.16(0.24+0.49) 0.17(0.25+0.51) +6.3% plain log: Test origin/master HEAD -------------------------------------------------------------------------------- 4221.1: fixed log --grep='int' 7.24(6.95+0.28) 7.20(6.95+0.18) -0.6% 4221.2: basic log --grep='int' 7.31(6.97+0.22) 7.20(6.93+0.21) -1.5% 4221.3: extended log --grep='int' 7.37(7.04+0.24) 7.22(6.91+0.25) -2.0% 4221.4: perl log --grep='int' 7.31(7.04+0.21) 7.19(6.89+0.21) -1.6% 4221.6: fixed log --grep='uncommon' 6.93(6.59+0.32) 7.04(6.66+0.37) +1.6% 4221.7: basic log --grep='uncommon' 6.92(6.58+0.29) 7.08(6.75+0.29) +2.3% 4221.8: extended log --grep='uncommon' 6.92(6.55+0.31) 7.00(6.68+0.31) +1.2% 4221.9: perl log --grep='uncommon' 7.03(6.59+0.33) 7.12(6.73+0.34) +1.3% 4221.11: fixed log --grep='æ' 7.41(7.08+0.28) 7.05(6.76+0.29) -4.9% 4221.12: basic log --grep='æ' 7.39(6.99+0.33) 7.00(6.68+0.25) -5.3% 4221.13: extended log --grep='æ' 7.34(7.00+0.25) 7.15(6.81+0.31) -2.6% 4221.14: perl log --grep='æ' 7.43(7.13+0.26) 7.01(6.60+0.36) -5.7% log with -i: Test origin/master HEAD ------------------------------------------------------------------------------------ 4221.1: fixed log -i --grep='int' 7.31(7.07+0.24) 7.23(7.00+0.22) -1.1% 4221.2: basic log -i --grep='int' 7.40(7.08+0.28) 7.19(6.92+0.20) -2.8% 4221.3: extended log -i --grep='int' 7.43(7.13+0.25) 7.27(6.99+0.21) -2.2% 4221.4: perl log -i --grep='int' 7.34(7.10+0.24) 7.10(6.90+0.19) -3.3% 4221.6: fixed log -i --grep='uncommon' 7.07(6.71+0.32) 7.11(6.77+0.28) +0.6% 4221.7: basic log -i --grep='uncommon' 6.99(6.64+0.28) 7.12(6.69+0.38) +1.9% 4221.8: extended log -i --grep='uncommon' 7.11(6.74+0.32) 7.10(6.77+0.27) -0.1% 4221.9: perl log -i --grep='uncommon' 6.98(6.60+0.29) 7.05(6.64+0.34) +1.0% 4221.11: fixed log -i --grep='æ' 7.85(7.45+0.34) 7.03(6.68+0.32) -10.4% 4221.12: basic log -i --grep='æ' 7.87(7.49+0.29) 7.06(6.69+0.31) -10.3% 4221.13: extended log -i --grep='æ' 7.87(7.54+0.31) 7.09(6.69+0.31) -9.9% 4221.14: perl log -i --grep='æ' 7.06(6.77+0.28) 6.91(6.57+0.31) -2.1% So as with e05b027627 ("grep: use PCRE v2 for optimized fixed-string search", 2019-06-26) there's a huge improvement in performance for "grep", but in "log" most of our time is spent elsewhere, so we don't notice it that much. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/grep.c b/grep.c index 4468519d5ce955..fc0ed73ef34781 100644 --- a/grep.c +++ b/grep.c @@ -356,6 +356,18 @@ static NORETURN void compile_regexp_failed(const struct grep_pat *p, die("%s'%s': %s", where, p->pattern, error); } +static int is_fixed(const char *s, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (is_regex_special(s[i])) + return 0; + } + + return 1; +} + #ifdef USE_LIBPCRE1 static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt) { @@ -602,7 +614,6 @@ static int pcre2match(struct grep_pat *p, const char *line, const char *eol, static void free_pcre2_pattern(struct grep_pat *p) { } -#endif /* !USE_LIBPCRE2 */ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) { @@ -623,11 +634,13 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) compile_regexp_failed(p, errbuf); } } +#endif /* !USE_LIBPCRE2 */ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { int err; int regflags = REG_NEWLINE; + int pat_is_fixed; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; @@ -636,8 +649,42 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) if (memchr(p->pattern, 0, p->patternlen) && !opt->pcre2) die(_("given pattern contains NULL byte (via -f ). This is only supported with -P under PCRE v2")); - if (opt->fixed) { + pat_is_fixed = is_fixed(p->pattern, p->patternlen); + if (opt->fixed || pat_is_fixed) { +#ifdef USE_LIBPCRE2 + opt->pcre2 = 1; + if (pat_is_fixed) { + compile_pcre2_pattern(p, opt); + } else { + /* + * E.g. t7811-grep-open.sh relies on the + * pattern being restored. + */ + char *old_pattern = p->pattern; + size_t old_patternlen = p->patternlen; + struct strbuf sb = STRBUF_INIT; + + /* + * There is the PCRE2_LITERAL flag, but it's + * only in PCRE v2 10.30 and later. Needing to + * ifdef our way around that and dealing with + * it + PCRE2_MULTILINE being an error is more + * complex than just quoting this ourselves. + */ + strbuf_add(&sb, "\\Q", 2); + strbuf_add(&sb, p->pattern, p->patternlen); + strbuf_add(&sb, "\\E", 2); + + p->pattern = sb.buf; + p->patternlen = sb.len; + compile_pcre2_pattern(p, opt); + p->pattern = old_pattern; + p->patternlen = old_patternlen; + strbuf_release(&sb); + } +#else /* !USE_LIBPCRE2 */ compile_fixed_regexp(p, opt); +#endif /* !USE_LIBPCRE2 */ return; } From 04bef50c0175bba195443ea0edbf2991175cdd91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 26 Jul 2019 17:08:11 +0200 Subject: [PATCH 036/710] grep: remove overly paranoid BUG(...) code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove code that would trigger if pcre_config() or pcre2_config() was so broken that "do we have JIT?" wouldn't return a boolean. I added this code back in fbaceaac47 ("grep: add support for the PCRE v1 JIT API", 2017-05-25) and then as noted in f002532784 ("grep: print the pcre2_jit_on value", 2019-07-22) incorrectly copy/pasted some of it in 94da9193a6 ("grep: add support for PCRE v2", 2017-06-01). Let's just remove this code. Being this paranoid about the pcre2?_config() function itself being broken is crossing the line into unreasonable paranoia. Reported-by: Beat Bolli Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/grep.c b/grep.c index fc0ed73ef34781..95af88cb74db54 100644 --- a/grep.c +++ b/grep.c @@ -394,14 +394,11 @@ static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt) #ifdef GIT_PCRE1_USE_JIT pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on); - if (p->pcre1_jit_on == 1) { + if (p->pcre1_jit_on) { p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024); if (!p->pcre1_jit_stack) die("Couldn't allocate PCRE JIT stack"); pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack); - } else if (p->pcre1_jit_on != 0) { - BUG("The pcre1_jit_on variable should be 0 or 1, not %d", - p->pcre1_jit_on); } #endif } @@ -510,7 +507,7 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt } pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on); - if (p->pcre2_jit_on == 1) { + if (p->pcre2_jit_on) { jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE); if (jitret) die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret); @@ -545,9 +542,6 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt if (!p->pcre2_match_context) die("Couldn't allocate PCRE2 match context"); pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack); - } else if (p->pcre2_jit_on != 0) { - BUG("The pcre2_jit_on variable should be 0 or 1, not %d", - p->pcre1_jit_on); } } From 34489239d0f920ddc3bfff1c4cfe2c13ad02b2cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 26 Jul 2019 17:08:12 +0200 Subject: [PATCH 037/710] grep: stop "using" a custom JIT stack with PCRE v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As reported in [1] the code I added in 94da9193a6 ("grep: add support for PCRE v2", 2017-06-01) to use a custom JIT stack has never worked. It was incorrectly copy/pasted from code I added in fbaceaac47 ("grep: add support for the PCRE v1 JIT API", 2017-05-25), which did work. Thus our intention of starting with 1 byte of stack at a maximum of 1 MB didn't happen, we'd always use the 32 KB stack provided by PCRE v2's jit_machine_stack_exec()[2]. The reason I allocated a custom stack at all was this advice in pcrejit(3) (same in pcre2jit(3)): "By default, it uses 32KiB on the machine stack. However, some large or complicated patterns need more than this" Since we've haven't had any reports of users running into PCRE2_ERROR_JIT_STACKLIMIT in the wild I think we can safely assume that we can just use the library defaults instead and drop this code. This won't change with the wider use of PCRE v2 in ed0479ce3d ("Merge branch 'ab/no-kwset' into next", 2019-07-15), a fixed string search is not a "large or complicated pattern". For good measure I ran the performance test noted in 94da9193a6, although the command is simpler now due to my 0f50c8e32c ("Makefile: remove the NO_R_TO_GCC_LINKER flag", 2019-05-17): GIT_PERF_REPEAT_COUNT=30 GIT_PERF_LARGE_REPO=~/g/linux GIT_PERF_MAKE_OPTS='-j8 USE_LIBPCRE2=Y CFLAGS=-O3 LIBPCREDIR=/home/avar/g/pcre2/inst' ./run HEAD~ HEAD p7820-grep-engines.sh Just the /perl/ results are: Test HEAD~ HEAD --------------------------------------------------------------------------------------- 7820.3: perl grep 'how.to' 0.17(0.27+0.65) 0.17(0.24+0.68) +0.0% 7820.7: perl grep '^how to' 0.16(0.23+0.66) 0.16(0.23+0.67) +0.0% 7820.11: perl grep '[how] to' 0.18(0.35+0.62) 0.18(0.33+0.65) +0.0% 7820.15: perl grep '(e.t[^ ]*|v.ry) rare' 0.17(0.45+0.54) 0.17(0.49+0.50) +0.0% 7820.19: perl grep 'm(ú|u)lt.b(æ|y)te' 0.16(0.33+0.58) 0.16(0.29+0.62) +0.0% So, as expected there's no change, and running with valgrind reveals that we have fewer allocations now. As noted in [3] there are known regexes that will fail with the lower stack limit, the way GNU grep fixed it is interesting, although I believe the implementation is overly verbose, they could make PCRE v2 handle that gradual re-allocation, that's what min/max memory is for. So we might end up bringing this back, I'm more inclined to just kick such cases upstairs to PCRE maintainers as a bug, perhaps they'll add some overall "just allocate more then" flag to make this easier. In any case there's no functional change here, we didn't have a custom stack, so let's apply this first, we can always revert it later. 1. https://public-inbox.org/git/20190721194052.15440-1-carenas@gmail.com/ 2. I didn't really intend to start with 1 byte, looking at the PCRE v2 code again what happened is that I cargo-culted some of PCRE v2's own test code which was meant to test re-allocations. It's more sane to start with say 32 KB with a max of 1 MB, as pcre2grep.c does. 3. https://public-inbox.org/git/CAPUEspjj+fG8QDmf=bZXktfpLgkgiu34HTjKLhm-cmEE04FE-A@mail.gmail.com/ Reported-by: Carlo Marcelo Arenas Belón Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 10 ---------- grep.h | 4 ---- 2 files changed, 14 deletions(-) diff --git a/grep.c b/grep.c index 95af88cb74db54..4b1e917ac55231 100644 --- a/grep.c +++ b/grep.c @@ -534,14 +534,6 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt p->pcre2_jit_on = 0; return; } - - p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL); - if (!p->pcre2_jit_stack) - die("Couldn't allocate PCRE2 JIT stack"); - p->pcre2_match_context = pcre2_match_context_create(NULL); - if (!p->pcre2_match_context) - die("Couldn't allocate PCRE2 match context"); - pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack); } } @@ -585,8 +577,6 @@ static void free_pcre2_pattern(struct grep_pat *p) pcre2_compile_context_free(p->pcre2_compile_context); pcre2_code_free(p->pcre2_pattern); pcre2_match_data_free(p->pcre2_match_data); - pcre2_jit_stack_free(p->pcre2_jit_stack); - pcre2_match_context_free(p->pcre2_match_context); } #else /* !USE_LIBPCRE2 */ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt) diff --git a/grep.h b/grep.h index d35a137fcbcb23..4d8e300175bf69 100644 --- a/grep.h +++ b/grep.h @@ -29,8 +29,6 @@ typedef int pcre_jit_stack; typedef int pcre2_code; typedef int pcre2_match_data; typedef int pcre2_compile_context; -typedef int pcre2_match_context; -typedef int pcre2_jit_stack; #endif #include "thread-utils.h" #include "userdiff.h" @@ -93,8 +91,6 @@ struct grep_pat { pcre2_code *pcre2_pattern; pcre2_match_data *pcre2_match_data; pcre2_compile_context *pcre2_compile_context; - pcre2_match_context *pcre2_match_context; - pcre2_jit_stack *pcre2_jit_stack; uint32_t pcre2_jit_on; unsigned fixed:1; unsigned ignore_case:1; From 685668faaae6daf5990068b198525491591aff87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 26 Jul 2019 17:08:13 +0200 Subject: [PATCH 038/710] grep: stop using a custom JIT stack with PCRE v1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify the PCRE v1 code for the same reasons as for the PCRE v2 code in the last commit. Unlike with v2 we actually used the custom stack in v1, but let's use PCRE's built-in 32 KB one instead, since experience with v2 shows that's enough. Most distros are already using v2 as a default, and the underlying sljit code is the same. Unfortunately we can't just pass a NULL to pcre_jit_exec() as with pcre2_jit_match(). Unlike the v2 function it doesn't support that. Instead we need to use the fatter pcre_exec() if we'd like the same behavior. This will make things slightly slower than on the fast-path function, but it's OK since we care less about v1 performance these days since we have and recommend v2. Running a similar performance test as what I ran in fbaceaac47 ("grep: add support for the PCRE v1 JIT API", 2017-05-25) via: GIT_PERF_REPEAT_COUNT=30 GIT_PERF_LARGE_REPO=~/g/linux GIT_PERF_MAKE_OPTS='-j8 USE_LIBPCRE1=Y CFLAGS=-O3 LIBPCREDIR=/home/avar/g/pcre/inst' ./run HEAD~ HEAD p7820-grep-engines.sh Gives us this, just the /perl/ results: Test HEAD~ HEAD --------------------------------------------------------------------------------------- 7820.3: perl grep 'how.to' 0.19(0.67+0.52) 0.19(0.65+0.52) +0.0% 7820.7: perl grep '^how to' 0.19(0.78+0.44) 0.19(0.72+0.49) +0.0% 7820.11: perl grep '[how] to' 0.39(2.13+0.43) 0.40(2.10+0.46) +2.6% 7820.15: perl grep '(e.t[^ ]*|v.ry) rare' 0.44(2.55+0.37) 0.45(2.47+0.41) +2.3% 7820.19: perl grep 'm(ú|u)lt.b(æ|y)te' 0.23(1.06+0.42) 0.22(1.03+0.43) -4.3% It will also implicitly re-enable UTF-8 validation for PCRE v1. As noted in [1] we now have cases as a result where PCRE v1 is more eager to error out. Subsequent patches will fix that for v2, and I think it's fair to tell v1 users "just upgrade" and not worry about that edge case for v1. 1. https://public-inbox.org/git/CAPUEsphZJ_Uv9o1-yDpjNLA_q-f7gWXz9g1gCY2pYAYN8ri40g@mail.gmail.com/ Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 28 +++++----------------------- grep.h | 5 ----- 2 files changed, 5 insertions(+), 28 deletions(-) diff --git a/grep.c b/grep.c index 4b1e917ac55231..9c2b25977103e7 100644 --- a/grep.c +++ b/grep.c @@ -394,12 +394,6 @@ static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt) #ifdef GIT_PCRE1_USE_JIT pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on); - if (p->pcre1_jit_on) { - p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024); - if (!p->pcre1_jit_stack) - die("Couldn't allocate PCRE JIT stack"); - pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack); - } #endif } @@ -411,18 +405,9 @@ static int pcre1match(struct grep_pat *p, const char *line, const char *eol, if (eflags & REG_NOTBOL) flags |= PCRE_NOTBOL; -#ifdef GIT_PCRE1_USE_JIT - if (p->pcre1_jit_on) { - ret = pcre_jit_exec(p->pcre1_regexp, p->pcre1_extra_info, line, - eol - line, 0, flags, ovector, - ARRAY_SIZE(ovector), p->pcre1_jit_stack); - } else -#endif - { - ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line, - eol - line, 0, flags, ovector, - ARRAY_SIZE(ovector)); - } + ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line, + eol - line, 0, flags, ovector, + ARRAY_SIZE(ovector)); if (ret < 0 && ret != PCRE_ERROR_NOMATCH) die("pcre_exec failed with error code %d", ret); @@ -439,14 +424,11 @@ static void free_pcre1_regexp(struct grep_pat *p) { pcre_free(p->pcre1_regexp); #ifdef GIT_PCRE1_USE_JIT - if (p->pcre1_jit_on) { + if (p->pcre1_jit_on) pcre_free_study(p->pcre1_extra_info); - pcre_jit_stack_free(p->pcre1_jit_stack); - } else + else #endif - { pcre_free(p->pcre1_extra_info); - } pcre_free((void *)p->pcre1_tables); } #else /* !USE_LIBPCRE1 */ diff --git a/grep.h b/grep.h index 4d8e300175bf69..ce2d72571f1c65 100644 --- a/grep.h +++ b/grep.h @@ -14,13 +14,9 @@ #ifndef GIT_PCRE_STUDY_JIT_COMPILE #define GIT_PCRE_STUDY_JIT_COMPILE 0 #endif -#if PCRE_MAJOR <= 8 && PCRE_MINOR < 20 -typedef int pcre_jit_stack; -#endif #else typedef int pcre; typedef int pcre_extra; -typedef int pcre_jit_stack; #endif #ifdef USE_LIBPCRE2 #define PCRE2_CODE_UNIT_WIDTH 8 @@ -85,7 +81,6 @@ struct grep_pat { regex_t regexp; pcre *pcre1_regexp; pcre_extra *pcre1_extra_info; - pcre_jit_stack *pcre1_jit_stack; const unsigned char *pcre1_tables; int pcre1_jit_on; pcre2_code *pcre2_pattern; From 8a35b540a99d909ee4680e773c1d3befb6bff782 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 26 Jul 2019 17:08:14 +0200 Subject: [PATCH 039/710] grep: consistently use "p->fixed" in compile_regexp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the start of this function we do: p->fixed = opt->fixed; It's less confusing to use that variable consistently that switch back & forth between the two. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grep.c b/grep.c index 9c2b25977103e7..b94e998680c3c2 100644 --- a/grep.c +++ b/grep.c @@ -616,7 +616,7 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) die(_("given pattern contains NULL byte (via -f ). This is only supported with -P under PCRE v2")); pat_is_fixed = is_fixed(p->pattern, p->patternlen); - if (opt->fixed || pat_is_fixed) { + if (p->fixed || pat_is_fixed) { #ifdef USE_LIBPCRE2 opt->pcre2 = 1; if (pat_is_fixed) { From 09872f6418f6b6fc1b823d3b324907c02e9bc75b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 26 Jul 2019 17:08:15 +0200 Subject: [PATCH 040/710] grep: create a "is_fixed" member in "grep_pat" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change paves the way for later using this value the regex compile functions themselves. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 7 +++---- grep.h | 1 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/grep.c b/grep.c index b94e998680c3c2..6d60e2e557055c 100644 --- a/grep.c +++ b/grep.c @@ -606,7 +606,6 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { int err; int regflags = REG_NEWLINE; - int pat_is_fixed; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; @@ -615,11 +614,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) if (memchr(p->pattern, 0, p->patternlen) && !opt->pcre2) die(_("given pattern contains NULL byte (via -f ). This is only supported with -P under PCRE v2")); - pat_is_fixed = is_fixed(p->pattern, p->patternlen); - if (p->fixed || pat_is_fixed) { + p->is_fixed = is_fixed(p->pattern, p->patternlen); + if (p->fixed || p->is_fixed) { #ifdef USE_LIBPCRE2 opt->pcre2 = 1; - if (pat_is_fixed) { + if (p->is_fixed) { compile_pcre2_pattern(p, opt); } else { /* diff --git a/grep.h b/grep.h index ce2d72571f1c65..c0c71eb4a9b255 100644 --- a/grep.h +++ b/grep.h @@ -88,6 +88,7 @@ struct grep_pat { pcre2_compile_context *pcre2_compile_context; uint32_t pcre2_jit_on; unsigned fixed:1; + unsigned is_fixed:1; unsigned ignore_case:1; unsigned word_regexp:1; }; From 8a5999838e3cc24652f09670b6fe9461a789721b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 26 Jul 2019 17:08:16 +0200 Subject: [PATCH 041/710] grep: stess test PCRE v2 on invalid UTF-8 data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since my b65abcafc7 ("grep: use PCRE v2 for optimized fixed-string search", 2019-07-01) we've been dying on invalid UTF-8 data when grepping for fixed strings if the following are all true: * The subject string is non-ASCII (e.g. "ævar") * We're under a is_utf8_locale(), e.g. "en_US.UTF-8", not "C" * We compiled with PCRE v2 * That PCRE v2 did not have JIT support The last of those is why this wasn't caught earlier, per pcre2jit(3): "unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for validity. In the interests of speed, these checks do not happen on the JIT fast path, and if invalid data is passed, the result is undefined." I.e. the subject being matched against our pattern was invalid, but we were lucky and getting away with it on the JIT path, but the non-JIT one is stricter. This patch does nothing to fix that, instead we sneak in support for fixed patterns starting with "(*NO_JIT)", this disables the PCRE v2 jit with implicit fixed-string matching for testing, see pcre2syntax(3) the syntax. This is technically a change in behavior, but it's so obscure that I figured it was OK. We'd previously consider this an invalid regular expression as regcomp() would die on it, now we feed it to the PCRE v2 fixed-string path. I thought this was better than introducing yet another GIT_TEST_* environment variable. We're also relying on a behavior of PCRE v2 that technically could change, but I think the test coverage is worth dipping our toe into some somewhat undefined behavior. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 10 ++++++++++ t/t7812-grep-icase-non-ascii.sh | 28 ++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/grep.c b/grep.c index 6d60e2e557055c..5bc0f4f32abc53 100644 --- a/grep.c +++ b/grep.c @@ -615,6 +615,16 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) die(_("given pattern contains NULL byte (via -f ). This is only supported with -P under PCRE v2")); p->is_fixed = is_fixed(p->pattern, p->patternlen); +#ifdef USE_LIBPCRE2 + if (!p->fixed && !p->is_fixed) { + const char *no_jit = "(*NO_JIT)"; + const int no_jit_len = strlen(no_jit); + if (starts_with(p->pattern, no_jit) && + is_fixed(p->pattern + no_jit_len, + p->patternlen - no_jit_len)) + p->is_fixed = 1; + } +#endif if (p->fixed || p->is_fixed) { #ifdef USE_LIBPCRE2 opt->pcre2 = 1; diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index 0c685d35986eeb..96c35720569962 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t7812-grep-icase-non-ascii.sh @@ -53,4 +53,32 @@ test_expect_success REGEX_LOCALE 'pickaxe -i on non-ascii' ' test_cmp expected actual ' +test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: setup invalid UTF-8 data' ' + printf "\\200\\n" >invalid-0x80 && + echo "ævar" >expected && + cat expected >>invalid-0x80 && + git add invalid-0x80 +' + +test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: grep ASCII from invalid UTF-8 data' ' + git grep -h "var" invalid-0x80 >actual && + test_cmp expected actual && + git grep -h "(*NO_JIT)var" invalid-0x80 >actual && + test_cmp expected actual +' + +test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: grep non-ASCII from invalid UTF-8 data' ' + test_might_fail git grep -h "æ" invalid-0x80 >actual && + test_cmp expected actual && + test_must_fail git grep -h "(*NO_JIT)æ" invalid-0x80 && + test_cmp expected actual +' + +test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: grep non-ASCII from invalid UTF-8 data with -i' ' + test_might_fail git grep -hi "Æ" invalid-0x80 >actual && + test_cmp expected actual && + test_must_fail git grep -hi "(*NO_JIT)Æ" invalid-0x80 && + test_cmp expected actual +' + test_done From 870eea81669bfff4333b37b11fedd870cd05fd90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 26 Jul 2019 17:08:17 +0200 Subject: [PATCH 042/710] grep: do not enter PCRE2_UTF mode on fixed matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As discussed in the last commit partially fix a bug introduced in b65abcafc7 ("grep: use PCRE v2 for optimized fixed-string search", 2019-07-01). Because PCRE v2, unlike kwset, validates its UTF-8 input we'd die on e.g.: fatal: pcre2_match failed with error code -22: UTF-8 error: isolated byte with 0x80 bit set When grepping a non-ASCII fixed string. This is a more general problem that's hard to fix, but we can at least fix the most common case of grepping for a fixed string without "-i". I can't think of a reason for why we'd turn on PCRE2_UTF when matching byte-for-byte like that. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- grep.c | 3 ++- t/t7812-grep-icase-non-ascii.sh | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/grep.c b/grep.c index 5bc0f4f32abc53..c7c06ae08dd178 100644 --- a/grep.c +++ b/grep.c @@ -472,7 +472,8 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt } options |= PCRE2_CASELESS; } - if (!opt->ignore_locale && is_utf8_locale() && has_non_ascii(p->pattern)) + if (!opt->ignore_locale && is_utf8_locale() && has_non_ascii(p->pattern) && + !(!opt->ignore_case && (p->fixed || p->is_fixed))) options |= PCRE2_UTF; p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern, diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index 96c35720569962..531eb59d5785f2 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t7812-grep-icase-non-ascii.sh @@ -68,9 +68,9 @@ test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: grep ASCII from invalid UT ' test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: grep non-ASCII from invalid UTF-8 data' ' - test_might_fail git grep -h "æ" invalid-0x80 >actual && + git grep -h "æ" invalid-0x80 >actual && test_cmp expected actual && - test_must_fail git grep -h "(*NO_JIT)æ" invalid-0x80 && + git grep -h "(*NO_JIT)æ" invalid-0x80 && test_cmp expected actual ' From 6a289d45c0e5d155f536d7c1f73c3b33690e92c6 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Date: Tue, 30 Jul 2019 13:53:27 -0300 Subject: [PATCH 043/710] grep: fix worktree case in submodules Running git-grep with --recurse-submodules results in a cached grep for the submodules even when --cached is not used. This makes all modifications in submodules' tracked files be always ignored when grepping. Solve that making git-grep respect the cached option when invoking grep_cache() inside grep_submodule(). Also, add tests to ensure that the desired behavior is performed. Reported-by: Daniel Zaoui Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- builtin/grep.c | 10 ++++++---- t/t7814-grep-recurse-submodules.sh | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 580fd38f41704b..467eb4542b72f1 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -403,7 +403,7 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec, static int grep_submodule(struct grep_opt *opt, const struct pathspec *pathspec, const struct object_id *oid, - const char *filename, const char *path) + const char *filename, const char *path, int cached) { struct repository subrepo; struct repository *superproject = opt->repo; @@ -474,7 +474,7 @@ static int grep_submodule(struct grep_opt *opt, strbuf_release(&base); free(data); } else { - hit = grep_cache(&subopt, pathspec, 1); + hit = grep_cache(&subopt, pathspec, cached); } repo_clear(&subrepo); @@ -522,7 +522,8 @@ static int grep_cache(struct grep_opt *opt, } } else if (recurse_submodules && S_ISGITLINK(ce->ce_mode) && submodule_path_match(repo->index, pathspec, name.buf, NULL)) { - hit |= grep_submodule(opt, pathspec, NULL, ce->name, ce->name); + hit |= grep_submodule(opt, pathspec, NULL, ce->name, + ce->name, cached); } else { continue; } @@ -597,7 +598,8 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec, free(data); } else if (recurse_submodules && S_ISGITLINK(entry.mode)) { hit |= grep_submodule(opt, pathspec, &entry.oid, - base->buf, base->buf + tn_len); + base->buf, base->buf + tn_len, + 1); /* ignored */ } strbuf_setlen(base, old_baselen); diff --git a/t/t7814-grep-recurse-submodules.sh b/t/t7814-grep-recurse-submodules.sh index 134a694516c924..48f2b13e7d7905 100755 --- a/t/t7814-grep-recurse-submodules.sh +++ b/t/t7814-grep-recurse-submodules.sh @@ -392,4 +392,25 @@ test_expect_success 'grep --recurse-submodules with submodules without .gitmodul test_cmp expect actual ' +reset_and_clean () { + git reset --hard && + git clean -fd && + git submodule foreach --recursive 'git reset --hard' && + git submodule foreach --recursive 'git clean -fd' +} + +test_expect_success 'grep --recurse-submodules without --cached considers worktree modifications' ' + reset_and_clean && + echo "A modified line in submodule" >>submodule/a && + echo "submodule/a:A modified line in submodule" >expect && + git grep --recurse-submodules "A modified line in submodule" >actual && + test_cmp expect actual +' + +test_expect_success 'grep --recurse-submodules with --cached ignores worktree modifications' ' + reset_and_clean && + echo "A modified line in submodule" >>submodule/a && + test_must_fail git grep --recurse-submodules --cached "A modified line in submodule" >actual 2>&1 && + test_must_be_empty actual +' test_done From 814291cf3f286ddffc291e4820c62ac729e83171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Tue, 30 Jul 2019 23:29:15 +0200 Subject: [PATCH 044/710] t5510-fetch: fix negated 'test_i18ngrep' invocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test '--no-show-forced-updates' in 't5510-fetch.sh' added in cdbd70c437 (fetch: add --[no-]show-forced-updates argument, 2019-06-18) runs '! test_i18ngrep ...'. This is wrong, because when running the test with GIT_TEST_GETTEXT_POISON=true, then 'test_i18ngrep' is basically a noop and always returns with success, the leading ! turns that into a failure, which then fails the test. Use 'test_i18ngrep ! ...' instead. This went unnoticed by our GETTEXT_POISON CI builds, because those builds don't run this test case: in those builds we don't install Apache, and this test comes after 't5510' sources 'lib-httpd.sh', which, consequently, skips all the remaining tests, including this one. Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- t/t5510-fetch.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh index 139f7106f78177..f2481de577bf0a 100755 --- a/t/t5510-fetch.sh +++ b/t/t5510-fetch.sh @@ -997,7 +997,7 @@ test_expect_success '--no-show-forced-updates' ' ( cd no-forced-update-clone && git fetch --no-show-forced-updates origin 2>output && - ! test_i18ngrep "(forced update)" output + test_i18ngrep ! "(forced update)" output ) ' From d5b581f228fb8b2ec3a9d6225f657dff7d28925d Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 31 Jul 2019 08:18:38 -0700 Subject: [PATCH 045/710] Drop unused git-rebase--am.sh Since 21853626ea (built-in rebase: call `git am` directly, 2019-01-18), the built-in rebase already uses the built-in `git am` directly. Now that d03ebd411c (rebase: remove the rebase.useBuiltin setting, 2019-03-18) even removed the scripted rebase, there is no longer any user of `git-rebase--am.sh`, so let's just remove it. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- .gitignore | 1 - Makefile | 1 - builtin/rebase.c | 4 --- git-rebase--am.sh | 85 ----------------------------------------------- 4 files changed, 91 deletions(-) delete mode 100644 git-rebase--am.sh diff --git a/.gitignore b/.gitignore index 2374f77a1aae58..875f3fc6e8ac20 100644 --- a/.gitignore +++ b/.gitignore @@ -122,7 +122,6 @@ /git-range-diff /git-read-tree /git-rebase -/git-rebase--am /git-rebase--common /git-rebase--interactive /git-rebase--preserve-merges diff --git a/Makefile b/Makefile index 8a7e2353520ddd..63e1973333272a 100644 --- a/Makefile +++ b/Makefile @@ -624,7 +624,6 @@ SCRIPT_SH += git-web--browse.sh SCRIPT_LIB += git-mergetool--lib SCRIPT_LIB += git-parse-remote -SCRIPT_LIB += git-rebase--am SCRIPT_LIB += git-rebase--common SCRIPT_LIB += git-rebase--preserve-merges SCRIPT_LIB += git-sh-setup diff --git a/builtin/rebase.c b/builtin/rebase.c index 21681a551b1983..4dd76ee5761e24 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -1153,10 +1153,6 @@ static int run_specific_rebase(struct rebase_options *opts, enum action action) } switch (opts->type) { - case REBASE_AM: - backend = "git-rebase--am"; - backend_func = "git_rebase__am"; - break; case REBASE_PRESERVE_MERGES: backend = "git-rebase--preserve-merges"; backend_func = "git_rebase__preserve_merges"; diff --git a/git-rebase--am.sh b/git-rebase--am.sh deleted file mode 100644 index 6416716ee620cf..00000000000000 --- a/git-rebase--am.sh +++ /dev/null @@ -1,85 +0,0 @@ -# This shell script fragment is sourced by git-rebase to implement -# its default, fast, patch-based, non-interactive mode. -# -# Copyright (c) 2010 Junio C Hamano. -# - -git_rebase__am () { - -case "$action" in -continue) - git am --resolved --resolvemsg="$resolvemsg" \ - ${gpg_sign_opt:+"$gpg_sign_opt"} && - move_to_original_branch - return - ;; -skip) - git am --skip --resolvemsg="$resolvemsg" && - move_to_original_branch - return - ;; -show-current-patch) - exec git am --show-current-patch - ;; -esac - -if test -z "$rebase_root" - # this is now equivalent to ! -z "$upstream" -then - revisions=$upstream...$orig_head -else - revisions=$onto...$orig_head -fi - -ret=0 -rm -f "$GIT_DIR/rebased-patches" - -git format-patch -k --stdout --full-index --cherry-pick --right-only \ - --src-prefix=a/ --dst-prefix=b/ --no-renames --no-cover-letter \ - --pretty=mboxrd --topo-order \ - $git_format_patch_opt \ - "$revisions" ${restrict_revision+^$restrict_revision} \ - >"$GIT_DIR/rebased-patches" -ret=$? - -if test 0 != $ret -then - rm -f "$GIT_DIR/rebased-patches" - case "$head_name" in - refs/heads/*) - git checkout -q "$head_name" - ;; - *) - git checkout -q "$orig_head" - ;; - esac - - cat >&2 <<-EOF - - git encountered an error while preparing the patches to replay - these revisions: - - $revisions - - As a result, git cannot rebase them. - EOF - return $ret -fi - -git am $git_am_opt --rebasing --resolvemsg="$resolvemsg" \ - --patch-format=mboxrd \ - $allow_rerere_autoupdate \ - ${gpg_sign_opt:+"$gpg_sign_opt"} <"$GIT_DIR/rebased-patches" -ret=$? - -rm -f "$GIT_DIR/rebased-patches" - -if test 0 != $ret -then - test -d "$state_dir" && write_basic_state - return $ret -fi - -move_to_original_branch - -} From 6180b202390a04c4c4f63453cdc75142bb2c8fae Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 31 Jul 2019 08:18:39 -0700 Subject: [PATCH 046/710] t3400: stop referring to the scripted rebase One test case's title mentioned the then-current implementation detail that the `--am` backend was implemented in `git-rebase--am.sh`. This is no longer the case, so let's update the title to reflect the current reality. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t3400-rebase.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t3400-rebase.sh b/t/t3400-rebase.sh index 42f147858d7c2e..80b23fd3269c78 100755 --- a/t/t3400-rebase.sh +++ b/t/t3400-rebase.sh @@ -285,7 +285,7 @@ EOF test_cmp From_.msg out ' -test_expect_success 'rebase--am.sh and --show-current-patch' ' +test_expect_success 'rebase --am and --show-current-patch' ' test_create_repo conflict-apply && ( cd conflict-apply && From 2e7bbac6be6b905773ad091bdcd7c897286c17d0 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 31 Jul 2019 08:18:40 -0700 Subject: [PATCH 047/710] .gitignore: there is no longer a built-in `git-rebase--interactive` This went away in 0609b741a4 (rebase -i: combine rebase--interactive.c with rebase.c, 2019-04-17). Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 875f3fc6e8ac20..bcee4fda81df96 100644 --- a/.gitignore +++ b/.gitignore @@ -123,7 +123,6 @@ /git-read-tree /git-rebase /git-rebase--common -/git-rebase--interactive /git-rebase--preserve-merges /git-receive-pack /git-reflog From 68b54f669de27c66e42793977df5b4d5a4a3fc5b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 31 Jul 2019 08:18:40 -0700 Subject: [PATCH 048/710] sequencer: the `am` and `rebase--interactive` scripts are gone Update a code comment that referred to those files as if they were still there. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- sequencer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sequencer.c b/sequencer.c index f88a97fb10a322..334de145421c1f 100644 --- a/sequencer.c +++ b/sequencer.c @@ -767,7 +767,7 @@ static int parse_key_value_squoted(char *buf, struct string_list *list) * GIT_AUTHOR_DATE='$author_date' * * where $author_name, $author_email and $author_date are quoted. We are strict - * with our parsing, as the file was meant to be eval'd in the old + * with our parsing, as the file was meant to be eval'd in the now-removed * git-am.sh/git-rebase--interactive.sh scripts, and thus if the file differs * from what this function expects, it is better to bail out than to do * something that the user does not expect. From 5efed0ecf936596377c241aa0e0bd40839bafbd5 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 31 Jul 2019 08:18:41 -0700 Subject: [PATCH 049/710] rebase: fold git-rebase--common into the -p backend The only remaining scripted part of `git rebase` is the `--preserve-merges` backend. Meaning: there is little reason to keep the "library of common rebase functions" as a separate file. While moving the functions to `git-rebase--preserve-merges.sh`, we also drop the `move_to_original_branch` function that is no longer used. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- .gitignore | 1 - Makefile | 1 - builtin/rebase.c | 3 +- git-rebase--common.sh | 69 ---------------------------------- git-rebase--preserve-merges.sh | 55 +++++++++++++++++++++++++++ 5 files changed, 56 insertions(+), 73 deletions(-) delete mode 100644 git-rebase--common.sh diff --git a/.gitignore b/.gitignore index bcee4fda81df96..4470d7cfc0ae72 100644 --- a/.gitignore +++ b/.gitignore @@ -122,7 +122,6 @@ /git-range-diff /git-read-tree /git-rebase -/git-rebase--common /git-rebase--preserve-merges /git-receive-pack /git-reflog diff --git a/Makefile b/Makefile index 63e1973333272a..6c3bfb1733e2b2 100644 --- a/Makefile +++ b/Makefile @@ -624,7 +624,6 @@ SCRIPT_SH += git-web--browse.sh SCRIPT_LIB += git-mergetool--lib SCRIPT_LIB += git-parse-remote -SCRIPT_LIB += git-rebase--common SCRIPT_LIB += git-rebase--preserve-merges SCRIPT_LIB += git-sh-setup SCRIPT_LIB += git-sh-i18n diff --git a/builtin/rebase.c b/builtin/rebase.c index 4dd76ee5761e24..74a60e8c83f7b2 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -1163,8 +1163,7 @@ static int run_specific_rebase(struct rebase_options *opts, enum action action) } strbuf_addf(&script_snippet, - ". git-sh-setup && . git-rebase--common &&" - " . %s && %s", backend, backend_func); + ". git-sh-setup && . %s && %s", backend, backend_func); argv[0] = script_snippet.buf; status = run_command_v_opt(argv, RUN_USING_SHELL); diff --git a/git-rebase--common.sh b/git-rebase--common.sh deleted file mode 100644 index f00e13e5d04df0..00000000000000 --- a/git-rebase--common.sh +++ /dev/null @@ -1,69 +0,0 @@ - -resolvemsg=" -$(gettext 'Resolve all conflicts manually, mark them as resolved with -"git add/rm ", then run "git rebase --continue". -You can instead skip this commit: run "git rebase --skip". -To abort and get back to the state before "git rebase", run "git rebase --abort".') -" - -write_basic_state () { - echo "$head_name" > "$state_dir"/head-name && - echo "$onto" > "$state_dir"/onto && - echo "$orig_head" > "$state_dir"/orig-head && - test t = "$GIT_QUIET" && : > "$state_dir"/quiet - test t = "$verbose" && : > "$state_dir"/verbose - test -n "$strategy" && echo "$strategy" > "$state_dir"/strategy - test -n "$strategy_opts" && echo "$strategy_opts" > \ - "$state_dir"/strategy_opts - test -n "$allow_rerere_autoupdate" && echo "$allow_rerere_autoupdate" > \ - "$state_dir"/allow_rerere_autoupdate - test -n "$gpg_sign_opt" && echo "$gpg_sign_opt" > "$state_dir"/gpg_sign_opt - test -n "$signoff" && echo "$signoff" >"$state_dir"/signoff - test -n "$reschedule_failed_exec" && : > "$state_dir"/reschedule-failed-exec -} - -apply_autostash () { - if test -f "$state_dir/autostash" - then - stash_sha1=$(cat "$state_dir/autostash") - if git stash apply $stash_sha1 >/dev/null 2>&1 - then - echo "$(gettext 'Applied autostash.')" >&2 - else - git stash store -m "autostash" -q $stash_sha1 || - die "$(eval_gettext "Cannot store \$stash_sha1")" - gettext 'Applying autostash resulted in conflicts. -Your changes are safe in the stash. -You can run "git stash pop" or "git stash drop" at any time. -' >&2 - fi - fi -} - -move_to_original_branch () { - case "$head_name" in - refs/*) - message="rebase finished: $head_name onto $onto" - git update-ref -m "$message" \ - $head_name $(git rev-parse HEAD) $orig_head && - git symbolic-ref \ - -m "rebase finished: returning to $head_name" \ - HEAD $head_name || - die "$(eval_gettext "Could not move back to \$head_name")" - ;; - esac -} - -output () { - case "$verbose" in - '') - output=$("$@" 2>&1 ) - status=$? - test $status != 0 && printf "%s\n" "$output" - return $status - ;; - *) - "$@" - ;; - esac -} diff --git a/git-rebase--preserve-merges.sh b/git-rebase--preserve-merges.sh index afbb65765d4610..dec90e9af67785 100644 --- a/git-rebase--preserve-merges.sh +++ b/git-rebase--preserve-merges.sh @@ -77,6 +77,61 @@ rewritten_pending="$state_dir"/rewritten-pending # and leaves CR at the end instead. cr=$(printf "\015") +resolvemsg=" +$(gettext 'Resolve all conflicts manually, mark them as resolved with +"git add/rm ", then run "git rebase --continue". +You can instead skip this commit: run "git rebase --skip". +To abort and get back to the state before "git rebase", run "git rebase --abort".') +" + +write_basic_state () { + echo "$head_name" > "$state_dir"/head-name && + echo "$onto" > "$state_dir"/onto && + echo "$orig_head" > "$state_dir"/orig-head && + test t = "$GIT_QUIET" && : > "$state_dir"/quiet + test t = "$verbose" && : > "$state_dir"/verbose + test -n "$strategy" && echo "$strategy" > "$state_dir"/strategy + test -n "$strategy_opts" && echo "$strategy_opts" > \ + "$state_dir"/strategy_opts + test -n "$allow_rerere_autoupdate" && echo "$allow_rerere_autoupdate" > \ + "$state_dir"/allow_rerere_autoupdate + test -n "$gpg_sign_opt" && echo "$gpg_sign_opt" > "$state_dir"/gpg_sign_opt + test -n "$signoff" && echo "$signoff" >"$state_dir"/signoff + test -n "$reschedule_failed_exec" && : > "$state_dir"/reschedule-failed-exec +} + +apply_autostash () { + if test -f "$state_dir/autostash" + then + stash_sha1=$(cat "$state_dir/autostash") + if git stash apply $stash_sha1 >/dev/null 2>&1 + then + echo "$(gettext 'Applied autostash.')" >&2 + else + git stash store -m "autostash" -q $stash_sha1 || + die "$(eval_gettext "Cannot store \$stash_sha1")" + gettext 'Applying autostash resulted in conflicts. +Your changes are safe in the stash. +You can run "git stash pop" or "git stash drop" at any time. +' >&2 + fi + fi +} + +output () { + case "$verbose" in + '') + output=$("$@" 2>&1 ) + status=$? + test $status != 0 && printf "%s\n" "$output" + return $status + ;; + *) + "$@" + ;; + esac +} + strategy_args=${strategy:+--strategy=$strategy} test -n "$strategy_opts" && eval ' From 8c1e24048a0a159329743a8e76251412c0f727d9 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 31 Jul 2019 08:18:42 -0700 Subject: [PATCH 050/710] t3427: add a clarifying comment The flow of this test script is outright confusing, and to start the endeavor to address that, let's describe what this test is all about, and how it tries to do it. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t3427-rebase-subtree.sh | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/t/t3427-rebase-subtree.sh b/t/t3427-rebase-subtree.sh index d8640522a08696..3a2ae7b55d5da1 100755 --- a/t/t3427-rebase-subtree.sh +++ b/t/t3427-rebase-subtree.sh @@ -11,6 +11,34 @@ commit_message() { git log --pretty=format:%s -1 "$1" } +# There are a few bugs in the rebase with regards to the subtree strategy, and +# this test script tries to document them. First, the following commit history +# is generated (the onelines are shown, time flows from left to right): +# +# master1 - master2 - master3 +# \ +# README ---------------------- Add subproject master - master4 - files_subtree/master5 +# +# Where the merge moves the files master[123].t into the subdirectory +# files_subtree/ and master4 as well as files_subtree/master5 add files to that +# directory directly. +# +# Then, in subsequent test cases, `git filter-branch` is used to distill just +# the commits that touch files_subtree/. To give it a final pre-rebase touch, +# an empty commit is added on top. The pre-rebase commit history looks like +# this: +# +# Add subproject master - master4 - files_subtree/master5 - Empty commit +# +# where the root commit adds three files: master1.t, master2.t and master3.t. +# +# This commit history is then rebased onto `master3` with the +# `-Xsubtree=files_subtree` option in three different ways: +# +# 1. using `--preserve-merges` +# 2. using `--preserve-merges` and --keep-empty +# 3. without specifying a rebase backend + test_expect_success 'setup' ' test_commit README && mkdir files && From c248d32cdb302812585823231cf29027db33571e Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 31 Jul 2019 08:18:42 -0700 Subject: [PATCH 051/710] t3427: simplify the `setup` test case significantly It still does the very same thing as before, but expresses it in a much more succinct (and still quite readable) manner. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t3427-rebase-subtree.sh | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/t/t3427-rebase-subtree.sh b/t/t3427-rebase-subtree.sh index 3a2ae7b55d5da1..36c4f92e061748 100755 --- a/t/t3427-rebase-subtree.sh +++ b/t/t3427-rebase-subtree.sh @@ -41,27 +41,21 @@ commit_message() { test_expect_success 'setup' ' test_commit README && - mkdir files && - ( - cd files && - git init && - test_commit master1 && - test_commit master2 && - test_commit master3 - ) && - git fetch files master && - git branch files-master FETCH_HEAD && - git read-tree --prefix=files_subtree files-master && - git checkout -- files_subtree && - tree=$(git write-tree) && - head=$(git rev-parse HEAD) && - rev=$(git rev-parse --verify files-master^0) && - commit=$(git commit-tree -p $head -p $rev -m "Add subproject master" $tree) && - git update-ref HEAD $commit && - ( - cd files_subtree && - test_commit master4 - ) && + + git init files && + test_commit -C files master1 && + test_commit -C files master2 && + test_commit -C files master3 && + + : perform subtree merge into files_subtree/ && + git fetch files refs/heads/master:refs/heads/files-master && + git merge -s ours --no-commit --allow-unrelated-histories \ + files-master && + git read-tree --prefix=files_subtree -u files-master && + git commit -m "Add subproject master" && + + : add two extra commits to rebase && + test_commit -C files_subtree master4 && test_commit files_subtree/master5 ' From d51b771dc0a2ea71c44f3c4c54230f140588bacd Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 31 Jul 2019 08:18:43 -0700 Subject: [PATCH 052/710] t3427: move the `filter-branch` invocation into the `setup` case The step to prepare a pre-rebase commit history is _identical_ in _all_ of the test cases (except of course the `setup` case). It should therefore clearly a part of the `setup` test case instead. As the `git filter-branch` command is quite costly on platforms where Unix shell scripting is simply slow (meaning: on Windows), this shaves off a noticeable part of the runtime: in this developer's setup, the time was reduced from ~1m25s to ~1m. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t3427-rebase-subtree.sh | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/t/t3427-rebase-subtree.sh b/t/t3427-rebase-subtree.sh index 36c4f92e061748..64ba95f3e0dda0 100755 --- a/t/t3427-rebase-subtree.sh +++ b/t/t3427-rebase-subtree.sh @@ -56,16 +56,18 @@ test_expect_success 'setup' ' : add two extra commits to rebase && test_commit -C files_subtree master4 && - test_commit files_subtree/master5 + test_commit files_subtree/master5 && + + git checkout -b to-rebase && + git filter-branch --prune-empty -f --subdirectory-filter files_subtree && + git commit -m "Empty commit" --allow-empty ' # FAILURE: Does not preserve master4. test_expect_failure REBASE_P \ 'Rebase -Xsubtree --preserve-merges --onto commit 4' ' reset_rebase && - git checkout -b rebase-preserve-merges-4 master && - git filter-branch --prune-empty -f --subdirectory-filter files_subtree && - git commit -m "Empty commit" --allow-empty && + git checkout -b rebase-preserve-merges-4 to-rebase && git rebase -Xsubtree=files_subtree --preserve-merges --onto files-master master && verbose test "$(commit_message HEAD~)" = "files_subtree/master4" ' @@ -74,9 +76,7 @@ test_expect_failure REBASE_P \ test_expect_failure REBASE_P \ 'Rebase -Xsubtree --preserve-merges --onto commit 5' ' reset_rebase && - git checkout -b rebase-preserve-merges-5 master && - git filter-branch --prune-empty -f --subdirectory-filter files_subtree && - git commit -m "Empty commit" --allow-empty && + git checkout -b rebase-preserve-merges-5 to-rebase && git rebase -Xsubtree=files_subtree --preserve-merges --onto files-master master && verbose test "$(commit_message HEAD)" = "files_subtree/master5" ' @@ -85,9 +85,7 @@ test_expect_failure REBASE_P \ test_expect_failure REBASE_P \ 'Rebase -Xsubtree --keep-empty --preserve-merges --onto commit 4' ' reset_rebase && - git checkout -b rebase-keep-empty-4 master && - git filter-branch --prune-empty -f --subdirectory-filter files_subtree && - git commit -m "Empty commit" --allow-empty && + git checkout -b rebase-keep-empty-4 to-rebase && git rebase -Xsubtree=files_subtree --keep-empty --preserve-merges --onto files-master master && verbose test "$(commit_message HEAD~2)" = "files_subtree/master4" ' @@ -96,9 +94,7 @@ test_expect_failure REBASE_P \ test_expect_failure REBASE_P \ 'Rebase -Xsubtree --keep-empty --preserve-merges --onto commit 5' ' reset_rebase && - git checkout -b rebase-keep-empty-5 master && - git filter-branch --prune-empty -f --subdirectory-filter files_subtree && - git commit -m "Empty commit" --allow-empty && + git checkout -b rebase-keep-empty-5 to-rebase && git rebase -Xsubtree=files_subtree --keep-empty --preserve-merges --onto files-master master && verbose test "$(commit_message HEAD~)" = "files_subtree/master5" ' @@ -107,9 +103,7 @@ test_expect_failure REBASE_P \ test_expect_failure REBASE_P \ 'Rebase -Xsubtree --keep-empty --preserve-merges --onto empty commit' ' reset_rebase && - git checkout -b rebase-keep-empty-empty master && - git filter-branch --prune-empty -f --subdirectory-filter files_subtree && - git commit -m "Empty commit" --allow-empty && + git checkout -b rebase-keep-empty-empty to-rebase && git rebase -Xsubtree=files_subtree --keep-empty --preserve-merges --onto files-master master && verbose test "$(commit_message HEAD)" = "Empty commit" ' @@ -117,9 +111,7 @@ test_expect_failure REBASE_P \ # FAILURE: fatal: Could not parse object test_expect_failure 'Rebase -Xsubtree --onto commit 4' ' reset_rebase && - git checkout -b rebase-onto-4 master && - git filter-branch --prune-empty -f --subdirectory-filter files_subtree && - git commit -m "Empty commit" --allow-empty && + git checkout -b rebase-onto-4 to-rebase && git rebase -Xsubtree=files_subtree --onto files-master master && verbose test "$(commit_message HEAD~2)" = "files_subtree/master4" ' @@ -127,18 +119,14 @@ test_expect_failure 'Rebase -Xsubtree --onto commit 4' ' # FAILURE: fatal: Could not parse object test_expect_failure 'Rebase -Xsubtree --onto commit 5' ' reset_rebase && - git checkout -b rebase-onto-5 master && - git filter-branch --prune-empty -f --subdirectory-filter files_subtree && - git commit -m "Empty commit" --allow-empty && + git checkout -b rebase-onto-5 to-rebase && git rebase -Xsubtree=files_subtree --onto files-master master && verbose test "$(commit_message HEAD~)" = "files_subtree/master5" ' # FAILURE: fatal: Could not parse object test_expect_failure 'Rebase -Xsubtree --onto empty commit' ' reset_rebase && - git checkout -b rebase-onto-empty master && - git filter-branch --prune-empty -f --subdirectory-filter files_subtree && - git commit -m "Empty commit" --allow-empty && + git checkout -b rebase-onto-empty to-rebase && git rebase -Xsubtree=files_subtree --onto files-master master && verbose test "$(commit_message HEAD)" = "Empty commit" ' From b8c6f24255d3224438ee8c591b7b08d665433b6b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 31 Jul 2019 08:18:44 -0700 Subject: [PATCH 053/710] t3427: condense the unnecessarily repetitive test cases into three Previously, this test script performed essentially three rebases and verified breakages by testing the post-rebase commits' messages. To do so, the rebases were performed multiple times, though, once per commit message to test. This wastes electricity (and CO2) and time. Let's condense the test cases to the essential number: the number of different rebases to validate. On Windows, where the scripted nature of the `--preserve-merges` backend hurts performance rather badly, this reduces the overall runtime in this developer's setup from ~1m to ~28s while still performing the exact same testing as before. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t3427-rebase-subtree.sh | 61 +++++++-------------------------------- 1 file changed, 11 insertions(+), 50 deletions(-) diff --git a/t/t3427-rebase-subtree.sh b/t/t3427-rebase-subtree.sh index 64ba95f3e0dda0..b21925d279aaad 100755 --- a/t/t3427-rebase-subtree.sh +++ b/t/t3427-rebase-subtree.sh @@ -64,70 +64,31 @@ test_expect_success 'setup' ' ' # FAILURE: Does not preserve master4. -test_expect_failure REBASE_P \ - 'Rebase -Xsubtree --preserve-merges --onto commit 4' ' +test_expect_failure REBASE_P 'Rebase -Xsubtree --preserve-merges --onto commit' ' reset_rebase && - git checkout -b rebase-preserve-merges-4 to-rebase && - git rebase -Xsubtree=files_subtree --preserve-merges --onto files-master master && - verbose test "$(commit_message HEAD~)" = "files_subtree/master4" -' - -# FAILURE: Does not preserve master5. -test_expect_failure REBASE_P \ - 'Rebase -Xsubtree --preserve-merges --onto commit 5' ' - reset_rebase && - git checkout -b rebase-preserve-merges-5 to-rebase && + git checkout -b rebase-preserve-merges to-rebase && git rebase -Xsubtree=files_subtree --preserve-merges --onto files-master master && + verbose test "$(commit_message HEAD~)" = "files_subtree/master4" && verbose test "$(commit_message HEAD)" = "files_subtree/master5" ' # FAILURE: Does not preserve master4. -test_expect_failure REBASE_P \ - 'Rebase -Xsubtree --keep-empty --preserve-merges --onto commit 4' ' +test_expect_failure REBASE_P 'Rebase -Xsubtree --keep-empty --preserve-merges --onto commit' ' reset_rebase && - git checkout -b rebase-keep-empty-4 to-rebase && - git rebase -Xsubtree=files_subtree --keep-empty --preserve-merges --onto files-master master && - verbose test "$(commit_message HEAD~2)" = "files_subtree/master4" -' - -# FAILURE: Does not preserve master5. -test_expect_failure REBASE_P \ - 'Rebase -Xsubtree --keep-empty --preserve-merges --onto commit 5' ' - reset_rebase && - git checkout -b rebase-keep-empty-5 to-rebase && - git rebase -Xsubtree=files_subtree --keep-empty --preserve-merges --onto files-master master && - verbose test "$(commit_message HEAD~)" = "files_subtree/master5" -' - -# FAILURE: Does not preserve Empty. -test_expect_failure REBASE_P \ - 'Rebase -Xsubtree --keep-empty --preserve-merges --onto empty commit' ' - reset_rebase && - git checkout -b rebase-keep-empty-empty to-rebase && + git checkout -b rebase-keep-empty to-rebase && git rebase -Xsubtree=files_subtree --keep-empty --preserve-merges --onto files-master master && + verbose test "$(commit_message HEAD~2)" = "files_subtree/master4" && + verbose test "$(commit_message HEAD~)" = "files_subtree/master5" && verbose test "$(commit_message HEAD)" = "Empty commit" ' # FAILURE: fatal: Could not parse object -test_expect_failure 'Rebase -Xsubtree --onto commit 4' ' - reset_rebase && - git checkout -b rebase-onto-4 to-rebase && - git rebase -Xsubtree=files_subtree --onto files-master master && - verbose test "$(commit_message HEAD~2)" = "files_subtree/master4" -' - -# FAILURE: fatal: Could not parse object -test_expect_failure 'Rebase -Xsubtree --onto commit 5' ' - reset_rebase && - git checkout -b rebase-onto-5 to-rebase && - git rebase -Xsubtree=files_subtree --onto files-master master && - verbose test "$(commit_message HEAD~)" = "files_subtree/master5" -' -# FAILURE: fatal: Could not parse object -test_expect_failure 'Rebase -Xsubtree --onto empty commit' ' +test_expect_failure 'Rebase -Xsubtree --onto commit' ' reset_rebase && - git checkout -b rebase-onto-empty to-rebase && + git checkout -b rebase-onto to-rebase && git rebase -Xsubtree=files_subtree --onto files-master master && + verbose test "$(commit_message HEAD~2)" = "files_subtree/master4" && + verbose test "$(commit_message HEAD~)" = "files_subtree/master5" && verbose test "$(commit_message HEAD)" = "Empty commit" ' From a9c71073dace0d1ddbd12291604d191064b1ae94 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 31 Jul 2019 08:18:45 -0700 Subject: [PATCH 054/710] t3427: fix erroneous assumption Apart from the `setup` test case, `t3427-rebase-subtree.sh` is made up exclusively of demonstrations of breakages. The tricky thing about such demonstrations is that they are often buggy themselves. In this instance, somewhere over the course of the six iterations of the patch that eventually made it into Git's `master` as 5f35900849e (contrib/subtree: Add a test for subtree rebase that loses commits, 2016-06-28), the commit message "files_subtree/master4" was changed to just "master4", but the test cases still expected the old commit message. Let's fix this, at long last. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t3427-rebase-subtree.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/t/t3427-rebase-subtree.sh b/t/t3427-rebase-subtree.sh index b21925d279aaad..94cc532e108798 100755 --- a/t/t3427-rebase-subtree.sh +++ b/t/t3427-rebase-subtree.sh @@ -68,7 +68,7 @@ test_expect_failure REBASE_P 'Rebase -Xsubtree --preserve-merges --onto commit' reset_rebase && git checkout -b rebase-preserve-merges to-rebase && git rebase -Xsubtree=files_subtree --preserve-merges --onto files-master master && - verbose test "$(commit_message HEAD~)" = "files_subtree/master4" && + verbose test "$(commit_message HEAD~)" = "master4" && verbose test "$(commit_message HEAD)" = "files_subtree/master5" ' @@ -77,7 +77,7 @@ test_expect_failure REBASE_P 'Rebase -Xsubtree --keep-empty --preserve-merges -- reset_rebase && git checkout -b rebase-keep-empty to-rebase && git rebase -Xsubtree=files_subtree --keep-empty --preserve-merges --onto files-master master && - verbose test "$(commit_message HEAD~2)" = "files_subtree/master4" && + verbose test "$(commit_message HEAD~2)" = "master4" && verbose test "$(commit_message HEAD~)" = "files_subtree/master5" && verbose test "$(commit_message HEAD)" = "Empty commit" ' @@ -87,7 +87,7 @@ test_expect_failure 'Rebase -Xsubtree --onto commit' ' reset_rebase && git checkout -b rebase-onto to-rebase && git rebase -Xsubtree=files_subtree --onto files-master master && - verbose test "$(commit_message HEAD~2)" = "files_subtree/master4" && + verbose test "$(commit_message HEAD~2)" = "master4" && verbose test "$(commit_message HEAD~)" = "files_subtree/master5" && verbose test "$(commit_message HEAD)" = "Empty commit" ' From f67336dabf8b77f1eee0341790c8fbbe5c6d81dd Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 31 Jul 2019 08:18:45 -0700 Subject: [PATCH 055/710] t3427: accommodate for the `rebase --merge` backend having been replaced Since 68aa495b590 (rebase: implement --merge via the interactive machinery, 2018-12-11), the job of the old `--merge` backend is now performed by the `--interactive` backend, too. One consequence is that empty commits are no longer rebased by default. Meaning that the test case that calls `git rebase -Xsubtree` (which used to be handled by the `--merge` backend) now needs to ask explicitly for the empty commit to be rebased. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t3427-rebase-subtree.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t3427-rebase-subtree.sh b/t/t3427-rebase-subtree.sh index 94cc532e108798..a734716ea3e377 100755 --- a/t/t3427-rebase-subtree.sh +++ b/t/t3427-rebase-subtree.sh @@ -83,10 +83,10 @@ test_expect_failure REBASE_P 'Rebase -Xsubtree --keep-empty --preserve-merges -- ' # FAILURE: fatal: Could not parse object -test_expect_failure 'Rebase -Xsubtree --onto commit' ' +test_expect_failure 'Rebase -Xsubtree --keep-empty --onto commit' ' reset_rebase && git checkout -b rebase-onto to-rebase && - git rebase -Xsubtree=files_subtree --onto files-master master && + git rebase -Xsubtree=files_subtree --keep-empty --onto files-master master && verbose test "$(commit_message HEAD~2)" = "master4" && verbose test "$(commit_message HEAD~)" = "files_subtree/master5" && verbose test "$(commit_message HEAD)" = "Empty commit" From 4e6023b13ae1159277a7c3053f8d074c23456812 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 31 Jul 2019 08:18:46 -0700 Subject: [PATCH 056/710] t3427: fix another incorrect assumption The test case that concerns `git rebase -Xsubtree` (with the default rebase backend, not with `--preserve-merges`) starts out with a pre-rebase commit history that begins with a commit that introduces three files: master1.t, master2.t and master3.t. This commit was generated by passing a subtree merge commit through `git filter-branch --subdirectory-filter`, so it looks as if this commit really introduces all those files. The commit history onto which this commit is then rebased, however, introduced those files in individual commits. For that reason, the rebase will fail, it _must_ fail, because the first `pick` results in no changes to be committed. Let's fix the test case to expect exactly this situation. With this change, we can mark the original bug that this test case tried to demonstrate as fixed. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t3427-rebase-subtree.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/t/t3427-rebase-subtree.sh b/t/t3427-rebase-subtree.sh index a734716ea3e377..7a37235768574d 100755 --- a/t/t3427-rebase-subtree.sh +++ b/t/t3427-rebase-subtree.sh @@ -82,11 +82,12 @@ test_expect_failure REBASE_P 'Rebase -Xsubtree --keep-empty --preserve-merges -- verbose test "$(commit_message HEAD)" = "Empty commit" ' -# FAILURE: fatal: Could not parse object -test_expect_failure 'Rebase -Xsubtree --keep-empty --onto commit' ' +test_expect_success 'Rebase -Xsubtree --keep-empty --onto commit' ' reset_rebase && git checkout -b rebase-onto to-rebase && - git rebase -Xsubtree=files_subtree --keep-empty --onto files-master master && + test_must_fail git rebase -Xsubtree=files_subtree --keep-empty --onto files-master master && + : first pick results in no changes && + git rebase --continue && verbose test "$(commit_message HEAD~2)" = "master4" && verbose test "$(commit_message HEAD~)" = "files_subtree/master5" && verbose test "$(commit_message HEAD)" = "Empty commit" From e145d993478befd0db6999822aa31d422b283a3b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 31 Jul 2019 08:18:47 -0700 Subject: [PATCH 057/710] rebase -r: support merge strategies other than `recursive` We already support merge strategies in the sequencer, but only for `pick` commands. With this commit, we now also support them in `merge` commands. The approach is simple: if any merge strategy option is specified, or if any merge strategy other than `recursive` is specified, we simply spawn the `git merge` command. Otherwise, we handle the merge in-process just as before. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/git-rebase.txt | 2 -- builtin/rebase.c | 9 --------- sequencer.c | 14 ++++++++++++-- t/t3422-rebase-incompatible-options.sh | 10 ---------- t/t3430-rebase-merges.sh | 21 +++++++++++++++++++++ 5 files changed, 33 insertions(+), 23 deletions(-) diff --git a/Documentation/git-rebase.txt b/Documentation/git-rebase.txt index 5e4e9276479c94..bc620c44e93916 100644 --- a/Documentation/git-rebase.txt +++ b/Documentation/git-rebase.txt @@ -543,8 +543,6 @@ In addition, the following pairs of options are incompatible: * --preserve-merges and --interactive * --preserve-merges and --signoff * --preserve-merges and --rebase-merges - * --rebase-merges and --strategy - * --rebase-merges and --strategy-option BEHAVIORAL DIFFERENCES ----------------------- diff --git a/builtin/rebase.c b/builtin/rebase.c index 74a60e8c83f7b2..625f50c63779af 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -1811,15 +1811,6 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) "'--reschedule-failed-exec'")); } - if (options.rebase_merges) { - if (strategy_options.nr) - die(_("cannot combine '--rebase-merges' with " - "'--strategy-option'")); - if (options.strategy) - die(_("cannot combine '--rebase-merges' with " - "'--strategy'")); - } - if (!options.root) { if (argc < 1) { struct branch *branch; diff --git a/sequencer.c b/sequencer.c index 334de145421c1f..d228448cd8c54a 100644 --- a/sequencer.c +++ b/sequencer.c @@ -3256,6 +3256,9 @@ static int do_merge(struct repository *r, struct commit *head_commit, *merge_commit, *i; struct commit_list *bases, *j, *reversed = NULL; struct commit_list *to_merge = NULL, **tail = &to_merge; + const char *strategy = !opts->xopts_nr && + (!opts->strategy || !strcmp(opts->strategy, "recursive")) ? + NULL : opts->strategy; struct merge_options o; int merge_arg_len, oneline_offset, can_fast_forward, ret, k; static struct lock_file lock; @@ -3404,7 +3407,7 @@ static int do_merge(struct repository *r, goto leave_merge; } - if (to_merge->next) { + if (strategy || to_merge->next) { /* Octopus merge */ struct child_process cmd = CHILD_PROCESS_INIT; @@ -3418,7 +3421,14 @@ static int do_merge(struct repository *r, cmd.git_cmd = 1; argv_array_push(&cmd.args, "merge"); argv_array_push(&cmd.args, "-s"); - argv_array_push(&cmd.args, "octopus"); + if (!strategy) + argv_array_push(&cmd.args, "octopus"); + else { + argv_array_push(&cmd.args, strategy); + for (k = 0; k < opts->xopts_nr; k++) + argv_array_pushf(&cmd.args, + "-X%s", opts->xopts[k]); + } argv_array_push(&cmd.args, "--no-edit"); argv_array_push(&cmd.args, "--no-ff"); argv_array_push(&cmd.args, "--no-log"); diff --git a/t/t3422-rebase-incompatible-options.sh b/t/t3422-rebase-incompatible-options.sh index a5868ea152f941..50e7960702da4f 100755 --- a/t/t3422-rebase-incompatible-options.sh +++ b/t/t3422-rebase-incompatible-options.sh @@ -76,14 +76,4 @@ test_expect_success REBASE_P \ test_must_fail git rebase --preserve-merges --rebase-merges A ' -test_expect_success '--rebase-merges incompatible with --strategy' ' - git checkout B^0 && - test_must_fail git rebase --rebase-merges -s resolve A -' - -test_expect_success '--rebase-merges incompatible with --strategy-option' ' - git checkout B^0 && - test_must_fail git rebase --rebase-merges -Xignore-space-change A -' - test_done diff --git a/t/t3430-rebase-merges.sh b/t/t3430-rebase-merges.sh index 42ba5b9f0981b2..8ea6ff3548444c 100755 --- a/t/t3430-rebase-merges.sh +++ b/t/t3430-rebase-merges.sh @@ -412,4 +412,25 @@ test_expect_success '--continue after resolving conflicts after a merge' ' test_path_is_missing .git/MERGE_HEAD ' +test_expect_success '--rebase-merges with strategies' ' + git checkout -b with-a-strategy F && + test_tick && + git merge -m "Merge conflicting-G" conflicting-G && + + : first, test with a merge strategy option && + git rebase -ir -Xtheirs G && + echo conflicting-G >expect && + test_cmp expect G.t && + + : now, try with a merge strategy other than recursive && + git reset --hard @{1} && + write_script git-merge-override <<-\EOF && + echo overridden$1 >>G.t + git add G.t + EOF + PATH="$PWD:$PATH" git rebase -ir -s override -Xxopt G && + test_write_lines G overridden--xopt >expect && + test_cmp expect G.t +' + test_done From 5dcdd7409ad3b3e8d5e79871bfb9d5f4215d4a13 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 31 Jul 2019 08:18:48 -0700 Subject: [PATCH 058/710] t/lib-rebase: prepare for testing `git rebase --rebase-merges` The format of the todo list is quite a bit different in the `--rebase-merges` mode; Let's prepare the fake editor to handle those todo lists properly, too. The original idea was that we keep the original command unless overridden, and because the original todo lists only had `pick` lines anyway, we could be sloppy and "override" the command by the same command (i.e. use the sed replacement pattern "pick" instead of "&"). This actually would not have worked with `fixup` and `squash` commands, but it would appear that we never tried to use the fake editor with `--autosquash`. However, in the next commit we want to use the fake editor in conjunction with `--rebase-merges`, so let's use the correct sed replacement pattern. Technically, it is not necessary to take care of the `fakesha` thing (where we reuse the sed replacement pattern to craft a new todo command), at least for now, as the only user of that thing overrides the `action` anyway. Nevertheless, for completeness' sake, we do take care of it. Helped-by: brian m. carlson Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/lib-rebase.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/t/lib-rebase.sh b/t/lib-rebase.sh index 7ea30e50068be8..6d87961e419e10 100644 --- a/t/lib-rebase.sh +++ b/t/lib-rebase.sh @@ -44,10 +44,10 @@ set_fake_editor () { rm -f "$1" echo 'rebase -i script before editing:' cat "$1".tmp - action=pick + action=\& for line in $FAKE_LINES; do case $line in - pick|p|squash|s|fixup|f|edit|e|reword|r|drop|d) + pick|p|squash|s|fixup|f|edit|e|reword|r|drop|d|label|l|reset|r|merge|m) action="$line";; exec_*|x_*|break|b) echo "$line" | sed 's/_/ /g' >> "$1";; @@ -58,11 +58,12 @@ set_fake_editor () { bad) action="badcmd";; fakesha) + test \& != "$action" || action=pick echo "$action XXXXXXX False commit" >> "$1" action=pick;; *) - sed -n "${line}s/^pick/$action/p" < "$1".tmp >> "$1" - action=pick;; + sed -n "${line}s/^[a-z][a-z]*/$action/p" < "$1".tmp >> "$1" + action=\&;; esac done echo 'rebase -i script after editing:' From a63f990d9205d1a781aec310e44edf35e29063a2 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 31 Jul 2019 08:18:49 -0700 Subject: [PATCH 059/710] t3418: test `rebase -r` with merge strategies There is a test case in this script that verifies that `git rebase --preserve-merges` works all right with non-default merge strategies or non-default merge strategy options. Now that `git rebase --rebase-merges` learned about merge strategies, let's copy-edit this test case to verify that that works as intended, too. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t3418-rebase-continue.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/t/t3418-rebase-continue.sh b/t/t3418-rebase-continue.sh index bdaa511bb0ae10..fbf9addfd137b9 100755 --- a/t/t3418-rebase-continue.sh +++ b/t/t3418-rebase-continue.sh @@ -120,6 +120,20 @@ test_expect_success REBASE_P 'rebase passes merge strategy options correctly' ' git rebase --continue ' +test_expect_success 'rebase -r passes merge strategy options correctly' ' + rm -fr .git/rebase-* && + git reset --hard commit-new-file-F3-on-topic-branch && + test_commit merge-theirs && + git reset --hard HEAD^ && + test_commit some-other-commit && + test_tick && + git merge --no-ff merge-theirs && + FAKE_LINES="1 3 edit 4 5 7 8 9" git rebase -i -f -r -m \ + -s recursive --strategy-option=theirs HEAD~2 && + test_commit force-change-ours && + git rebase --continue +' + test_expect_success '--skip after failed fixup cleans commit message' ' test_when_finished "test_might_fail git rebase --abort" && git checkout -b with-conflicting-fixup && From e1fac531ea9f9dc710f6eeae37ea2b38dd5f9fae Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 31 Jul 2019 08:18:49 -0700 Subject: [PATCH 060/710] rebase -r: do not (re-)generate root commits with `--root` *and* `--onto` When rebasing a complete commit history onto a given commit, it is pretty obvious that the root commits should be rebased on top of said given commit. To test this, let's kill two birds with one stone and add a test case to t3427-rebase-subtree.sh that not only demonstrates that this works, but also that `git rebase -r` works with merge strategies now. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- builtin/rebase.c | 7 +++++-- sequencer.c | 4 +++- sequencer.h | 6 ++++++ t/t3427-rebase-subtree.sh | 11 +++++++++++ 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/builtin/rebase.c b/builtin/rebase.c index 625f50c63779af..ee2bc8b032a2fa 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -62,7 +62,7 @@ struct rebase_options { const char *onto_name; const char *revisions; const char *switch_to; - int root; + int root, root_with_onto; struct object_id *squash_onto; struct commit *restrict_revision; int dont_finish_rebase; @@ -374,6 +374,7 @@ static int run_rebase_interactive(struct rebase_options *opts, flags |= abbreviate_commands ? TODO_LIST_ABBREVIATE_CMDS : 0; flags |= opts->rebase_merges ? TODO_LIST_REBASE_MERGES : 0; flags |= opts->rebase_cousins > 0 ? TODO_LIST_REBASE_COUSINS : 0; + flags |= opts->root_with_onto ? TODO_LIST_ROOT_WITH_ONTO : 0; flags |= command == ACTION_SHORTEN_OIDS ? TODO_LIST_SHORTEN_IDS : 0; switch (command) { @@ -1841,7 +1842,9 @@ int cmd_rebase(int argc, const char **argv, const char *prefix) options.squash_onto = &squash_onto; options.onto_name = squash_onto_name = xstrdup(oid_to_hex(&squash_onto)); - } + } else + options.root_with_onto = 1; + options.upstream_name = NULL; options.upstream = NULL; if (argc > 1) diff --git a/sequencer.c b/sequencer.c index d228448cd8c54a..ca119c84e577c9 100644 --- a/sequencer.c +++ b/sequencer.c @@ -4440,6 +4440,7 @@ static int make_script_with_merges(struct pretty_print_context *pp, { int keep_empty = flags & TODO_LIST_KEEP_EMPTY; int rebase_cousins = flags & TODO_LIST_REBASE_COUSINS; + int root_with_onto = flags & TODO_LIST_ROOT_WITH_ONTO; struct strbuf buf = STRBUF_INIT, oneline = STRBUF_INIT; struct strbuf label = STRBUF_INIT; struct commit_list *commits = NULL, **tail = &commits, *iter; @@ -4606,7 +4607,8 @@ static int make_script_with_merges(struct pretty_print_context *pp, if (!commit) strbuf_addf(out, "%s %s\n", cmd_reset, - rebase_cousins ? "onto" : "[new root]"); + rebase_cousins || root_with_onto ? + "onto" : "[new root]"); else { const char *to = NULL; diff --git a/sequencer.h b/sequencer.h index 0c494b83d43e2c..d506081d3c8656 100644 --- a/sequencer.h +++ b/sequencer.h @@ -142,6 +142,12 @@ int sequencer_remove_state(struct replay_opts *opts); */ #define TODO_LIST_REBASE_COUSINS (1U << 4) #define TODO_LIST_APPEND_TODO_HELP (1U << 5) +/* + * When generating a script that rebases merges with `--root` *and* with + * `--onto`, we do not want to re-generate the root commits. + */ +#define TODO_LIST_ROOT_WITH_ONTO (1U << 6) + int sequencer_make_script(struct repository *r, struct strbuf *out, int argc, const char **argv, unsigned flags); diff --git a/t/t3427-rebase-subtree.sh b/t/t3427-rebase-subtree.sh index 7a37235768574d..39e348de163b72 100755 --- a/t/t3427-rebase-subtree.sh +++ b/t/t3427-rebase-subtree.sh @@ -93,4 +93,15 @@ test_expect_success 'Rebase -Xsubtree --keep-empty --onto commit' ' verbose test "$(commit_message HEAD)" = "Empty commit" ' +test_expect_success 'Rebase -Xsubtree --keep-empty --rebase-merges --onto commit' ' + reset_rebase && + git checkout -b rebase-merges-onto to-rebase && + test_must_fail git rebase -Xsubtree=files_subtree --keep-empty --rebase-merges --onto files-master --root && + : first pick results in no changes && + git rebase --continue && + verbose test "$(commit_message HEAD~2)" = "master4" && + verbose test "$(commit_message HEAD~)" = "files_subtree/master5" && + verbose test "$(commit_message HEAD)" = "Empty commit" +' + test_done From 947208b725188eb499625ebc5c6e43d54c97e4fc Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 31 Jul 2019 00:38:11 -0400 Subject: [PATCH 061/710] setup_traverse_info(): stop copying oid We assume that if setup_traverse_info() is passed a non-empty "base" string, that string is pointing into a tree object and we can read the object oid by skipping past the trailing NUL. As it turns out, this is not true for either of the two calls, and we may end up reading garbage bytes: 1. In git-merge-tree, our base string is either empty (in which case we'd never run this code), or it comes from our traverse_path() helper. The latter overallocates a buffer by the_hash_algo->rawsz bytes, but then fills it with only make_traverse_path(), leaving those extra bytes uninitialized (but part of a legitimate heap buffer). 2. In unpack_trees(), we pass o->prefix, which is some arbitrary string from the caller. In "git read-tree --prefix=foo", for instance, it will point to the command-line parameter, and we'll read 20 bytes past the end of the string. Interestingly, tools like ASan do not detect (2) because the process argv is part of a big pre-allocated buffer. So we're reading trash, but it's trash that's probably part of the next argument, or the environment. You can convince it to fail by putting something like this at the beginning of common-main.c's main() function: { int i; for (i = 0; i < argc; i++) argv[i] = xstrdup_or_null(argv[i]); } That puts the arguments into their own heap buffers, so running: make SANITIZE=address test will find problems when "read-tree --prefix" is used (e.g., in t3030). Doubly interesting, even with the hackery above, this does not fail prior to ea82b2a085 (tree-walk: store object_id in a separate member, 2019-01-15). That commit switched setup_traverse_info() to actually copying the hash, rather than simply pointing to it. That pointer was always pointing to garbage memory, but that commit started actually dereferencing the bytes, which is what triggers ASan. That also implies that nobody actually cares about reading these oid bytes anyway (or at least no path covered by our tests). And manual inspection of the code backs that up (I'll follow this patch with some cleanups that show definitively this is the case, but they're quite invasive, so it's worth doing this fix on its own). So let's drop the bogus hashcpy(), along with the confusing oversizing in merge-tree. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/technical/api-tree-walking.txt | 4 +--- builtin/merge-tree.c | 2 +- tree-walk.c | 4 +--- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/Documentation/technical/api-tree-walking.txt b/Documentation/technical/api-tree-walking.txt index bde18622a87404..59d78e03624e77 100644 --- a/Documentation/technical/api-tree-walking.txt +++ b/Documentation/technical/api-tree-walking.txt @@ -62,9 +62,7 @@ Initializing `setup_traverse_info`:: Initialize a `traverse_info` given the pathname of the tree to start - traversing from. The `base` argument is assumed to be the `path` - member of the `name_entry` being recursed into unless the tree is a - top-level tree in which case the empty string ("") is used. + traversing from. Walking ------- diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c index 34ca0258b12ae4..8ac62708366f23 100644 --- a/builtin/merge-tree.c +++ b/builtin/merge-tree.c @@ -180,7 +180,7 @@ static struct merge_list *create_entry(unsigned stage, unsigned mode, const stru static char *traverse_path(const struct traverse_info *info, const struct name_entry *n) { - char *path = xmallocz(traverse_path_len(info, n) + the_hash_algo->rawsz); + char *path = xmallocz(traverse_path_len(info, n)); return make_traverse_path(path, info, n); } diff --git a/tree-walk.c b/tree-walk.c index ec32a47b2e7664..ba106152efa55a 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -177,10 +177,8 @@ void setup_traverse_info(struct traverse_info *info, const char *base) info->pathlen = pathlen ? pathlen + 1 : 0; info->name.path = base; info->name.pathlen = pathlen; - if (pathlen) { - hashcpy(info->name.oid.hash, (const unsigned char *)base + pathlen + 1); + if (pathlen) info->prev = &dummy; - } } char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n) From 9055384710dd8963b125f4f87c24d8f67d9fa24f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 31 Jul 2019 00:38:15 -0400 Subject: [PATCH 062/710] tree-walk: drop oid from traverse_info As the previous commit shows, the presence of an oid in each level of the traverse_info is confusing and ultimately not necessary. Let's drop it to make it clear that it will not always be set (as well as convince us that it's unused, and let the compiler catch any merges with other branches that do add new uses). Since the oid is part of name_entry, we'll actually stop embedding a name_entry entirely, and instead just separately hold the pathname, its length, and the mode. This makes the resulting code slightly more verbose as we have to pass those elements around individually. But it also makes it more clear what each code path is going to use (and in most of the paths, we really only care about the pathname itself). A few of these conversions are noisier than they need to be, as they also take the opportunity to rename "len" to "namelen" for clarity (especially where we also have "pathlen" or "ce_len" alongside). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/merge-tree.c | 2 +- cache-tree.c | 2 +- tree-walk.c | 23 ++++++++++--------- tree-walk.h | 8 +++++-- unpack-trees.c | 53 ++++++++++++++++++++++++-------------------- 5 files changed, 49 insertions(+), 39 deletions(-) diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c index 8ac62708366f23..f0e4cfefaaf5c4 100644 --- a/builtin/merge-tree.c +++ b/builtin/merge-tree.c @@ -181,7 +181,7 @@ static struct merge_list *create_entry(unsigned stage, unsigned mode, const stru static char *traverse_path(const struct traverse_info *info, const struct name_entry *n) { char *path = xmallocz(traverse_path_len(info, n)); - return make_traverse_path(path, info, n); + return make_traverse_path(path, info, n->path, n->pathlen); } static void resolve(const struct traverse_info *info, struct name_entry *ours, struct name_entry *result) diff --git a/cache-tree.c b/cache-tree.c index b13bfaf71e9e15..badf5669f1d69c 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -713,7 +713,7 @@ static struct cache_tree *find_cache_tree_from_traversal(struct cache_tree *root if (!info->prev) return root; our_parent = find_cache_tree_from_traversal(root, info->prev); - return cache_tree_find(our_parent, info->name.path); + return cache_tree_find(our_parent, info->name); } int cache_tree_matches_traversal(struct cache_tree *root, diff --git a/tree-walk.c b/tree-walk.c index ba106152efa55a..4610f77383527d 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -175,27 +175,27 @@ void setup_traverse_info(struct traverse_info *info, const char *base) if (pathlen && base[pathlen-1] == '/') pathlen--; info->pathlen = pathlen ? pathlen + 1 : 0; - info->name.path = base; - info->name.pathlen = pathlen; + info->name = base; + info->namelen = pathlen; if (pathlen) info->prev = &dummy; } -char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n) +char *make_traverse_path(char *path, const struct traverse_info *info, + const char *name, size_t namelen) { - int len = tree_entry_len(n); int pathlen = info->pathlen; - path[pathlen + len] = 0; + path[pathlen + namelen] = 0; for (;;) { - memcpy(path + pathlen, n->path, len); + memcpy(path + pathlen, name, namelen); if (!pathlen) break; path[--pathlen] = '/'; - n = &info->name; - len = tree_entry_len(n); + name = info->name; + namelen = info->namelen; info = info->prev; - pathlen -= len; + pathlen -= namelen; } return path; } @@ -397,12 +397,13 @@ int traverse_trees(struct index_state *istate, if (info->prev) { strbuf_grow(&base, info->pathlen); - make_traverse_path(base.buf, info->prev, &info->name); + make_traverse_path(base.buf, info->prev, info->name, + info->namelen); base.buf[info->pathlen-1] = '/'; strbuf_setlen(&base, info->pathlen); traverse_path = xstrndup(base.buf, info->pathlen); } else { - traverse_path = xstrndup(info->name.path, info->pathlen); + traverse_path = xstrndup(info->name, info->pathlen); } info->traverse_path = traverse_path; for (;;) { diff --git a/tree-walk.h b/tree-walk.h index 161e2400f44346..baa2aa62c76ff3 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -56,7 +56,10 @@ enum get_oid_result get_tree_entry_follow_symlinks(struct object_id *tree_oid, c struct traverse_info { const char *traverse_path; struct traverse_info *prev; - struct name_entry name; + const char *name; + size_t namelen; + unsigned mode; + int pathlen; struct pathspec *pathspec; @@ -67,7 +70,8 @@ struct traverse_info { }; int get_tree_entry(const struct object_id *, const char *, struct object_id *, unsigned short *); -char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n); +char *make_traverse_path(char *path, const struct traverse_info *info, + const char *name, size_t namelen); void setup_traverse_info(struct traverse_info *info, const char *base); static inline int traverse_path_len(const struct traverse_info *info, const struct name_entry *n) diff --git a/unpack-trees.c b/unpack-trees.c index 50189909b86d6a..26f971f7ffc9ca 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -632,7 +632,7 @@ static int unpack_index_entry(struct cache_entry *ce, return ret; } -static int find_cache_pos(struct traverse_info *, const struct name_entry *); +static int find_cache_pos(struct traverse_info *, const char *p, size_t len); static void restore_cache_bottom(struct traverse_info *info, int bottom) { @@ -651,7 +651,7 @@ static int switch_cache_bottom(struct traverse_info *info) if (o->diff_index_cached) return 0; ret = o->cache_bottom; - pos = find_cache_pos(info->prev, &info->name); + pos = find_cache_pos(info->prev, info->name, info->namelen); if (pos < -1) o->cache_bottom = -2 - pos; @@ -690,7 +690,7 @@ static int index_pos_by_traverse_info(struct name_entry *names, char *name = xmalloc(len + 1 /* slash */ + 1 /* NUL */); int pos; - make_traverse_path(name, info, names); + make_traverse_path(name, info, names->path, names->pathlen); name[len++] = '/'; name[len] = '\0'; pos = index_name_pos(o->src_index, name, len); @@ -811,7 +811,9 @@ static int traverse_trees_recursive(int n, unsigned long dirmask, newinfo = *info; newinfo.prev = info; newinfo.pathspec = info->pathspec; - newinfo.name = *p; + newinfo.name = p->path; + newinfo.namelen = p->pathlen; + newinfo.mode = p->mode; newinfo.pathlen += tree_entry_len(p) + 1; newinfo.df_conflicts |= df_conflicts; @@ -863,14 +865,18 @@ static int traverse_trees_recursive(int n, unsigned long dirmask, * itself - the caller needs to do the final check for the cache * entry having more data at the end! */ -static int do_compare_entry_piecewise(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n) +static int do_compare_entry_piecewise(const struct cache_entry *ce, + const struct traverse_info *info, + const char *name, size_t namelen, + unsigned mode) { - int len, pathlen, ce_len; + int pathlen, ce_len; const char *ce_name; if (info->prev) { int cmp = do_compare_entry_piecewise(ce, info->prev, - &info->name); + info->name, info->namelen, + info->mode); if (cmp) return cmp; } @@ -884,15 +890,15 @@ static int do_compare_entry_piecewise(const struct cache_entry *ce, const struct ce_len -= pathlen; ce_name = ce->name + pathlen; - len = tree_entry_len(n); - return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode); + return df_name_compare(ce_name, ce_len, S_IFREG, name, namelen, mode); } static int do_compare_entry(const struct cache_entry *ce, const struct traverse_info *info, - const struct name_entry *n) + const char *name, size_t namelen, + unsigned mode) { - int len, pathlen, ce_len; + int pathlen, ce_len; const char *ce_name; int cmp; @@ -902,7 +908,7 @@ static int do_compare_entry(const struct cache_entry *ce, * it is quicker to use the precomputed version. */ if (!info->traverse_path) - return do_compare_entry_piecewise(ce, info, n); + return do_compare_entry_piecewise(ce, info, name, namelen, mode); cmp = strncmp(ce->name, info->traverse_path, info->pathlen); if (cmp) @@ -917,13 +923,12 @@ static int do_compare_entry(const struct cache_entry *ce, ce_len -= pathlen; ce_name = ce->name + pathlen; - len = tree_entry_len(n); - return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode); + return df_name_compare(ce_name, ce_len, S_IFREG, name, namelen, mode); } static int compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n) { - int cmp = do_compare_entry(ce, info, n); + int cmp = do_compare_entry(ce, info, n->path, n->pathlen, n->mode); if (cmp) return cmp; @@ -939,7 +944,8 @@ static int ce_in_traverse_path(const struct cache_entry *ce, { if (!info->prev) return 1; - if (do_compare_entry(ce, info->prev, &info->name)) + if (do_compare_entry(ce, info->prev, + info->name, info->namelen, info->mode)) return 0; /* * If ce (blob) is the same name as the path (which is a tree @@ -964,7 +970,7 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info, ce->ce_flags = create_ce_flags(stage); ce->ce_namelen = len; oidcpy(&ce->oid, &n->oid); - make_traverse_path(ce->name, info, n); + make_traverse_path(ce->name, info, n->path, n->pathlen); return ce; } @@ -1057,13 +1063,12 @@ static int unpack_failed(struct unpack_trees_options *o, const char *message) * the directory. */ static int find_cache_pos(struct traverse_info *info, - const struct name_entry *p) + const char *p, size_t p_len) { int pos; struct unpack_trees_options *o = info->data; struct index_state *index = o->src_index; int pfxlen = info->pathlen; - int p_len = tree_entry_len(p); for (pos = o->cache_bottom; pos < index->cache_nr; pos++) { const struct cache_entry *ce = index->cache[pos]; @@ -1099,7 +1104,7 @@ static int find_cache_pos(struct traverse_info *info, ce_len = ce_slash - ce_name; else ce_len = ce_namelen(ce) - pfxlen; - cmp = name_compare(p->path, p_len, ce_name, ce_len); + cmp = name_compare(p, p_len, ce_name, ce_len); /* * Exact match; if we have a directory we need to * delay returning it. @@ -1114,7 +1119,7 @@ static int find_cache_pos(struct traverse_info *info, * E.g. ce_name == "t-i", and p->path == "t"; we may * have "t/a" in the index. */ - if (p_len < ce_len && !memcmp(ce_name, p->path, p_len) && + if (p_len < ce_len && !memcmp(ce_name, p, p_len) && ce_name[p_len] < '/') continue; /* keep looking */ break; @@ -1125,7 +1130,7 @@ static int find_cache_pos(struct traverse_info *info, static struct cache_entry *find_cache_entry(struct traverse_info *info, const struct name_entry *p) { - int pos = find_cache_pos(info, p); + int pos = find_cache_pos(info, p->path, p->pathlen); struct unpack_trees_options *o = info->data; if (0 <= pos) @@ -1138,10 +1143,10 @@ static void debug_path(struct traverse_info *info) { if (info->prev) { debug_path(info->prev); - if (*info->prev->name.path) + if (*info->prev->name) putchar('/'); } - printf("%s", info->name.path); + printf("%s", info->name); } static void debug_name_entry(int i, struct name_entry *n) From 12b1826609d5c309336ffd9f56e42abaf8d96cef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Thu, 1 Aug 2019 17:53:07 +0200 Subject: [PATCH 063/710] t5510-fetch: run non-httpd-specific test before sourcing 'lib-httpd.sh' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 't5510-fetch.sh' sources 'lib-httpd.sh' near the end to run a httpd-specific test, but 'lib-httpd.sh' skips all the rest of the test script if the dependencies for running httpd tests are not fulfilled. Alas, recently cdbd70c437 (fetch: add --[no-]show-forced-updates argument, 2019-06-18) appended a non-httpd-specific test at the end, and this test is then skipped as well when httpd tests can't be run. Move this new test earlier in the test script, before 'lib-httpd.sh' is sourced, so it will be run even when httpd tests aren't. Also add a comment at the end of this test script to warn against adding non-httpd-specific tests at the end, in the hope that it will help prevent similar issues in the future. Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- t/t5510-fetch.sh | 47 +++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh index f2481de577bf0a..34b486f1a48b08 100755 --- a/t/t5510-fetch.sh +++ b/t/t5510-fetch.sh @@ -902,6 +902,29 @@ test_expect_success C_LOCALE_OUTPUT 'fetch compact output' ' test_cmp expect actual ' +test_expect_success '--no-show-forced-updates' ' + mkdir forced-updates && + ( + cd forced-updates && + git init && + test_commit 1 && + test_commit 2 + ) && + git clone forced-updates forced-update-clone && + git clone forced-updates no-forced-update-clone && + git -C forced-updates reset --hard HEAD~1 && + ( + cd forced-update-clone && + git fetch --show-forced-updates origin 2>output && + test_i18ngrep "(forced update)" output + ) && + ( + cd no-forced-update-clone && + git fetch --no-show-forced-updates origin 2>output && + test_i18ngrep ! "(forced update)" output + ) +' + setup_negotiation_tip () { SERVER="$1" URL="$2" @@ -978,27 +1001,7 @@ test_expect_success '--negotiation-tip limits "have" lines sent with HTTP protoc check_negotiation_tip ' -test_expect_success '--no-show-forced-updates' ' - mkdir forced-updates && - ( - cd forced-updates && - git init && - test_commit 1 && - test_commit 2 - ) && - git clone forced-updates forced-update-clone && - git clone forced-updates no-forced-update-clone && - git -C forced-updates reset --hard HEAD~1 && - ( - cd forced-update-clone && - git fetch --show-forced-updates origin 2>output && - test_i18ngrep "(forced update)" output - ) && - ( - cd no-forced-update-clone && - git fetch --no-show-forced-updates origin 2>output && - test_i18ngrep ! "(forced update)" output - ) -' +# DO NOT add non-httpd-specific tests here, because the last part of this +# test script is only executed when httpd is available and enabled. test_done From 7f005b0f483a507d3c7b353f4753ac3fece88bc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Thu, 1 Aug 2019 17:53:08 +0200 Subject: [PATCH 064/710] t5703: run all non-httpd-specific tests before sourcing 'lib-httpd.sh' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 't5703-upload-pack-ref-in-want.sh' sources 'lib-httpd.sh' near the end to run a couple of httpd-specific tests, but 'lib-httpd.sh' skips all the rest of the test script if the dependencies for running httpd tests are not fulfilled. However, the last six tests in 't5703' are not httpd-specific, but they are skipped as well when httpd tests can't be run. Move these six tests earlier in the test script, before 'lib-httpd.sh' is sourced, so they will be run even when httpd tests aren't. Note that this is not merely a pure code movement, because the setup test case for the httpd tests needed an additional 'rm -rf "$LOCAL_PRISTINE"' to clean up a directory left behind by the moved non-httpd-specific tests. Also add a comment at the end of this test script to warn against adding non-httpd-specific tests at the end, in the hope that it will help prevent similar issues in the future. Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- t/t5703-upload-pack-ref-in-want.sh | 204 +++++++++++++++-------------- 1 file changed, 104 insertions(+), 100 deletions(-) diff --git a/t/t5703-upload-pack-ref-in-want.sh b/t/t5703-upload-pack-ref-in-want.sh index de4b6106ef4a7c..3a2c143c6d6777 100755 --- a/t/t5703-upload-pack-ref-in-want.sh +++ b/t/t5703-upload-pack-ref-in-want.sh @@ -157,106 +157,6 @@ test_expect_success 'want-ref with ref we already have commit for' ' check_output ' -. "$TEST_DIRECTORY"/lib-httpd.sh -start_httpd - -REPO="$HTTPD_DOCUMENT_ROOT_PATH/repo" -LOCAL_PRISTINE="$(pwd)/local_pristine" - -test_expect_success 'setup repos for change-while-negotiating test' ' - ( - git init "$REPO" && - cd "$REPO" && - >.git/git-daemon-export-ok && - test_commit m1 && - git tag -d m1 && - - # Local repo with many commits (so that negotiation will take - # more than 1 request/response pair) - git clone "http://127.0.0.1:$LIB_HTTPD_PORT/smart/repo" "$LOCAL_PRISTINE" && - cd "$LOCAL_PRISTINE" && - git checkout -b side && - test_commit_bulk --id=s 33 && - - # Add novel commits to upstream - git checkout master && - cd "$REPO" && - test_commit m2 && - test_commit m3 && - git tag -d m2 m3 - ) && - git -C "$LOCAL_PRISTINE" remote set-url origin "http://127.0.0.1:$LIB_HTTPD_PORT/one_time_sed/repo" && - git -C "$LOCAL_PRISTINE" config protocol.version 2 -' - -inconsistency () { - # Simulate that the server initially reports $2 as the ref - # corresponding to $1, and after that, $1 as the ref corresponding to - # $1. This corresponds to the real-life situation where the server's - # repository appears to change during negotiation, for example, when - # different servers in a load-balancing arrangement serve (stateless) - # RPCs during a single negotiation. - printf "s/%s/%s/" \ - $(git -C "$REPO" rev-parse $1 | tr -d "\n") \ - $(git -C "$REPO" rev-parse $2 | tr -d "\n") \ - >"$HTTPD_ROOT_PATH/one-time-sed" -} - -test_expect_success 'server is initially ahead - no ref in want' ' - git -C "$REPO" config uploadpack.allowRefInWant false && - rm -rf local && - cp -r "$LOCAL_PRISTINE" local && - inconsistency master 1234567890123456789012345678901234567890 && - test_must_fail git -C local fetch 2>err && - test_i18ngrep "fatal: remote error: upload-pack: not our ref" err -' - -test_expect_success 'server is initially ahead - ref in want' ' - git -C "$REPO" config uploadpack.allowRefInWant true && - rm -rf local && - cp -r "$LOCAL_PRISTINE" local && - inconsistency master 1234567890123456789012345678901234567890 && - git -C local fetch && - - git -C "$REPO" rev-parse --verify master >expected && - git -C local rev-parse --verify refs/remotes/origin/master >actual && - test_cmp expected actual -' - -test_expect_success 'server is initially behind - no ref in want' ' - git -C "$REPO" config uploadpack.allowRefInWant false && - rm -rf local && - cp -r "$LOCAL_PRISTINE" local && - inconsistency master "master^" && - git -C local fetch && - - git -C "$REPO" rev-parse --verify "master^" >expected && - git -C local rev-parse --verify refs/remotes/origin/master >actual && - test_cmp expected actual -' - -test_expect_success 'server is initially behind - ref in want' ' - git -C "$REPO" config uploadpack.allowRefInWant true && - rm -rf local && - cp -r "$LOCAL_PRISTINE" local && - inconsistency master "master^" && - git -C local fetch && - - git -C "$REPO" rev-parse --verify "master" >expected && - git -C local rev-parse --verify refs/remotes/origin/master >actual && - test_cmp expected actual -' - -test_expect_success 'server loses a ref - ref in want' ' - git -C "$REPO" config uploadpack.allowRefInWant true && - rm -rf local && - cp -r "$LOCAL_PRISTINE" local && - echo "s/master/raster/" >"$HTTPD_ROOT_PATH/one-time-sed" && - test_must_fail git -C local fetch 2>err && - - test_i18ngrep "fatal: remote error: unknown ref refs/heads/raster" err -' - REPO="$(pwd)/repo" LOCAL_PRISTINE="$(pwd)/local_pristine" @@ -372,4 +272,108 @@ test_expect_success 'fetching with wildcard that matches multiple refs' ' grep "want-ref refs/heads/o/bar" log ' +. "$TEST_DIRECTORY"/lib-httpd.sh +start_httpd + +REPO="$HTTPD_DOCUMENT_ROOT_PATH/repo" +LOCAL_PRISTINE="$(pwd)/local_pristine" + +test_expect_success 'setup repos for change-while-negotiating test' ' + ( + git init "$REPO" && + cd "$REPO" && + >.git/git-daemon-export-ok && + test_commit m1 && + git tag -d m1 && + + # Local repo with many commits (so that negotiation will take + # more than 1 request/response pair) + rm -rf "$LOCAL_PRISTINE" && + git clone "http://127.0.0.1:$LIB_HTTPD_PORT/smart/repo" "$LOCAL_PRISTINE" && + cd "$LOCAL_PRISTINE" && + git checkout -b side && + test_commit_bulk --id=s 33 && + + # Add novel commits to upstream + git checkout master && + cd "$REPO" && + test_commit m2 && + test_commit m3 && + git tag -d m2 m3 + ) && + git -C "$LOCAL_PRISTINE" remote set-url origin "http://127.0.0.1:$LIB_HTTPD_PORT/one_time_sed/repo" && + git -C "$LOCAL_PRISTINE" config protocol.version 2 +' + +inconsistency () { + # Simulate that the server initially reports $2 as the ref + # corresponding to $1, and after that, $1 as the ref corresponding to + # $1. This corresponds to the real-life situation where the server's + # repository appears to change during negotiation, for example, when + # different servers in a load-balancing arrangement serve (stateless) + # RPCs during a single negotiation. + printf "s/%s/%s/" \ + $(git -C "$REPO" rev-parse $1 | tr -d "\n") \ + $(git -C "$REPO" rev-parse $2 | tr -d "\n") \ + >"$HTTPD_ROOT_PATH/one-time-sed" +} + +test_expect_success 'server is initially ahead - no ref in want' ' + git -C "$REPO" config uploadpack.allowRefInWant false && + rm -rf local && + cp -r "$LOCAL_PRISTINE" local && + inconsistency master 1234567890123456789012345678901234567890 && + test_must_fail git -C local fetch 2>err && + test_i18ngrep "fatal: remote error: upload-pack: not our ref" err +' + +test_expect_success 'server is initially ahead - ref in want' ' + git -C "$REPO" config uploadpack.allowRefInWant true && + rm -rf local && + cp -r "$LOCAL_PRISTINE" local && + inconsistency master 1234567890123456789012345678901234567890 && + git -C local fetch && + + git -C "$REPO" rev-parse --verify master >expected && + git -C local rev-parse --verify refs/remotes/origin/master >actual && + test_cmp expected actual +' + +test_expect_success 'server is initially behind - no ref in want' ' + git -C "$REPO" config uploadpack.allowRefInWant false && + rm -rf local && + cp -r "$LOCAL_PRISTINE" local && + inconsistency master "master^" && + git -C local fetch && + + git -C "$REPO" rev-parse --verify "master^" >expected && + git -C local rev-parse --verify refs/remotes/origin/master >actual && + test_cmp expected actual +' + +test_expect_success 'server is initially behind - ref in want' ' + git -C "$REPO" config uploadpack.allowRefInWant true && + rm -rf local && + cp -r "$LOCAL_PRISTINE" local && + inconsistency master "master^" && + git -C local fetch && + + git -C "$REPO" rev-parse --verify "master" >expected && + git -C local rev-parse --verify refs/remotes/origin/master >actual && + test_cmp expected actual +' + +test_expect_success 'server loses a ref - ref in want' ' + git -C "$REPO" config uploadpack.allowRefInWant true && + rm -rf local && + cp -r "$LOCAL_PRISTINE" local && + echo "s/master/raster/" >"$HTTPD_ROOT_PATH/one-time-sed" && + test_must_fail git -C local fetch 2>err && + + test_i18ngrep "fatal: remote error: unknown ref refs/heads/raster" err +' + +# DO NOT add non-httpd-specific tests here, because the last part of this +# test script is only executed when httpd is available and enabled. + test_done From 37806080d7be1ab5b2fa918f6a528652596ea2c1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 31 Jul 2019 00:38:18 -0400 Subject: [PATCH 065/710] tree-walk: use size_t consistently We store and manipulate the cumulative traverse_info.pathlen as an "int", which can overflow when we are fed ridiculously long pathnames (e.g., ones at the edge of 2GB or 4GB, even if the individual tree entry names are smaller than that). The results can be confusing, though after some prodding I was not able to use this integer overflow to cause an under-allocated buffer. Let's consistently use size_t to generate and store these, and make sure our addition doesn't overflow. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- tree-walk.c | 4 ++-- tree-walk.h | 6 +++--- unpack-trees.c | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tree-walk.c b/tree-walk.c index 4610f77383527d..70f9eb5f1b449c 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -168,7 +168,7 @@ int tree_entry_gently(struct tree_desc *desc, struct name_entry *entry) void setup_traverse_info(struct traverse_info *info, const char *base) { - int pathlen = strlen(base); + size_t pathlen = strlen(base); static struct traverse_info dummy; memset(info, 0, sizeof(*info)); @@ -184,7 +184,7 @@ void setup_traverse_info(struct traverse_info *info, const char *base) char *make_traverse_path(char *path, const struct traverse_info *info, const char *name, size_t namelen) { - int pathlen = info->pathlen; + size_t pathlen = info->pathlen; path[pathlen + namelen] = 0; for (;;) { diff --git a/tree-walk.h b/tree-walk.h index baa2aa62c76ff3..47bf85d28287d2 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -60,7 +60,7 @@ struct traverse_info { size_t namelen; unsigned mode; - int pathlen; + size_t pathlen; struct pathspec *pathspec; unsigned long df_conflicts; @@ -74,9 +74,9 @@ char *make_traverse_path(char *path, const struct traverse_info *info, const char *name, size_t namelen); void setup_traverse_info(struct traverse_info *info, const char *base); -static inline int traverse_path_len(const struct traverse_info *info, const struct name_entry *n) +static inline size_t traverse_path_len(const struct traverse_info *info, const struct name_entry *n) { - return info->pathlen + tree_entry_len(n); + return st_add(info->pathlen, tree_entry_len(n)); } /* in general, positive means "kind of interesting" */ diff --git a/unpack-trees.c b/unpack-trees.c index 26f971f7ffc9ca..8dbfb22770df15 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -686,7 +686,7 @@ static int index_pos_by_traverse_info(struct name_entry *names, struct traverse_info *info) { struct unpack_trees_options *o = info->data; - int len = traverse_path_len(info, names); + size_t len = traverse_path_len(info, names); char *name = xmalloc(len + 1 /* slash */ + 1 /* NUL */); int pos; @@ -814,7 +814,7 @@ static int traverse_trees_recursive(int n, unsigned long dirmask, newinfo.name = p->path; newinfo.namelen = p->pathlen; newinfo.mode = p->mode; - newinfo.pathlen += tree_entry_len(p) + 1; + newinfo.pathlen = st_add3(newinfo.pathlen, tree_entry_len(p), 1); newinfo.df_conflicts |= df_conflicts; /* @@ -960,7 +960,7 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info, struct index_state *istate, int is_transient) { - int len = traverse_path_len(info, n); + size_t len = traverse_path_len(info, n); struct cache_entry *ce = is_transient ? make_empty_transient_cache_entry(len) : From b3b3cbcbf246b1051ad453bc02e24a89573e2911 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 31 Jul 2019 00:38:20 -0400 Subject: [PATCH 066/710] tree-walk: accept a raw length for traverse_path_len() We take a "struct name_entry", but only care about the length of the path name. Let's just take that length directly, making it easier to use the function from callers that sometimes do not have a name_entry at all. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/merge-tree.c | 2 +- tree-walk.h | 5 +++-- unpack-trees.c | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c index f0e4cfefaaf5c4..0629c87b19d59d 100644 --- a/builtin/merge-tree.c +++ b/builtin/merge-tree.c @@ -180,7 +180,7 @@ static struct merge_list *create_entry(unsigned stage, unsigned mode, const stru static char *traverse_path(const struct traverse_info *info, const struct name_entry *n) { - char *path = xmallocz(traverse_path_len(info, n)); + char *path = xmallocz(traverse_path_len(info, tree_entry_len(n))); return make_traverse_path(path, info, n->path, n->pathlen); } diff --git a/tree-walk.h b/tree-walk.h index 47bf85d28287d2..a25c751c1eae6f 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -74,9 +74,10 @@ char *make_traverse_path(char *path, const struct traverse_info *info, const char *name, size_t namelen); void setup_traverse_info(struct traverse_info *info, const char *base); -static inline size_t traverse_path_len(const struct traverse_info *info, const struct name_entry *n) +static inline size_t traverse_path_len(const struct traverse_info *info, + size_t namelen) { - return st_add(info->pathlen, tree_entry_len(n)); + return st_add(info->pathlen, namelen); } /* in general, positive means "kind of interesting" */ diff --git a/unpack-trees.c b/unpack-trees.c index 8dbfb22770df15..492eff666a7979 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -686,7 +686,7 @@ static int index_pos_by_traverse_info(struct name_entry *names, struct traverse_info *info) { struct unpack_trees_options *o = info->data; - size_t len = traverse_path_len(info, names); + size_t len = traverse_path_len(info, tree_entry_len(names)); char *name = xmalloc(len + 1 /* slash */ + 1 /* NUL */); int pos; @@ -936,7 +936,7 @@ static int compare_entry(const struct cache_entry *ce, const struct traverse_inf * Even if the beginning compared identically, the ce should * compare as bigger than a directory leading up to it! */ - return ce_namelen(ce) > traverse_path_len(info, n); + return ce_namelen(ce) > traverse_path_len(info, tree_entry_len(n)); } static int ce_in_traverse_path(const struct cache_entry *ce, @@ -960,7 +960,7 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info, struct index_state *istate, int is_transient) { - size_t len = traverse_path_len(info, n); + size_t len = traverse_path_len(info, tree_entry_len(n)); struct cache_entry *ce = is_transient ? make_empty_transient_cache_entry(len) : From c43ab062598d0299ea6e0d115a6018189a7793bf Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 31 Jul 2019 00:38:23 -0400 Subject: [PATCH 067/710] tree-walk: add a strbuf wrapper for make_traverse_path() All but one of the callers of make_traverse_path() allocate a new heap buffer to store the path. Let's give them an easy way to write to a strbuf, which saves them from computing the length themselves (which is especially tricky when they want to add to the path). It will also make it easier for us to change the make_traverse_path() interface in a future patch to improve its bounds-checking. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/technical/api-tree-walking.txt | 4 ++++ builtin/merge-tree.c | 5 +++-- tree-walk.c | 21 ++++++++++++++------ tree-walk.h | 3 +++ unpack-trees.c | 16 +++++++-------- 5 files changed, 32 insertions(+), 17 deletions(-) diff --git a/Documentation/technical/api-tree-walking.txt b/Documentation/technical/api-tree-walking.txt index 59d78e03624e77..7962e3285499aa 100644 --- a/Documentation/technical/api-tree-walking.txt +++ b/Documentation/technical/api-tree-walking.txt @@ -138,6 +138,10 @@ same in the next callback invocation. This utilizes the memory structure of a tree entry to avoid the overhead of using a generic strlen(). +`strbuf_make_traverse_path`:: + + Convenience wrapper to `make_traverse_path` into a strbuf. + Authors ------- diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c index 0629c87b19d59d..87d949cf882d72 100644 --- a/builtin/merge-tree.c +++ b/builtin/merge-tree.c @@ -180,8 +180,9 @@ static struct merge_list *create_entry(unsigned stage, unsigned mode, const stru static char *traverse_path(const struct traverse_info *info, const struct name_entry *n) { - char *path = xmallocz(traverse_path_len(info, tree_entry_len(n))); - return make_traverse_path(path, info, n->path, n->pathlen); + struct strbuf buf = STRBUF_INIT; + strbuf_make_traverse_path(&buf, info, n->path, n->pathlen); + return strbuf_detach(&buf, NULL); } static void resolve(const struct traverse_info *info, struct name_entry *ours, struct name_entry *result) diff --git a/tree-walk.c b/tree-walk.c index 70f9eb5f1b449c..c2952f3793fd2f 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -200,6 +200,17 @@ char *make_traverse_path(char *path, const struct traverse_info *info, return path; } +void strbuf_make_traverse_path(struct strbuf *out, + const struct traverse_info *info, + const char *name, size_t namelen) +{ + size_t len = traverse_path_len(info, namelen); + + strbuf_grow(out, len); + make_traverse_path(out->buf + out->len, info, name, namelen); + strbuf_setlen(out, out->len + len); +} + struct tree_desc_skip { struct tree_desc_skip *prev; const void *ptr; @@ -396,12 +407,10 @@ int traverse_trees(struct index_state *istate, tx[i].d = t[i]; if (info->prev) { - strbuf_grow(&base, info->pathlen); - make_traverse_path(base.buf, info->prev, info->name, - info->namelen); - base.buf[info->pathlen-1] = '/'; - strbuf_setlen(&base, info->pathlen); - traverse_path = xstrndup(base.buf, info->pathlen); + strbuf_make_traverse_path(&base, info->prev, + info->name, info->namelen); + strbuf_addch(&base, '/'); + traverse_path = xstrndup(base.buf, base.len); } else { traverse_path = xstrndup(info->name, info->pathlen); } diff --git a/tree-walk.h b/tree-walk.h index a25c751c1eae6f..994c14a49964b3 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -72,6 +72,9 @@ struct traverse_info { int get_tree_entry(const struct object_id *, const char *, struct object_id *, unsigned short *); char *make_traverse_path(char *path, const struct traverse_info *info, const char *name, size_t namelen); +void strbuf_make_traverse_path(struct strbuf *out, + const struct traverse_info *info, + const char *name, size_t namelen); void setup_traverse_info(struct traverse_info *info, const char *base); static inline size_t traverse_path_len(const struct traverse_info *info, diff --git a/unpack-trees.c b/unpack-trees.c index 492eff666a7979..c3059c2440cec5 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -686,21 +686,19 @@ static int index_pos_by_traverse_info(struct name_entry *names, struct traverse_info *info) { struct unpack_trees_options *o = info->data; - size_t len = traverse_path_len(info, tree_entry_len(names)); - char *name = xmalloc(len + 1 /* slash */ + 1 /* NUL */); + struct strbuf name = STRBUF_INIT; int pos; - make_traverse_path(name, info, names->path, names->pathlen); - name[len++] = '/'; - name[len] = '\0'; - pos = index_name_pos(o->src_index, name, len); + strbuf_make_traverse_path(&name, info, names->path, names->pathlen); + strbuf_addch(&name, '/'); + pos = index_name_pos(o->src_index, name.buf, name.len); if (pos >= 0) BUG("This is a directory and should not exist in index"); pos = -pos - 1; - if (!starts_with(o->src_index->cache[pos]->name, name) || - (pos > 0 && starts_with(o->src_index->cache[pos-1]->name, name))) + if (!starts_with(o->src_index->cache[pos]->name, name.buf) || + (pos > 0 && starts_with(o->src_index->cache[pos-1]->name, name.buf))) BUG("pos must point at the first entry in this directory"); - free(name); + strbuf_release(&name); return pos; } From 5aa02f98685d78666293149087d3f69b97528cfb Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 31 Jul 2019 00:38:25 -0400 Subject: [PATCH 068/710] tree-walk: harden make_traverse_path() length computations The make_traverse_path() function isn't very careful about checking its output buffer boundaries. In fact, it doesn't even _know_ the size of the buffer it's writing to, and just assumes that the caller used traverse_path_len() correctly. And even then we assume that our traverse_info.pathlen components are all correct, and just blindly write into the buffer. Let's improve this situation a bit: - have the caller pass in their allocated buffer length, which we'll check against our own computations - check for integer underflow as we do our backwards-insertion of pathnames into the buffer - check that we do not run out items in our list to traverse before we've filled the expected number of bytes None of these should be triggerable in practice (especially since our switch to size_t everywhere in a previous commit), but it doesn't hurt to check our assumptions. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- tree-walk.c | 28 ++++++++++++++++++++-------- tree-walk.h | 2 +- unpack-trees.c | 3 ++- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/tree-walk.c b/tree-walk.c index c2952f3793fd2f..4f1e9d79ab9aa2 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -181,21 +181,32 @@ void setup_traverse_info(struct traverse_info *info, const char *base) info->prev = &dummy; } -char *make_traverse_path(char *path, const struct traverse_info *info, +char *make_traverse_path(char *path, size_t pathlen, + const struct traverse_info *info, const char *name, size_t namelen) { - size_t pathlen = info->pathlen; + /* Always points to the end of the name we're about to add */ + size_t pos = st_add(info->pathlen, namelen); - path[pathlen + namelen] = 0; + if (pos >= pathlen) + BUG("too small buffer passed to make_traverse_path"); + + path[pos] = 0; for (;;) { - memcpy(path + pathlen, name, namelen); - if (!pathlen) + if (pos < namelen) + BUG("traverse_info pathlen does not match strings"); + pos -= namelen; + memcpy(path + pos, name, namelen); + + if (!pos) break; - path[--pathlen] = '/'; + path[--pos] = '/'; + + if (!info) + BUG("traverse_info ran out of list items"); name = info->name; namelen = info->namelen; info = info->prev; - pathlen -= namelen; } return path; } @@ -207,7 +218,8 @@ void strbuf_make_traverse_path(struct strbuf *out, size_t len = traverse_path_len(info, namelen); strbuf_grow(out, len); - make_traverse_path(out->buf + out->len, info, name, namelen); + make_traverse_path(out->buf + out->len, out->alloc - out->len, + info, name, namelen); strbuf_setlen(out, out->len + len); } diff --git a/tree-walk.h b/tree-walk.h index 994c14a49964b3..a3ad54e6ce7def 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -70,7 +70,7 @@ struct traverse_info { }; int get_tree_entry(const struct object_id *, const char *, struct object_id *, unsigned short *); -char *make_traverse_path(char *path, const struct traverse_info *info, +char *make_traverse_path(char *path, size_t pathlen, const struct traverse_info *info, const char *name, size_t namelen); void strbuf_make_traverse_path(struct strbuf *out, const struct traverse_info *info, diff --git a/unpack-trees.c b/unpack-trees.c index c3059c2440cec5..65c4677578ffaf 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -968,7 +968,8 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info, ce->ce_flags = create_ce_flags(stage); ce->ce_namelen = len; oidcpy(&ce->oid, &n->oid); - make_traverse_path(ce->name, info, n->path, n->pathlen); + /* len+1 because the cache_entry allocates space for NUL */ + make_traverse_path(ce->name, len + 1, info, n->path, n->pathlen); return ce; } From 371df1bea994d14347c3bea7a67066056d86b093 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 31 Jul 2019 13:04:28 -0700 Subject: [PATCH 069/710] trace2: cleanup column alignment in perf target format Truncate/elide very long "filename:linenumber" field. Truncate region and data "category" field if necessary. Adjust overall column widths. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- trace2/tr2_tgt_perf.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/trace2/tr2_tgt_perf.c b/trace2/tr2_tgt_perf.c index ea0cbbe13ee066..4a9d99218bf1be 100644 --- a/trace2/tr2_tgt_perf.c +++ b/trace2/tr2_tgt_perf.c @@ -21,10 +21,10 @@ static struct tr2_dst tr2dst_perf = { TR2_SYSENV_PERF, 0, 0, 0 }; */ static int tr2env_perf_be_brief; -#define TR2FMT_PERF_FL_WIDTH (50) +#define TR2FMT_PERF_FL_WIDTH (28) #define TR2FMT_PERF_MAX_EVENT_NAME (12) -#define TR2FMT_PERF_REPO_WIDTH (4) -#define TR2FMT_PERF_CATEGORY_WIDTH (10) +#define TR2FMT_PERF_REPO_WIDTH (3) +#define TR2FMT_PERF_CATEGORY_WIDTH (12) #define TR2_DOTS_BUFFER_SIZE (100) #define TR2_INDENT (2) @@ -79,17 +79,36 @@ static void perf_fmt_prepare(const char *event_name, if (!tr2env_perf_be_brief) { struct tr2_tbuf tb_now; + size_t fl_end_col; tr2_tbuf_local_time(&tb_now); strbuf_addstr(buf, tb_now.buf); strbuf_addch(buf, ' '); - if (file && *file) - strbuf_addf(buf, "%s:%d ", file, line); - while (buf->len < TR2FMT_PERF_FL_WIDTH) + fl_end_col = buf->len + TR2FMT_PERF_FL_WIDTH; + + if (file && *file) { + struct strbuf buf_fl = STRBUF_INIT; + + strbuf_addf(&buf_fl, "%s:%d", file, line); + + if (buf_fl.len <= TR2FMT_PERF_FL_WIDTH) + strbuf_addbuf(buf, &buf_fl); + else { + size_t avail = TR2FMT_PERF_FL_WIDTH - 3; + strbuf_addstr(buf, "..."); + strbuf_add(buf, + &buf_fl.buf[buf_fl.len - avail], + avail); + } + + strbuf_release(&buf_fl); + } + + while (buf->len < fl_end_col) strbuf_addch(buf, ' '); - strbuf_addstr(buf, "| "); + strbuf_addstr(buf, " | "); } strbuf_addf(buf, "d%d | ", tr2_sid_depth()); @@ -102,7 +121,7 @@ static void perf_fmt_prepare(const char *event_name, strbuf_addf(buf, "r%d ", repo->trace2_repo_id); while (buf->len < len) strbuf_addch(buf, ' '); - strbuf_addstr(buf, "| "); + strbuf_addstr(buf, " | "); if (p_us_elapsed_absolute) strbuf_addf(buf, "%9.6f | ", @@ -116,8 +135,8 @@ static void perf_fmt_prepare(const char *event_name, else strbuf_addf(buf, "%9s | ", " "); - strbuf_addf(buf, "%-*s | ", TR2FMT_PERF_CATEGORY_WIDTH, - (category ? category : "")); + strbuf_addf(buf, "%-*.*s | ", TR2FMT_PERF_CATEGORY_WIDTH, + TR2FMT_PERF_CATEGORY_WIDTH, (category ? category : "")); if (ctx->nr_open_regions > 0) { int len_indent = TR2_INDENT_LENGTH(ctx); From decfe05bb668771fb25a23021b2b33bc7649cd32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Thu, 1 Aug 2019 17:53:09 +0200 Subject: [PATCH 070/710] t: warn against adding non-httpd-specific tests after sourcing 'lib-httpd' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have a couple of test scripts that are not completely httpd-specific, but do run a few httpd-specific tests at the end. These test scripts source 'lib-httpd.sh' somewhere mid-script, which then skips all the rest of the test script if the dependencies for running httpd tests are not fulfilled. As the previous two patches in this series show, already on two occasions non-httpd-specific tests were appended at the end of such test scripts, and, consequently, they were skipped as well when httpd tests couldn't be run. Add a comment at the end of these test scripts to warn against adding non-httpd-specific tests at the end, in the hope that they will help prevent similar issues in the future. Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- t/t0410-partial-clone.sh | 3 +++ t/t5500-fetch-pack.sh | 3 +++ t/t5537-fetch-shallow.sh | 3 +++ t/t5545-push-options.sh | 3 +++ t/t5601-clone.sh | 3 +++ t/t5616-partial-clone.sh | 3 +++ t/t5700-protocol-v1.sh | 3 +++ t/t5702-protocol-v2.sh | 3 +++ 8 files changed, 24 insertions(+) diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh index 5bd892f2f7a90a..6415063980b499 100755 --- a/t/t0410-partial-clone.sh +++ b/t/t0410-partial-clone.sh @@ -518,4 +518,7 @@ test_expect_success 'fetching of missing objects from an HTTP server' ' git verify-pack --verbose "$IDX" | grep "$HASH" ' +# DO NOT add non-httpd-specific tests here, because the last part of this +# test script is only executed when httpd is available and enabled. + test_done diff --git a/t/t5500-fetch-pack.sh b/t/t5500-fetch-pack.sh index 1c71c0ec770cd5..8210f63d41edbe 100755 --- a/t/t5500-fetch-pack.sh +++ b/t/t5500-fetch-pack.sh @@ -920,4 +920,7 @@ test_expect_success 'fetch with --filter=blob:limit=0 and HTTP' ' fetch_filter_blob_limit_zero "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/smart/server" ' +# DO NOT add non-httpd-specific tests here, because the last part of this +# test script is only executed when httpd is available and enabled. + test_done diff --git a/t/t5537-fetch-shallow.sh b/t/t5537-fetch-shallow.sh index 66f0b64d39273d..97a67728ca9b9a 100755 --- a/t/t5537-fetch-shallow.sh +++ b/t/t5537-fetch-shallow.sh @@ -255,4 +255,7 @@ test_expect_success 'shallow fetches check connectivity before writing shallow f git -C client fsck ' +# DO NOT add non-httpd-specific tests here, because the last part of this +# test script is only executed when httpd is available and enabled. + test_done diff --git a/t/t5545-push-options.sh b/t/t5545-push-options.sh index 6d1d59c9b1af8c..04b34c4de19f85 100755 --- a/t/t5545-push-options.sh +++ b/t/t5545-push-options.sh @@ -278,4 +278,7 @@ test_expect_success 'push options keep quoted characters intact (http)' ' test_cmp expect "$HTTPD_DOCUMENT_ROOT_PATH"/upstream.git/hooks/pre-receive.push_options ' +# DO NOT add non-httpd-specific tests here, because the last part of this +# test script is only executed when httpd is available and enabled. + test_done diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh index 37d76808d4a74b..4a3b901f067c2c 100755 --- a/t/t5601-clone.sh +++ b/t/t5601-clone.sh @@ -739,4 +739,7 @@ test_expect_success 'partial clone using HTTP' ' partial_clone "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/smart/server" ' +# DO NOT add non-httpd-specific tests here, because the last part of this +# test script is only executed when httpd is available and enabled. + test_done diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index b91ef548f86b0e..565254558f3623 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -417,4 +417,7 @@ test_expect_success 'tolerate server sending REF_DELTA against missing promisor ! test -e "$HTTPD_ROOT_PATH/one-time-sed" ' +# DO NOT add non-httpd-specific tests here, because the last part of this +# test script is only executed when httpd is available and enabled. + test_done diff --git a/t/t5700-protocol-v1.sh b/t/t5700-protocol-v1.sh index 7c9511c593c175..2571eb90b7656c 100755 --- a/t/t5700-protocol-v1.sh +++ b/t/t5700-protocol-v1.sh @@ -292,4 +292,7 @@ test_expect_success 'push with http:// using protocol v1' ' grep "git< version 1" log ' +# DO NOT add non-httpd-specific tests here, because the last part of this +# test script is only executed when httpd is available and enabled. + test_done diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh index 011b81d4fc2780..fbddd0aea93408 100755 --- a/t/t5702-protocol-v2.sh +++ b/t/t5702-protocol-v2.sh @@ -723,4 +723,7 @@ test_expect_success 'when server does not send "ready", expect FLUSH' ' test_i18ngrep "expected no other sections to be sent after no .ready." err ' +# DO NOT add non-httpd-specific tests here, because the last part of this +# test script is only executed when httpd is available and enabled. + test_done From a613d4f8176593e560628b174c3e10cfac3f6d58 Mon Sep 17 00:00:00 2001 From: Sun Chao <16657101987@163.com> Date: Thu, 1 Aug 2019 02:35:44 +0800 Subject: [PATCH 071/710] pack-refs: always refresh after taking the lock file When a packed ref is deleted, the whole packed-refs file is rewritten to omit the ref that no longer exists. However if another gc command is running and calls `pack-refs --all` simultaneously, there is a chance that a ref that was just updated lose the newly created commits. Through these steps, losing commits on newly updated refs can be demonstrated: # step 1: compile git without `USE_NSEC` option Some kernel releases do enable it by default while some do not. And if we compile git without `USE_NSEC`, it will be easier demonstrated by the following steps. # step 2: setup a repository and add the first commit git init repo && (cd repo && git config core.logallrefupdates true && git commit --allow-empty -m foo) # step 3: in one terminal, repack the refs repeatedly cd repo && while true do git pack-refs --all done # step 4: in another terminal, simultaneously update the # master with update-ref, and create and delete an # unrelated ref also with update-ref cd repo && while true do us=$(git commit-tree -m foo -p HEAD HEAD^{tree}) && git update-ref refs/heads/newbranch $us && git update-ref refs/heads/master $us && git update-ref -d refs/heads/newbranch && them=$(git rev-parse master) && if test "$them" != "$us" then echo >&2 "lost commit: $us" exit 1 fi # eye candy printf . done Though we have the packed-refs lock file and loose refs lock files to avoid updating conflicts, a ref will lost its newly commits if racy stat-validity of `packed-refs` file happens (which is quite same as the racy-git described in `Documentation/technical/racy-git.txt`), the following specific set of operations demonstrates the problem: 1. Call `pack-refs --all` to pack all the loose refs to packed-refs, and let say the modify time of the packed-refs is DATE_M. 2. Call `update-ref` to update a new commit to master while it is already packed. the old value (let us call it OID_A) remains in the packed-refs file and write the new value (let us call it OID_B) to $GIT_DIR/refs/heads/master. 3. Call `update-ref -d` within the same DATE_M from the 1th step to delete a different ref newbranch which is packed in the packed-refs file. It check newbranch's oid from packed-refs file without locking it. Meanwhile it keeps a snapshot of the packed-refs file in memory and record the file's attributes with the snapshot. The oid of master in the packed-refs's snapshot is OID_A. 4. Call a new `pack-refs --all` to pack the loose refs, the oid of master in packe-refs file is OID_B, and the loose refs $GIT_DIR/refs/heads/master is removed. Let's say the `pack-refs --all` is very quickly done and the new packed-refs file's modify time is still DATE_M, and it has the same file size, even the same inode. 5. 3th step now goes on after checking the newbranch, it begin to rewrite the packed-refs file. After get the lock file of packed-ref file, it checks it's on-disk file attributes with the snapshot, suck as the timestamp, the file size and the inode value. If they are both the same values, and the snapshot is not refreshed. Because the loose ref of master is removed by 4th step, `update-ref -d` will updates the new packed-ref to disk which contains master with the oid OID_A. So now the newly commit OID_B of master is lost. The best path forward is just always refreshing after take the lock file of `packed-refs` file. Traditionally we avoided that because refreshing it implied parsing the whole file. But these days we mmap it, so it really is just an extra open()/mmap() and a quick read of the header. That doesn't seem like an outrageous cost to pay when we're already taking the lock. Signed-off-by: Sun Chao Signed-off-by: Jeff King Signed-off-by: Sun Chao Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index c01c7f5901a6f3..4458a0f69ccb21 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -1012,14 +1012,23 @@ int packed_refs_lock(struct ref_store *ref_store, int flags, struct strbuf *err) } /* - * Now that we hold the `packed-refs` lock, make sure that our - * snapshot matches the current version of the file. Normally - * `get_snapshot()` does that for us, but that function - * assumes that when the file is locked, any existing snapshot - * is still valid. We've just locked the file, but it might - * have changed the moment *before* we locked it. + * There is a stat-validity problem might cause `update-ref -d` + * lost the newly commit of a ref, because a new `packed-refs` + * file might has the same on-disk file attributes such as + * timestamp, file size and inode value, but has a changed + * ref value. + * + * This could happen with a very small chance when + * `update-ref -d` is called and at the same time another + * `pack-refs --all` process is running. + * + * Now that we hold the `packed-refs` lock, it is important + * to make sure we could read the latest version of + * `packed-refs` file no matter we have just mmap it or not. + * So what need to do is clear the snapshot if we hold it + * already. */ - validate_snapshot(refs); + clear_snapshot(refs); /* * Now make sure that the packed-refs file as it exists in the From ecd72042de7d79c05d8f153e288766c7f88f0b10 Mon Sep 17 00:00:00 2001 From: Varun Naik Date: Fri, 2 Aug 2019 09:28:52 -0700 Subject: [PATCH 072/710] checkout.c: unstage empty deleted ita files It is possible to delete a committed file from the index and then add it as intent-to-add. After `git checkout HEAD `, the file should be identical in the index and HEAD. The command already works correctly if the file has contents in HEAD. This patch provides the desired behavior even when the file is empty in HEAD. `git checkout HEAD ` calls tree.c:read_tree_1(), with fn pointing to checkout.c:update_some(). update_some() creates a new cache entry but discards it when its mode and oid match those of the old entry. A cache entry for an ita file and a cache entry for an empty file have the same oid. Therefore, an empty deleted ita file previously passed both of these checks, and the new entry was discarded, so the file remained unchanged in the index. After this fix, if the file is marked as ita in the cache, then we avoid discarding the new entry and add the new entry to the cache instead. This change should not affect newly added ita files. For those, inside tree.c:read_tree_1(), tree_entry_interesting() returns entry_not_interesting, so fn is never called. Helped-by: Jeff King Signed-off-by: Varun Naik Signed-off-by: Junio C Hamano --- builtin/checkout.c | 1 + t/t2022-checkout-paths.sh | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/builtin/checkout.c b/builtin/checkout.c index 91f8509f85396c..27daa09c3ce860 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -126,6 +126,7 @@ static int update_some(const struct object_id *oid, struct strbuf *base, if (pos >= 0) { struct cache_entry *old = active_cache[pos]; if (ce->ce_mode == old->ce_mode && + !ce_intent_to_add(old) && oideq(&ce->oid, &old->oid)) { old->ce_flags |= CE_UPDATE; discard_cache_entry(ce); diff --git a/t/t2022-checkout-paths.sh b/t/t2022-checkout-paths.sh index fc3eb43b890977..6844afafc0eb92 100755 --- a/t/t2022-checkout-paths.sh +++ b/t/t2022-checkout-paths.sh @@ -78,4 +78,15 @@ test_expect_success 'do not touch files that are already up-to-date' ' test_cmp expect actual ' +test_expect_success 'checkout HEAD adds deleted intent-to-add file back to index' ' + echo "nonempty" >nonempty && + >empty && + git add nonempty empty && + git commit -m "create files to be deleted" && + git rm --cached nonempty empty && + git add -N nonempty empty && + git checkout HEAD nonempty empty && + git diff --cached --exit-code +' + test_done From 620c09e1b686e06c4ddbd5fb153f7ad898bab412 Mon Sep 17 00:00:00 2001 From: Varun Naik Date: Thu, 1 Aug 2019 09:09:10 -0700 Subject: [PATCH 073/710] restore: add test for deleted ita files `git restore --staged` uses the same machinery as `git checkout HEAD`, so there should be a similar test case for "restore" as the existing test case for "checkout" with deleted ita files. Helped-by: Jeff King Signed-off-by: Varun Naik Signed-off-by: Junio C Hamano --- t/t2070-restore.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/t/t2070-restore.sh b/t/t2070-restore.sh index 2650df196670b8..21c3f84459dfe2 100755 --- a/t/t2070-restore.sh +++ b/t/t2070-restore.sh @@ -95,4 +95,15 @@ test_expect_success 'restore --ignore-unmerged ignores unmerged entries' ' ) ' +test_expect_success 'restore --staged adds deleted intent-to-add file back to index' ' + echo "nonempty" >nonempty && + >empty && + git add nonempty empty && + git commit -m "create files to be deleted" && + git rm --cached nonempty empty && + git add -N nonempty empty && + git restore --staged nonempty empty && + git diff --cached --exit-code +' + test_done From 9916073be5dc59997e7cb9f7577bc7c91fd8f44a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Mon, 5 Aug 2019 10:02:38 +0200 Subject: [PATCH 074/710] t5318-commit-graph: use 'test_expect_code' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In 't5318-commit-graph.sh' the test 'close with correct error on bad input' manually verifies the exit code of a 'git commit-graph write' command. Use 'test_expect_code' instead. Signed-off-by: SZEDER Gábor Acked-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t5318-commit-graph.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 22cb9d66430410..4391007f4c1c60 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -26,8 +26,7 @@ test_expect_success 'write graph with no packs' ' test_expect_success 'close with correct error on bad input' ' cd "$TRASH_DIRECTORY/full" && echo doesnotexist >in && - { git commit-graph write --stdin-packs stderr; ret=$?; } && - test "$ret" = 1 && + test_expect_code 1 git commit-graph write --stdin-packs stderr && test_i18ngrep "error adding pack" stderr ' From 39d88318569188c0544c3c0f8207f2e1b1829e60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Mon, 5 Aug 2019 10:02:39 +0200 Subject: [PATCH 075/710] commit-graph: turn a group of write-related macro flags into an enum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: SZEDER Gábor Acked-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 6 +++--- builtin/gc.c | 2 +- commit-graph.c | 11 ++++++----- commit-graph.h | 13 ++++++++----- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 38027b83d9d832..64eccde314e57e 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -154,7 +154,7 @@ static int graph_write(int argc, const char **argv) struct string_list *commit_hex = NULL; struct string_list lines; int result = 0; - unsigned int flags = COMMIT_GRAPH_PROGRESS; + enum commit_graph_write_flags flags = COMMIT_GRAPH_WRITE_PROGRESS; static struct option builtin_commit_graph_write_options[] = { OPT_STRING(0, "object-dir", &opts.obj_dir, @@ -192,9 +192,9 @@ static int graph_write(int argc, const char **argv) if (!opts.obj_dir) opts.obj_dir = get_object_directory(); if (opts.append) - flags |= COMMIT_GRAPH_APPEND; + flags |= COMMIT_GRAPH_WRITE_APPEND; if (opts.split) - flags |= COMMIT_GRAPH_SPLIT; + flags |= COMMIT_GRAPH_WRITE_SPLIT; read_replace_refs = 0; diff --git a/builtin/gc.c b/builtin/gc.c index c18efadda53e54..305fb0f45af3bd 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -687,7 +687,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) if (gc_write_commit_graph && write_commit_graph_reachable(get_object_directory(), - !quiet && !daemonized ? COMMIT_GRAPH_PROGRESS : 0, + !quiet && !daemonized ? COMMIT_GRAPH_WRITE_PROGRESS : 0, NULL)) return 1; diff --git a/commit-graph.c b/commit-graph.c index b3c4de79b6da45..04324a464894e9 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1133,7 +1133,8 @@ static int add_ref_to_list(const char *refname, return 0; } -int write_commit_graph_reachable(const char *obj_dir, unsigned int flags, +int write_commit_graph_reachable(const char *obj_dir, + enum commit_graph_write_flags flags, const struct split_commit_graph_opts *split_opts) { struct string_list list = STRING_LIST_INIT_DUP; @@ -1750,7 +1751,7 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx) int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, - unsigned int flags, + enum commit_graph_write_flags flags, const struct split_commit_graph_opts *split_opts) { struct write_commit_graph_context *ctx; @@ -1771,9 +1772,9 @@ int write_commit_graph(const char *obj_dir, if (len && ctx->obj_dir[len - 1] == '/') ctx->obj_dir[len - 1] = 0; - ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0; - ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0; - ctx->split = flags & COMMIT_GRAPH_SPLIT ? 1 : 0; + ctx->append = flags & COMMIT_GRAPH_WRITE_APPEND ? 1 : 0; + ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0; + ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0; ctx->split_opts = split_opts; if (ctx->split) { diff --git a/commit-graph.h b/commit-graph.h index df9a3b20e4abc7..ae4db87fb5e6dc 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -71,9 +71,11 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, */ int generation_numbers_enabled(struct repository *r); -#define COMMIT_GRAPH_APPEND (1 << 0) -#define COMMIT_GRAPH_PROGRESS (1 << 1) -#define COMMIT_GRAPH_SPLIT (1 << 2) +enum commit_graph_write_flags { + COMMIT_GRAPH_WRITE_APPEND = (1 << 0), + COMMIT_GRAPH_WRITE_PROGRESS = (1 << 1), + COMMIT_GRAPH_WRITE_SPLIT = (1 << 2) +}; struct split_commit_graph_opts { int size_multiple; @@ -87,12 +89,13 @@ struct split_commit_graph_opts { * is not compatible with the commit-graph feature, then the * methods will return 0 without writing a commit-graph. */ -int write_commit_graph_reachable(const char *obj_dir, unsigned int flags, +int write_commit_graph_reachable(const char *obj_dir, + enum commit_graph_write_flags flags, const struct split_commit_graph_opts *split_opts); int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, - unsigned int flags, + enum commit_graph_write_flags flags, const struct split_commit_graph_opts *split_opts); #define COMMIT_GRAPH_VERIFY_SHALLOW (1 << 0) From 7c5c9b9c57d58273d17dfc3fec3ebdb25077a9de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Mon, 5 Aug 2019 10:02:40 +0200 Subject: [PATCH 076/710] commit-graph: error out on invalid commit oids in 'write --stdin-commits' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While 'git commit-graph write --stdin-commits' expects commit object ids as input, it accepts and silently skips over any invalid commit object ids, and still exits with success: # nonsense $ echo not-a-commit-oid | git commit-graph write --stdin-commits $ echo $? 0 # sometimes I forgot that refs are not good... $ echo HEAD | git commit-graph write --stdin-commits $ echo $? 0 # valid tree OID, but not a commit OID $ git rev-parse HEAD^{tree} | git commit-graph write --stdin-commits $ echo $? 0 $ ls -l .git/objects/info/commit-graph ls: cannot access '.git/objects/info/commit-graph': No such file or directory Check that all input records are indeed valid commit object ids and return with error otherwise, the same way '--stdin-packs' handles invalid input; see e103f7276f (commit-graph: return with errors during write, 2019-06-12). Note that it should only return with error when encountering an invalid commit object id coming from standard input. However, '--reachable' uses the same code path to process object ids pointed to by all refs, and that includes tag object ids as well, which should still be skipped over. Therefore add a new flag to 'enum commit_graph_write_flags' and a corresponding field to 'struct write_commit_graph_context', so we can differentiate between those two cases. Signed-off-by: SZEDER Gábor Acked-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 4 +++- commit-graph.c | 29 +++++++++++++++++------------ commit-graph.h | 4 +++- t/t5318-commit-graph.sh | 11 ++++++++++- 4 files changed, 33 insertions(+), 15 deletions(-) diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 64eccde314e57e..57863619b71dc9 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -213,8 +213,10 @@ static int graph_write(int argc, const char **argv) if (opts.stdin_packs) pack_indexes = &lines; - if (opts.stdin_commits) + if (opts.stdin_commits) { commit_hex = &lines; + flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; + } UNLEAK(buf); } diff --git a/commit-graph.c b/commit-graph.c index 04324a464894e9..821900675b27f2 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -782,7 +782,8 @@ struct write_commit_graph_context { unsigned append:1, report_progress:1, - split:1; + split:1, + check_oids:1; const struct split_commit_graph_opts *split_opts; }; @@ -1193,8 +1194,8 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx, return 0; } -static void fill_oids_from_commit_hex(struct write_commit_graph_context *ctx, - struct string_list *commit_hex) +static int fill_oids_from_commit_hex(struct write_commit_graph_context *ctx, + struct string_list *commit_hex) { uint32_t i; struct strbuf progress_title = STRBUF_INIT; @@ -1215,20 +1216,21 @@ static void fill_oids_from_commit_hex(struct write_commit_graph_context *ctx, struct commit *result; display_progress(ctx->progress, i + 1); - if (commit_hex->items[i].string && - parse_oid_hex(commit_hex->items[i].string, &oid, &end)) - continue; - - result = lookup_commit_reference_gently(ctx->r, &oid, 1); - - if (result) { + if (!parse_oid_hex(commit_hex->items[i].string, &oid, &end) && + (result = lookup_commit_reference_gently(ctx->r, &oid, 1))) { ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc); oidcpy(&ctx->oids.list[ctx->oids.nr], &(result->object.oid)); ctx->oids.nr++; + } else if (ctx->check_oids) { + error(_("invalid commit object id: %s"), + commit_hex->items[i].string); + return -1; } } stop_progress(&ctx->progress); strbuf_release(&progress_title); + + return 0; } static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) @@ -1775,6 +1777,7 @@ int write_commit_graph(const char *obj_dir, ctx->append = flags & COMMIT_GRAPH_WRITE_APPEND ? 1 : 0; ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0; ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0; + ctx->check_oids = flags & COMMIT_GRAPH_WRITE_CHECK_OIDS ? 1 : 0; ctx->split_opts = split_opts; if (ctx->split) { @@ -1829,8 +1832,10 @@ int write_commit_graph(const char *obj_dir, goto cleanup; } - if (commit_hex) - fill_oids_from_commit_hex(ctx, commit_hex); + if (commit_hex) { + if ((res = fill_oids_from_commit_hex(ctx, commit_hex))) + goto cleanup; + } if (!pack_indexes && !commit_hex) fill_oids_from_all_packs(ctx); diff --git a/commit-graph.h b/commit-graph.h index ae4db87fb5e6dc..486e64e591d476 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -74,7 +74,9 @@ int generation_numbers_enabled(struct repository *r); enum commit_graph_write_flags { COMMIT_GRAPH_WRITE_APPEND = (1 << 0), COMMIT_GRAPH_WRITE_PROGRESS = (1 << 1), - COMMIT_GRAPH_WRITE_SPLIT = (1 << 2) + COMMIT_GRAPH_WRITE_SPLIT = (1 << 2), + /* Make sure that each OID in the input is a valid commit OID. */ + COMMIT_GRAPH_WRITE_CHECK_OIDS = (1 << 3) }; struct split_commit_graph_opts { diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 4391007f4c1c60..ab3eccf0fafa3f 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -23,7 +23,7 @@ test_expect_success 'write graph with no packs' ' test_path_is_missing info/commit-graph ' -test_expect_success 'close with correct error on bad input' ' +test_expect_success 'exit with correct error on bad input to --stdin-packs' ' cd "$TRASH_DIRECTORY/full" && echo doesnotexist >in && test_expect_code 1 git commit-graph write --stdin-packs stderr && @@ -40,6 +40,15 @@ test_expect_success 'create commits and repack' ' git repack ' +test_expect_success 'exit with correct error on bad input to --stdin-commits' ' + cd "$TRASH_DIRECTORY/full" && + echo HEAD | test_expect_code 1 git commit-graph write --stdin-commits 2>stderr && + test_i18ngrep "invalid commit object id" stderr && + # valid tree OID, but not a commit OID + git rev-parse HEAD^{tree} | test_expect_code 1 git commit-graph write --stdin-commits 2>stderr && + test_i18ngrep "invalid commit object id" stderr +' + graph_git_two_modes() { git -c core.commitGraph=true $1 >output git -c core.commitGraph=false $1 >expect From 96f3ccc2abf81520740de857471ab6b200f0ea3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Mon, 5 Aug 2019 23:04:46 +0200 Subject: [PATCH 077/710] t0000-basic: use realistic test script names in the verbose tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our test scripts are named something like 't1234-command.sh', but the script names used in 't0000-basic.sh' don't follow this naming convention. Normally this doesn't matter, because the test scripts themselves don't care how they are called. However, the next patch will start to include the test number in the test's verbose output, so the test script's name will matter in the two tests checking the verbose output. Update the tests 'test --verbose' and 'test --verbose-only' to follow out test script naming convention. Leave the other tests in 't0000' unchanged: changing the names of their test scripts would be only pointless code churn. Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- t/t0000-basic.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh index c03054c538a0f9..f9838d88daf1bd 100755 --- a/t/t0000-basic.sh +++ b/t/t0000-basic.sh @@ -276,15 +276,15 @@ test_expect_success 'pretend we have a mix of all possible results' " test_expect_success C_LOCALE_OUTPUT 'test --verbose' ' test_must_fail run_sub_test_lib_test \ - test-verbose "test verbose" --verbose <<-\EOF && + t1234-verbose "test verbose" --verbose <<-\EOF && test_expect_success "passing test" true test_expect_success "test with output" "echo foo" test_expect_success "failing test" false test_done EOF - mv test-verbose/out test-verbose/out+ && - grep -v "^Initialized empty" test-verbose/out+ >test-verbose/out && - check_sub_test_lib_test test-verbose <<-\EOF + mv t1234-verbose/out t1234-verbose/out+ && + grep -v "^Initialized empty" t1234-verbose/out+ >t1234-verbose/out && + check_sub_test_lib_test t1234-verbose <<-\EOF > expecting success: true > ok 1 - passing test > Z @@ -303,14 +303,14 @@ test_expect_success C_LOCALE_OUTPUT 'test --verbose' ' test_expect_success 'test --verbose-only' ' test_must_fail run_sub_test_lib_test \ - test-verbose-only-2 "test verbose-only=2" \ + t2345-verbose-only-2 "test verbose-only=2" \ --verbose-only=2 <<-\EOF && test_expect_success "passing test" true test_expect_success "test with output" "echo foo" test_expect_success "failing test" false test_done EOF - check_sub_test_lib_test test-verbose-only-2 <<-\EOF + check_sub_test_lib_test t2345-verbose-only-2 <<-\EOF > ok 1 - passing test > Z > expecting success: echo foo From ffe1afe67c0fab1786ad2aa9668efabe773f73c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Mon, 5 Aug 2019 23:04:47 +0200 Subject: [PATCH 078/710] tests: show the test name and number at the start of verbose output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The verbose output of every test looks something like this: expecting success: echo content >file && git add file && git commit -m "add file" [master (root-commit) d1fbfbd] add file Author: A U Thor 1 file changed, 1 insertion(+) create mode 100644 file ok 1 - commit works i.e. first an "expecting success" (or "checking known breakage") line followed by the commands to be executed, then the output of those comamnds, and finally an "ok"/"not ok" line containing the test name. Note that the test's name is only shown at the very end. With '-x' tracing enabled and/or in longer tests the verbose output might be several screenfulls long, making it harder than necessary to find where the output of the test with a given name starts (especially when the outputs to different file descriptors are racing, and the "expecting success"/command block arrives earlier than the "ok" line of the previous test). Print the test name at the start of the test's verbose output, i.e. at the end of the "expecting success" and "checking known breakage" lines, to make the start of a particular test a bit easier to recognize. Also print the test script and test case numbers, to help those poor souls who regularly have to scan through the combined verbose output of several test scripts. So the dummy test above would start like this: expecting success of 9999.1 'commit works': echo content >file && [...] Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- t/t0000-basic.sh | 8 ++++---- t/test-lib-functions.sh | 4 ++-- t/test-lib.sh | 2 ++ 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh index f9838d88daf1bd..8ba34efefca10e 100755 --- a/t/t0000-basic.sh +++ b/t/t0000-basic.sh @@ -285,14 +285,14 @@ test_expect_success C_LOCALE_OUTPUT 'test --verbose' ' mv t1234-verbose/out t1234-verbose/out+ && grep -v "^Initialized empty" t1234-verbose/out+ >t1234-verbose/out && check_sub_test_lib_test t1234-verbose <<-\EOF - > expecting success: true + > expecting success of 1234.1 '\''passing test'\'': true > ok 1 - passing test > Z - > expecting success: echo foo + > expecting success of 1234.2 '\''test with output'\'': echo foo > foo > ok 2 - test with output > Z - > expecting success: false + > expecting success of 1234.3 '\''failing test'\'': false > not ok 3 - failing test > # false > Z @@ -313,7 +313,7 @@ test_expect_success 'test --verbose-only' ' check_sub_test_lib_test t2345-verbose-only-2 <<-\EOF > ok 1 - passing test > Z - > expecting success: echo foo + > expecting success of 2345.2 '\''test with output'\'': echo foo > foo > ok 2 - test with output > Z diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index f233522f43ab39..cb3941beea619d 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -437,7 +437,7 @@ test_expect_failure () { export test_prereq if ! test_skip "$@" then - say >&3 "checking known breakage: $2" + say >&3 "checking known breakage of $TEST_NUMBER.$test_count '$1': $2" if test_run_ "$2" expecting_failure then test_known_broken_ok_ "$1" @@ -457,7 +457,7 @@ test_expect_success () { export test_prereq if ! test_skip "$@" then - say >&3 "expecting success: $2" + say >&3 "expecting success of $TEST_NUMBER.$test_count '$1': $2" if test_run_ "$2" then test_ok_ "$1" diff --git a/t/test-lib.sh b/t/test-lib.sh index 599fd70e141c7b..901f57915193e9 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -212,6 +212,8 @@ fi TEST_STRESS_JOB_SFX="${GIT_TEST_STRESS_JOB_NR:+.stress-$GIT_TEST_STRESS_JOB_NR}" TEST_NAME="$(basename "$0" .sh)" +TEST_NUMBER="${TEST_NAME%%-*}" +TEST_NUMBER="${TEST_NUMBER#t}" TEST_RESULTS_DIR="$TEST_OUTPUT_DIRECTORY/test-results" TEST_RESULTS_BASE="$TEST_RESULTS_DIR/$TEST_NAME$TEST_STRESS_JOB_SFX" TRASH_DIRECTORY="trash directory.$TEST_NAME$TEST_STRESS_JOB_SFX" From 19e8789b236dfe33667747d5523d6689bb59b5ef Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 6 Aug 2019 10:39:58 -0400 Subject: [PATCH 079/710] revision: allow --end-of-options to end option parsing There's currently no robust way to tell Git that a particular option is meant to be a revision, and not an option. So if you have a branch "refs/heads/--foo", you cannot just say: git rev-list --foo You can say: git rev-list refs/heads/--foo But that breaks down if you don't know the refname, and in particular if you're a script passing along a value from elsewhere. In most programs, you can use "--" to end option parsing, like this: some-prog -- "$revision" But that doesn't work for the revision parser, because "--" is already meaningful there: it separates revisions from pathspecs. So we need some other marker to separate options from revisions. This patch introduces "--end-of-options", which serves that purpose: git rev-list --oneline --end-of-options "$revision" will work regardless of what's in "$revision" (well, if you say "--" it may fail, but it won't do something dangerous, like triggering an unexpected option). The name is verbose, but that's probably a good thing; this is meant to be used for scripted invocations where readability is more important than terseness. One alternative would be to introduce an explicit option to mark a revision, like: git rev-list --oneline --revision="$revision" That's slightly _more_ informative than this commit (because it makes even something silly like "--" unambiguous). But the pattern of using a separator like "--" is well established in git and in other commands, and it makes some scripting tasks simpler like: git rev-list --end-of-options "$@" There's no documentation in this patch, because it will make sense to describe the feature once it is available everywhere (and support will be added in further patches). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- revision.c | 8 +++++++- t/t6000-rev-list-misc.sh | 8 ++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/revision.c b/revision.c index 07412297f0248a..51690e480d5b62 100644 --- a/revision.c +++ b/revision.c @@ -2523,6 +2523,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s int i, flags, left, seen_dashdash, got_rev_arg = 0, revarg_opt; struct argv_array prune_data = ARGV_ARRAY_INIT; const char *submodule = NULL; + int seen_end_of_options = 0; if (opt) submodule = opt->submodule; @@ -2552,7 +2553,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s revarg_opt |= REVARG_CANNOT_BE_FILENAME; for (left = i = 1; i < argc; i++) { const char *arg = argv[i]; - if (*arg == '-') { + if (!seen_end_of_options && *arg == '-') { int opts; opts = handle_revision_pseudo_opt(submodule, @@ -2574,6 +2575,11 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s continue; } + if (!strcmp(arg, "--end-of-options")) { + seen_end_of_options = 1; + continue; + } + opts = handle_revision_opt(revs, argc - i, argv + i, &left, argv, opt); if (opts > 0) { diff --git a/t/t6000-rev-list-misc.sh b/t/t6000-rev-list-misc.sh index 52a9e38d66f322..b8cf82349b1d6d 100755 --- a/t/t6000-rev-list-misc.sh +++ b/t/t6000-rev-list-misc.sh @@ -140,4 +140,12 @@ test_expect_success '--header shows a NUL after each commit' ' test_cmp expect actual ' +test_expect_success 'rev-list --end-of-options' ' + git update-ref refs/heads/--output=yikes HEAD && + git rev-list --end-of-options --output=yikes >actual && + test_path_is_missing yikes && + git rev-list HEAD >expect && + test_cmp expect actual +' + test_done From 51b4594b4024d5cb2b4ace36ff932affa31c03f4 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 6 Aug 2019 10:40:16 -0400 Subject: [PATCH 080/710] parse-options: allow --end-of-options as a synonym for "--" The revision option parser recently learned about --end-of-options, but that's not quite enough for all callers. Some of them, like git-log, pick out some options using parse_options(), and then feed the remainder to setup_revisions(). For those cases we need to stop parse_options() from finding more options when it sees --end-of-options, and to retain that option in argv so that setup_revisions() can see it as well. Let's handle this the same as we do "--". We can even piggy-back on the handling of PARSE_OPT_KEEP_DASHDASH, because any caller that wants to retain one will want to retain the other. I've included two tests here. The "log" test covers "--source", which is one of the options it handles with parse_options(), and would fail before this patch. There's also a test that uses the parse-options helper directly. That confirms that the option is handled correctly even in cases without KEEP_DASHDASH or setup_revisions(). I.e., it is safe to use --end-of-options in place of "--" in other programs. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- parse-options.c | 3 ++- t/t0040-parse-options.sh | 7 +++++++ t/t4202-log.sh | 7 +++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/parse-options.c b/parse-options.c index 87b26a1d922e89..b42f54d48b96c0 100644 --- a/parse-options.c +++ b/parse-options.c @@ -780,7 +780,8 @@ int parse_options_step(struct parse_opt_ctx_t *ctx, continue; } - if (!arg[2]) { /* "--" */ + if (!arg[2] /* "--" */ || + !strcmp(arg + 2, "end-of-options")) { if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) { ctx->argc--; ctx->argv++; diff --git a/t/t0040-parse-options.sh b/t/t0040-parse-options.sh index cebc77fab0b254..705a136ed92c99 100755 --- a/t/t0040-parse-options.sh +++ b/t/t0040-parse-options.sh @@ -399,4 +399,11 @@ test_expect_success 'GIT_TEST_DISALLOW_ABBREVIATED_OPTIONS works' ' test-tool parse-options --ye ' +test_expect_success '--end-of-options treats remainder as args' ' + test-tool parse-options \ + --expect="verbose: -1" \ + --expect="arg 00: --verbose" \ + --end-of-options --verbose +' + test_done diff --git a/t/t4202-log.sh b/t/t4202-log.sh index c20209324c8e71..e88ccb04a9715b 100755 --- a/t/t4202-log.sh +++ b/t/t4202-log.sh @@ -1707,4 +1707,11 @@ test_expect_success '--exclude-promisor-objects does not BUG-crash' ' test_must_fail git log --exclude-promisor-objects source-a ' +test_expect_success 'log --end-of-options' ' + git update-ref refs/heads/--source HEAD && + git log --end-of-options --source >actual && + git log >expect && + test_cmp expect actual +' + test_done From 67feca3b1c45a51b204253039139b46cc07e145f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 6 Aug 2019 10:40:30 -0400 Subject: [PATCH 081/710] gitcli: document --end-of-options Now that --end-of-options is available for any users of setup_revisions() or parse_options(), which should be effectively everywhere, we can guide people to use it for all their disambiguating needs. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/gitcli.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/gitcli.txt b/Documentation/gitcli.txt index 1ed3ca33b7a94a..4b32876b6e912b 100644 --- a/Documentation/gitcli.txt +++ b/Documentation/gitcli.txt @@ -37,6 +37,12 @@ arguments. Here are the rules: file called HEAD in your work tree, `git diff HEAD` is ambiguous, and you have to say either `git diff HEAD --` or `git diff -- HEAD` to disambiguate. + + * Because `--` disambiguates revisions and paths in some commands, it + cannot be used for those commands to separate options and revisions. + You can use `--end-of-options` for this (it also works for commands + that do not distinguish between revisions in paths, in which case it + is simply an alias for `--`). + When writing a script that is expected to handle random user-input, it is a good practice to make it explicit which arguments are which by placing From 58ebccb478b34a1babf2719fd64e3adb3974139c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 6 Aug 2019 08:26:45 -0400 Subject: [PATCH 082/710] t1309: use short branch name in includeIf.onbranch test Commit 85fe0e800c (config: work around bug with includeif:onbranch and early config, 2019-07-31) tests that our early config-reader does not access the file mentioned by includeIf.onbranch:refs/heads/master.path. But it would never do so even if the feature were implemented, since the onbranch matching code uses the short refname "master". The test still serves its purpose, since the bug fixed by 85fe0e800c is actually that we hit a BUG() before even deciding whether to match the ref. But let's use the correct name to avoid confusion (and which we'll eventually want to trigger once we do the "real" fix described in that commit). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t1309-early-config.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t1309-early-config.sh b/t/t1309-early-config.sh index 0c37e7180d1cde..eeb60e41437dde 100755 --- a/t/t1309-early-config.sh +++ b/t/t1309-early-config.sh @@ -91,7 +91,7 @@ test_expect_failure 'ignore .git/ with invalid config' ' test_expect_success 'early config and onbranch' ' echo "[broken" >broken && - test_with_config "[includeif \"onbranch:refs/heads/master\"]path=../broken" + test_with_config "[includeif \"onbranch:master\"]path=../broken" ' test_done From 5732f2b1ef3559f2b2a5c67302f8bf21846488bc Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 6 Aug 2019 08:27:26 -0400 Subject: [PATCH 083/710] common-main: delay trace2 initialization We initialize the trace2 system in the common main() function so that all programs (even ones that aren't builtins) will enable tracing. But trace2 startup is relatively heavy-weight, as we have to actually read on-disk config to decide whether to trace. This can cause unexpected interactions with other common-main initialization. For instance, we'll end up in the config code before calling initialize_the_repository(), and the usual invariant that the_repository is never NULL will not hold. Let's push the trace2 initialization further down in common-main, to just before we execute cmd_main(). The other parts of the initialization are much more self-contained and less likely to call library code that depends on those kinds of invariants. Originally the trace2 code tried to start as early as possible to get accurate timings. But the timer initialization was split out from the config reading in a089724958 (trace2: refactor setting process starting time, 2019-04-15), so there shouldn't be any impact from this patch. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- common-main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/common-main.c b/common-main.c index 582a7b18869fb8..71e21dd20a3b14 100644 --- a/common-main.c +++ b/common-main.c @@ -39,16 +39,16 @@ int main(int argc, const char **argv) git_resolve_executable_dir(argv[0]); - trace2_initialize(); - trace2_cmd_start(argv); - trace2_collect_process_info(TRACE2_PROCESS_INFO_STARTUP); - git_setup_gettext(); initialize_the_repository(); attr_start(); + trace2_initialize(); + trace2_cmd_start(argv); + trace2_collect_process_info(TRACE2_PROCESS_INFO_STARTUP); + result = cmd_main(argc, argv); trace2_cmd_exit(result); From 22932d9169fea7c826c85e2407054feff30ebaf1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 6 Aug 2019 08:27:58 -0400 Subject: [PATCH 084/710] config: stop checking whether the_repository is NULL Since the previous commit, our invariant that the_repository is never NULL is restored, and we can stop being defensive in include_by_branch(). We can confirm the fix by showing that an onbranch config include will not cause a segfault when run outside a git repository. I've put this in t1309-early-config since it's related to the case added by 85fe0e800c (config: work around bug with includeif:onbranch and early config, 2019-07-31), though technically the issue was with read_very_early_config() and not read_early_config(). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- config.c | 2 +- t/t1309-early-config.sh | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/config.c b/config.c index 3900e4947be92b..cc637363bbae10 100644 --- a/config.c +++ b/config.c @@ -275,7 +275,7 @@ static int include_by_branch(const char *cond, size_t cond_len) int flags; int ret; struct strbuf pattern = STRBUF_INIT; - const char *refname = !the_repository || !the_repository->gitdir ? + const char *refname = !the_repository->gitdir ? NULL : resolve_ref_unsafe("HEAD", 0, NULL, &flags); const char *shortname; diff --git a/t/t1309-early-config.sh b/t/t1309-early-config.sh index eeb60e41437dde..3a0de0ddaa553e 100755 --- a/t/t1309-early-config.sh +++ b/t/t1309-early-config.sh @@ -94,4 +94,9 @@ test_expect_success 'early config and onbranch' ' test_with_config "[includeif \"onbranch:master\"]path=../broken" ' +test_expect_success 'onbranch config outside of git repo' ' + test_config_global includeIf.onbranch:master.path non-existent && + nongit git help +' + test_done From c9b9c09dae175f75bed4363cc6278c3f0cb3b9dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Wed, 7 Aug 2019 15:08:51 +0200 Subject: [PATCH 085/710] nedmalloc: do assignments only after the declaration section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid the following compiler warning: In file included from compat/nedmalloc/nedmalloc.c:63: compat/nedmalloc/malloc.c.h: In function ‘pthread_release_lock’: compat/nedmalloc/malloc.c.h:1759:5: error: ISO C90 forbids mixed declarations and code [-Werror=declaration-after-statement] 1759 | volatile unsigned int* lp = &sl->l; | ^~~~~~~~ Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- compat/nedmalloc/malloc.c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compat/nedmalloc/malloc.c.h b/compat/nedmalloc/malloc.c.h index b833ff9225e73a..88c131ca93ade1 100644 --- a/compat/nedmalloc/malloc.c.h +++ b/compat/nedmalloc/malloc.c.h @@ -1755,10 +1755,10 @@ static FORCEINLINE void pthread_release_lock (MLOCK_T *sl) { assert(sl->l != 0); assert(sl->threadid == CURRENT_THREAD); if (--sl->c == 0) { - sl->threadid = 0; volatile unsigned int* lp = &sl->l; int prev = 0; int ret; + sl->threadid = 0; __asm__ __volatile__ ("lock; xchgl %0, %1" : "=r" (ret) : "m" (*(lp)), "0"(prev) From 70597e838601ee6be5c43ece255c9df47f8fe9a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Wed, 7 Aug 2019 15:09:02 +0200 Subject: [PATCH 086/710] nedmalloc: avoid compiler warning about unused value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cast the evaluated value of the macro INITIAL_LOCK to void to instruct the compiler that we're not interested in said value nor the following warning: In file included from compat/nedmalloc/nedmalloc.c:63: compat/nedmalloc/malloc.c.h: In function ‘init_user_mstate’: compat/nedmalloc/malloc.c.h:1706:62: error: right-hand operand of comma expression has no effect [-Werror=unused-value] 1706 | #define INITIAL_LOCK(sl) (memset(sl, 0, sizeof(MLOCK_T)), 0) | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~ compat/nedmalloc/malloc.c.h:5020:3: note: in expansion of macro ‘INITIAL_LOCK’ 5020 | INITIAL_LOCK(&m->mutex); | ^~~~~~~~~~~~ Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- compat/nedmalloc/malloc.c.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compat/nedmalloc/malloc.c.h b/compat/nedmalloc/malloc.c.h index 88c131ca93ade1..913434959002f2 100644 --- a/compat/nedmalloc/malloc.c.h +++ b/compat/nedmalloc/malloc.c.h @@ -3066,7 +3066,7 @@ static int init_mparams(void) { #if !ONLY_MSPACES /* Set up lock for main malloc area */ gm->mflags = mparams.default_mflags; - INITIAL_LOCK(&gm->mutex); + (void)INITIAL_LOCK(&gm->mutex); #endif #if (FOOTERS && !INSECURE) @@ -5017,7 +5017,7 @@ static mstate init_user_mstate(char* tbase, size_t tsize) { mchunkptr msp = align_as_chunk(tbase); mstate m = (mstate)(chunk2mem(msp)); memset(m, 0, msize); - INITIAL_LOCK(&m->mutex); + (void)INITIAL_LOCK(&m->mutex); msp->head = (msize|PINUSE_BIT|CINUSE_BIT); m->seg.base = m->least_addr = tbase; m->seg.size = m->footprint = m->max_footprint = tsize; From f78f6c7e0ca91a48a4b519cc2cf1197d16810f7a Mon Sep 17 00:00:00 2001 From: Josh Steadmon Date: Wed, 7 Aug 2019 11:57:05 -0700 Subject: [PATCH 087/710] t7503: verify proper hook execution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit t7503 did not verify that the expected hooks actually ran during testing. Fix that by making the hook scripts write their $0 into a file so that we can compare actual execution vs. expected execution. While we're at it, do some test style cleanups, such as using write_script() and doing setup inside a test_expect_success block. Improved-by: Martin Ågren Signed-off-by: Martin Ågren Signed-off-by: Josh Steadmon Signed-off-by: Junio C Hamano --- t/t7503-pre-commit-hook.sh | 157 +++++++++++++++++++++---------------- 1 file changed, 89 insertions(+), 68 deletions(-) diff --git a/t/t7503-pre-commit-hook.sh b/t/t7503-pre-commit-hook.sh index 984889b39d3f8e..6aa83204c25350 100755 --- a/t/t7503-pre-commit-hook.sh +++ b/t/t7503-pre-commit-hook.sh @@ -4,124 +4,144 @@ test_description='pre-commit hook' . ./test-lib.sh -test_expect_success 'with no hook' ' +HOOKDIR="$(git rev-parse --git-dir)/hooks" +PRECOMMIT="$HOOKDIR/pre-commit" + +# Prepare sample scripts that write their $0 to actual_hooks +test_expect_success 'sample script setup' ' + mkdir -p "$HOOKDIR" && + write_script "$HOOKDIR/success.sample" <<-\EOF && + echo $0 >>actual_hooks + exit 0 + EOF + write_script "$HOOKDIR/fail.sample" <<-\EOF && + echo $0 >>actual_hooks + exit 1 + EOF + write_script "$HOOKDIR/non-exec.sample" <<-\EOF && + echo $0 >>actual_hooks + exit 1 + EOF + chmod -x "$HOOKDIR/non-exec.sample" && + write_script "$HOOKDIR/require-prefix.sample" <<-\EOF && + echo $0 >>actual_hooks + test $GIT_PREFIX = "success/" + EOF + write_script "$HOOKDIR/check-author.sample" <<-\EOF + echo $0 >>actual_hooks + test "$GIT_AUTHOR_NAME" = "New Author" && + test "$GIT_AUTHOR_EMAIL" = "newauthor@example.com" + EOF +' - echo "foo" > file && +test_expect_success 'with no hook' ' + test_when_finished "rm -f actual_hooks" && + echo "foo" >file && git add file && - git commit -m "first" - + git commit -m "first" && + test_path_is_missing actual_hooks ' test_expect_success '--no-verify with no hook' ' - - echo "bar" > file && + test_when_finished "rm -f actual_hooks" && + echo "bar" >file && git add file && - git commit --no-verify -m "bar" - + git commit --no-verify -m "bar" && + test_path_is_missing actual_hooks ' -# now install hook that always succeeds -HOOKDIR="$(git rev-parse --git-dir)/hooks" -HOOK="$HOOKDIR/pre-commit" -mkdir -p "$HOOKDIR" -cat > "$HOOK" <> file && + test_when_finished "rm -f \"$PRECOMMIT\" expected_hooks actual_hooks" && + cp "$HOOKDIR/success.sample" "$PRECOMMIT" && + echo "$PRECOMMIT" >expected_hooks && + echo "more" >>file && git add file && - git commit -m "more" - + git commit -m "more" && + test_cmp expected_hooks actual_hooks ' test_expect_success '--no-verify with succeeding hook' ' - - echo "even more" >> file && + test_when_finished "rm -f \"$PRECOMMIT\" actual_hooks" && + cp "$HOOKDIR/success.sample" "$PRECOMMIT" && + echo "even more" >>file && git add file && - git commit --no-verify -m "even more" - + git commit --no-verify -m "even more" && + test_path_is_missing actual_hooks ' -# now a hook that fails -cat > "$HOOK" <> file && + test_when_finished "rm -f \"$PRECOMMIT\" expected_hooks actual_hooks" && + cp "$HOOKDIR/fail.sample" "$PRECOMMIT" && + echo "$PRECOMMIT" >expected_hooks && + echo "another" >>file && git add file && - test_must_fail git commit -m "another" - + test_must_fail git commit -m "another" && + test_cmp expected_hooks actual_hooks ' test_expect_success '--no-verify with failing hook' ' - - echo "stuff" >> file && + test_when_finished "rm -f \"$PRECOMMIT\" actual_hooks" && + cp "$HOOKDIR/fail.sample" "$PRECOMMIT" && + echo "stuff" >>file && git add file && - git commit --no-verify -m "stuff" - + git commit --no-verify -m "stuff" && + test_path_is_missing actual_hooks ' -chmod -x "$HOOK" test_expect_success POSIXPERM 'with non-executable hook' ' - - echo "content" >> file && + test_when_finished "rm -f \"$PRECOMMIT\" actual_hooks" && + cp "$HOOKDIR/non-exec.sample" "$PRECOMMIT" && + echo "content" >>file && git add file && - git commit -m "content" - + git commit -m "content" && + test_path_is_missing actual_hooks ' test_expect_success POSIXPERM '--no-verify with non-executable hook' ' - - echo "more content" >> file && + test_when_finished "rm -f \"$PRECOMMIT\" actual_hooks" && + cp "$HOOKDIR/non-exec.sample" "$PRECOMMIT" && + echo "more content" >>file && git add file && - git commit --no-verify -m "more content" - + git commit --no-verify -m "more content" && + test_path_is_missing actual_hooks ' -chmod +x "$HOOK" - -# a hook that checks $GIT_PREFIX and succeeds inside the -# success/ subdirectory only -cat > "$HOOK" <> file && + test_when_finished "rm -rf \"$PRECOMMIT\" expected_hooks actual_hooks success" && + cp "$HOOKDIR/require-prefix.sample" "$PRECOMMIT" && + echo "$PRECOMMIT" >expected_hooks && + echo "more content" >>file && git add file && mkdir success && ( cd success && git commit -m "hook requires GIT_PREFIX = success/" ) && - rmdir success + test_cmp expected_hooks actual_hooks ' test_expect_success 'with failing hook requiring GIT_PREFIX' ' - - echo "more content" >> file && + test_when_finished "rm -rf \"$PRECOMMIT\" expected_hooks actual_hooks fail" && + cp "$HOOKDIR/require-prefix.sample" "$PRECOMMIT" && + echo "$PRECOMMIT" >expected_hooks && + echo "more content" >>file && git add file && mkdir fail && ( cd fail && test_must_fail git commit -m "hook must fail" ) && - rmdir fail && - git checkout -- file + git checkout -- file && + test_cmp expected_hooks actual_hooks ' test_expect_success 'check the author in hook' ' - write_script "$HOOK" <<-\EOF && - test "$GIT_AUTHOR_NAME" = "New Author" && - test "$GIT_AUTHOR_EMAIL" = "newauthor@example.com" + test_when_finished "rm -f \"$PRECOMMIT\" expected_hooks actual_hooks" && + cp "$HOOKDIR/check-author.sample" "$PRECOMMIT" && + cat >expected_hooks <<-EOF && + $PRECOMMIT + $PRECOMMIT + $PRECOMMIT EOF test_must_fail git commit --allow-empty -m "by a.u.thor" && ( @@ -133,7 +153,8 @@ test_expect_success 'check the author in hook' ' ) && git commit --author="New Author " \ --allow-empty -m "by new.author via command line" && - git show -s + git show -s && + test_cmp expected_hooks actual_hooks ' test_done From a1f3dd7eb303d924f90da30808f7702869430321 Mon Sep 17 00:00:00 2001 From: Michael J Gruber Date: Wed, 7 Aug 2019 11:57:06 -0700 Subject: [PATCH 088/710] merge: do no-verify like commit f8b863598c ("builtin/merge: honor commit-msg hook for merges", 2017-09-07) introduced the no-verify flag to merge for bypassing the commit-msg hook, though in a different way from the implementation in commit.c. Change the implementation in merge.c to be the same as in commit.c so that both do the same in the same way. This also changes the output of "git merge --help" to be more clear that the hook return code is respected by default. [js: * reworded commit message * squashed documentation changes from original series' patch 3/4 ] Signed-off-by: Michael J Gruber Signed-off-by: Josh Steadmon Signed-off-by: Junio C Hamano --- Documentation/git-merge.txt | 2 +- Documentation/merge-options.txt | 4 ++++ builtin/merge.c | 6 +++--- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Documentation/git-merge.txt b/Documentation/git-merge.txt index 01fd52dc706380..092529c619e29c 100644 --- a/Documentation/git-merge.txt +++ b/Documentation/git-merge.txt @@ -10,7 +10,7 @@ SYNOPSIS -------- [verse] 'git merge' [-n] [--stat] [--no-commit] [--squash] [--[no-]edit] - [-s ] [-X ] [-S[]] + [--no-verify] [-s ] [-X ] [-S[]] [--[no-]allow-unrelated-histories] [--[no-]rerere-autoupdate] [-m ] [-F ] [...] 'git merge' (--continue | --abort | --quit) diff --git a/Documentation/merge-options.txt b/Documentation/merge-options.txt index 79a00d2a4abd6f..d6a9f4b96faaf2 100644 --- a/Documentation/merge-options.txt +++ b/Documentation/merge-options.txt @@ -105,6 +105,10 @@ option can be used to override --squash. + With --squash, --commit is not allowed, and will fail. +--no-verify:: + This option bypasses the pre-merge and commit-msg hooks. + See also linkgit:githooks[5]. + -s :: --strategy=:: Use the given merge strategy; can be supplied more than diff --git a/builtin/merge.c b/builtin/merge.c index e2ccbc44e20417..4425a7a12e92b4 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -81,7 +81,7 @@ static int show_progress = -1; static int default_to_upstream = 1; static int signoff; static const char *sign_commit; -static int verify_msg = 1; +static int no_verify; static struct strategy all_strategy[] = { { "recursive", DEFAULT_TWOHEAD | NO_TRIVIAL }, @@ -287,7 +287,7 @@ static struct option builtin_merge_options[] = { N_("GPG sign commit"), PARSE_OPT_OPTARG, NULL, (intptr_t) "" }, OPT_BOOL(0, "overwrite-ignore", &overwrite_ignore, N_("update ignored files (default)")), OPT_BOOL(0, "signoff", &signoff, N_("add Signed-off-by:")), - OPT_BOOL(0, "verify", &verify_msg, N_("verify commit-msg hook")), + OPT_BOOL(0, "no-verify", &no_verify, N_("bypass commit-msg hook")), OPT_END() }; @@ -842,7 +842,7 @@ static void prepare_to_commit(struct commit_list *remoteheads) abort_commit(remoteheads, NULL); } - if (verify_msg && run_commit_hook(0 < option_edit, get_index_file(), + if (!no_verify && run_commit_hook(0 < option_edit, get_index_file(), "commit-msg", git_path_merge_msg(the_repository), NULL)) abort_commit(remoteheads, NULL); From 6098817fd7f64209664c701df30096dc0f4fb876 Mon Sep 17 00:00:00 2001 From: Michael J Gruber Date: Wed, 7 Aug 2019 11:57:07 -0700 Subject: [PATCH 089/710] git-merge: honor pre-merge-commit hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-merge does not honor the pre-commit hook when doing automatic merge commits, and for compatibility reasons this is going to stay. Introduce a pre-merge-commit hook which is called for an automatic merge commit just like pre-commit is called for a non-automatic merge commit (or any other commit). [js: * renamed hook from "pre-merge" to "pre-merge-commit" * only discard the index if the hook is actually present * expanded githooks documentation entry * clarified that hook should write messages to stderr * squashed test changes from the original series' patch 4/4 * modified tests to follow new pattern from this series' patch 1/4 * added a test case for non-executable merge hooks * added a test case for failed merges * when testing that the merge hook did not run, make sure we actually have a merge to perform (by resetting the "side" branch to its original state). * reworded commit message ] Improved-by: Martin Ågren Signed-off-by: Michael J Gruber Signed-off-by: Martin Ågren Signed-off-by: Josh Steadmon Signed-off-by: Junio C Hamano --- Documentation/githooks.txt | 21 +++++ builtin/merge.c | 12 +++ ...-pre-commit-and-pre-merge-commit-hooks.sh} | 84 ++++++++++++++++++- templates/hooks--pre-merge-commit.sample | 13 +++ 4 files changed, 129 insertions(+), 1 deletion(-) rename t/{t7503-pre-commit-hook.sh => t7503-pre-commit-and-pre-merge-commit-hooks.sh} (63%) create mode 100755 templates/hooks--pre-merge-commit.sample diff --git a/Documentation/githooks.txt b/Documentation/githooks.txt index 82cd573776cec6..d9da474fb01c2e 100644 --- a/Documentation/githooks.txt +++ b/Documentation/githooks.txt @@ -103,6 +103,27 @@ The default 'pre-commit' hook, when enabled--and with the `hooks.allownonascii` config option unset or set to false--prevents the use of non-ASCII filenames. +pre-merge-commit +~~~~~~~~~~~~~~~~ + +This hook is invoked by linkgit:git-merge[1]. It takes no parameters, and is +invoked after the merge has been carried out successfully and before +obtaining the proposed commit log message to +make a commit. Exiting with a non-zero status from this script +causes the `git merge` command to abort before creating a commit. + +The default 'pre-merge-commit' hook, when enabled, runs the +'pre-commit' hook, if the latter is enabled. + +This hook is invoked with the environment variable +`GIT_EDITOR=:` if the command will not bring up an editor +to modify the commit message. + +If the merge cannot be carried out automatically, the conflicts +need to be resolved and the result committed separately (see +linkgit:git-merge[1]). At that point, this hook will not be executed, +but the 'pre-commit' hook will, if it is enabled. + prepare-commit-msg ~~~~~~~~~~~~~~~~~~ diff --git a/builtin/merge.c b/builtin/merge.c index 4425a7a12e92b4..bf0ae68c40caca 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -816,6 +816,18 @@ static void write_merge_heads(struct commit_list *); static void prepare_to_commit(struct commit_list *remoteheads) { struct strbuf msg = STRBUF_INIT; + const char *index_file = get_index_file(); + + if (run_commit_hook(0 < option_edit, index_file, "pre-merge-commit", NULL)) + abort_commit(remoteheads, NULL); + /* + * Re-read the index as pre-merge-commit hook could have updated it, + * and write it out as a tree. We must do this before we invoke + * the editor and after we invoke run_status above. + */ + if (find_hook("pre-merge-commit")) + discard_cache(); + read_cache_from(index_file); strbuf_addbuf(&msg, &merge_msg); if (squash) BUG("the control must not reach here under --squash"); diff --git a/t/t7503-pre-commit-hook.sh b/t/t7503-pre-commit-and-pre-merge-commit-hooks.sh similarity index 63% rename from t/t7503-pre-commit-hook.sh rename to t/t7503-pre-commit-and-pre-merge-commit-hooks.sh index 6aa83204c25350..7a5434c7ab8580 100755 --- a/t/t7503-pre-commit-hook.sh +++ b/t/t7503-pre-commit-and-pre-merge-commit-hooks.sh @@ -1,11 +1,12 @@ #!/bin/sh -test_description='pre-commit hook' +test_description='pre-commit and pre-merge-commit hooks' . ./test-lib.sh HOOKDIR="$(git rev-parse --git-dir)/hooks" PRECOMMIT="$HOOKDIR/pre-commit" +PREMERGE="$HOOKDIR/pre-merge-commit" # Prepare sample scripts that write their $0 to actual_hooks test_expect_success 'sample script setup' ' @@ -34,6 +35,30 @@ test_expect_success 'sample script setup' ' EOF ' +test_expect_success 'root commit' ' + echo "root" >file && + git add file && + git commit -m "zeroth" && + git checkout -b side && + echo "foo" >foo && + git add foo && + git commit -m "make it non-ff" && + git branch side-orig side && + git checkout master +' + +test_expect_success 'setup conflicting branches' ' + test_when_finished "git checkout master" && + git checkout -b conflicting-a master && + echo a >conflicting && + git add conflicting && + git commit -m conflicting-a && + git checkout -b conflicting-b master && + echo b >conflicting && + git add conflicting && + git commit -m conflicting-b +' + test_expect_success 'with no hook' ' test_when_finished "rm -f actual_hooks" && echo "foo" >file && @@ -42,6 +67,15 @@ test_expect_success 'with no hook' ' test_path_is_missing actual_hooks ' +test_expect_success 'with no hook (merge)' ' + test_when_finished "rm -f actual_hooks" && + git branch -f side side-orig && + git checkout side && + git merge -m "merge master" master && + git checkout master && + test_path_is_missing actual_hooks +' + test_expect_success '--no-verify with no hook' ' test_when_finished "rm -f actual_hooks" && echo "bar" >file && @@ -60,6 +94,34 @@ test_expect_success 'with succeeding hook' ' test_cmp expected_hooks actual_hooks ' +test_expect_success 'with succeeding hook (merge)' ' + test_when_finished "rm -f \"$PREMERGE\" expected_hooks actual_hooks" && + cp "$HOOKDIR/success.sample" "$PREMERGE" && + echo "$PREMERGE" >expected_hooks && + git checkout side && + git merge -m "merge master" master && + git checkout master && + test_cmp expected_hooks actual_hooks +' + +test_expect_success 'automatic merge fails; both hooks are available' ' + test_when_finished "rm -f \"$PREMERGE\" \"$PRECOMMIT\"" && + test_when_finished "rm -f expected_hooks actual_hooks" && + test_when_finished "git checkout master" && + cp "$HOOKDIR/success.sample" "$PREMERGE" && + cp "$HOOKDIR/success.sample" "$PRECOMMIT" && + + git checkout conflicting-a && + test_must_fail git merge -m "merge conflicting-b" conflicting-b && + test_path_is_missing actual_hooks && + + echo "$PRECOMMIT" >expected_hooks && + echo a+b >conflicting && + git add conflicting && + git commit -m "resolve conflict" && + test_cmp expected_hooks actual_hooks +' + test_expect_success '--no-verify with succeeding hook' ' test_when_finished "rm -f \"$PRECOMMIT\" actual_hooks" && cp "$HOOKDIR/success.sample" "$PRECOMMIT" && @@ -88,6 +150,16 @@ test_expect_success '--no-verify with failing hook' ' test_path_is_missing actual_hooks ' +test_expect_success 'with failing hook (merge)' ' + test_when_finished "rm -f \"$PREMERGE\" expected_hooks actual_hooks" && + cp "$HOOKDIR/fail.sample" "$PREMERGE" && + echo "$PREMERGE" >expected_hooks && + git checkout side && + test_must_fail git merge -m "merge master" master && + git checkout master && + test_cmp expected_hooks actual_hooks +' + test_expect_success POSIXPERM 'with non-executable hook' ' test_when_finished "rm -f \"$PRECOMMIT\" actual_hooks" && cp "$HOOKDIR/non-exec.sample" "$PRECOMMIT" && @@ -106,6 +178,16 @@ test_expect_success POSIXPERM '--no-verify with non-executable hook' ' test_path_is_missing actual_hooks ' +test_expect_success POSIXPERM 'with non-executable hook (merge)' ' + test_when_finished "rm -f \"$PREMERGE\" actual_hooks" && + cp "$HOOKDIR/non-exec.sample" "$PREMERGE" && + git branch -f side side-orig && + git checkout side && + git merge -m "merge master" master && + git checkout master && + test_path_is_missing actual_hooks +' + test_expect_success 'with hook requiring GIT_PREFIX' ' test_when_finished "rm -rf \"$PRECOMMIT\" expected_hooks actual_hooks success" && cp "$HOOKDIR/require-prefix.sample" "$PRECOMMIT" && diff --git a/templates/hooks--pre-merge-commit.sample b/templates/hooks--pre-merge-commit.sample new file mode 100755 index 00000000000000..399eab1924e39d --- /dev/null +++ b/templates/hooks--pre-merge-commit.sample @@ -0,0 +1,13 @@ +#!/bin/sh +# +# An example hook script to verify what is about to be committed. +# Called by "git merge" with no arguments. The hook should +# exit with non-zero status after issuing an appropriate message to +# stderr if it wants to stop the merge commit. +# +# To enable this hook, rename this file to "pre-merge-commit". + +. git-sh-setup +test -x "$GIT_DIR/hooks/pre-commit" && + exec "$GIT_DIR/hooks/pre-commit" +: From bc40ce4de612942e9a6a755f7432806ee095bd82 Mon Sep 17 00:00:00 2001 From: Michael J Gruber Date: Wed, 7 Aug 2019 11:57:08 -0700 Subject: [PATCH 090/710] merge: --no-verify to bypass pre-merge-commit hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Analogous to commit, introduce a '--no-verify' option which bypasses the pre-merge-commit hook. The shorthand '-n' is taken by '--no-stat' already. [js: * reworded commit message to reflect current state of --no-stat flag and new hook name * fixed flag documentation to reflect new hook name * cleaned up trailing whitespace * squashed test changes from the original series' patch 4/4 * modified tests to follow pattern from this series' patch 1/4 * added a test case for --no-verify with non-executable hook * when testing that the merge hook did not run, make sure we actually have a merge to perform (by resetting the "side" branch to its original state). ] Improved-by: Martin Ågren Signed-off-by: Michael J Gruber Signed-off-by: Martin Ågren Signed-off-by: Josh Steadmon Signed-off-by: Junio C Hamano --- Documentation/githooks.txt | 3 +- builtin/merge.c | 4 +- ...3-pre-commit-and-pre-merge-commit-hooks.sh | 39 +++++++++++++++++++ 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/Documentation/githooks.txt b/Documentation/githooks.txt index d9da474fb01c2e..57d6e2b98da7a0 100644 --- a/Documentation/githooks.txt +++ b/Documentation/githooks.txt @@ -106,7 +106,8 @@ the use of non-ASCII filenames. pre-merge-commit ~~~~~~~~~~~~~~~~ -This hook is invoked by linkgit:git-merge[1]. It takes no parameters, and is +This hook is invoked by linkgit:git-merge[1], and can be bypassed +with the `--no-verify` option. It takes no parameters, and is invoked after the merge has been carried out successfully and before obtaining the proposed commit log message to make a commit. Exiting with a non-zero status from this script diff --git a/builtin/merge.c b/builtin/merge.c index bf0ae68c40caca..c9746e37b86fa1 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -287,7 +287,7 @@ static struct option builtin_merge_options[] = { N_("GPG sign commit"), PARSE_OPT_OPTARG, NULL, (intptr_t) "" }, OPT_BOOL(0, "overwrite-ignore", &overwrite_ignore, N_("update ignored files (default)")), OPT_BOOL(0, "signoff", &signoff, N_("add Signed-off-by:")), - OPT_BOOL(0, "no-verify", &no_verify, N_("bypass commit-msg hook")), + OPT_BOOL(0, "no-verify", &no_verify, N_("bypass pre-merge-commit and commit-msg hooks")), OPT_END() }; @@ -818,7 +818,7 @@ static void prepare_to_commit(struct commit_list *remoteheads) struct strbuf msg = STRBUF_INIT; const char *index_file = get_index_file(); - if (run_commit_hook(0 < option_edit, index_file, "pre-merge-commit", NULL)) + if (!no_verify && run_commit_hook(0 < option_edit, index_file, "pre-merge-commit", NULL)) abort_commit(remoteheads, NULL); /* * Re-read the index as pre-merge-commit hook could have updated it, diff --git a/t/t7503-pre-commit-and-pre-merge-commit-hooks.sh b/t/t7503-pre-commit-and-pre-merge-commit-hooks.sh index 7a5434c7ab8580..b3485450a2059d 100755 --- a/t/t7503-pre-commit-and-pre-merge-commit-hooks.sh +++ b/t/t7503-pre-commit-and-pre-merge-commit-hooks.sh @@ -84,6 +84,15 @@ test_expect_success '--no-verify with no hook' ' test_path_is_missing actual_hooks ' +test_expect_success '--no-verify with no hook (merge)' ' + test_when_finished "rm -f actual_hooks" && + git branch -f side side-orig && + git checkout side && + git merge --no-verify -m "merge master" master && + git checkout master && + test_path_is_missing actual_hooks +' + test_expect_success 'with succeeding hook' ' test_when_finished "rm -f \"$PRECOMMIT\" expected_hooks actual_hooks" && cp "$HOOKDIR/success.sample" "$PRECOMMIT" && @@ -131,6 +140,16 @@ test_expect_success '--no-verify with succeeding hook' ' test_path_is_missing actual_hooks ' +test_expect_success '--no-verify with succeeding hook (merge)' ' + test_when_finished "rm -f \"$PREMERGE\" actual_hooks" && + cp "$HOOKDIR/success.sample" "$PREMERGE" && + git branch -f side side-orig && + git checkout side && + git merge --no-verify -m "merge master" master && + git checkout master && + test_path_is_missing actual_hooks +' + test_expect_success 'with failing hook' ' test_when_finished "rm -f \"$PRECOMMIT\" expected_hooks actual_hooks" && cp "$HOOKDIR/fail.sample" "$PRECOMMIT" && @@ -160,6 +179,16 @@ test_expect_success 'with failing hook (merge)' ' test_cmp expected_hooks actual_hooks ' +test_expect_success '--no-verify with failing hook (merge)' ' + test_when_finished "rm -f \"$PREMERGE\" actual_hooks" && + cp "$HOOKDIR/fail.sample" "$PREMERGE" && + git branch -f side side-orig && + git checkout side && + git merge --no-verify -m "merge master" master && + git checkout master && + test_path_is_missing actual_hooks +' + test_expect_success POSIXPERM 'with non-executable hook' ' test_when_finished "rm -f \"$PRECOMMIT\" actual_hooks" && cp "$HOOKDIR/non-exec.sample" "$PRECOMMIT" && @@ -188,6 +217,16 @@ test_expect_success POSIXPERM 'with non-executable hook (merge)' ' test_path_is_missing actual_hooks ' +test_expect_success POSIXPERM '--no-verify with non-executable hook (merge)' ' + test_when_finished "rm -f \"$PREMERGE\" actual_hooks" && + cp "$HOOKDIR/non-exec.sample" "$PREMERGE" && + git branch -f side side-orig && + git checkout side && + git merge --no-verify -m "merge master" master && + git checkout master && + test_path_is_missing actual_hooks +' + test_expect_success 'with hook requiring GIT_PREFIX' ' test_when_finished "rm -rf \"$PRECOMMIT\" expected_hooks actual_hooks success" && cp "$HOOKDIR/require-prefix.sample" "$PRECOMMIT" && From 5440eb0ea2651c45a0e46f2335ecbb8d1f42c584 Mon Sep 17 00:00:00 2001 From: Pratyush Yadav Date: Sun, 4 Aug 2019 20:09:19 +0530 Subject: [PATCH 091/710] git-gui: call do_quit before destroying the main window If the toplevel window for the window being destroyed is the main window (aka "."), then simply destroying it means the cleanup tasks are not executed (like saving the commit message buffer, saving window state, etc.) All this is handled by do_quit. Call it instead of directly destroying the main window. For other toplevel windows, the old behavior remains. Signed-off-by: Pratyush Yadav Signed-off-by: Junio C Hamano --- git-gui.sh | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/git-gui.sh b/git-gui.sh index 5bc21b878d413e..09c1b3c097a0a1 100755 --- a/git-gui.sh +++ b/git-gui.sh @@ -3006,8 +3006,23 @@ unset doc_path doc_url wm protocol . WM_DELETE_WINDOW do_quit bind all <$M1B-Key-q> do_quit bind all <$M1B-Key-Q> do_quit -bind all <$M1B-Key-w> {destroy [winfo toplevel %W]} -bind all <$M1B-Key-W> {destroy [winfo toplevel %W]} + +set m1b_w_script { + set toplvl_win [winfo toplevel %W] + + # If we are destroying the main window, we should call do_quit to take + # care of cleanup before exiting the program. + if {$toplvl_win eq "."} { + do_quit + } else { + destroy $toplvl_win + } +} + +bind all <$M1B-Key-w> $m1b_w_script +bind all <$M1B-Key-W> $m1b_w_script + +unset m1b_w_script set subcommand_args {} proc usage {} { From da4589ce7ecb84b51b2819542f9555848dccf726 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 8 Aug 2019 07:19:00 -0700 Subject: [PATCH 092/710] trace2: trim whitespace in region messages in perf target format Avoid unecessary trailing whitespace in "region_enter" and "region_leave" messages in perf target format. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- trace2/tr2_tgt_perf.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/trace2/tr2_tgt_perf.c b/trace2/tr2_tgt_perf.c index 4a9d99218bf1be..fb845cb62774de 100644 --- a/trace2/tr2_tgt_perf.c +++ b/trace2/tr2_tgt_perf.c @@ -452,8 +452,11 @@ static void fn_region_enter_printf_va_fl(const char *file, int line, struct strbuf buf_payload = STRBUF_INIT; if (label) - strbuf_addf(&buf_payload, "label:%s ", label); - maybe_append_string_va(&buf_payload, fmt, ap); + strbuf_addf(&buf_payload, "label:%s", label); + if (fmt && *fmt) { + strbuf_addch(&buf_payload, ' '); + maybe_append_string_va(&buf_payload, fmt, ap); + } perf_io_write_fl(file, line, event_name, repo, &us_elapsed_absolute, NULL, category, &buf_payload); @@ -469,8 +472,11 @@ static void fn_region_leave_printf_va_fl( struct strbuf buf_payload = STRBUF_INIT; if (label) - strbuf_addf(&buf_payload, "label:%s ", label); - maybe_append_string_va(&buf_payload, fmt, ap); + strbuf_addf(&buf_payload, "label:%s", label); + if (fmt && *fmt) { + strbuf_addch(&buf_payload, ' ' ); + maybe_append_string_va(&buf_payload, fmt, ap); + } perf_io_write_fl(file, line, event_name, repo, &us_elapsed_absolute, &us_elapsed_region, category, &buf_payload); From 04f10d332f47aed7d93ff3ed5c2bb3a366e44693 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 8 Aug 2019 07:19:01 -0700 Subject: [PATCH 093/710] trace2: remove dead code in maybe_add_string_va() Remove an unnecessary "if" block in maybe_add_string_va(). Commit "ad006fe419e trace2: NULL is not allowed for va_list" changed "if (fmt && *fmt && ap)" to just "if (fmt && *fmt)" because it isn't safe to treat 'ap' as a pointer. This made the "if" block following it unnecessary. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- trace2/tr2_tgt_event.c | 5 ----- trace2/tr2_tgt_normal.c | 5 ----- trace2/tr2_tgt_perf.c | 5 ----- 3 files changed, 15 deletions(-) diff --git a/trace2/tr2_tgt_event.c b/trace2/tr2_tgt_event.c index c2852d1bd2bd85..9bcac20d1b5a3d 100644 --- a/trace2/tr2_tgt_event.c +++ b/trace2/tr2_tgt_event.c @@ -205,11 +205,6 @@ static void maybe_add_string_va(struct json_writer *jw, const char *field_name, strbuf_release(&buf); return; } - - if (fmt && *fmt) { - jw_object_string(jw, field_name, fmt); - return; - } } static void fn_error_va_fl(const char *file, int line, const char *fmt, diff --git a/trace2/tr2_tgt_normal.c b/trace2/tr2_tgt_normal.c index 00b116d797c844..47a18825573b63 100644 --- a/trace2/tr2_tgt_normal.c +++ b/trace2/tr2_tgt_normal.c @@ -135,11 +135,6 @@ static void maybe_append_string_va(struct strbuf *buf, const char *fmt, va_end(copy_ap); return; } - - if (fmt && *fmt) { - strbuf_addstr(buf, fmt); - return; - } } static void fn_error_va_fl(const char *file, int line, const char *fmt, diff --git a/trace2/tr2_tgt_perf.c b/trace2/tr2_tgt_perf.c index fb845cb62774de..8e52395de2d1ba 100644 --- a/trace2/tr2_tgt_perf.c +++ b/trace2/tr2_tgt_perf.c @@ -239,11 +239,6 @@ static void maybe_append_string_va(struct strbuf *buf, const char *fmt, va_end(copy_ap); return; } - - if (fmt && *fmt) { - strbuf_addstr(buf, fmt); - return; - } } static void fn_error_va_fl(const char *file, int line, const char *fmt, From ad43e37839ea6a3cf369af074a723a60cd66233b Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 8 Aug 2019 07:19:02 -0700 Subject: [PATCH 094/710] trace2: trim trailing whitespace in normal format error message Avoid creating unnecessary trailing whitespace in normal target format error messages when the message is omitted. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- trace2/tr2_tgt_normal.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/trace2/tr2_tgt_normal.c b/trace2/tr2_tgt_normal.c index 47a18825573b63..213724d5cb9264 100644 --- a/trace2/tr2_tgt_normal.c +++ b/trace2/tr2_tgt_normal.c @@ -142,8 +142,11 @@ static void fn_error_va_fl(const char *file, int line, const char *fmt, { struct strbuf buf_payload = STRBUF_INIT; - strbuf_addstr(&buf_payload, "error "); - maybe_append_string_va(&buf_payload, fmt, ap); + strbuf_addstr(&buf_payload, "error"); + if (fmt && *fmt) { + strbuf_addch(&buf_payload, ' '); + maybe_append_string_va(&buf_payload, fmt, ap); + } normal_io_write_fl(file, line, &buf_payload); strbuf_release(&buf_payload); } From c2b890aca50885ce7503dfe4bdae4ac83bbb2331 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 9 Aug 2019 08:00:55 -0700 Subject: [PATCH 095/710] quote: add sq_append_quote_argv_pretty() sq_quote_argv_pretty() builds a "pretty" string from the given argv. It inserts whitespace before each value, rather than just between them, so the resulting string always has a leading space. Lets give callers an option to not have the leading space or have to ltrim() it later. Create sq_append_quote_argv_pretty() to convert an argv into a pretty, quoted if necessary, string with space delimiters and without a leading space. Convert the existing sq_quote_argv_pretty() to use this new routine while preserving the leading space behavior. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- quote.c | 18 +++++++++++++++++- quote.h | 1 + 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/quote.c b/quote.c index 7f2aa6faa43fed..c8ba6b397a0003 100644 --- a/quote.c +++ b/quote.c @@ -84,12 +84,28 @@ void sq_quote_argv(struct strbuf *dst, const char **argv) } } +/* + * Legacy function to append each argv value, quoted as necessasry, + * with whitespace before each value. This results in a leading + * space in the result. + */ void sq_quote_argv_pretty(struct strbuf *dst, const char **argv) +{ + if (argv[0]) + strbuf_addch(dst, ' '); + sq_append_quote_argv_pretty(dst, argv); +} + +/* + * Append each argv value, quoted as necessary, with whitespace between them. + */ +void sq_append_quote_argv_pretty(struct strbuf *dst, const char **argv) { int i; for (i = 0; argv[i]; i++) { - strbuf_addch(dst, ' '); + if (i > 0) + strbuf_addch(dst, ' '); sq_quote_buf_pretty(dst, argv[i]); } } diff --git a/quote.h b/quote.h index fb08dc085cca25..ca8ee3144a6ad2 100644 --- a/quote.h +++ b/quote.h @@ -40,6 +40,7 @@ void sq_quotef(struct strbuf *, const char *fmt, ...); */ void sq_quote_buf_pretty(struct strbuf *, const char *src); void sq_quote_argv_pretty(struct strbuf *, const char **argv); +void sq_append_quote_argv_pretty(struct strbuf *dst, const char **argv); /* This unwraps what sq_quote() produces in place, but returns * NULL if the input does not look like what sq_quote would have From e34430556c8cb7c99a561125df3c8f595c385e7e Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 9 Aug 2019 08:00:56 -0700 Subject: [PATCH 096/710] trace2: cleanup whitespace in normal format Make use of new sq_append_quote_argv_pretty() to normalize how we handle leading whitespace in normal format messages. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- trace2/tr2_tgt_normal.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/trace2/tr2_tgt_normal.c b/trace2/tr2_tgt_normal.c index 213724d5cb9264..438ed05a408e62 100644 --- a/trace2/tr2_tgt_normal.c +++ b/trace2/tr2_tgt_normal.c @@ -87,7 +87,7 @@ static void fn_start_fl(const char *file, int line, struct strbuf buf_payload = STRBUF_INIT; strbuf_addstr(&buf_payload, "start "); - sq_quote_argv_pretty(&buf_payload, argv); + sq_append_quote_argv_pretty(&buf_payload, argv); normal_io_write_fl(file, line, &buf_payload); strbuf_release(&buf_payload); } @@ -186,8 +186,8 @@ static void fn_alias_fl(const char *file, int line, const char *alias, { struct strbuf buf_payload = STRBUF_INIT; - strbuf_addf(&buf_payload, "alias %s ->", alias); - sq_quote_argv_pretty(&buf_payload, argv); + strbuf_addf(&buf_payload, "alias %s -> ", alias); + sq_append_quote_argv_pretty(&buf_payload, argv); normal_io_write_fl(file, line, &buf_payload); strbuf_release(&buf_payload); } @@ -198,12 +198,12 @@ static void fn_child_start_fl(const char *file, int line, { struct strbuf buf_payload = STRBUF_INIT; - strbuf_addf(&buf_payload, "child_start[%d] ", cmd->trace2_child_id); + strbuf_addf(&buf_payload, "child_start[%d]", cmd->trace2_child_id); if (cmd->dir) { - strbuf_addstr(&buf_payload, " cd"); + strbuf_addstr(&buf_payload, " cd "); sq_quote_buf_pretty(&buf_payload, cmd->dir); - strbuf_addstr(&buf_payload, "; "); + strbuf_addstr(&buf_payload, ";"); } /* @@ -211,9 +211,10 @@ static void fn_child_start_fl(const char *file, int line, * See trace_add_env() in run-command.c as used by original trace.c */ + strbuf_addch(&buf_payload, ' '); if (cmd->git_cmd) - strbuf_addstr(&buf_payload, "git"); - sq_quote_argv_pretty(&buf_payload, cmd->argv); + strbuf_addstr(&buf_payload, "git "); + sq_append_quote_argv_pretty(&buf_payload, cmd->argv); normal_io_write_fl(file, line, &buf_payload); strbuf_release(&buf_payload); @@ -238,9 +239,11 @@ static void fn_exec_fl(const char *file, int line, uint64_t us_elapsed_absolute, struct strbuf buf_payload = STRBUF_INIT; strbuf_addf(&buf_payload, "exec[%d] ", exec_id); - if (exe) + if (exe) { strbuf_addstr(&buf_payload, exe); - sq_quote_argv_pretty(&buf_payload, argv); + strbuf_addch(&buf_payload, ' '); + } + sq_append_quote_argv_pretty(&buf_payload, argv); normal_io_write_fl(file, line, &buf_payload); strbuf_release(&buf_payload); } From 742ed63345a6e26399fc08e9c711e9f3fd23cc2d Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 9 Aug 2019 08:00:57 -0700 Subject: [PATCH 097/710] trace2: cleanup whitespace in perf format Make use of new sq_append_quote_argv_pretty() to normalize how we handle leading whitespace in perf format messages. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- t/t0211-trace2-perf.sh | 4 ++-- trace2/tr2_tgt_perf.c | 31 ++++++++++++++++++++----------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/t/t0211-trace2-perf.sh b/t/t0211-trace2-perf.sh index 2c3ad6e8c186d7..6ee8ee3b6729f0 100755 --- a/t/t0211-trace2-perf.sh +++ b/t/t0211-trace2-perf.sh @@ -130,11 +130,11 @@ test_expect_success 'perf stream, child processes' ' d0|main|version|||||$V d0|main|start||_T_ABS_|||_EXE_ trace2 004child test-tool trace2 004child test-tool trace2 001return 0 d0|main|cmd_name|||||trace2 (trace2) - d0|main|child_start||_T_ABS_|||[ch0] class:? argv: test-tool trace2 004child test-tool trace2 001return 0 + d0|main|child_start||_T_ABS_|||[ch0] class:? argv:[test-tool trace2 004child test-tool trace2 001return 0] d1|main|version|||||$V d1|main|start||_T_ABS_|||_EXE_ trace2 004child test-tool trace2 001return 0 d1|main|cmd_name|||||trace2 (trace2/trace2) - d1|main|child_start||_T_ABS_|||[ch0] class:? argv: test-tool trace2 001return 0 + d1|main|child_start||_T_ABS_|||[ch0] class:? argv:[test-tool trace2 001return 0] d2|main|version|||||$V d2|main|start||_T_ABS_|||_EXE_ trace2 001return 0 d2|main|cmd_name|||||trace2 (trace2/trace2/trace2) diff --git a/trace2/tr2_tgt_perf.c b/trace2/tr2_tgt_perf.c index 8e52395de2d1ba..fd979db4ad8004 100644 --- a/trace2/tr2_tgt_perf.c +++ b/trace2/tr2_tgt_perf.c @@ -184,7 +184,7 @@ static void fn_start_fl(const char *file, int line, const char *event_name = "start"; struct strbuf buf_payload = STRBUF_INIT; - sq_quote_argv_pretty(&buf_payload, argv); + sq_append_quote_argv_pretty(&buf_payload, argv); perf_io_write_fl(file, line, event_name, NULL, &us_elapsed_absolute, NULL, NULL, &buf_payload); @@ -299,8 +299,9 @@ static void fn_alias_fl(const char *file, int line, const char *alias, const char *event_name = "alias"; struct strbuf buf_payload = STRBUF_INIT; - strbuf_addf(&buf_payload, "alias:%s argv:", alias); - sq_quote_argv_pretty(&buf_payload, argv); + strbuf_addf(&buf_payload, "alias:%s argv:[", alias); + sq_append_quote_argv_pretty(&buf_payload, argv); + strbuf_addch(&buf_payload, ']'); perf_io_write_fl(file, line, event_name, NULL, NULL, NULL, NULL, &buf_payload); @@ -329,10 +330,14 @@ static void fn_child_start_fl(const char *file, int line, sq_quote_buf_pretty(&buf_payload, cmd->dir); } - strbuf_addstr(&buf_payload, " argv:"); - if (cmd->git_cmd) - strbuf_addstr(&buf_payload, " git"); - sq_quote_argv_pretty(&buf_payload, cmd->argv); + strbuf_addstr(&buf_payload, " argv:["); + if (cmd->git_cmd) { + strbuf_addstr(&buf_payload, "git"); + if (cmd->argv[0]) + strbuf_addch(&buf_payload, ' '); + } + sq_append_quote_argv_pretty(&buf_payload, cmd->argv); + strbuf_addch(&buf_payload, ']'); perf_io_write_fl(file, line, event_name, NULL, &us_elapsed_absolute, NULL, NULL, &buf_payload); @@ -383,10 +388,14 @@ static void fn_exec_fl(const char *file, int line, uint64_t us_elapsed_absolute, struct strbuf buf_payload = STRBUF_INIT; strbuf_addf(&buf_payload, "id:%d ", exec_id); - strbuf_addstr(&buf_payload, "argv:"); - if (exe) - strbuf_addf(&buf_payload, " %s", exe); - sq_quote_argv_pretty(&buf_payload, argv); + strbuf_addstr(&buf_payload, "argv:["); + if (exe) { + strbuf_addstr(&buf_payload, exe); + if (argv[0]) + strbuf_addch(&buf_payload, ' '); + } + sq_append_quote_argv_pretty(&buf_payload, argv); + strbuf_addch(&buf_payload, ']'); perf_io_write_fl(file, line, event_name, NULL, &us_elapsed_absolute, NULL, NULL, &buf_payload); From 362f8b280c0cdf2b7bbb0a8575e617a105f0a3ff Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 12 Aug 2019 11:58:03 -0400 Subject: [PATCH 098/710] t/perf: rename duplicate-numbered test script There are two perf scripts numbered p5600, but with otherwise different names ("clone-reference" versus "partial-clone"). We store timing results in files named after the whole script, so internally we don't get confused between the two. But "aggregate.perl" just prints the test number for each result, giving multiple entries for "5600.3". It also makes it impossible to skip one test but not the other with GIT_SKIP_TESTS. Let's renumber the one that appeared later (by date -- the source of the problem is that the two were developed on independent branches). For the non-perf test suite, our test-lint rule would have complained about this when the two were merged, but t/perf never learned that trick. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/perf/{p5600-clone-reference.sh => p5601-clone-reference.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename t/perf/{p5600-clone-reference.sh => p5601-clone-reference.sh} (100%) diff --git a/t/perf/p5600-clone-reference.sh b/t/perf/p5601-clone-reference.sh similarity index 100% rename from t/perf/p5600-clone-reference.sh rename to t/perf/p5601-clone-reference.sh From d1387d389549fe6e1b97ad88a0f7dbf3afe96c16 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 12 Aug 2019 10:17:47 -0700 Subject: [PATCH 099/710] git-fast-import.txt: clarify that multiple merge commits are allowed The grammar for commits used a '?' rather than a '*' on the `merge` directive line, despite the fact that the code allows multiple `merge` directives in order to support n-way merges. In fact, elsewhere in git-fast-import.txt there is an explicit declaration that "an unlimited number of `merge` commands per commit are permitted by fast-import". Fix the grammar to match the intent and implementation. Reported-by: Joachim Klein Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index d65cdb3d08fd74..28b447a3e64406 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -390,7 +390,7 @@ change to the project. 'committer' (SP )? SP LT GT SP LF data ('from' SP LF)? - ('merge' SP LF)? + ('merge' SP LF)* (filemodify | filedelete | filecopy | filerename | filedeleteall | notemodify)* LF? .... From 9827d4c185e4da728f51cd77c54a38c9de62495f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 12 Aug 2019 16:50:21 -0400 Subject: [PATCH 100/710] packfile: drop release_pack_memory() Long ago, in 97bfeb34df (Release pack windows before reporting out of memory., 2006-12-24), we taught xmalloc() and friends to try unmapping pack windows when malloc() failed. It's unlikely that his helps a lot in practice, and it has some downsides. First, the downsides: 1. It makes xmalloc() not thread-safe. We've worked around this in pack-objects.c, which installs its own locking version of the try_to_free_routine(). But other threaded code doesn't. 2. It makes the system as a whole harder to reason about. Functions which allocate heap memory under the hood may have farther-reaching effects than expected. That might be worth the tradeoff if there's a benefit. But in practice, it seems unlikely. We're generally dealing with mmap'd files, so the OS is going to do a much better job at responding to memory pressure by dropping individual pages (the exception is systems with NO_MMAP, but even there the OS can probably respond just as well with swapping). So the only thing we're really freeing is address space. On 64-bit systems, we have plenty of that to go around. On 32-bit systems, it could possibly help. But around the same time we made two other changes: 77ccc5bbd1 (Introduce new config option for mmap limit., 2006-12-23) and 60bb8b1453 (Fully activate the sliding window pack access., 2006-12-23). Together that means that a 32-bit system should have no more than 256MB total of packed-git mmaps at one time, split between a few 32MB windows. It's unlikely we have any address space problems since then, but we don't have any data since the features were all added at the same time. Likewise, xmmap() will try to free memory. At first glance, it seems like we'd need this (when we try to mmap a new window, we might need to close an old one to save address space on a 32-bit system). But we're saved again by core.packedGitLimit: if we're going to exceed our 256MB limit, we'll close an existing window before we even call mmap(). So it seems unlikely that this feature is actually doing anything useful. And while we don't have reports of it harming anything (probably because it rarely if ever kicks in), it would be nice to simplify the system overall. This patch drops the whole try_to_free system from xmalloc(), as well as the manual pack memory release in xmmap(). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 11 -------- git-compat-util.h | 3 -- packfile.c | 18 ------------ sha1-file.c | 8 ++---- trace.c | 2 -- wrapper.c | 63 +++++++++--------------------------------- 6 files changed, 15 insertions(+), 90 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 76ce9069467e06..93e92876aa7637 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -2342,15 +2342,6 @@ static void find_deltas(struct object_entry **list, unsigned *list_size, free(array); } -static void try_to_free_from_threads(size_t size) -{ - packing_data_lock(&to_pack); - release_pack_memory(size); - packing_data_unlock(&to_pack); -} - -static try_to_free_t old_try_to_free_routine; - /* * The main object list is split into smaller lists, each is handed to * one worker. @@ -2391,12 +2382,10 @@ static void init_threaded_search(void) pthread_mutex_init(&cache_mutex, NULL); pthread_mutex_init(&progress_mutex, NULL); pthread_cond_init(&progress_cond, NULL); - old_try_to_free_routine = set_try_to_free_routine(try_to_free_from_threads); } static void cleanup_threaded_search(void) { - set_try_to_free_routine(old_try_to_free_routine); pthread_cond_destroy(&progress_cond); pthread_mutex_destroy(&cache_mutex); pthread_mutex_destroy(&progress_mutex); diff --git a/git-compat-util.h b/git-compat-util.h index 83be89de0aac7c..f0d13e4e28470a 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -818,9 +818,6 @@ const char *inet_ntop(int af, const void *src, char *dst, size_t size); int git_atexit(void (*handler)(void)); #endif -typedef void (*try_to_free_t)(size_t); -try_to_free_t set_try_to_free_routine(try_to_free_t); - static inline size_t st_add(size_t a, size_t b) { if (unsigned_add_overflows(a, b)) diff --git a/packfile.c b/packfile.c index fc43a6c52c75a3..d98ac2287683f9 100644 --- a/packfile.c +++ b/packfile.c @@ -287,13 +287,6 @@ static int unuse_one_window(struct packed_git *current) return 0; } -void release_pack_memory(size_t need) -{ - size_t cur = pack_mapped; - while (need >= (cur - pack_mapped) && unuse_one_window(NULL)) - ; /* nothing */ -} - void close_pack_windows(struct packed_git *p) { while (p->windows) { @@ -710,23 +703,12 @@ void unuse_pack(struct pack_window **w_cursor) } } -static void try_to_free_pack_memory(size_t size) -{ - release_pack_memory(size); -} - struct packed_git *add_packed_git(const char *path, size_t path_len, int local) { - static int have_set_try_to_free_routine; struct stat st; size_t alloc; struct packed_git *p; - if (!have_set_try_to_free_routine) { - have_set_try_to_free_routine = 1; - set_try_to_free_routine(try_to_free_pack_memory); - } - /* * Make sure a corresponding .pack file exists and that * the index looks sane. diff --git a/sha1-file.c b/sha1-file.c index 487ea35d2d3984..4895408e1ed309 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -952,12 +952,8 @@ void *xmmap_gently(void *start, size_t length, mmap_limit_check(length); ret = mmap(start, length, prot, flags, fd, offset); - if (ret == MAP_FAILED) { - if (!length) - return NULL; - release_pack_memory(length); - ret = mmap(start, length, prot, flags, fd, offset); - } + if (ret == MAP_FAILED && !length) + ret = NULL; return ret; } diff --git a/trace.c b/trace.c index fa4a2e7120e405..b3ef0e627f8cec 100644 --- a/trace.c +++ b/trace.c @@ -88,8 +88,6 @@ static int prepare_trace_line(const char *file, int line, if (!trace_want(key)) return 0; - set_try_to_free_routine(NULL); /* is never reset */ - /* unit tests may want to disable additional trace output */ if (trace_want(&trace_bare)) return 1; diff --git a/wrapper.c b/wrapper.c index 1e45ab7b92749b..c55d7722d7be5a 100644 --- a/wrapper.c +++ b/wrapper.c @@ -4,12 +4,6 @@ #include "cache.h" #include "config.h" -static void do_nothing(size_t size) -{ -} - -static void (*try_to_free_routine)(size_t size) = do_nothing; - static int memory_limit_check(size_t size, int gentle) { static size_t limit = 0; @@ -30,24 +24,11 @@ static int memory_limit_check(size_t size, int gentle) return 0; } -try_to_free_t set_try_to_free_routine(try_to_free_t routine) -{ - try_to_free_t old = try_to_free_routine; - if (!routine) - routine = do_nothing; - try_to_free_routine = routine; - return old; -} - char *xstrdup(const char *str) { char *ret = strdup(str); - if (!ret) { - try_to_free_routine(strlen(str) + 1); - ret = strdup(str); - if (!ret) - die("Out of memory, strdup failed"); - } + if (!ret) + die("Out of memory, strdup failed"); return ret; } @@ -61,19 +42,13 @@ static void *do_xmalloc(size_t size, int gentle) if (!ret && !size) ret = malloc(1); if (!ret) { - try_to_free_routine(size); - ret = malloc(size); - if (!ret && !size) - ret = malloc(1); - if (!ret) { - if (!gentle) - die("Out of memory, malloc failed (tried to allocate %lu bytes)", - (unsigned long)size); - else { - error("Out of memory, malloc failed (tried to allocate %lu bytes)", - (unsigned long)size); - return NULL; - } + if (!gentle) + die("Out of memory, malloc failed (tried to allocate %lu bytes)", + (unsigned long)size); + else { + error("Out of memory, malloc failed (tried to allocate %lu bytes)", + (unsigned long)size); + return NULL; } } #ifdef XMALLOC_POISON @@ -138,14 +113,8 @@ void *xrealloc(void *ptr, size_t size) ret = realloc(ptr, size); if (!ret && !size) ret = realloc(ptr, 1); - if (!ret) { - try_to_free_routine(size); - ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) - die("Out of memory, realloc failed"); - } + if (!ret) + die("Out of memory, realloc failed"); return ret; } @@ -160,14 +129,8 @@ void *xcalloc(size_t nmemb, size_t size) ret = calloc(nmemb, size); if (!ret && (!nmemb || !size)) ret = calloc(1, 1); - if (!ret) { - try_to_free_routine(nmemb * size); - ret = calloc(nmemb, size); - if (!ret && (!nmemb || !size)) - ret = calloc(1, 1); - if (!ret) - die("Out of memory, calloc failed"); - } + if (!ret) + die("Out of memory, calloc failed"); return ret; } From 08a12175d8b725d80efac41328780b57c4cfa6b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Tue, 13 Aug 2019 14:26:42 +0200 Subject: [PATCH 101/710] completion: fix a typo in a comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index e087c4bf0085ad..cd9d8e1940a88b 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -524,7 +524,7 @@ __git_index_files () # Even when a directory name itself does not contain # any special characters, it will still be quoted if # any of its (stripped) trailing path components do. - # Because of this we may have seen the same direcory + # Because of this we may have seen the same directory # both quoted and unquoted. if (p in paths) # We have seen the same directory unquoted, From 840d7e5b3ca97f831b5af804a135a9cc79d9b919 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Tue, 13 Aug 2019 14:26:43 +0200 Subject: [PATCH 102/710] completion: complete more values of more 'color.*' configuration variables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most 'color.*' configuration variables, with the sole exception of 'color.pager', accept the same set of values, but our completion script recognizes only about half of them. We could explicitly add all those missing variables, but let's try to reduce future maintenance burden, and use the catch-all 'color.*' pattern instead, so this list won't get out of sync when a similar new configuration variable accepting the same values is introduced [1]. Furthermore, their documentation explicitly mentions that they all accept the standard boolean values 'false' and 'true' as well, so list these, too, among the possible values. [1] OTOH, there will be a maintenance burden if ever a new 'color.something' is introduced which doesn't accept the same set of values. We'll see which one happens first... Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index cd9d8e1940a88b..c59347daeeefdf 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -2277,11 +2277,6 @@ _git_config () __gitcomp "$__git_merge_strategies" return ;; - color.branch|color.diff|color.interactive|\ - color.showbranch|color.status|color.ui) - __gitcomp "always never auto" - return - ;; color.pager) __gitcomp "false true" return @@ -2293,6 +2288,10 @@ _git_config () " return ;; + color.*) + __gitcomp "false true always never auto" + return + ;; diff.submodule) __gitcomp "$__git_diff_submodule_formats" return From 7a09a8f093eef940eef7d012907c051974ada254 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Tue, 13 Aug 2019 14:26:44 +0200 Subject: [PATCH 103/710] completion: add tests for 'git config' completion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The next patches will change/refactor the way we complete configuration variable names and values, so add a few tests to cover the basics, namely the completion of matching configuration sections, full variable names, and their values. Note that the test checking the completion of configuration sections is currently failing, though it's not a sign of an actual bug. If a section contains multiple variables, then that section is currently repeated as many times as the number of variables in there. This is not a correctness issue in practice, because Bash's completion facilities remove all repetitions anyway. Consequently, we could list all those repeated sections in the expected output of this test as well, but then it would have to be updated whenever a new configuration variable is added to those sections. Instead, list each matching configuration section only once, mark the test as failing for now, and the next patch will update the completion script to avoid those repetitions. Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- t/t9902-completion.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh index 75512c340366f3..e15be1164d1945 100755 --- a/t/t9902-completion.sh +++ b/t/t9902-completion.sh @@ -1698,6 +1698,27 @@ do ' done +test_expect_failure 'git config - section' ' + test_completion "git config br" <<-\EOF + branch.Z + browser.Z + EOF +' + +test_expect_success 'git config - variable name' ' + test_completion "git config log.d" <<-\EOF + log.date Z + log.decorate Z + EOF +' + +test_expect_success 'git config - value' ' + test_completion "git config color.pager " <<-\EOF + false Z + true Z + EOF +' + test_expect_success 'sourcing the completion script clears cached commands' ' __git_compute_all_commands && verbose test -n "$__git_all_commands" && From d9438873c4dd80057a86f4b9a082db5ec75275cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Tue, 13 Aug 2019 14:26:45 +0200 Subject: [PATCH 104/710] completion: deduplicate configuration sections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The number of configuration variables listed by the completion script grew quite when we started to auto-generate it from the documentation [1], so we now complete them in two steps: first we list only the section names, then the rest [2]. To get the section names we simply strip everything following the first dot in each variable name, resulting in a lot of repeated section names, because most sections contain more than one configuration variable. This is not a correctness issue in practice, because Bash's completion facilities remove all repetitions anyway, but these repetitions make testing a bit harder. Replace the small 'sed' script removing subsections and variable names with an 'awk' script that does the same, and in addition removes any repeated configuration sections as well (by first creating and filling an associative array indexed by all encountered configuration sections, and then iterating over this array and printing the indices, i.e. the unique section names). This change makes the failing 'git config - section' test in 't9902-completion.sh' pass. Note that this changes the order of section names in the output, and makes it downright undeterministic, but this is not an issue, because Bash sorts them before presenting them to the user, and our completion tests sort them as well before comparing with the expected output. Yeah, it would be simpler and shorter to just append '| sort -u' to that command, but that would incur the overhead of one more external process and pipeline stage every time a user completes configuration sections. [1] e17ca92637 (completion: drop the hard coded list of config vars, 2018-05-26) [2] f22f682695 (completion: complete general config vars in two steps, 2018-05-27) Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 10 +++++++++- t/t9902-completion.sh | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index c59347daeeefdf..f89324d84faf09 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -2398,7 +2398,15 @@ _git_config () ;; *) __git_compute_config_vars - __gitcomp "$(echo "$__git_config_vars" | sed 's/\.[^ ]*/./g')" + __gitcomp "$(echo "$__git_config_vars" | + awk -F . '{ + sections[$1] = 1 + } + END { + for (s in sections) + print s "." + } + ')" esac } diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh index e15be1164d1945..008fba7c896388 100755 --- a/t/t9902-completion.sh +++ b/t/t9902-completion.sh @@ -1698,7 +1698,7 @@ do ' done -test_expect_failure 'git config - section' ' +test_expect_success 'git config - section' ' test_completion "git config br" <<-\EOF branch.Z browser.Z From 2675ea1cc0f5d542d9bde4e8a458ac726bf30f63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Tue, 13 Aug 2019 14:26:46 +0200 Subject: [PATCH 105/710] completion: use 'sort -u' to deduplicate config variable names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The completion script runs the classic '| sort | uniq' pipeline to deduplicate the output of 'git help --config-for-completion'. 'sort -u' does the same, but uses one less external process and pipeline stage. Not a bit win, as it's only run once as the list of supported configuration variables is initialized, but at least it sets a better example for others to follow. Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index f89324d84faf09..b51cb31ea1112d 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -2225,7 +2225,7 @@ __git_config_vars= __git_compute_config_vars () { test -n "$__git_config_vars" || - __git_config_vars="$(git help --config-for-completion | sort | uniq)" + __git_config_vars="$(git help --config-for-completion | sort -u)" } _git_config () From d9ee1e061783be43d10dee996b85bac0f9223ec4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Tue, 13 Aug 2019 14:26:47 +0200 Subject: [PATCH 106/710] completion: simplify inner 'case' pattern in __gitcomp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The second '*' in the '--*=*' pattern of the inner 'case' statement of the __gitcomp() helper function never matches anything, so let's use '--*=' instead. The purpose of that inner case statement is to decide when to append a trailing space to the listed options and when not. When an option requires a stuck argument, i.e. '--option=', then the trailing space should not be added, so the user can continue typing the required argument right away. That '--*=*' pattern is supposed to match these options, but for this purpose that second '*' is unnecessary, a '--*=' pattern works just as well. That second '*' would only make a difference in case of a possible completion word like '--option=value', but our completion script never passes such a word to __gitcomp(), because the '--option=' and its 'value' must be completed separately. Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index b51cb31ea1112d..fc437bf3eb0e3f 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -340,7 +340,7 @@ __gitcomp () c="$c${4-}" if [[ $c == "$cur_"* ]]; then case $c in - --*=*|*.) ;; + --*=|*.) ;; *) c="$c " ;; esac COMPREPLY[i++]="${2-}$c" @@ -360,7 +360,7 @@ __gitcomp () c="$c${4-}" if [[ $c == "$cur_"* ]]; then case $c in - --*=*|*.) ;; + --*=|*.) ;; *) c="$c " ;; esac COMPREPLY[i++]="${2-}$c" From 42d0efec592ecd6bc8858ffb5d64fb0bbca827d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Tue, 13 Aug 2019 14:26:48 +0200 Subject: [PATCH 107/710] completion: split _git_config() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _git_config() contains two enormous case statements, one to complete configuration sections and variable names, and the other to complete their values. Split these out into two separate helper functions, so in the next patches we can use them to implement completion for 'git -c '. Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 39 ++++++++++++++++++++------ 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index fc437bf3eb0e3f..3e9c5b6b71a052 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -2228,7 +2228,8 @@ __git_compute_config_vars () __git_config_vars="$(git help --config-for-completion | sort -u)" } -_git_config () +# Completes possible values of various configuration variables. +__git_complete_config_variable_value () { local varname @@ -2320,19 +2321,16 @@ _git_config () __gitcomp "7bit 8bit quoted-printable base64" return ;; - --get|--get-all|--unset|--unset-all) - __gitcomp_nl "$(__git_config_get_set_variables)" - return - ;; *.*) return ;; esac +} + +# Completes configuration sections, subsections, variable names. +__git_complete_config_variable_name () +{ case "$cur" in - --*) - __gitcomp_builtin config - return - ;; branch.*.*) local pfx="${cur%.*}." cur_="${cur##*.}" __gitcomp "remote pushRemote merge mergeOptions rebase" "$pfx" "$cur_" @@ -2407,6 +2405,29 @@ _git_config () print s "." } ')" + ;; + esac +} + +_git_config () +{ + case "$prev" in + --get|--get-all|--unset|--unset-all) + __gitcomp_nl "$(__git_config_get_set_variables)" + return + ;; + *.*) + __git_complete_config_variable_value + return + ;; + esac + case "$cur" in + --*) + __gitcomp_builtin config + ;; + *) + __git_complete_config_variable_name + ;; esac } From e1e00089da9f616d23f0ca3bb183258e9013c469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Tue, 13 Aug 2019 14:26:49 +0200 Subject: [PATCH 108/710] completion: complete configuration sections and variable names for 'git -c' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'git config' expects a configuration variable's name and value in separate arguments, so we let the __gitcomp() helper append a space character to each variable name by default, like we do for most other things (--options, refs, paths, etc.). 'git -c', however, expects them in a single option joined by a '=' character, i.e. 'section.name=value', so we should append a '=' character to each fully completed variable name, but no space, so the user can continue typing the value right away. Add an option to the __git_complete_config_variable_name() function to allow callers to specify an alternate suffix to add, and use it to append that '=' character to configuration variables. Update the __gitcomp() helper function to not append a trailing space to any completion words ending with a '=', not just to those option with a stuck argument. Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 59 ++++++++++++++++++++------ t/t9902-completion.sh | 14 ++++++ 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index 3e9c5b6b71a052..367b1c50f450e0 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -360,7 +360,7 @@ __gitcomp () c="$c${4-}" if [[ $c == "$cur_"* ]]; then case $c in - --*=|*.) ;; + *=|*.) ;; *) c="$c " ;; esac COMPREPLY[i++]="${2-}$c" @@ -2328,18 +2328,33 @@ __git_complete_config_variable_value () } # Completes configuration sections, subsections, variable names. +# +# Usage: __git_complete_config_variable_name [