Discussion:
[PATCH 04/17] lib: read reference terms into message struct.
David Bremner
2018-09-04 10:57:10 UTC
Permalink
The plan is to use these in resolving threads.
---
lib/message.cc | 18 ++++++++++++++++++
lib/notmuch-private.h | 3 +++
2 files changed, 21 insertions(+)

diff --git a/lib/message.cc b/lib/message.cc
index 23266c1a..9d090bcf 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -41,6 +41,7 @@ struct _notmuch_message {
notmuch_message_file_t *message_file;
notmuch_string_list_t *property_term_list;
notmuch_string_map_t *property_map;
+ notmuch_string_list_t *reference_list;
notmuch_message_list_t *replies;
unsigned long flags;
/* For flags that are initialized on-demand, lazy_flags indicates
@@ -129,6 +130,7 @@ _notmuch_message_create_for_document (const void *talloc_owner,
message->author = NULL;
message->property_term_list = NULL;
message->property_map = NULL;
+ message->reference_list = NULL;

message->replies = _notmuch_message_list_create (message);
if (unlikely (message->replies == NULL)) {
@@ -349,6 +351,7 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message, void *field)
*type_prefix = _find_prefix ("type"),
*filename_prefix = _find_prefix ("file-direntry"),
*property_prefix = _find_prefix ("property"),
+ *reference_prefix = _find_prefix ("reference"),
*replyto_prefix = _find_prefix ("replyto");

/* We do this all in a single pass because Xapian decompresses the
@@ -413,6 +416,14 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message, void *field)
_notmuch_database_get_terms_with_prefix (message, i, end,
property_prefix);

+ /* get references */
+ assert (strcmp (property_prefix, reference_prefix) < 0);
+ if (!message->reference_list) {
+ message->reference_list =
+ _notmuch_database_get_terms_with_prefix (message, i, end,
+ reference_prefix);
+ }
+
/* Get reply to */
assert (strcmp (property_prefix, replyto_prefix) < 0);
if (!message->in_reply_to)
@@ -588,6 +599,13 @@ _notmuch_message_add_reply (notmuch_message_t *message,
_notmuch_message_list_add_message (message->replies, reply);
}

+const notmuch_string_list_t *
+_notmuch_message_get_references (notmuch_message_t *message)
+{
+ _notmuch_message_ensure_metadata (message, message->reference_list);
+ return message->reference_list;
+}
+
static int
_cmpmsg (const void *pa, const void *pb)
{
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 39fc4757..bd9d2747 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -583,6 +583,9 @@ _notmuch_string_list_append (notmuch_string_list_t *list,
void
_notmuch_string_list_sort (notmuch_string_list_t *list);

+const notmuch_string_list_t *
+_notmuch_message_get_references(notmuch_message_t *message);
+
/* string-map.c */
typedef struct _notmuch_string_map notmuch_string_map_t;
typedef struct _notmuch_string_map_iterator notmuch_string_map_iterator_t;
--
2.11.0
David Bremner
2018-09-04 10:57:07 UTC
Permalink
There are 3 threads here, two synthetic, and one anonymized one using
data from Gregor. They test various aspects of thread
ordering/construction in the presence of replies to ghost messages.
---
.../ghost-root/1529425589.M615261P21663.len:2,S | 9 +++++++++
.../ghost-root/1532672447.R3166642290392477575.len:2,S | 17 +++++++++++++++++
.../ghost-root/1532672447.R6968667928580738175.len:2,S | 18 ++++++++++++++++++
test/corpora/threading/ghost-root/child | 9 +++++++++
test/corpora/threading/ghost-root/fake-root | 9 +++++++++
test/corpora/threading/ghost-root/grand-child | 9 +++++++++
test/corpora/threading/ghost-root/grand-child2 | 9 +++++++++
test/corpora/threading/ghost-root/great-grand-child | 9 +++++++++
test/corpora/threading/ghost-root/real-root | 7 +++++++
9 files changed, 96 insertions(+)
create mode 100644 test/corpora/threading/ghost-root/1529425589.M615261P21663.len:2,S
create mode 100644 test/corpora/threading/ghost-root/1532672447.R3166642290392477575.len:2,S
create mode 100644 test/corpora/threading/ghost-root/1532672447.R6968667928580738175.len:2,S
create mode 100644 test/corpora/threading/ghost-root/child
create mode 100644 test/corpora/threading/ghost-root/fake-root
create mode 100644 test/corpora/threading/ghost-root/grand-child
create mode 100644 test/corpora/threading/ghost-root/grand-child2
create mode 100644 test/corpora/threading/ghost-root/great-grand-child
create mode 100644 test/corpora/threading/ghost-root/real-root

diff --git a/test/corpora/threading/ghost-root/1529425589.M615261P21663.len:2,S b/test/corpora/threading/ghost-root/1529425589.M615261P21663.len:2,S
new file mode 100644
index 00000000..62bf98db
--- /dev/null
+++ b/test/corpora/threading/ghost-root/1529425589.M615261P21663.len:2,S
@@ -0,0 +1,9 @@
+From: Gregor Zattler <***@xxxxxxx-xxxxxxxxx.de>
+To: xxx request tracker <rt-***@xxxxxxx-xxxxxxxxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de>
+Subject: FYI: xxxx xxxxxxx xxxxxxxxxxxx xxx
+Date: Tue, 19 Jun 2016 18:26:26 +0200
+Message-ID: <***@len.workgroup>
+MIME-Version: 1.0
+Content-Type: text/plain; charset=utf-8
+Content-Transfer-Encoding: quoted-printable
+
diff --git a/test/corpora/threading/ghost-root/1532672447.R3166642290392477575.len:2,S b/test/corpora/threading/ghost-root/1532672447.R3166642290392477575.len:2,S
new file mode 100644
index 00000000..b79eaf7a
--- /dev/null
+++ b/test/corpora/threading/ghost-root/1532672447.R3166642290392477575.len:2,S
@@ -0,0 +1,17 @@
+Return-Path: <prvs=701fd58e1=www-***@support.xxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de>
+Subject: [support.xxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de #33575] AutoReply: FYI: xxxx xxxxxxx xxxxxxxxxxxx xxx
+From: " via RT" <rt-***@xxxxxxx-xxxxxxxxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de>
+Reply-To: rt-***@xxxxxxx-xxxxxxxxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de
+In-Reply-To: <***@len.workgroup>
+References: <RT-Ticket-***@xxxxxxx-xxxxxxxxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de>
+ <***@len.workgroup>
+Message-ID: <rt-4.2.8-22046-1529425595-591.33575-211-***@xxxxxxx-xxxxxxxxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de>
+To: ***@xxxxxxx-xxxxxxxxx.de
+Content-Type: text/plain; charset="utf-8"
+Date: Tue, 19 Jun 2016 18:26:36 +0200
+MIME-Version: 1.0
+Content-Transfer-Encoding: 8bit
+
+
+
+
diff --git a/test/corpora/threading/ghost-root/1532672447.R6968667928580738175.len:2,S b/test/corpora/threading/ghost-root/1532672447.R6968667928580738175.len:2,S
new file mode 100644
index 00000000..343a855e
--- /dev/null
+++ b/test/corpora/threading/ghost-root/1532672447.R6968667928580738175.len:2,S
@@ -0,0 +1,18 @@
+Return-Path: <prvs=708ebe06b=www-***@support.xxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de>
+Subject: [support.xxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de #33575] Resolved: FYI: xxxx xxxxxxx xxxxxxxxxxxx xxx
+From: " via RT" <rt-***@xxxxxxx-xxxxxxxxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de>
+Reply-To: rt-***@xxxxxxx-xxxxxxxxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de
+References: <RT-Ticket-***@xxxxxxx-xxxxxxxxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de>
+Message-ID: <rt-4.2.8-6644-1530017064-1465.33575-215-***@xxxxxxx-xxxxxxxxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de>
+To: ***@xxxxxxx-xxxxxxxxx.de
+Content-Type: text/plain; charset="utf-8"
+Date: Tue, 26 Jun 2016 14:44:24 +0200
+MIME-Version: 1.0
+Content-Transfer-Encoding: 8bit
+
+
+
+
+According to our records, your request has been resolved. If you have any
+further questions or concerns, please respond to this message.
+
diff --git a/test/corpora/threading/ghost-root/child b/test/corpora/threading/ghost-root/child
new file mode 100644
index 00000000..4c36af95
--- /dev/null
+++ b/test/corpora/threading/ghost-root/child
@@ -0,0 +1,9 @@
+From: Alice <***@example.org>
+To: Daniel <***@example.org>
+Subject: child message
+Message-ID: <001-***@example.org>
+In-Reply-To: <000-real-***@example.org>
+References: <000-real-***@example.org>
+Date: Fri, 17 Jun 2016 22:14:41 -0400
+
+
diff --git a/test/corpora/threading/ghost-root/fake-root b/test/corpora/threading/ghost-root/fake-root
new file mode 100644
index 00000000..a698185d
--- /dev/null
+++ b/test/corpora/threading/ghost-root/fake-root
@@ -0,0 +1,9 @@
+From: Mallory <***@example.org>
+To: Daniel <***@example.org>
+Subject: fake root message
+Message-ID: <001-fake-message-***@example.org>
+In-Reply-to: <nonexistent-***@example.org>
+References: <000-real-***@example.org> <001-***@example.org> <nonexistent-***@example.org>
+Date: Thu, 16 Jun 2016 22:14:41 -0400
+
+This message has an in-reply-to pointing to a non-existent message
diff --git a/test/corpora/threading/ghost-root/grand-child b/test/corpora/threading/ghost-root/grand-child
new file mode 100644
index 00000000..5f77ac36
--- /dev/null
+++ b/test/corpora/threading/ghost-root/grand-child
@@ -0,0 +1,9 @@
+From: Alice <***@example.org>
+To: Daniel <***@example.org>
+Subject: grand-child message
+Message-ID: <001-grand-***@example.org>
+In-Reply-To: <001-***@example.org>
+References: <000-real-***@example.org> <001-***@example.org>
+Date: Fri, 17 Jun 2016 22:24:41 -0400
+
+
diff --git a/test/corpora/threading/ghost-root/grand-child2 b/test/corpora/threading/ghost-root/grand-child2
new file mode 100644
index 00000000..59682a95
--- /dev/null
+++ b/test/corpora/threading/ghost-root/grand-child2
@@ -0,0 +1,9 @@
+From: Daniel <***@example.org>
+To: Alice <***@example.org>
+Subject: grand-child message 2
+Message-ID: <001-grand-***@example.org>
+In-Reply-To: <001-***@example.org>
+References: <000-real-***@example.org> <001-***@example.org>
+Date: Fri, 17 Jun 2016 22:34:41 -0400
+
+
diff --git a/test/corpora/threading/ghost-root/great-grand-child b/test/corpora/threading/ghost-root/great-grand-child
new file mode 100644
index 00000000..287a8954
--- /dev/null
+++ b/test/corpora/threading/ghost-root/great-grand-child
@@ -0,0 +1,9 @@
+From: Alice <***@example.org>
+To: Daniel <***@example.org>
+Subject: great grand-child message
+Message-ID: <001-great-grand-***@example.org>
+In-Reply-To: <001-grand-***@example.org>
+References: <000-real-***@example.org> <001-grand-***@example.org>
+Date: Fri, 17 Jun 2016 22:44:41 -0400
+
+
diff --git a/test/corpora/threading/ghost-root/real-root b/test/corpora/threading/ghost-root/real-root
new file mode 100644
index 00000000..f1b16a0c
--- /dev/null
+++ b/test/corpora/threading/ghost-root/real-root
@@ -0,0 +1,7 @@
+From: Alice <***@example.org>
+To: Daniel <***@example.org>
+Subject: root message
+Message-ID: <000-real-***@example.org>
+Date: Thu, 16 Jun 2016 22:14:41 -0400
+
+This message has no in-reply-to
--
2.11.0
David Bremner
2018-09-04 10:57:09 UTC
Permalink
For non-root messages, this should not should anything currently, as
the messages are already added in date order. In the future we will
add some non-root messages in a second pass out of order and the
sorting will be useful. It does fix the order of multiple
root-messages (although it is overkill for that).
---
lib/message.cc | 52 +++++++++++++++++++++++++++++++++++++++++++++
lib/notmuch-private.h | 3 +++
lib/thread.cc | 6 ++++++
test/T510-thread-replies.sh | 2 --
4 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/lib/message.cc b/lib/message.cc
index 153e4bed..23266c1a 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -588,6 +588,58 @@ _notmuch_message_add_reply (notmuch_message_t *message,
_notmuch_message_list_add_message (message->replies, reply);
}

+static int
+_cmpmsg (const void *pa, const void *pb)
+{
+ notmuch_message_t **a = (notmuch_message_t **) pa;
+ notmuch_message_t **b = (notmuch_message_t **) pb;
+ time_t time_a = notmuch_message_get_date (*a);
+ time_t time_b = notmuch_message_get_date (*b);
+
+ if (time_a == time_b)
+ return 0;
+ else if (time_a < time_b)
+ return -1;
+ else
+ return 1;
+}
+
+notmuch_message_list_t *
+_notmuch_message_sort_subtrees (void *ctx, notmuch_message_list_t *list)
+{
+
+ size_t count = 0;
+ size_t capacity = 16;
+
+ if (! list)
+ return list;
+
+ void *local = talloc_new (NULL);
+ notmuch_message_list_t *new_list = _notmuch_message_list_create (ctx);
+ notmuch_message_t **message_array = talloc_zero_array (local, notmuch_message_t *, capacity);
+
+ for (notmuch_messages_t *messages = _notmuch_messages_create (list);
+ notmuch_messages_valid (messages);
+ notmuch_messages_move_to_next (messages)) {
+ notmuch_message_t *root = notmuch_messages_get (messages);
+ if (count >= capacity) {
+ capacity *= 2;
+ message_array = talloc_realloc (local, message_array, notmuch_message_t *, capacity);
+ }
+ message_array[count++] = root;
+ root->replies = _notmuch_message_sort_subtrees (root, root->replies);
+ }
+
+ qsort (message_array, count, sizeof (notmuch_message_t *), _cmpmsg);
+ for (size_t i = 0; i < count; i++) {
+ _notmuch_message_list_add_message (new_list, message_array[i]);
+ }
+
+ talloc_free (local);
+ talloc_free (list);
+ return new_list;
+}
+
notmuch_messages_t *
notmuch_message_get_replies (notmuch_message_t *message)
{
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 3764a6a9..39fc4757 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -539,6 +539,9 @@ _notmuch_message_remove_unprefixed_terms (notmuch_message_t *message);
const char *
_notmuch_message_get_thread_id_only(notmuch_message_t *message);

+notmuch_message_list_t *
+_notmuch_message_sort_subtrees (void *ctx, notmuch_message_list_t *list);
+
/* sha1.c */

char *
diff --git a/lib/thread.cc b/lib/thread.cc
index e961c76b..b599a97d 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -429,6 +429,12 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
_notmuch_message_list_add_message (thread->toplevel_list, message);
}

+ /* XXX this could be made conditional on messages being inserted
+ * (out of order) in later passes
+ */
+ thread->toplevel_list = _notmuch_message_sort_subtrees (thread, thread->toplevel_list);
+
+
/* XXX: After scanning through the entire list looking for parents
* via "In-Reply-To", we should do a second pass that looks at the
* list of messages IDs in the "References" header instead. (And
diff --git a/test/T510-thread-replies.sh b/test/T510-thread-replies.sh
index 753c7ae1..72af50df 100755
--- a/test/T510-thread-replies.sh
+++ b/test/T510-thread-replies.sh
@@ -192,7 +192,6 @@ EOF
test_expect_equal_file EXPECTED OUTPUT

test_begin_subtest "reply to ghost (RT)"
-test_subtest_known_broken
notmuch show --entire-thread=true id:***@len.workgroup | grep ^Subject: | head -1 > OUTPUT
cat <<EOF > EXPECTED
Subject: FYI: xxxx xxxxxxx xxxxxxxxxxxx xxx
@@ -200,7 +199,6 @@ EOF
test_expect_equal_file EXPECTED OUTPUT

test_begin_subtest "reply to ghost (RT/tree view)"
-test_subtest_known_broken
test_emacs '(notmuch-tree "id:***@len.workgroup")
(notmuch-test-wait)
(test-output)
--
2.11.0
David Bremner
2018-09-04 10:57:08 UTC
Permalink
This documents the bug discussed at

id:***@len.workgroup

The underlying issue is that the reply to a ghost (missing) message is
falsely classified as a root message in _resolve_thread_relationships.

There are two pairs of tests; in each case the the first test is
simpler / more robust, but also easier to fool.
---
test/T510-thread-replies.sh | 48 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 48 insertions(+)

diff --git a/test/T510-thread-replies.sh b/test/T510-thread-replies.sh
index 6837ff17..753c7ae1 100755
--- a/test/T510-thread-replies.sh
+++ b/test/T510-thread-replies.sh
@@ -164,5 +164,53 @@ expected='[[[{"id": "XXXXX", "match": true, "excluded": false,
expected=`echo "$expected" | notmuch_json_show_sanitize`
test_expect_equal_json "$output" "$expected"

+add_email_corpus threading
+
+test_begin_subtest "reply to ghost"
+test_subtest_known_broken
+notmuch show --entire-thread=true id:000-real-***@example.org | grep ^Subject: | head -1 > OUTPUT
+cat <<EOF > EXPECTED
+Subject: root message
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "reply to ghost (tree view)"
+test_subtest_known_broken
+test_emacs '(notmuch-tree "id:000-real-***@example.org")
+ (notmuch-test-wait)
+ (test-output)
+ (delete-other-windows)'
+cat <<EOF > EXPECTED
+ 2016-06-17 Alice ┬►root message (inbox unread)
+ 2016-06-18 Alice ╰┬►child message (inbox unread)
+ 2016-06-17 Mallory ├─►fake root message (inbox unread)
+ 2016-06-18 Alice ├┬►grand-child message (inbox unread)
+ 2016-06-18 Alice │╰─►great grand-child message (inbox unread)
+ 2016-06-18 Daniel ╰─►grand-child message 2 (inbox unread)
+End of search results.
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "reply to ghost (RT)"
+test_subtest_known_broken
+notmuch show --entire-thread=true id:***@len.workgroup | grep ^Subject: | head -1 > OUTPUT
+cat <<EOF > EXPECTED
+Subject: FYI: xxxx xxxxxxx xxxxxxxxxxxx xxx
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "reply to ghost (RT/tree view)"
+test_subtest_known_broken
+test_emacs '(notmuch-tree "id:***@len.workgroup")
+ (notmuch-test-wait)
+ (test-output)
+ (delete-other-windows)'
+cat <<EOF > EXPECTED
+ 2016-06-19 Gregor Zattler ┬┬►FYI: xxxx xxxxxxx xxxxxxxxxxxx xxx (inbox unread)
+ 2016-06-19 via RT │╰─►[support.xxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de #33575] AutoReply: FYI: xxxx xxxxxxx xxxxxxxxxxxx xxx (inbox unread)
+ 2016-06-26 via RT ╰─►[support.xxxxxxxxxxx-xxxxxxxxx-xxxxxxxxx.de #33575] Resolved: FYI: xxxx xxxxxxx xxxxxxxxxxxx xxx (inbox unread)
+End of search results.
+EOF
+test_expect_equal_file EXPECTED OUTPUT

test_done
--
2.11.0
David Bremner
2018-09-04 10:57:14 UTC
Permalink
This is a review suggestion [1] of Tomi. I decided not to squash it
so that the code movement remains clear.

[1]: id:***@guru.guru-group.fi
---
lib/thread.cc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/thread.cc b/lib/thread.cc
index 62dbd3b3..4711319d 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -402,7 +402,7 @@ _parent_via_in_reply_to (notmuch_thread_t *thread, notmuch_message_t *message) {
THREAD_DEBUG("checking message = %s in_reply_to=%s\n",
notmuch_message_get_message_id (message), in_reply_to);

- if (in_reply_to && strlen (in_reply_to) &&
+ if (in_reply_to && (! EMPTY_STRING(in_reply_to)) &&
g_hash_table_lookup_extended (thread->message_hash,
in_reply_to, NULL,
(void **) &parent)) {
--
2.11.0
David Bremner
2018-09-04 10:57:17 UTC
Permalink
This is part 1/2 of changing the reparenting of alleged toplevel
messages to use a "deep" reference rather than just the first one
found.
---
lib/thread.cc | 34 +++++++++++++++++++++++++++-------
1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/lib/thread.cc b/lib/thread.cc
index bca1fb8f..4f85a829 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -416,20 +416,40 @@ _parent_via_in_reply_to (notmuch_thread_t *thread, notmuch_message_t *message) {
static void
_parent_or_toplevel (notmuch_thread_t *thread, notmuch_message_t *message)
{
- bool found = false;
+ size_t max_depth = 0;
+ notmuch_message_t *new_parent;
notmuch_message_t *parent = NULL;
const notmuch_string_list_t *references =
_notmuch_message_get_references (message);
+
+ THREAD_DEBUG("trying to reparent via references: %s\n",
+ notmuch_message_get_message_id (message));
+
for (notmuch_string_node_t *ref_node = references->head;
- ! found && ref_node; ref_node = ref_node->next) {
- if ((found = g_hash_table_lookup_extended (thread->message_hash,
- ref_node->string, NULL,
- (void **) &parent))) {
- _notmuch_message_add_reply (parent, message);
+ ref_node; ref_node = ref_node->next) {
+ THREAD_DEBUG("checking reference=%s\n", ref_node->string);
+ if ((g_hash_table_lookup_extended (thread->message_hash,
+ ref_node->string, NULL,
+ (void **) &new_parent))) {
+ size_t new_depth = _notmuch_message_get_thread_depth (new_parent);
+ THREAD_DEBUG("got depth %lu\n", new_depth);
+ if (new_depth > max_depth || !parent) {
+ THREAD_DEBUG("adding at depth %lu parent=%s\n", new_depth, ref_node->string);
+ max_depth = new_depth;
+ parent = new_parent;
+ }
}
}
- if (! found)
+ if (parent) {
+ THREAD_DEBUG("adding reply %s to parent=%s\n",
+ notmuch_message_get_message_id (message),
+ notmuch_message_get_message_id (parent));
+ _notmuch_message_add_reply (parent, message);
+ } else {
+ THREAD_DEBUG("adding as toplevel %s\n",
+ notmuch_message_get_message_id (message));
_notmuch_message_list_add_message (thread->toplevel_list, message);
+ }
}

static void
--
2.11.0
David Bremner
2018-09-04 10:57:20 UTC
Permalink
In a future commit, we will start trusting In-Reply-To's when they
look sane (i.e. a single message-id). Modify these tests so they will
keep passing (i.e. keep choosing References) when that happens.
---
test/T510-thread-replies.sh | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/T510-thread-replies.sh b/test/T510-thread-replies.sh
index 8ff2ade9..1dfb86c9 100755
--- a/test/T510-thread-replies.sh
+++ b/test/T510-thread-replies.sh
@@ -45,10 +45,10 @@ expected='[[[{"id": "***@one.com",
expected=`echo "$expected" | notmuch_json_show_sanitize`
test_expect_equal_json "$output" "$expected"

-test_begin_subtest "Prefer References to In-Reply-To"
+test_begin_subtest "Prefer References to dodgy In-Reply-To"
add_message '[id]="***@two.com"' \
'[subject]=two'
-add_message '[in-reply-to]="<***@baz.com>"' \
+add_message '[in-reply-to]="Your message of December 31 1999 <***@baz.com>"' \
'[references]="<***@two.com>"' \
'[subject]="Re: two"'
output=$(notmuch show --format=json 'subject:two' | notmuch_json_show_sanitize)
@@ -101,12 +101,12 @@ expected='[[[{"id": "***@three.com", "match": true, "excluded": false,
expected=`echo "$expected" | notmuch_json_show_sanitize`
test_expect_equal_json "$output" "$expected"

-test_begin_subtest "Use last Reference"
+test_begin_subtest "Use last Reference when In-Reply-To is dodgy"
add_message '[id]="***@four.com"' \
'[subject]="four"'
add_message '[id]="***@four.com"' \
'[subject]="not-four"'
-add_message '[in-reply-to]="<***@four.com>"' \
+add_message '[in-reply-to]="<***@four.com> (RFC822 4lyfe)"' \
'[references]="<***@four.com> <***@four.com>"' \
'[subject]="neither"'
output=$(notmuch show --format=json 'subject:four' | notmuch_json_show_sanitize)
--
2.11.0
David Bremner
2018-09-04 10:57:11 UTC
Permalink
This is analogous to DEBUG_DATABASE_SANITY, and is intended to help
debugging and to help users submit bug reports.
---
lib/notmuch-private.h | 1 +
lib/thread.cc | 6 ++++++
2 files changed, 7 insertions(+)

diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index bd9d2747..02cc0e09 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -56,6 +56,7 @@ NOTMUCH_BEGIN_DECLS

#ifdef DEBUG
# define DEBUG_DATABASE_SANITY 1
+# define DEBUG_THREADING 1
# define DEBUG_QUERY 1
#endif

diff --git a/lib/thread.cc b/lib/thread.cc
index b599a97d..6d15c49b 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -24,6 +24,12 @@
#include <gmime/gmime.h>
#include <glib.h> /* GHashTable */

+#ifdef DEBUG_THREADING
+#define THREAD_DEBUG(format, ...) fprintf(stderr, format " (%s).\n", ##__VA_ARGS__, __location__)
+#else
+#define THREAD_DEBUG(format, ...) do {} while (0) /* ignored */
+#endif
+
#define EMPTY_STRING(s) ((s)[0] == '\0')

struct _notmuch_thread {
--
2.11.0
David Bremner
2018-09-04 10:57:19 UTC
Permalink
The current scheme of choosing the replyto (i.e. the default parent
for threading purposes) does not work well for mailers that put
the oldest Reference last.
---
test/T510-thread-replies.sh | 14 ++++++++++++++
test/corpora/threading/parent-priority/cur/child | 11 +++++++++++
test/corpora/threading/parent-priority/cur/grand-child | 10 ++++++++++
test/corpora/threading/parent-priority/cur/root | 7 +++++++
4 files changed, 42 insertions(+)
create mode 100644 test/corpora/threading/parent-priority/cur/child
create mode 100644 test/corpora/threading/parent-priority/cur/grand-child
create mode 100644 test/corpora/threading/parent-priority/cur/root

diff --git a/test/T510-thread-replies.sh b/test/T510-thread-replies.sh
index 915e68ef..8ff2ade9 100755
--- a/test/T510-thread-replies.sh
+++ b/test/T510-thread-replies.sh
@@ -209,4 +209,18 @@ End of search results.
EOF
test_expect_equal_file EXPECTED OUTPUT

+test_begin_subtest "trusting reply-to (tree view)"
+test_subtest_known_broken
+test_emacs '(notmuch-tree "id:B00-***@example.org")
+ (notmuch-test-wait)
+ (test-output)
+ (delete-other-windows)'
+cat <<EOF > EXPECTED
+ 2016-06-17 Alice ┬►root message (inbox unread)
+ 2016-06-18 Alice ╰┬►child message (inbox unread)
+ 2016-06-18 Alice ╰─►grand-child message (inbox unread)
+End of search results.
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
test_done
diff --git a/test/corpora/threading/parent-priority/cur/child b/test/corpora/threading/parent-priority/cur/child
new file mode 100644
index 00000000..23ee6495
--- /dev/null
+++ b/test/corpora/threading/parent-priority/cur/child
@@ -0,0 +1,11 @@
+From: Alice <***@example.org>
+To: Daniel <***@example.org>
+Subject: child message
+Message-ID: <B01-***@example.org>
+In-Reply-To: <B00-***@example.org>
+References: <B00--***@example.org>
+Date: Fri, 17 Jun 2016 22:14:41 -0400
+
+This is a normal-ish reply, and has both a references header and an
+in-reply-to header.
+
diff --git a/test/corpora/threading/parent-priority/cur/grand-child b/test/corpora/threading/parent-priority/cur/grand-child
new file mode 100644
index 00000000..028371d4
--- /dev/null
+++ b/test/corpora/threading/parent-priority/cur/grand-child
@@ -0,0 +1,10 @@
+From: Alice <***@example.org>
+To: Daniel <***@example.org>
+Subject: grand-child message
+Message-ID: <B01-grand-***@example.org>
+In-Reply-To: <B01-***@example.org>
+References: <B01-***@example.org> <B00-***@example.org>
+Date: Fri, 17 Jun 2016 22:24:41 -0400
+
+This has the references headers in the wrong order, with oldest first.
+Debbugs does this.
diff --git a/test/corpora/threading/parent-priority/cur/root b/test/corpora/threading/parent-priority/cur/root
new file mode 100644
index 00000000..3990843d
--- /dev/null
+++ b/test/corpora/threading/parent-priority/cur/root
@@ -0,0 +1,7 @@
+From: Alice <***@example.org>
+To: Daniel <***@example.org>
+Subject: root message
+Message-ID: <B00-***@example.org>
+Date: Thu, 16 Jun 2016 22:14:41 -0400
+
+This message has no reply-to
--
2.11.0
David Bremner
2018-09-04 10:57:12 UTC
Permalink
There is no public notmuch_message_list_t public interface, so to this
is added to the private API. We use it immediately in thread.cc;
future commits will use it further.
---
lib/messages.c | 9 +++++++++
lib/notmuch-private.h | 3 +++
lib/thread.cc | 2 +-
3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/lib/messages.c b/lib/messages.c
index a88f974f..ba8a9777 100644
--- a/lib/messages.c
+++ b/lib/messages.c
@@ -56,6 +56,15 @@ _notmuch_message_list_add_message (notmuch_message_list_t *list,
list->tail = &node->next;
}

+bool
+_notmuch_message_list_empty (notmuch_message_list_t *list)
+{
+ if (list == NULL)
+ return TRUE;
+
+ return (list->head == NULL);
+}
+
notmuch_messages_t *
_notmuch_messages_create (notmuch_message_list_t *list)
{
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 02cc0e09..590a3451 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -477,6 +477,9 @@ struct _notmuch_messages {
notmuch_message_list_t *
_notmuch_message_list_create (const void *ctx);

+bool
+_notmuch_message_list_empty (notmuch_message_list_t *list);
+
void
_notmuch_message_list_add_message (notmuch_message_list_t *list,
notmuch_message_t *message);
diff --git a/lib/thread.cc b/lib/thread.cc
index 6d15c49b..a0ce4be3 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -425,7 +425,7 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
if (first_node) {
message = first_node->message;
in_reply_to = _notmuch_message_get_in_reply_to (message);
- if (thread->toplevel_list->head &&
+ if (! _notmuch_message_list_empty (thread->toplevel_list) &&
in_reply_to && strlen (in_reply_to) &&
g_hash_table_lookup_extended (thread->message_hash,
in_reply_to, NULL,
--
2.11.0
David Bremner
2018-09-04 10:57:15 UTC
Permalink
This is mainly to lay out the structure of the final code. The problem
isn't really solved yet, although some very simple cases are
better (hence the fixed test). We need two passes through the messages
because we need to be careful not to re-parent too many messages and
end up without any toplevel messages.
---
lib/thread.cc | 43 +++++++++++++++++++++++++++++++++++++++++--
test/T510-thread-replies.sh | 1 -
2 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/lib/thread.cc b/lib/thread.cc
index 4711319d..bca1fb8f 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -414,21 +414,51 @@ _parent_via_in_reply_to (notmuch_thread_t *thread, notmuch_message_t *message) {
}

static void
+_parent_or_toplevel (notmuch_thread_t *thread, notmuch_message_t *message)
+{
+ bool found = false;
+ notmuch_message_t *parent = NULL;
+ const notmuch_string_list_t *references =
+ _notmuch_message_get_references (message);
+ for (notmuch_string_node_t *ref_node = references->head;
+ ! found && ref_node; ref_node = ref_node->next) {
+ if ((found = g_hash_table_lookup_extended (thread->message_hash,
+ ref_node->string, NULL,
+ (void **) &parent))) {
+ _notmuch_message_add_reply (parent, message);
+ }
+ }
+ if (! found)
+ _notmuch_message_list_add_message (thread->toplevel_list, message);
+}
+
+static void
_resolve_thread_relationships (notmuch_thread_t *thread)
{
notmuch_message_node_t *node, *first_node;
notmuch_message_t *message;
+ void *local;
+ notmuch_message_list_t *maybe_toplevel_list;

first_node = thread->message_list->head;
if (! first_node)
return;

+ local = talloc_new (thread);
+ maybe_toplevel_list = _notmuch_message_list_create (local);
+
for (node = first_node->next; node; node = node->next) {
message = node->message;
if (! _parent_via_in_reply_to (thread, message))
- _notmuch_message_list_add_message (thread->toplevel_list, message);
+ _notmuch_message_list_add_message (maybe_toplevel_list, message);
}

+ for (notmuch_messages_t *roots = _notmuch_messages_create (maybe_toplevel_list);
+ notmuch_messages_valid (roots);
+ notmuch_messages_move_to_next (roots)) {
+ notmuch_message_t *message = notmuch_messages_get (roots);
+ _parent_or_toplevel (thread, message);
+ }
/*
* if we reach the end of the list without finding a top-level
* message, that means the thread is a cycle (or set of cycles)
@@ -439,7 +469,15 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
message = first_node->message;
if (_notmuch_message_list_empty(thread->toplevel_list) ||
! _parent_via_in_reply_to (thread, message)) {
- _notmuch_message_list_add_message (thread->toplevel_list, message);
+ /*
+ * If the oldest message happens to be in-reply-to a
+ * missing message, we only check for references if there
+ * is some other candidate for root message.
+ */
+ if (! _notmuch_message_list_empty (thread->toplevel_list))
+ _parent_or_toplevel (thread, message);
+ else
+ _notmuch_message_list_add_message (thread->toplevel_list, message);
}
}

@@ -459,6 +497,7 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
* correctly in the thread even when an intermediate message is
* missing from the thread.
*/
+ talloc_free (local);
}

/* Create a new notmuch_thread_t object by finding the thread
diff --git a/test/T510-thread-replies.sh b/test/T510-thread-replies.sh
index 72af50df..c244054a 100755
--- a/test/T510-thread-replies.sh
+++ b/test/T510-thread-replies.sh
@@ -167,7 +167,6 @@ test_expect_equal_json "$output" "$expected"
add_email_corpus threading

test_begin_subtest "reply to ghost"
-test_subtest_known_broken
notmuch show --entire-thread=true id:000-real-***@example.org | grep ^Subject: | head -1 > OUTPUT
cat <<EOF > EXPECTED
Subject: root message
--
2.11.0
David Bremner
2018-09-04 10:57:13 UTC
Permalink
This is not a complete win in code-size, but it makes the code (which
is about to get more complicated) easier to follow.
---
lib/thread.cc | 42 +++++++++++++++++++++++++-----------------
1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/lib/thread.cc b/lib/thread.cc
index a0ce4be3..62dbd3b3 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -393,12 +393,31 @@ _thread_add_matched_message (notmuch_thread_t *thread,
_thread_add_matched_author (thread, _notmuch_message_get_author (hashed_message));
}

+static bool
+_parent_via_in_reply_to (notmuch_thread_t *thread, notmuch_message_t *message) {
+ notmuch_message_t *parent;
+ const char *in_reply_to;
+
+ in_reply_to = _notmuch_message_get_in_reply_to (message);
+ THREAD_DEBUG("checking message = %s in_reply_to=%s\n",
+ notmuch_message_get_message_id (message), in_reply_to);
+
+ if (in_reply_to && strlen (in_reply_to) &&
+ g_hash_table_lookup_extended (thread->message_hash,
+ in_reply_to, NULL,
+ (void **) &parent)) {
+ _notmuch_message_add_reply (parent, message);
+ return true;
+ } else {
+ return false;
+ }
+}
+
static void
_resolve_thread_relationships (notmuch_thread_t *thread)
{
notmuch_message_node_t *node, *first_node;
- notmuch_message_t *message, *parent;
- const char *in_reply_to;
+ notmuch_message_t *message;

first_node = thread->message_list->head;
if (! first_node)
@@ -406,13 +425,7 @@ _resolve_thread_relationships (notmuch_thread_t *thread)

for (node = first_node->next; node; node = node->next) {
message = node->message;
- in_reply_to = _notmuch_message_get_in_reply_to (message);
- if (in_reply_to && strlen (in_reply_to) &&
- g_hash_table_lookup_extended (thread->message_hash,
- in_reply_to, NULL,
- (void **) &parent))
- _notmuch_message_add_reply (parent, message);
- else
+ if (! _parent_via_in_reply_to (thread, message))
_notmuch_message_list_add_message (thread->toplevel_list, message);
}

@@ -424,15 +437,10 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
*/
if (first_node) {
message = first_node->message;
- in_reply_to = _notmuch_message_get_in_reply_to (message);
- if (! _notmuch_message_list_empty (thread->toplevel_list) &&
- in_reply_to && strlen (in_reply_to) &&
- g_hash_table_lookup_extended (thread->message_hash,
- in_reply_to, NULL,
- (void **) &parent))
- _notmuch_message_add_reply (parent, message);
- else
+ if (_notmuch_message_list_empty(thread->toplevel_list) ||
+ ! _parent_via_in_reply_to (thread, message)) {
_notmuch_message_list_add_message (thread->toplevel_list, message);
+ }
}

/* XXX this could be made conditional on messages being inserted
--
2.11.0
David Bremner
2018-09-04 10:57:16 UTC
Permalink
This will be used in reparenting messages without useful in-reply-to,
but with useful references
---
lib/message.cc | 23 +++++++++++++++++++++++
lib/notmuch-private.h | 6 ++++++
2 files changed, 29 insertions(+)

diff --git a/lib/message.cc b/lib/message.cc
index 9d090bcf..6f2f6345 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -32,6 +32,7 @@ struct _notmuch_message {
int frozen;
char *message_id;
char *thread_id;
+ size_t thread_depth;
char *in_reply_to;
notmuch_string_list_t *tag_list;
notmuch_string_list_t *filename_term_list;
@@ -118,6 +119,9 @@ _notmuch_message_create_for_document (const void *talloc_owner,
/* the message is initially not synchronized with Xapian */
message->last_view = 0;

+ /* Calculated after the thread structure is computed */
+ message->thread_depth = 0;
+
/* Each of these will be lazily created as needed. */
message->message_id = NULL;
message->thread_id = NULL;
@@ -599,6 +603,25 @@ _notmuch_message_add_reply (notmuch_message_t *message,
_notmuch_message_list_add_message (message->replies, reply);
}

+size_t
+_notmuch_message_get_thread_depth (notmuch_message_t *message) {
+ return message->thread_depth;
+}
+
+void
+_notmuch_message_label_depths (notmuch_message_t *message,
+ size_t depth)
+{
+ message->thread_depth = depth;
+
+ for (notmuch_messages_t *messages = _notmuch_messages_create (message->replies);
+ notmuch_messages_valid (messages);
+ notmuch_messages_move_to_next (messages)) {
+ notmuch_message_t *child = notmuch_messages_get (messages);
+ _notmuch_message_label_depths (child, depth+1);
+ }
+}
+
const notmuch_string_list_t *
_notmuch_message_get_references (notmuch_message_t *message)
{
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 590a3451..94fc54d7 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -543,6 +543,12 @@ _notmuch_message_remove_unprefixed_terms (notmuch_message_t *message);
const char *
_notmuch_message_get_thread_id_only(notmuch_message_t *message);

+size_t _notmuch_message_get_thread_depth (notmuch_message_t *message);
+
+void
+_notmuch_message_label_depths (notmuch_message_t *message,
+ size_t depth);
+
notmuch_message_list_t *
_notmuch_message_sort_subtrees (void *ctx, notmuch_message_list_t *list);
--
2.11.0
David Bremner
2018-09-04 10:57:21 UTC
Permalink
It's only few lines, but we already define the function, so make it
usable elsewhere
---
util/string-util.c | 2 +-
util/string-util.h | 2 ++
2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/util/string-util.c b/util/string-util.c
index b0108811..fc2058e0 100644
--- a/util/string-util.c
+++ b/util/string-util.c
@@ -141,7 +141,7 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,
return 0;
}

-static const char*
+const char*
skip_space (const char *str)
{
while (*str && isspace ((unsigned char) *str))
diff --git a/util/string-util.h b/util/string-util.h
index 97770614..4c110a20 100644
--- a/util/string-util.h
+++ b/util/string-util.h
@@ -77,6 +77,8 @@ unsigned int strcase_hash (const void *ptr);

void strip_trailing (char *str, char ch);

+const char* skip_space (const char *str);
+
#ifdef __cplusplus
}
#endif
--
2.11.0
David Bremner
2018-09-04 10:57:18 UTC
Permalink
We (finally) implement the XXX comment. It requires a bit of care not
to reparent all of the possible toplevel messages.

_notmuch_messages_has_next is not ready to be a public function yet,
since it punts on the mset case. We know in the one case it is called,
the notmuch_messages_t is just a regular list / iterator.
---
lib/messages.c | 12 ++++++++++
lib/notmuch-private.h | 3 +++
lib/thread.cc | 53 ++++++++++++++++++++++-----------------------
test/T510-thread-replies.sh | 1 -
4 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/lib/messages.c b/lib/messages.c
index ba8a9777..04fa19f8 100644
--- a/lib/messages.c
+++ b/lib/messages.c
@@ -110,6 +110,18 @@ notmuch_messages_valid (notmuch_messages_t *messages)
return (messages->iterator != NULL);
}

+bool
+_notmuch_messages_has_next (notmuch_messages_t *messages)
+{
+ if (! notmuch_messages_valid (messages))
+ return false;
+
+ if (! messages->is_of_list_type)
+ INTERNAL_ERROR("_notmuch_messages_has_next not implimented for msets");
+
+ return (messages->iterator->next != NULL);
+}
+
notmuch_message_t *
notmuch_messages_get (notmuch_messages_t *messages)
{
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 94fc54d7..9eca0789 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -487,6 +487,9 @@ _notmuch_message_list_add_message (notmuch_message_list_t *list,
notmuch_messages_t *
_notmuch_messages_create (notmuch_message_list_t *list);

+bool
+_notmuch_messages_has_next (notmuch_messages_t *messages);
+
/* query.cc */

bool
diff --git a/lib/thread.cc b/lib/thread.cc
index 4f85a829..47c90664 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -473,12 +473,6 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
_notmuch_message_list_add_message (maybe_toplevel_list, message);
}

- for (notmuch_messages_t *roots = _notmuch_messages_create (maybe_toplevel_list);
- notmuch_messages_valid (roots);
- notmuch_messages_move_to_next (roots)) {
- notmuch_message_t *message = notmuch_messages_get (roots);
- _parent_or_toplevel (thread, message);
- }
/*
* if we reach the end of the list without finding a top-level
* message, that means the thread is a cycle (or set of cycles)
@@ -487,36 +481,41 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
*/
if (first_node) {
message = first_node->message;
- if (_notmuch_message_list_empty(thread->toplevel_list) ||
+ THREAD_DEBUG("checking first message %s\n",
+ notmuch_message_get_message_id (message));
+
+ if (_notmuch_message_list_empty (maybe_toplevel_list) ||
! _parent_via_in_reply_to (thread, message)) {
- /*
- * If the oldest message happens to be in-reply-to a
- * missing message, we only check for references if there
- * is some other candidate for root message.
- */
- if (! _notmuch_message_list_empty (thread->toplevel_list))
- _parent_or_toplevel (thread, message);
- else
- _notmuch_message_list_add_message (thread->toplevel_list, message);
+
+ THREAD_DEBUG("adding first message as toplevel = %s\n",
+ notmuch_message_get_message_id (message));
+ _notmuch_message_list_add_message (maybe_toplevel_list, message);
}
}

+ for (notmuch_messages_t *messages = _notmuch_messages_create (maybe_toplevel_list);
+ notmuch_messages_valid (messages);
+ notmuch_messages_move_to_next (messages))
+ {
+ notmuch_message_t *message = notmuch_messages_get (messages);
+ _notmuch_message_label_depths (message, 0);
+ }
+
+ for (notmuch_messages_t *roots = _notmuch_messages_create (maybe_toplevel_list);
+ notmuch_messages_valid (roots);
+ notmuch_messages_move_to_next (roots)) {
+ notmuch_message_t *message = notmuch_messages_get (roots);
+ if (_notmuch_messages_has_next (roots) || ! _notmuch_message_list_empty (thread->toplevel_list))
+ _parent_or_toplevel (thread, message);
+ else
+ _notmuch_message_list_add_message (thread->toplevel_list, message);
+ }
+
/* XXX this could be made conditional on messages being inserted
* (out of order) in later passes
*/
thread->toplevel_list = _notmuch_message_sort_subtrees (thread, thread->toplevel_list);

-
- /* XXX: After scanning through the entire list looking for parents
- * via "In-Reply-To", we should do a second pass that looks at the
- * list of messages IDs in the "References" header instead. (And
- * for this the parent would be the "deepest" message of all the
- * messages found in the "References" list.)
- *
- * Doing this will allow messages and sub-threads to be positioned
- * correctly in the thread even when an intermediate message is
- * missing from the thread.
- */
talloc_free (local);
}

diff --git a/test/T510-thread-replies.sh b/test/T510-thread-replies.sh
index c244054a..915e68ef 100755
--- a/test/T510-thread-replies.sh
+++ b/test/T510-thread-replies.sh
@@ -174,7 +174,6 @@ EOF
test_expect_equal_file EXPECTED OUTPUT

test_begin_subtest "reply to ghost (tree view)"
-test_subtest_known_broken
test_emacs '(notmuch-tree "id:000-real-***@example.org")
(notmuch-test-wait)
(test-output)
--
2.11.0
David Bremner
2018-09-04 10:57:22 UTC
Permalink
The idea is that if a message-id parses with this function, the MUA
generating it was probably sane, and in particular it's probably safe
to use the result as a parent from In-Reply-to.
---
lib/message-id.c | 30 ++++++++++++++++++++
lib/notmuch-private.h | 14 ++++++++++
test/Makefile.local | 6 +++-
test/T710-message-id.sh | 73 +++++++++++++++++++++++++++++++++++++++++++++++++
test/message-id-parse.c | 26 ++++++++++++++++++
5 files changed, 148 insertions(+), 1 deletion(-)
create mode 100755 test/T710-message-id.sh
create mode 100644 test/message-id-parse.c

diff --git a/lib/message-id.c b/lib/message-id.c
index d7541d50..e71ce9f4 100644
--- a/lib/message-id.c
+++ b/lib/message-id.c
@@ -1,4 +1,5 @@
#include "notmuch-private.h"
+#include "string-util.h"

/* Advance 'str' past any whitespace or RFC 822 comments. A comment is
* a (potentially nested) parenthesized sequence with '\' used to
@@ -94,3 +95,32 @@ _notmuch_message_id_parse (void *ctx, const char *message_id, const char **next)

return result;
}
+
+char *
+_notmuch_message_id_parse_strict (void *ctx, const char *message_id)
+{
+ const char *s, *end;
+
+ if (message_id == NULL || *message_id == '\0')
+ return NULL;
+
+ s = skip_space (message_id);
+ if (*s == '<')
+ s++;
+ else
+ return NULL;
+
+ for (end = s; *end && *end != '>'; end++)
+ if (isspace (*end))
+ return NULL;
+
+ if (*end != '>')
+ return NULL;
+ else {
+ const char *last = skip_space (end + 1);
+ if (*last != '\0')
+ return NULL;
+ }
+
+ return talloc_strndup (ctx, s, end - s);
+}
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 9eca0789..df32d39c 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -533,6 +533,20 @@ _notmuch_query_count_documents (notmuch_query_t *query,
char *
_notmuch_message_id_parse (void *ctx, const char *message_id, const char **next);

+/* Parse a message-id, discarding leading and trailing whitespace, and
+ * '<' and '>' delimiters.
+ *
+ * Apply a probably-stricter-than RFC definition of what is allowed in
+ * a message-id. In particular, forbid whitespace.
+ *
+ * Returns a newly talloc'ed string belonging to 'ctx'.
+ *
+ * Returns NULL if there is any error parsing the message-id.
+ */
+
+char *
+_notmuch_message_id_parse_strict (void *ctx, const char *message_id);
+

/* message.cc */

diff --git a/test/Makefile.local b/test/Makefile.local
index 1a0ab813..1cf09778 100644
--- a/test/Makefile.local
+++ b/test/Makefile.local
@@ -15,6 +15,9 @@ smtp_dummy_modules = $(smtp_dummy_srcs:.c=.o)
$(dir)/arg-test: $(dir)/arg-test.o command-line-arguments.o util/libnotmuch_util.a
$(call quiet,CC) $^ -o $@ $(LDFLAGS)

+$(dir)/message-id-parse: $(dir)/message-id-parse.o lib/libnotmuch.a util/libnotmuch_util.a
+ $(call quiet,CC) $^ -o $@ $(LDFLAGS) $(TALLOC_LDFLAGS)
+
$(dir)/hex-xcode: $(dir)/hex-xcode.o command-line-arguments.o util/libnotmuch_util.a
$(call quiet,CC) $^ -o $@ $(LDFLAGS) $(TALLOC_LDFLAGS)

@@ -50,7 +53,8 @@ test_main_srcs=$(dir)/arg-test.c \
$(dir)/smtp-dummy.c \
$(dir)/symbol-test.cc \
$(dir)/make-db-version.cc \
- $(dir)/ghost-report.cc
+ $(dir)/ghost-report.cc \
+ $(dir)/message-id-parse.c

test_srcs=$(test_main_srcs) $(dir)/database-test.c

diff --git a/test/T710-message-id.sh b/test/T710-message-id.sh
new file mode 100755
index 00000000..e73d6ba9
--- /dev/null
+++ b/test/T710-message-id.sh
@@ -0,0 +1,73 @@
+#!/usr/bin/env bash
+test_description="message id parsing"
+
+. $(dirname "$0")/test-lib.sh || exit 1
+
+test_begin_subtest "good message ids"
+${TEST_DIRECTORY}/message-id-parse <<EOF >OUTPUT
+<***@nikula.org>
+<***@strange.none>
+<1258787708-21121-2-git-send-email-***@keithp.com>
+EOF
+cat <<EOF >EXPECTED
+GOOD: ***@nikula.org
+GOOD: ***@strange.none
+GOOD: 1258787708-21121-2-git-send-email-***@keithp.com
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "leading and trailing space is OK"
+${TEST_DIRECTORY}/message-id-parse <<EOF >OUTPUT
+ <***@nikula.org>
+<***@strange.none>
+ <1258787708-21121-2-git-send-email-***@keithp.com>
+EOF
+cat <<EOF >EXPECTED
+GOOD: ***@nikula.org
+GOOD: ***@strange.none
+GOOD: 1258787708-21121-2-git-send-email-***@keithp.com
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "<> delimeters are required"
+${TEST_DIRECTORY}/message-id-parse <<EOF >OUTPUT
+***@nikula.org>
+<***@strange.none
+1258787708-21121-2-git-send-email-***@keithp.com
+EOF
+cat <<EOF >EXPECTED
+BAD: ***@nikula.org>
+BAD: <***@strange.none
+BAD: 1258787708-21121-2-git-send-email-***@keithp.com
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "embedded whitespace is forbidden"
+${TEST_DIRECTORY}/message-id-parse <<EOF >OUTPUT
+<018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915 ***@nikula.org>
+<1530507300.raoomurnbf.astroid @strange.none>
+<1258787708-21121- 2-git-send-email-***@keithp.com>
+EOF
+cat <<EOF >EXPECTED
+BAD: <018b1a8f2d1df62e804ce88b65401304832dfbbf.1346614915 ***@nikula.org>
+BAD: <1530507300.raoomurnbf.astroid @strange.none>
+BAD: <1258787708-21121- 2-git-send-email-***@keithp.com>
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+
+test_begin_subtest "folded real life bad In-Reply-To values"
+${TEST_DIRECTORY}/message-id-parse <<EOF >OUTPUT
+<***@chiark.greenend.org.uk> (Ian Jackson's message of "Mon, 5 Dec 2016 14:41:01 +0000")
+<***@gaara.hadrons.org> <***@seneca.home.org>
+Your message of Tue, 09 Dec 2014 13:21:11 +0100. <***@liber>
+EOF
+cat <<EOF >EXPECTED
+BAD: <***@chiark.greenend.org.uk> (Ian Jackson's message of "Mon, 5 Dec 2016 14:41:01 +0000")
+BAD: <***@gaara.hadrons.org> <***@seneca.home.org>
+BAD: Your message of Tue, 09 Dec 2014 13:21:11 +0100. <***@liber>
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+
+test_done
diff --git a/test/message-id-parse.c b/test/message-id-parse.c
new file mode 100644
index 00000000..752eb1fd
--- /dev/null
+++ b/test/message-id-parse.c
@@ -0,0 +1,26 @@
+#include <stdio.h>
+#include <talloc.h>
+#include "notmuch-private.h"
+
+int
+main (unused (int argc), unused (char **argv))
+{
+ char *line = NULL;
+ size_t len = 0;
+ ssize_t nread;
+ void *local = talloc_new (NULL);
+
+ while ((nread = getline (&line, &len, stdin)) != -1) {
+ int last = strlen (line) - 1;
+ if (line[last] == '\n')
+ line[last] = '\0';
+
+ char *mid = _notmuch_message_id_parse_strict (local, line);
+ if (mid)
+ printf ("GOOD: %s\n", mid);
+ else
+ printf ("BAD: %s\n", line);
+ }
+
+ talloc_free (local);
+}
--
2.11.0
David Bremner
2018-09-04 10:57:23 UTC
Permalink
As reported by Sean Whitton, there are mailers (in particular the
Debian Bug Tracking System) that have sensible In-Reply-To headers,
but un-useful-for-notmuch References (in particular with the BTS, the
oldest reference is last). I looked at a sample of about 200K
messages, and only about 0.5% these had something other than a single
message-id in In-Reply-To. On this basis, if we see a single
message-id in In-Reply-To, consider that as authoritative.
---
lib/add-message.cc | 20 +++++++++++++++-----
test/T510-thread-replies.sh | 1 -
2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/lib/add-message.cc b/lib/add-message.cc
index f5fac8be..da37032c 100644
--- a/lib/add-message.cc
+++ b/lib/add-message.cc
@@ -227,7 +227,7 @@ _notmuch_database_link_message_to_parents (notmuch_database_t *notmuch,
const char **thread_id)
{
GHashTable *parents = NULL;
- const char *refs, *in_reply_to, *in_reply_to_message_id;
+ const char *refs, *in_reply_to, *in_reply_to_message_id, *strict_message_id = NULL;
const char *last_ref_message_id, *this_message_id;
GList *l, *keys = NULL;
notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
@@ -242,14 +242,24 @@ _notmuch_database_link_message_to_parents (notmuch_database_t *notmuch,
parents, refs);

in_reply_to = _notmuch_message_file_get_header (message_file, "in-reply-to");
+ if (in_reply_to)
+ strict_message_id = _notmuch_message_id_parse_strict (message,
+ in_reply_to);
+
in_reply_to_message_id = parse_references (message,
this_message_id,
parents, in_reply_to);

- /* For the parent of this message, use the last message ID of the
- * References header, if available. If not, fall back to the
- * first message ID in the In-Reply-To header. */
- if (last_ref_message_id) {
+ /* For the parent of this message, use
+ * 1) the In-Reply-To header, if it looks sane, otherwise
+ * 2) the last message ID of the References header, if available.
+ * 3) Otherwise, fall back to the first message ID in
+ * the In-Reply-To header.
+ */
+
+ if (strict_message_id) {
+ _notmuch_message_add_term (message, "replyto", strict_message_id);
+ } else if (last_ref_message_id) {
_notmuch_message_add_term (message, "replyto",
last_ref_message_id);
} else if (in_reply_to_message_id) {
diff --git a/test/T510-thread-replies.sh b/test/T510-thread-replies.sh
index 1dfb86c9..5d6bea7e 100755
--- a/test/T510-thread-replies.sh
+++ b/test/T510-thread-replies.sh
@@ -210,7 +210,6 @@ EOF
test_expect_equal_file EXPECTED OUTPUT

test_begin_subtest "trusting reply-to (tree view)"
-test_subtest_known_broken
test_emacs '(notmuch-tree "id:B00-***@example.org")
(notmuch-test-wait)
(test-output)
--
2.11.0
David Bremner
2018-09-06 11:11:58 UTC
Permalink
This is mainly coding style issues pointed out by Tomi, along with
moving some of the low level list access into two new functions. The
interdiff is a bit noisy since I dropped the creation of
debug_print.{c,h}.
I'm going to optimistically claim this version is ready to go, but
feel free to point out any issues.
I have pushed this version. Let the complaining from non-reviewers
commence ;).

d

Loading...