Discussion:
Proposed New sort API
David Bremner
2017-12-10 15:49:54 UTC
Permalink
I started looking at William's sorting patches [1], but the
proliferation of sorting options bugged me a bit. I decided to sketch
out a new more flexible API.

In the new API, there is is a sort "key", currently mapped one-to-one
to value slots, but potentially could do more sophisticated things
like making keys up dynamically (e.g. preprocessing subjects).

The second parameter is a sort "type". Currently this is just
ascending or descending, but other potential options include
sort_by_relevance_then_value

[1]: id:20170926053547.18564-1-***@jb55.com
David Bremner
2017-12-10 15:49:55 UTC
Permalink
---
lib/notmuch.h | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
lib/query.cc | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 124 insertions(+)

diff --git a/lib/notmuch.h b/lib/notmuch.h
index 39759b7a..ae592e93 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -773,6 +773,10 @@ notmuch_query_create (notmuch_database_t *database,
* Sort values for notmuch_query_set_sort.
*/
typedef enum {
+ /**
+ * Value was not set
+ */
+ NOTMUCH_SORT_UNSET = -1,
/**
* Oldest first.
*/
@@ -791,6 +795,42 @@ typedef enum {
NOTMUCH_SORT_UNSORTED
} notmuch_sort_t;

+/**
+ * Sort key values for notmuch_query_set_sort_key
+ */
+typedef enum {
+ /**
+ * Do not sort.
+ */
+ NOTMUCH_SORT_KEY_NONE=0,
+ /**
+ * Sort by timestamp (from Date: header)
+ */
+ NOTMUCH_SORT_KEY_TIMESTAMP,
+ /**
+ * Sort by message-id.
+ */
+ NOTMUCH_SORT_KEY_MESSAGE_ID,
+} notmuch_sort_key_t;
+
+/**
+ * Sort type values for notmuch_query_set_sort_type
+ */
+typedef enum {
+ /**
+ * Do not sort.
+ */
+ NOTMUCH_SORT_TYPE_NONE=0,
+ /**
+ * Ascending order
+ */
+ NOTMUCH_SORT_TYPE_ASCENDING,
+ /**
+ * Descending order
+ */
+ NOTMUCH_SORT_TYPE_DESCENDING,
+} notmuch_sort_type_t;
+
/**
* Return the query_string of this query. See notmuch_query_create.
*/
@@ -860,6 +900,32 @@ notmuch_query_set_sort (notmuch_query_t *query, notmuch_sort_t sort);
notmuch_sort_t
notmuch_query_get_sort (const notmuch_query_t *query);

+/**
+ * Specify the sort key for this query.
+ */
+void
+notmuch_query_set_sort_key (notmuch_query_t *query, notmuch_sort_key_t key);
+
+/**
+ * Return the sort_key specified for this query. See
+ * notmuch_query_set_sort_key.
+ */
+notmuch_sort_key_t
+notmuch_query_get_sort_key (const notmuch_query_t *query);
+
+/**
+ * Specify the sort type for this query.
+ */
+void
+notmuch_query_set_sort_type (notmuch_query_t *query, notmuch_sort_type_t type);
+
+/**
+ * Return the sort_type specified for this query. See
+ * notmuch_query_set_sort_type.
+ */
+notmuch_sort_type_t
+notmuch_query_get_sort_type (const notmuch_query_t *query);
+
/**
* Add a tag that will be excluded from the query results by default.
* This exclusion will be ignored if this tag appears explicitly in
diff --git a/lib/query.cc b/lib/query.cc
index d633fa3d..c3b228fb 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -27,6 +27,8 @@ struct _notmuch_query {
notmuch_database_t *notmuch;
const char *query_string;
notmuch_sort_t sort;
+ notmuch_sort_key_t sort_key;
+ notmuch_sort_type_t sort_type;
notmuch_string_list_t *exclude_terms;
notmuch_exclude_t omit_excluded;
bool parsed;
@@ -107,6 +109,9 @@ notmuch_query_create (notmuch_database_t *notmuch,

query->sort = NOTMUCH_SORT_NEWEST_FIRST;

+ query->sort_key = NOTMUCH_SORT_KEY_TIMESTAMP;
+ query->sort_type = NOTMUCH_SORT_TYPE_DESCENDING;
+
query->exclude_terms = _notmuch_string_list_create (query);

query->omit_excluded = NOTMUCH_EXCLUDE_TRUE;
@@ -168,7 +173,33 @@ notmuch_query_set_omit_excluded (notmuch_query_t *query,
void
notmuch_query_set_sort (notmuch_query_t *query, notmuch_sort_t sort)
{
+ /* only for use by _get_sort */
query->sort = sort;
+
+ /* translate to new API */
+ switch (sort) {
+ case NOTMUCH_SORT_UNSET:
+ /* this probably indicates an error, but it seems relatively
+ * harmless, and this code path is deprecated */
+ break;
+ case NOTMUCH_SORT_OLDEST_FIRST:
+ query->sort_key = NOTMUCH_SORT_KEY_TIMESTAMP;
+ query->sort_type = NOTMUCH_SORT_TYPE_ASCENDING;
+ break;
+ case NOTMUCH_SORT_NEWEST_FIRST:
+ query->sort_key = NOTMUCH_SORT_KEY_TIMESTAMP;
+ query->sort_type = NOTMUCH_SORT_TYPE_DESCENDING;
+ break;
+ case NOTMUCH_SORT_MESSAGE_ID:
+ query->sort_key = NOTMUCH_SORT_KEY_MESSAGE_ID;
+ query->sort_type = NOTMUCH_SORT_TYPE_ASCENDING;
+ break;
+ case NOTMUCH_SORT_UNSORTED:
+ query->sort_key = NOTMUCH_SORT_KEY_NONE;
+ query->sort_type = NOTMUCH_SORT_TYPE_NONE;
+ break;
+ }
+
}

notmuch_sort_t
@@ -177,6 +208,32 @@ notmuch_query_get_sort (const notmuch_query_t *query)
return query->sort;
}

+void
+notmuch_query_set_sort_key (notmuch_query_t *query, notmuch_sort_key_t sort_key)
+{
+ query->sort = NOTMUCH_SORT_UNSET;
+ query->sort_key = sort_key;
+}
+
+notmuch_sort_key_t
+notmuch_query_get_sort_key (const notmuch_query_t *query)
+{
+ return query->sort_key;
+}
+
+void
+notmuch_query_set_sort_type (notmuch_query_t *query, notmuch_sort_type_t sort_type)
+{
+ query->sort = NOTMUCH_SORT_UNSET;
+ query->sort_type = sort_type;
+}
+
+notmuch_sort_type_t
+notmuch_query_get_sort_type (const notmuch_query_t *query)
+{
+ return query->sort_type;
+}
+
notmuch_status_t
notmuch_query_add_tag_exclude (notmuch_query_t *query, const char *tag)
{
@@ -331,6 +388,7 @@ _notmuch_query_search_documents (notmuch_query_t *query,
enquire.set_sort_by_value (NOTMUCH_VALUE_MESSAGE_ID, false);
break;
case NOTMUCH_SORT_UNSORTED:
+ case NOTMUCH_SORT_UNSET:
break;
}
--
2.15.0
William Casarin
2018-10-08 20:51:20 UTC
Permalink
Post by David Bremner
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -773,6 +773,10 @@ notmuch_query_create (notmuch_database_t *database,
* Sort values for notmuch_query_set_sort.
*/
typedef enum {
+ /**
+ * Value was not set
+ */
+ NOTMUCH_SORT_UNSET = -1,
/**
* Oldest first.
*/
@@ -791,6 +795,42 @@ typedef enum {
NOTMUCH_SORT_UNSORTED
} notmuch_sort_t;
I'm adding a few new keys (from, subject, etc). I don't want to
duplicate them in the old notmuch_sort_t enum, I assume to move to the
new API there should be something like this as well?


diff --git a/lib/notmuch.h b/lib/notmuch.h
index d26cc09b..81a9d72f 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -787,10 +787,14 @@ typedef enum {
NOTMUCH_SORT_NEWEST_FIRST,
/**
* Sort by message-id.
*/
NOTMUCH_SORT_MESSAGE_ID,
+ /**
+ * Sort by keys.
+ */
+ NOTMUCH_SORT_KEYS,
/**
* Do not sort.
*/
NOTMUCH_SORT_UNSORTED
} notmuch_sort_t;


I've been wanting to sort by multiple keys recently so I figured
generalizing this to multiple (key, order) pairs might be a good idea
now that we're rethinking the sort api.

so then if I wanted to add my current changes I could just add a few
more keys like so:


diff --git a/lib/notmuch.h b/lib/notmuch.h
index a035bdb2..d26cc09b 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -809,10 +809,18 @@ typedef enum {
NOTMUCH_SORT_KEY_TIMESTAMP,
/**
* Sort by message-id.
*/
NOTMUCH_SORT_KEY_MESSAGE_ID,
+ /**
+ * Sort by subject.
+ */
+ NOTMUCH_SORT_KEY_SUBJECT,
+ /**
+ * Sort by from.
+ */
+ NOTMUCH_SORT_KEY_FROM,
} notmuch_sort_key_t;


Then we could eventually do something like:

--sort date:desc,from:asc

Which would mean: sort by date, and then by from.

Perhaps it could do something sensible by default without order
qualifiers as well:

--sort date,from

LMK what you think


Will
David Bremner
2018-10-20 11:51:53 UTC
Permalink
Post by William Casarin
I'm adding a few new keys (from, subject, etc). I don't want to
duplicate them in the old notmuch_sort_t enum, I assume to move to the
new API there should be something like this as well?
It's been a while since I was thinking about this, but I suspect my
intent was that new keys could be supported (only) by the new API.
We basically want to deprecate the old API as soon as the new one exists
to transition to.
Post by William Casarin
--sort date:desc,from:asc
Which would mean: sort by date, and then by from.
I think the "notmuch way" would be to spell out descending and
ascending, and rely on completion to make that less painful to
type. I only mention it because I was confused by what "desc" and "asc"
stood for. For some reason descriptor and ascii jumped to my
pre-caffeinated mind.
Post by William Casarin
Perhaps it could do something sensible by default without order
--sort date,from
I would hope so, yes.
David Bremner
2017-12-10 15:49:56 UTC
Permalink
---
lib/query.cc | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/lib/query.cc b/lib/query.cc
index c3b228fb..0e529ca8 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -377,18 +377,18 @@ _notmuch_query_search_documents (notmuch_query_t *query,

enquire.set_weighting_scheme (Xapian::BoolWeight());

- switch (query->sort) {
- case NOTMUCH_SORT_OLDEST_FIRST:
- enquire.set_sort_by_value (NOTMUCH_VALUE_TIMESTAMP, false);
- break;
- case NOTMUCH_SORT_NEWEST_FIRST:
- enquire.set_sort_by_value (NOTMUCH_VALUE_TIMESTAMP, true);
+ bool sort_reverse = false;
+ if (query->sort_type == NOTMUCH_SORT_TYPE_DESCENDING)
+ sort_reverse = true;
+
+ switch (query->sort_key) {
+ case NOTMUCH_SORT_KEY_TIMESTAMP:
+ enquire.set_sort_by_value (NOTMUCH_VALUE_TIMESTAMP, sort_reverse);
break;
- case NOTMUCH_SORT_MESSAGE_ID:
- enquire.set_sort_by_value (NOTMUCH_VALUE_MESSAGE_ID, false);
+ case NOTMUCH_SORT_KEY_MESSAGE_ID:
+ enquire.set_sort_by_value (NOTMUCH_VALUE_MESSAGE_ID, sort_reverse);
break;
- case NOTMUCH_SORT_UNSORTED:
- case NOTMUCH_SORT_UNSET:
+ case NOTMUCH_SORT_KEY_NONE:
break;
}
--
2.15.0
William Casarin
2017-12-10 19:37:49 UTC
Permalink
Post by David Bremner
I started looking at William's sorting patches [1], but the
proliferation of sorting options bugged me a bit. I decided to sketch
out a new more flexible API.
In the new API, there is is a sort "key", currently mapped one-to-one
to value slots, but potentially could do more sophisticated things
like making keys up dynamically (e.g. preprocessing subjects).
The second parameter is a sort "type". Currently this is just
ascending or descending, but other potential options include
sort_by_relevance_then_value
This is great, this is what I was thinking about as well. I'll try
refactoring my sorting branch on top of these changes.
--
https://jb55.com
William Casarin
2018-10-09 03:05:17 UTC
Permalink
Post by David Bremner
I started looking at William's sorting patches [1], but the
proliferation of sorting options bugged me a bit. I decided to sketch
out a new more flexible API.
In the new API, there is is a sort "key", currently mapped one-to-one
to value slots, but potentially could do more sophisticated things
like making keys up dynamically (e.g. preprocessing subjects).
The second parameter is a sort "type". Currently this is just
ascending or descending, but other potential options include
sort_by_relevance_then_value
Another thought I had that I wanted to throw out there for
consideration. It would be nice to be able to sort by "popular" threads,
aka sort by the number of messages in each thread. Not sure if this is
an easy thing to do at the query level?


Cheers,
Will
David Bremner
2018-10-20 11:43:54 UTC
Permalink
Post by William Casarin
Post by David Bremner
I started looking at William's sorting patches [1], but the
proliferation of sorting options bugged me a bit. I decided to sketch
out a new more flexible API.
In the new API, there is is a sort "key", currently mapped one-to-one
to value slots, but potentially could do more sophisticated things
like making keys up dynamically (e.g. preprocessing subjects).
The second parameter is a sort "type". Currently this is just
ascending or descending, but other potential options include
sort_by_relevance_then_value
Another thought I had that I wanted to throw out there for
consideration. It would be nice to be able to sort by "popular" threads,
aka sort by the number of messages in each thread. Not sure if this is
an easy thing to do at the query level?
I don't see how to manage it as a xapian query. In the current database
schema xapian doesn't really know about threads, except that messages
know what thread they belong to.
William Casarin
2018-10-20 17:15:30 UTC
Permalink
Post by David Bremner
Post by William Casarin
Another thought I had that I wanted to throw out there for
consideration. It would be nice to be able to sort by "popular" threads,
aka sort by the number of messages in each thread. Not sure if this is
an easy thing to do at the query level?
I don't see how to manage it as a xapian query. In the current database
schema xapian doesn't really know about threads, except that messages
know what thread they belong to.
I noticed notmuch-search shows the number of messages in the thread,
perhaps you could just sort on that value when returning the results?
David Bremner
2018-10-21 11:44:07 UTC
Permalink
Post by William Casarin
Post by David Bremner
Post by William Casarin
Another thought I had that I wanted to throw out there for
consideration. It would be nice to be able to sort by "popular" threads,
aka sort by the number of messages in each thread. Not sure if this is
an easy thing to do at the query level?
I don't see how to manage it as a xapian query. In the current database
schema xapian doesn't really know about threads, except that messages
know what thread they belong to.
I noticed notmuch-search shows the number of messages in the thread,
perhaps you could just sort on that value when returning the results?
AFAIU, currently there is no explicit sorting of threads returned, any
sorting is a side effect of the sorting of the underlying
messages. Since the threads are generated lazily, for queries generating
a large number of threads such a post thread generation sorting would
generate a noticeable delay. On the other hand, if it was optional, I
guess that would be OK. It would introduce some complications and new
code paths orthogonal to the other stuff we've been talking about.

d

Loading...