diff --git a/.gitignore b/.gitignore index 9718006..190ccbe 100644 --- a/.gitignore +++ b/.gitignore @@ -18,8 +18,6 @@ dat/ncdu/changes2.log dat/ncdu/changes2.md dat/ncdu/man.md dat/ncdu/man.pod -dat/ncdu/man2.md -dat/ncdu/man2.pod dat/nginx-confgen/changes.log dat/nginx-confgen/changes.md dat/nginx-confgen/man.md @@ -55,6 +53,7 @@ pub/doc/ncdu2.html pub/doc/pwlookup.html pub/doc/sqlaccess.html pub/doc/sqlobject.html +pub/doc/vndbtitles.html pub/dump.html pub/dump/awshrink.html pub/dump/btrfssize.html @@ -85,7 +84,6 @@ pub/ncdu/changes2.html pub/ncdu/feed.atom pub/ncdu/jsonfmt.html pub/ncdu/man.html -pub/ncdu/man2.html pub/ncdu/scr.html pub/nginx-confgen.html pub/nginx-confgen/changes.html diff --git a/Makefile b/Makefile index ed2bac1..a2b7ae3 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,7 @@ PAGES=\ "doc/pwlookup.md"\ "doc/sqlaccess.md"\ "doc/sqlobject.md"\ + "doc/vndbtitles.md"\ "dump.md"\ "dump/awshrink.md"\ "dump/btrfssize.md"\ @@ -49,8 +50,7 @@ PAGES=\ "ncdu/changes.log https://g.blicky.net/ncdu.git/plain/ChangeLog?h=master Ncdu 1.x Release History"\ "ncdu/changes2.log https://g.blicky.net/ncdu.git/plain/ChangeLog?h=zig Ncdu 2.x Release History"\ "ncdu/jsonfmt.md"\ - "ncdu/man.pod https://g.blicky.net/ncdu.git/plain/doc/ncdu.pod?h=master Ncdu 1.x Manual"\ - "ncdu/man2.pod https://g.blicky.net/ncdu.git/plain/ncdu.pod?h=zig Ncdu 2.x Manual"\ + "ncdu/man.pod https://g.blicky.net/ncdu.git/plain/ncdu.pod?h=zig Ncdu Manual"\ "ncdu/scr.md"\ "nginx-confgen.md"\ "nginx-confgen/changes.log https://g.blicky.net/nginx-confgen.git/plain/ChangeLog Nginx-confgen Release History"\ diff --git a/dat/doc.md b/dat/doc.md index c9fa68e..af14134 100644 --- a/dat/doc.md +++ b/dat/doc.md @@ -6,6 +6,10 @@ rare occasions are published on this page. ## Articles That May As Well Be Considered Blog Posts +`2022-12-23` - [Overengineering Title Preferences for VNDB](/doc/vndbtitles) +: A detailed look into the implementation of title preferences on VNDB and a + bunch of PostgreSQL query optimization experiments. + `2021-07-22` - [Ncdu 2: Less hungry and more Ziggy](/doc/ncdu2) `2019-08-13` - [From SQL to Nested Data Structures](/doc/sqlobject) diff --git a/dat/doc/vndbtitles.md b/dat/doc/vndbtitles.md new file mode 100644 index 0000000..fc558c6 --- /dev/null +++ b/dat/doc/vndbtitles.md @@ -0,0 +1,614 @@ +% Overengineering Title Preferences for VNDB + +(Published on **2022-12-23**) + +Alternative title: Fighting with the PostgreSQL query planner. + +What people want me to work on: cool new features and user interface updates +for [VNDB](https://vndb.org/).
+What I actually work on: rewriting existing features and silly microbenchmarks. + +In this post I elaborate on the technical implementation of one particular site +feature and my attempts to improve on it. + + +# Context: Title Preferences + +The VNDB schema has these two tables: + +```sql +CREATE TABLE vn ( + id vndbid PRIMARY KEY, + olang language NOT NULL, -- "original language" + -- and more +); + +CREATE TABLE vn_titles ( + id vndbid NOT NULL, -- refers to vn.id + lang language NOT NULL, + official boolean NOT NULL, + title text NOT NULL, + latin text, + PRIMARY KEY(id, lang) +); +``` + +That is to say, we've got visual novel (`vn`) entries, and each entry can have +a bunch of titles, one for each language it has been published in. The +`vn.olang` column tells us which language the VN has originally been published +in and hence which title to consider as primary. Titles can be unofficial and +titles written in a non-latin script (CJK, Cyrillic, etc) also have a +transcription to the latin alphabet. + +When a VN entry is displayed on the website, the `latin` title of the original +language (or just the `title` if `latin` is NULL) is used as the main title, +and, when applicable, the `title` in the original script is used as an +alternative title. The alternative title is often displayed below the main +title or used as HTML "title" attribute in listings and links. To take +[G-senjou no Maou](https://vndb.org/v211) as an example: + +![Annotated screenshot of the VNDB G-senjou no Maou page](/img/vndbtitles.jpg) + +People who can read Japanese will generally not care for that latin +transcription, while other people may prefer to see an English title when one +is available. Hence, [VNDB lets users configure which titles to +display](https://vndb.org/t17804). These settings are pretty flexible: users +can list multiple languages, assign a priority to each and indicate whether to +accept only original or official titles. The selection algorithm for the main +and alternative titles can be configured separately. Here's an [elaborate +example](/img/vndbtitleprefs.png). + + +# More Context: Current Implementation + +To ease the migration from the old situation where a VN entry only had a single +title, I implemented this feature in SQL by (ab)using VIEWs. + +The database schema now has the following view, which acts as an alias to the +`vn` table and adds a `title` and `alttitle` column: + +```sql +CREATE VIEW vnt AS + SELECT v.* + , COALESCE(vo.latin, vo.title) AS title + , CASE WHEN vo.latin IS NULL THEN '' ELSE vo.title END AS alttitle + FROM vn v + JOIN vn_titles vo ON vo.id = v.id AND vo.lang = v.olang; +``` + +Adjusting the code to select from this view instead of the `vn` table was +trivial and the performance is perfectly fine. With that view in place, there's +a simple path towards supporting title preferences: just temporarily replace +that view with a different query that implements the user's preferred title +selection. This is made even easier because, in PostgreSQL, temporary views are +defined inside a special session-local schema and that schema is searched +before the others. So all that is needed to switch to a configuration where +English is preferred for the main title is the following query^[Sadly, while +the "CREATE TABLE" syntax supports an "ON COMMIT DROP" clause to get rid of the +table after the transaction has completed, this feature is not available for +views, so the view needs to be dropped or replaced manually to switch back to +the default configuration.]. + +```sql +CREATE OR REPLACE TEMPORARY VIEW vnt AS + SELECT v.* + , COALESCE(ve.title, vo.latin, vo.title) AS title + , CASE WHEN vo.latin IS NULL THEN '' ELSE vo.title END AS alttitle + FROM vn v + JOIN vn_titles vo ON vo.id = v.id AND vo.lang = v.olang + LEFT JOIN vn_titles ve ON ve.id = v.id AND ve.lang = 'en'; +``` + +While abusing views for configuration is perhaps a bit weird, this particular +solution was motivated by two other constraints: + +**"If you're generating the SQL from the application anyway, couldn't you +inline the preferences as a subquery rather than using a view?"** That would be +a nicer solution, but there's a problem. I also have this `item_info()` support +function that takes an identifier of any database object - including VNs - and +returns some generic information about that object - including the title. That +title should also take preferences into account, so I either need to have a way +to pass the preferences as a function argument or... simply have it select from +a `VIEW`. The `item_info()` function is also too useful to just let go. + +**"How about just passing all available titles to the application and implement +the preferences there?"** Would not be a terrible solution either, except that +I also need a way to sort large-ish lists on the displayed title. I'd rather +not do that in the application as performance would be a nightmare. + +That is not to say the current abuse of views is necessarily a *great* solution +either, it comes with two main drawbacks: + +- The `CREATE VIEW` command isn't super slow, but also not instant. It has to + be run at the start of each pageview (when switching from a previous + configuration, at least), and we'll need to issue a command for each database + type that supports title preferences. Currently that's only two (VNs and + releases), but I'd like to increase that number and that will further + increase page generation times even for pages that don't display any titles + at all. +- There are some scenarios where we need to query an object title (through + `item_info()` or other means) *without* applying the user's preferences, but + switching preferences for each query is much too slow. My current workaround + is to remove the temporary schema from the `search_path` so that the + temporary view is not visible to the query being run, but that's a pretty + shitty hack. + + +# A Little Experiment + +In search for a better solution, I was wondering if I could implement the title +selection *entirely* within SQL. That is, in such a way that the users' +preferences are passed around as some value and that the query to fetch the +correct title is then generated within SQL. PostgreSQL supports writing +functions that can be used in the context of a table or view, and I knew it +could, if stars aligned correctly, also optimize queries through function +boundaries. My initial attempt to write such a function looked like this: + +```sql +CREATE OR REPLACE FUNCTION vnt(p langprefs) RETURNS SETOF vnt_type AS $$ + SELECT v.* + , coalesce( + CASE WHEN p.t1_latin THEN t1.latin ELSE NULL END, t1.title, + CASE WHEN p.t2_latin THEN t2.latin ELSE NULL END, t2.title, + CASE WHEN p.t3_latin THEN t3.latin ELSE NULL END, t3.title, + CASE WHEN p.t4_latin THEN t4.latin ELSE NULL END, t4.title, + CASE WHEN p.t5_latin THEN t5.latin ELSE NULL END, t5.title + ) AS title + , coalesce( + CASE WHEN p.a1_latin THEN a1.latin ELSE NULL END, a1.title, + CASE WHEN p.a2_latin THEN a2.latin ELSE NULL END, a2.title, + CASE WHEN p.a3_latin THEN a3.latin ELSE NULL END, a3.title, + CASE WHEN p.a4_latin THEN a4.latin ELSE NULL END, a4.title, + CASE WHEN p.a5_latin THEN a5.latin ELSE NULL END, a5.title + ) AS alttitle + FROM vn v + LEFT JOIN vn_titles t1 ON t1.id = v.id AND t1.lang = COALESCE(p.t1_lang, v.olang) AND (NOT p.t1_official OR t1.official) + LEFT JOIN vn_titles t2 ON t2.id = v.id AND t2.lang = COALESCE(p.t2_lang, v.olang) AND (NOT p.t2_official OR t2.official) AND p.t1_lang IS NOT NULL + LEFT JOIN vn_titles t3 ON t3.id = v.id AND t3.lang = COALESCE(p.t3_lang, v.olang) AND (NOT p.t3_official OR t3.official) AND p.t1_lang IS NOT NULL AND p.t2_lang IS NOT NULL + LEFT JOIN vn_titles t4 ON t4.id = v.id AND t4.lang = COALESCE(p.t4_lang, v.olang) AND (NOT p.t4_official OR t4.official) AND p.t1_lang IS NOT NULL AND p.t2_lang IS NOT NULL AND p.t3_lang IS NOT NULL + LEFT JOIN vn_titles t5 ON t5.id = v.id AND t5.lang = COALESCE(p.t5_lang, v.olang) AND (NOT p.t5_official OR t5.official) AND p.t1_lang IS NOT NULL AND p.t2_lang IS NOT NULL AND p.t3_lang IS NOT NULL AND p.t4_lang IS NOT NULL + LEFT JOIN vn_titles a1 ON a1.id = v.id AND a1.lang = COALESCE(p.a1_lang, v.olang) AND (NOT p.a1_official OR a1.official) + LEFT JOIN vn_titles a2 ON a2.id = v.id AND a2.lang = COALESCE(p.a2_lang, v.olang) AND (NOT p.a2_official OR a2.official) AND p.a1_lang IS NOT NULL + LEFT JOIN vn_titles a3 ON a3.id = v.id AND a3.lang = COALESCE(p.a3_lang, v.olang) AND (NOT p.a3_official OR a3.official) AND p.a1_lang IS NOT NULL AND p.a2_lang IS NOT NULL + LEFT JOIN vn_titles a4 ON a4.id = v.id AND a4.lang = COALESCE(p.a4_lang, v.olang) AND (NOT p.a4_official OR a4.official) AND p.a1_lang IS NOT NULL AND p.a2_lang IS NOT NULL AND p.a3_lang IS NOT NULL + LEFT JOIN vn_titles a5 ON a5.id = v.id AND a5.lang = COALESCE(p.a5_lang, v.olang) AND (NOT p.a5_official OR a5.official) AND p.a1_lang IS NOT NULL AND p.a2_lang IS NOT NULL AND p.a3_lang IS NOT NULL AND p.a4_lang IS NOT NULL +$$ LANGUAGE SQL STABLE; +``` + +Not the most elegant function ever written and also not fully correct, but it +serves as a starting point. It can be used in place of the `vnt` view: + +```sql +SELECT id, title, alttitle + FROM vnt('(en,,,,,,,,,,t,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f)'::langprefs) x + WHERE NOT hidden AND id < 'v10'; +``` + +The configuration I'm using here is the same as the English-main-title example +used above. To my surprise, the corresponding query plan isn't even awful: + +``` + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Nested Loop Left Join (cost=655.67..4101.26 rows=195 width=68) (actual time=0.053..3.255 rows=9 loops=1) + Join Filter: false + -> Nested Loop Left Join (cost=655.67..4099.31 rows=195 width=315) (actual time=0.052..3.252 rows=9 loops=1) + Join Filter: false + -> Nested Loop Left Join (cost=655.67..4097.36 rows=195 width=284) (actual time=0.052..3.250 rows=9 loops=1) + Join Filter: false + -> Nested Loop Left Join (cost=655.67..4095.41 rows=195 width=253) (actual time=0.052..3.249 rows=9 loops=1) + Join Filter: false + -> Nested Loop Left Join (cost=655.67..4093.46 rows=195 width=222) (actual time=0.052..3.247 rows=9 loops=1) + -> Nested Loop Left Join (cost=655.38..2933.01 rows=195 width=195) (actual time=0.049..3.236 rows=9 loops=1) + Join Filter: false + -> Nested Loop Left Join (cost=655.38..2931.06 rows=195 width=164) (actual time=0.049..3.235 rows=9 loops=1) + Join Filter: false + -> Nested Loop Left Join (cost=655.38..2929.11 rows=195 width=133) (actual time=0.048..3.233 rows=9 loops=1) + Join Filter: false + -> Nested Loop Left Join (cost=655.38..2927.16 rows=195 width=102) (actual time=0.048..3.231 rows=9 loops=1) + -> Hash Right Join (cost=655.09..1766.71 rows=195 width=71) (actual time=0.042..3.215 rows=9 loops=1) + Hash Cond: (t1.id = v.id) + -> Seq Scan on vn_titles t1 (cost=0.00..1081.40 rows=11514 width=67) (actual time=0.002..2.689 rows=11535 loops=1) + Filter: (lang = 'en'::language) + Rows Removed by Filter: 33617 + -> Hash (cost=652.65..652.65 rows=195 width=8) (actual time=0.018..0.019 rows=9 loops=1) + Buckets: 1024 Batches: 1 Memory Usage: 9kB + -> Bitmap Heap Scan on vn v (cost=5.86..652.65 rows=195 width=8) (actual time=0.008..0.016 rows=9 loops=1) + Recheck Cond: (id < 'v10'::vndbid) + Filter: (NOT hidden) + Heap Blocks: exact=9 + -> Bitmap Index Scan on vn_pkey (cost=0.00..5.81 rows=203 width=0) (actual time=0.003..0.003 rows=9 loops=1) + Index Cond: (id < 'v10'::vndbid) + -> Index Scan using vn_titles_pkey on vn_titles t2 (cost=0.29..5.95 rows=1 width=39) (actual time=0.001..0.001 rows=1 loops=9) + Index Cond: ((id = v.id) AND (lang = COALESCE(v.olang))) + -> Result (cost=0.00..0.00 rows=0 width=31) (actual time=0.000..0.000 rows=0 loops=9) + One-Time Filter: false + -> Result (cost=0.00..0.00 rows=0 width=31) (actual time=0.000..0.000 rows=0 loops=9) + One-Time Filter: false + -> Result (cost=0.00..0.00 rows=0 width=31) (actual time=0.000..0.000 rows=0 loops=9) + One-Time Filter: false + -> Index Scan using vn_titles_pkey on vn_titles a1 (cost=0.29..5.95 rows=1 width=39) (actual time=0.001..0.001 rows=1 loops=9) + Index Cond: ((id = v.id) AND (lang = COALESCE(v.olang))) + -> Result (cost=0.00..0.00 rows=0 width=31) (actual time=0.000..0.000 rows=0 loops=9) + One-Time Filter: false + -> Result (cost=0.00..0.00 rows=0 width=31) (actual time=0.000..0.000 rows=0 loops=9) + One-Time Filter: false + -> Result (cost=0.00..0.00 rows=0 width=31) (actual time=0.000..0.000 rows=0 loops=9) + One-Time Filter: false + -> Result (cost=0.00..0.00 rows=0 width=31) (actual time=0.000..0.000 rows=0 loops=9) + One-Time Filter: false + Planning Time: 1.558 ms + Execution Time: 3.329 ms +``` + +I mean, it's neither the smallest nor the best^[The planner made a bad estimate +about the number of rows returned by the "`id < 'v10'`" filter and ended up +choosing a sequential scan on `vn_titles` when a few index lookups would +probably have been faster, but that's not important for now.] plan, but: + +- The big ugly query inside the `vnt()` function was properly "inlined" into + the `SELECT` statement where it was used, which allowed the query planner to + move the conditions from the `WHERE` clause to the point where rows from the + `vn` table are being read, which in turn allows it to use an index. Without + this optimization, that simple query would have had to read all titles from + all VN entries from disk, only to then throw almost everything away again. +- There's a lot of joins, but the planner had correctly inferred that they'll + never match anything and has optimized those out with a "One-Time Filter: + false". + +Honestly, that's pretty awesome. Let's try sorting on the title. + +```sql +SELECT id, title, alttitle + FROM vnt('(en,,,,,,,,,,t,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f)'::langprefs) x + WHERE NOT hidden + ORDER BY title LIMIT 100 +``` + +``` + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Limit (cost=13010.96..13011.21 rows=100 width=68) (actual time=108.074..108.085 rows=100 loops=1) + -> Sort (cost=13010.96..13108.95 rows=39196 width=68) (actual time=108.073..108.079 rows=100 loops=1) + Sort Key: (COALESCE(t1.latin, t1.title, t2.title, title, title, title)) + Sort Method: top-N heapsort Memory: 44kB + -> Nested Loop Left Join (cost=4516.79..11512.92 rows=39196 width=68) (actual time=19.879..88.257 rows=39128 loops=1) + Join Filter: false + -> Nested Loop Left Join (cost=4516.79..11120.96 rows=39196 width=315) (actual time=19.878..82.548 rows=39128 loops=1) + Join Filter: false + -> Nested Loop Left Join (cost=4516.79..10729.00 rows=39196 width=284) (actual time=19.878..77.113 rows=39128 loops=1) + Join Filter: false + -> Nested Loop Left Join (cost=4516.79..10337.04 rows=39196 width=253) (actual time=19.877..71.686 rows=39128 loops=1) + Join Filter: false + -> Hash Left Join (cost=4516.79..9945.08 rows=39196 width=222) (actual time=19.877..66.404 rows=39128 loops=1) + Hash Cond: ((v.id = a1.id) AND (COALESCE(v.olang) = a1.lang)) + -> Nested Loop Left Join (cost=2870.99..8093.50 rows=39196 width=195) (actual time=11.921..50.437 rows=39128 loops=1) + Join Filter: false + -> Nested Loop Left Join (cost=2870.99..7701.54 rows=39196 width=164) (actual time=11.920..45.003 rows=39128 loops=1) + Join Filter: false + -> Nested Loop Left Join (cost=2870.99..7309.58 rows=39196 width=133) (actual time=11.920..39.607 rows=39128 loops=1) + Join Filter: false + -> Hash Left Join (cost=2870.99..6917.62 rows=39196 width=102) (actual time=11.919..33.959 rows=39128 loops=1) + Hash Cond: ((v.id = t2.id) AND (COALESCE(v.olang) = t2.lang)) + -> Hash Left Join (cost=1225.19..5066.04 rows=39196 width=71) (actual time=3.964..18.276 rows=39128 loops=1) + Hash Cond: (v.id = t1.id) + -> Seq Scan on vn v (cost=0.00..3737.95 rows=39196 width=8) (actual time=0.003..9.290 rows=39128 loops=1) + Filter: (NOT hidden) + Rows Removed by Filter: 1667 + -> Hash (cost=1081.40..1081.40 rows=11503 width=67) (actual time=3.958..3.958 rows=11535 loops=1) + Buckets: 16384 Batches: 1 Memory Usage: 783kB + -> Seq Scan on vn_titles t1 (cost=0.00..1081.40 rows=11503 width=67) (actual time=0.002..2.924 rows=11535 loops=1) + Filter: (lang = 'en'::language) + Rows Removed by Filter: 33617 + -> Hash (cost=968.52..968.52 rows=45152 width=39) (actual time=7.933..7.934 rows=45152 loops=1) + Buckets: 65536 Batches: 1 Memory Usage: 3749kB + -> Seq Scan on vn_titles t2 (cost=0.00..968.52 rows=45152 width=39) (actual time=0.001..3.368 rows=45152 loops=1) + -> Result (cost=0.00..0.00 rows=0 width=31) (actual time=0.000..0.000 rows=0 loops=39128) + One-Time Filter: false + -> Result (cost=0.00..0.00 rows=0 width=31) (actual time=0.000..0.000 rows=0 loops=39128) + One-Time Filter: false + -> Result (cost=0.00..0.00 rows=0 width=31) (actual time=0.000..0.000 rows=0 loops=39128) + One-Time Filter: false + -> Hash (cost=968.52..968.52 rows=45152 width=39) (actual time=7.933..7.933 rows=45152 loops=1) + Buckets: 65536 Batches: 1 Memory Usage: 3749kB + -> Seq Scan on vn_titles a1 (cost=0.00..968.52 rows=45152 width=39) (actual time=0.001..3.385 rows=45152 loops=1) + -> Result (cost=0.00..0.00 rows=0 width=31) (actual time=0.000..0.000 rows=0 loops=39128) + One-Time Filter: false + -> Result (cost=0.00..0.00 rows=0 width=31) (actual time=0.000..0.000 rows=0 loops=39128) + One-Time Filter: false + -> Result (cost=0.00..0.00 rows=0 width=31) (actual time=0.000..0.000 rows=0 loops=39128) + One-Time Filter: false + -> Result (cost=0.00..0.00 rows=0 width=31) (actual time=0.000..0.000 rows=0 loops=39128) + One-Time Filter: false + Planning Time: 0.768 ms + Execution Time: 108.133 ms +``` + +Hmm, wait, all those joins that I had thought the planner had nicely optimized +away apparently still take up 5ms+ of processing time each. That's... not +ideal. Let's look at the `vnt` view solution for comparison: + +``` + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------ + Limit (cost=8415.66..8415.91 rows=100 width=68) (actual time=37.743..37.752 rows=100 loops=1) + -> Sort (cost=8415.66..8513.65 rows=39196 width=68) (actual time=37.742..37.746 rows=100 loops=1) + Sort Key: (COALESCE(ve.title, vo.latin, vo.title)) + Sort Method: top-N heapsort Memory: 42kB + -> Hash Join (cost=2870.99..6917.62 rows=39196 width=68) (actual time=12.355..31.546 rows=39128 loops=1) + Hash Cond: ((v.id = vo.id) AND (v.olang = vo.lang)) + -> Hash Left Join (cost=1225.19..5066.04 rows=39196 width=39) (actual time=3.694..16.147 rows=39128 loops=1) + Hash Cond: (v.id = ve.id) + -> Seq Scan on vn v (cost=0.00..3737.95 rows=39196 width=8) (actual time=0.002..8.232 rows=39128 loops=1) + Filter: (NOT hidden) + Rows Removed by Filter: 1667 + -> Hash (cost=1081.40..1081.40 rows=11503 width=35) (actual time=3.685..3.686 rows=11535 loops=1) + Buckets: 16384 Batches: 1 Memory Usage: 782kB + -> Seq Scan on vn_titles ve (cost=0.00..1081.40 rows=11503 width=35) (actual time=0.001..2.598 rows=11535 loops=1) + Filter: (lang = 'en'::language) + Rows Removed by Filter: 33617 + -> Hash (cost=968.52..968.52 rows=45152 width=71) (actual time=8.652..8.652 rows=45152 loops=1) + Buckets: 65536 Batches: 1 Memory Usage: 4665kB + -> Seq Scan on vn_titles vo (cost=0.00..968.52 rows=45152 width=71) (actual time=0.003..3.676 rows=45152 loops=1) + Planning Time: 0.217 ms + Execution Time: 37.778 ms +``` + +Ouch, much faster. One reason it's faster is because there's only a single join +to fetch the title for the original language, rather than two in the `vnt()` +function. That can be worked around by special-casing a join on the original +language, but the other reason it's faster is that all those silly unused joins +don't exist at all, and I hadn't a clue how to fix that. + +([Full script](/download/code/vntitles1.sql) of this experiment for those who +want to follow along, it should work on the [public database +dumps](https://vndb.org/d14#5) with minimal modifications) + + +# A Little Detour: Custom Aggregate + +So my mind wandered off to try another approach. What if we simply fetch all +the relevant titles for the VN entry and throw them through a custom aggregate +function that selects the appropriate title? So I gave that a try: + +```sql +CREATE TYPE vntitle_state AS ( + rank smallint, + title text +); + +CREATE OR REPLACE FUNCTION vntitle_sfunc(state vntitle_state, t vn_titles, olang language, p langprefs) RETURNS vntitle_state AS $$ +BEGIN + IF state.rank > 1 AND COALESCE(p.t1_lang, olang) = t.lang AND (NOT p.t1_official OR t.official) THEN + RETURN ROW(1::smallint, COALESCE(CASE WHEN p.t1_latin THEN t.latin ELSE NULL END, t.title)); + ELSIF state.rank > 2 AND COALESCE(p.t2_lang, olang) = t.lang AND (NOT p.t2_official OR t.official) THEN + RETURN ROW(2::smallint, COALESCE(CASE WHEN p.t2_latin THEN t.latin ELSE NULL END, t.title)); + ELSIF state.rank > 3 AND COALESCE(p.t3_lang, olang) = t.lang AND (NOT p.t3_official OR t.official) THEN + RETURN ROW(3::smallint, COALESCE(CASE WHEN p.t3_latin THEN t.latin ELSE NULL END, t.title)); + ELSIF state.rank > 4 AND COALESCE(p.t4_lang, olang) = t.lang AND (NOT p.t4_official OR t.official) THEN + RETURN ROW(4::smallint, COALESCE(CASE WHEN p.t4_latin THEN t.latin ELSE NULL END, t.title)); + ELSIF state.rank > 5 AND COALESCE(p.t5_lang, olang) = t.lang AND (NOT p.t5_official OR t.official) THEN + RETURN ROW(5::smallint, COALESCE(CASE WHEN p.t5_latin THEN t.latin ELSE NULL END, t.title)); + END IF; + RETURN state; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +CREATE OR REPLACE AGGREGATE vntitle(vn_titles, language, langprefs) ( + SFUNC = vntitle_sfunc, + STYPE = vntitle_state, + INITCOND = '(6,)' +); +``` + +Requires rewriting our SELECT query a bit, but it's still usable: + +```sql +SELECT v.id, vntitle(t, v.olang, '(en,,,,,f,f,f,f,f,f,f,f,f,f)'::langprefs) title + FROM vn v + JOIN vn_titles t ON t.id = v.id + WHERE NOT hidden AND v.id < 'v10' + GROUP BY v.id; +``` + +Uh, that simple example already takes 10ms, making it slower than the `vnt()` +approach. It's even worse if we want to sort on the title: 114ms. I replaced my +custom aggregate with the built-in `max()` to see if it was just my custom +function being slow, but even then we're at 4.2ms and 52ms, both slower than +the original `vnt` view. + +Nope, bad idea (but I kept the [full script](/download/code/vntitles2.sql) +anyway). + + +# Another Little Detour: SupportRequestSimplify + +One of the neat things about PostgreSQL is that it exposes much of its internal +workings through a C extension API. If you're running into the limitations of +SQL, there's always the option of dropping into C. The C API exposes [a planner +support function](https://www.postgresql.org/docs/15/xfunc-optimization.html), +which, among other things, has a "`SupportRequestSimplify`" operation that lets +you rewrite a function call into an arbitrary SQL expression tree at planning +time. + +So what I could do, I thought, is write a C support function that rewrites +calls to `vnt()` with the appropriate optimized SQL statements; thus not +emitting any unnecessary JOINs in the first place. + +I did write a little function to verify that the `SupportRequestSimplify` +operation is being called for functions in table context - it is - and I looked +into having it generate and return a proper table expression, but browsing +through the PostgreSQL source code I could not find a single instance of the +simplify operation being used in such a way. It only seems to be used (and +intended) for simplifying certain types of scalar expressions, so I'm not sure +to what extend it's possible to have it construct complex SQL statements. + +Before I was able to dig deeper and really give it a try, I came to my senses +and realized that I don't want to write and maintain complex C code generating +custom query trees for each type of database object. I haven't gone quite that +mad... yet. + + +# Back to A Little Experiment + +While I was busying myself with the above detours, I kept thinking about ways +to optimize the earlier `vnt()` function. What if, instead of trying to +convince the planner that those unused JOINs didn't return any *rows*, I could +convince it that I didn't care about the join by not referencing any of its +*columns*, instead? Turns out that works, if the conditions are right. One of +those conditions is that the ON filter of the JOIN must exactly match a unique +index (or primary key) of the joined table, which is `(id,lang)` in our case. +Adding any more filters causes the optimization to fail, so part of the title +selection logic had to move to the SELECT part of the query. Apparently the +join filter may not be NULL, either. + +With that trick applied, I also implemented proper support for selecting titles +that must match the original language and implemented the original language as +a special-case so that the same JOIN can be used for both the main and +alternative title. The end result isn't any more elegant: + +```sql +CREATE OR REPLACE FUNCTION vnt(p langprefs) RETURNS SETOF vnt_type AS $$ + SELECT v.* + -- The language selection logic below is specially written so that the planner can remove references to joined tables corresponding to NULL languages. + , COALESCE( + CASE WHEN p.t1_lang IS NULL OR (p.t1_official AND NOT t1.official) OR (p.t1_official IS NULL AND t1.lang <> v.olang) THEN NULL ELSE COALESCE(CASE WHEN p.t1_latin THEN t1.latin ELSE NULL END, t1.title) END, + CASE WHEN p.t2_lang IS NULL OR (p.t2_official AND NOT t2.official) OR (p.t2_official IS NULL AND t2.lang <> v.olang) THEN NULL ELSE COALESCE(CASE WHEN p.t2_latin THEN t2.latin ELSE NULL END, t2.title) END, + CASE WHEN p.t3_lang IS NULL OR (p.t3_official AND NOT t3.official) OR (p.t3_official IS NULL AND t3.lang <> v.olang) THEN NULL ELSE COALESCE(CASE WHEN p.t3_latin THEN t3.latin ELSE NULL END, t3.title) END, + CASE WHEN p.t4_lang IS NULL OR (p.t4_official AND NOT t4.official) OR (p.t4_official IS NULL AND t4.lang <> v.olang) THEN NULL ELSE COALESCE(CASE WHEN p.t4_latin THEN t4.latin ELSE NULL END, t4.title) END, + CASE WHEN p IS NULL OR p.to_latin THEN ol.latin ELSE NULL END, ol.title + ) AS title + , COALESCE( + CASE WHEN p.a1_lang IS NULL OR (p.a1_official AND NOT a1.official) OR (p.a1_official IS NULL AND a1.lang <> v.olang) THEN NULL ELSE COALESCE(CASE WHEN p.a1_latin THEN a1.latin ELSE NULL END, a1.title) END, + CASE WHEN p.a2_lang IS NULL OR (p.a2_official AND NOT a2.official) OR (p.a2_official IS NULL AND a2.lang <> v.olang) THEN NULL ELSE COALESCE(CASE WHEN p.a2_latin THEN a2.latin ELSE NULL END, a2.title) END, + CASE WHEN p.a3_lang IS NULL OR (p.a3_official AND NOT a3.official) OR (p.a3_official IS NULL AND a3.lang <> v.olang) THEN NULL ELSE COALESCE(CASE WHEN p.a3_latin THEN a3.latin ELSE NULL END, a3.title) END, + CASE WHEN p.a4_lang IS NULL OR (p.a4_official AND NOT a4.official) OR (p.a4_official IS NULL AND a4.lang <> v.olang) THEN NULL ELSE COALESCE(CASE WHEN p.a4_latin THEN a4.latin ELSE NULL END, a4.title) END, + CASE WHEN p.ao_latin THEN ol.latin ELSE NULL END, ol.title + ) AS alttitle + FROM vn v + JOIN vn_titles ol ON ol.id = v.id AND ol.lang = v.olang + -- The COALESCE() below is kind of meaningless, but apparently the query planner can't optimize out JOINs with NULL conditions. + LEFT JOIN vn_titles t1 ON t1.id = v.id AND t1.lang = COALESCE(p.t1_lang, 'en') + LEFT JOIN vn_titles t2 ON t2.id = v.id AND t2.lang = COALESCE(p.t2_lang, 'en') + LEFT JOIN vn_titles t3 ON t3.id = v.id AND t3.lang = COALESCE(p.t3_lang, 'en') + LEFT JOIN vn_titles t4 ON t4.id = v.id AND t4.lang = COALESCE(p.t4_lang, 'en') + LEFT JOIN vn_titles a1 ON a1.id = v.id AND a1.lang = COALESCE(p.a1_lang, 'en') + LEFT JOIN vn_titles a2 ON a2.id = v.id AND a2.lang = COALESCE(p.a2_lang, 'en') + LEFT JOIN vn_titles a3 ON a3.id = v.id AND a3.lang = COALESCE(p.a3_lang, 'en') + LEFT JOIN vn_titles a4 ON a4.id = v.id AND a4.lang = COALESCE(p.a4_lang, 'en') +$$ LANGUAGE SQL STABLE; +``` + +Let's try the sort-by-title benchmark again: + +``` + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit (cost=8513.65..8513.90 rows=100 width=68) (actual time=39.216..39.226 rows=100 loops=1) + -> Sort (cost=8513.65..8611.64 rows=39196 width=68) (actual time=39.216..39.220 rows=100 loops=1) + Sort Key: (COALESCE(CASE WHEN (((NOT t1.official) AND NULL::boolean) OR (t1.lang <> v.olang)) THEN NULL::text ELSE COALESCE(t1.title) END, ol.latin, ol.title)) + Sort Method: top-N heapsort Memory: 43kB + -> Hash Join (cost=2870.99..7015.61 rows=39196 width=68) (actual time=12.583..32.991 rows=39128 loops=1) + Hash Cond: ((v.id = ol.id) AND (v.olang = ol.lang)) + -> Hash Left Join (cost=1225.19..5066.04 rows=39196 width=44) (actual time=3.841..16.826 rows=39128 loops=1) + Hash Cond: (v.id = t1.id) + -> Seq Scan on vn v (cost=0.00..3737.95 rows=39196 width=8) (actual time=0.002..8.535 rows=39128 loops=1) + Filter: (NOT hidden) + Rows Removed by Filter: 1667 + -> Hash (cost=1081.40..1081.40 rows=11503 width=40) (actual time=3.833..3.834 rows=11535 loops=1) + Buckets: 16384 Batches: 1 Memory Usage: 873kB + -> Seq Scan on vn_titles t1 (cost=0.00..1081.40 rows=11503 width=40) (actual time=0.001..2.622 rows=11535 loops=1) + Filter: (lang = 'en'::language) + Rows Removed by Filter: 33617 + -> Hash (cost=968.52..968.52 rows=45152 width=71) (actual time=8.731..8.732 rows=45152 loops=1) + Buckets: 65536 Batches: 1 Memory Usage: 4665kB + -> Seq Scan on vn_titles ol (cost=0.00..968.52 rows=45152 width=71) (actual time=0.003..3.744 rows=45152 loops=1) + Planning Time: 0.545 ms + Execution Time: 39.252 ms +``` + +Hey, that's the exact same query plan that we got with the original `vnt` view! + +SUCCESS! \\o/ + + +# A Failed Little Experiment? + +...or so I thought. But then I went ahead an implemented that function to a few +parts of the site and quickly ran into queries that ran quite a bit slower than +they used to. Turns out the query planner gives up on finding the most optimal +plan pretty soon once the queries start getting slightly more complex. Taking, +for example, the query to fetch the 4 random screenshots on the homepage: + +```sql +SELECT i.id, i.width, i.height, i.id AS vid, v.title + FROM (SELECT id, width, height FROM images i TABLESAMPLE SYSTEM (0.93) + WHERE i.c_weight > 0 + AND vndbid_type(i.id) = 'sf' + AND i.c_sexual_avg < 40 + AND i.c_violence_avg < 40 + ORDER BY random() + LIMIT 4) i(id) + JOIN vn_screenshots vs ON vs.scr = i.id + JOIN vnt('(en,,,,,,,,f,f,f,f,t,f,f,f,f,f,,,,,f,f,f,f)'::langprefs) v ON v.id = vs.id + WHERE NOT v.hidden + ORDER BY random() LIMIT 4; +``` + +A fairly staightforward query. Using the `vnt` view it runs in about 5ms, with +the wonderfully optimized `vnt()` function above, 37ms. The planner completely +misses an important optimization. While I'm sure I could find other workarounds +for that particular query, I found several similar cases in just a short time. +Apart from the work involved in revisiting all queries used by the site, the +workarounds necessary to re-optimize them again are going to be fairly ugly. + +Increasing +[join\_collapse\_limit](https://www.postgresql.org/docs/15/runtime-config-query.html#GUC-JOIN-COLLAPSE-LIMIT) +and +[from\_collapse\_limit](https://www.postgresql.org/docs/15/runtime-config-query.html#GUC-FROM-COLLAPSE-LIMIT) +appears to work, but I have queries that may have 3 or 4 `vnt()` calls and +optimizing those requires bumping the limit by a factor 4 or 5. I'm worried +about the impact of doing that site-wide, will have to do some more experiments +for that. + +This little experiment may not have been the biggest success, but there's still +promise. + +(Either way I got a [full script](/download/code/vntitles3.sql) for this +one, too) + + +# The Cliffhanger + +And that's when I decided that I wasn't going to make much progress and I might +as well spend some time writing an article about it. There's still a bunch of +other ideas worth exploring: + +- I could keep `vnt()` around for the few special cases where I *need* title + selection to be done entirely in SQL, while falling back to + application-generated subqueries for the more common case that I don't. + Downside: I need to maintain multiple different implementations of the same + thing. +- The simpler option: get rid of the `vn_titles` table and instead denormalize + the list of titles to a single column in the `vn` table. That should greatly + simplify implementing title selection and has the potential to be faster than + the current `vnt` view approach. Downside: I don't like it. +- There's always the madness option of trying SupportRequestSimplify. + +If you have any better ideas, I'd love to hear them as well. + + +# The Conclusion + +I wonder how other sites deal with features like this, other than "we don't" +and "we do but you can only select a single language option". Perhaps I just +want too much. + +And it's always fun when people request simple-sounding features ("Hey can you +implement language preferences? Super simple! kthnx") and you end up in a +wonderfully deep rabbit hole. I haven't even fully explored this particular +hole yet. + +For now, though, I'll be off exploring other ideas again. I'd like to end with +"To be continued...", but I'm lazy when it comes to writing so there may not be +a followup. diff --git a/dat/dump.md b/dat/dump.md index 11398a9..4958045 100644 --- a/dat/dump.md +++ b/dat/dump.md @@ -7,6 +7,17 @@ thought might be of useful to others as well. I also maintain a collection of miscellaneous C micro-libraries. Those are listed under the collective name of [Ylib](/ylib). +# Subpages + +- [Demoscene thingy](/dump/demo) +- [AWStats Data File Schrinker](/dump/awshrink) +- [GTK+ Mass File Renamer](/dump/grenamr) +- [Colours in NCurses](/dump/nccolour) +- [Insertion Performance Benchmarks](/dump/insbench) +- [btrfs-size.pl](/dump/btrfssize) + +# Miscellaneous + ## maildir.pl October 2012. A tiny weechat plugin to display the number of unread emails in a diff --git a/dat/index.md b/dat/index.md index 10b3a07..604facd 100644 --- a/dat/index.md +++ b/dat/index.md @@ -20,6 +20,10 @@ the incidental article on this site. Enjoy your stay! ## Announcements Atom feed +`2022-12-23` - New article: Overengineering Title Preferences for VNDB +: A detailed look into the implementation of title preferences on VNDB and a + bunch of PostgreSQL experiments. [Read more.](/doc/vndbtitles) + `2022-12-06` - ncdu 1.18 released : Backports configuration file support and a whole bunch of CLI flags from the 2.x branch. Also fixes drawing a pwoper background with the default diff --git a/dat/ncdu.md b/dat/ncdu.md index 61c69a2..ec295a9 100644 --- a/dat/ncdu.md +++ b/dat/ncdu.md @@ -8,6 +8,14 @@ POSIX-like environment with ncurses installed. ## Download +Static binaries +: Convenient static binaries for Linux. Download, extract and run; no + compilation or installation necessary: + [i486](/download/ncdu-2.2.1-linux-i386.tar.gz) - + [x86_64](/download/ncdu-2.2.1-linux-x86_64.tar.gz) - + [ARM](/download/ncdu-2.2.1-linux-arm.tar.gz) - + [AArch64](/download/ncdu-2.2.1-linux-aarch64.tar.gz). + Zig version (stable) : 2.2.1 ([ncdu-2.2.1.tar.gz](/download/ncdu-2.2.1.tar.gz) - [changes](/ncdu/changes2) - requires Zig 0.9.0 or 0.9.1) @@ -23,13 +31,6 @@ Zig version (stable) Zig](https://github.com/ziglang/zig/pull/11295) to fix a [possible crash](https://code.blicky.net/yorhel/ncdu/issues/192). - Convenient static binaries for Linux: - [i486](/download/ncdu-2.2.1-linux-i386.tar.gz), - [x86_64](/download/ncdu-2.2.1-linux-x86_64.tar.gz), - [ARM](/download/ncdu-2.2.1-linux-arm.tar.gz) and - [AArch64](/download/ncdu-2.2.1-linux-aarch64.tar.gz). - Download, extract and run; no compilation or installation necessary. - For users on Zig 0.10.0: see [#213](https://code.blicky.net/yorhel/ncdu/issues/213). C version (LTS) diff --git a/dat/ncdu/scr.md b/dat/ncdu/scr.md index ea40ccb..dcea1c4 100644 --- a/dat/ncdu/scr.md +++ b/dat/ncdu/scr.md @@ -1,8 +1,7 @@ % Ncdu Screenshots -These screenshots were made with ncdu 1.13 with the `--color=dark` option. -Colors are not available in older versions and (in 1.13) still disabled by -default. +These screenshots were made with ncdu 1.13 with the `--color=dark` option. This +color scheme is the default since 1.17. ## Scanning... diff --git a/pub/.htaccess b/pub/.htaccess index e10ff4d..b7c417b 100644 --- a/pub/.htaccess +++ b/pub/.htaccess @@ -17,6 +17,7 @@ RewriteRule ^dump/cechoserv/*$ /dump [L,R=permanent] RewriteRule ^dump/cyapong/*$ /dump [L,R=permanent] RewriteRule ^ncdu/issue(.*)$ /ncdu/bug$1 [L,R=permanent] RewriteRule ^ncdc/issue(.*)$ /ncdc/bug$1 [L,R=permanent] +RewriteRule ^ncdu/man2$ /ncdu/man [L,R=permanent] RewriteRule ^ncdu/bug/1$ https://code.blicky.net/yorhel/ncdu/issues/3 [L,R=permanent] RewriteRule ^ncdu/bug/2$ https://code.blicky.net/yorhel/ncdu/issues/4 [L,R=permanent] diff --git a/pub/download/code/vntitles1.sql b/pub/download/code/vntitles1.sql new file mode 100644 index 0000000..f92d395 --- /dev/null +++ b/pub/download/code/vntitles1.sql @@ -0,0 +1,92 @@ +BEGIN; + +-- The original VIEW solution for comparison: +CREATE OR REPLACE VIEW vnt AS + SELECT v.* + , COALESCE(ve.title, vo.latin, vo.title) AS title + , CASE WHEN vo.latin IS NULL THEN '' ELSE vo.title END AS alttitle + FROM vn v + JOIN vn_titles vo ON vo.id = v.id AND vo.lang = v.olang + LEFT JOIN vn_titles ve ON ve.id = v.id AND ve.lang = 'en'; + + +CREATE TABLE vnt_type (LIKE vn, title text, alttitle text); + +CREATE TYPE langprefs AS ( + t1_lang language, -- NULL for original language + t2_lang language, + t3_lang language, + t4_lang language, + t5_lang language, + a1_lang language, + a2_lang language, + a3_lang language, + a4_lang language, + a5_lang language, + t1_latin boolean, + t2_latin boolean, + t3_latin boolean, + t4_latin boolean, + t5_latin boolean, + a1_latin boolean, + a2_latin boolean, + a3_latin boolean, + a4_latin boolean, + a5_latin boolean, + t1_official boolean, -- always true for original language + t2_official boolean, + t3_official boolean, + t4_official boolean, + t5_official boolean, + a1_official boolean, + a2_official boolean, + a3_official boolean, + a4_official boolean, + a5_official boolean +); + +CREATE OR REPLACE FUNCTION vnt(p langprefs) RETURNS SETOF vnt_type AS $$ + SELECT v.* + , coalesce( + CASE WHEN p.t1_latin THEN t1.latin ELSE NULL END, t1.title, + CASE WHEN p.t2_latin THEN t2.latin ELSE NULL END, t2.title, + CASE WHEN p.t3_latin THEN t3.latin ELSE NULL END, t3.title, + CASE WHEN p.t4_latin THEN t4.latin ELSE NULL END, t4.title, + CASE WHEN p.t5_latin THEN t5.latin ELSE NULL END, t5.title + ) AS title + , coalesce( + CASE WHEN p.a1_latin THEN a1.latin ELSE NULL END, a1.title, + CASE WHEN p.a2_latin THEN a2.latin ELSE NULL END, a2.title, + CASE WHEN p.a3_latin THEN a3.latin ELSE NULL END, a3.title, + CASE WHEN p.a4_latin THEN a4.latin ELSE NULL END, a4.title, + CASE WHEN p.a5_latin THEN a5.latin ELSE NULL END, a5.title + ) AS alttitle + FROM vn v + LEFT JOIN vn_titles t1 ON t1.id = v.id AND t1.lang = COALESCE(p.t1_lang, v.olang) AND (NOT p.t1_official OR t1.official) + LEFT JOIN vn_titles t2 ON t2.id = v.id AND t2.lang = COALESCE(p.t2_lang, v.olang) AND (NOT p.t2_official OR t2.official) AND p.t1_lang IS NOT NULL + LEFT JOIN vn_titles t3 ON t3.id = v.id AND t3.lang = COALESCE(p.t3_lang, v.olang) AND (NOT p.t3_official OR t3.official) AND p.t1_lang IS NOT NULL AND p.t2_lang IS NOT NULL + LEFT JOIN vn_titles t4 ON t4.id = v.id AND t4.lang = COALESCE(p.t4_lang, v.olang) AND (NOT p.t4_official OR t4.official) AND p.t1_lang IS NOT NULL AND p.t2_lang IS NOT NULL AND p.t3_lang IS NOT NULL + LEFT JOIN vn_titles t5 ON t5.id = v.id AND t5.lang = COALESCE(p.t5_lang, v.olang) AND (NOT p.t5_official OR t5.official) AND p.t1_lang IS NOT NULL AND p.t2_lang IS NOT NULL AND p.t3_lang IS NOT NULL AND p.t4_lang IS NOT NULL + LEFT JOIN vn_titles a1 ON a1.id = v.id AND a1.lang = COALESCE(p.a1_lang, v.olang) AND (NOT p.a1_official OR a1.official) + LEFT JOIN vn_titles a2 ON a2.id = v.id AND a2.lang = COALESCE(p.a2_lang, v.olang) AND (NOT p.a2_official OR a2.official) AND p.a1_lang IS NOT NULL + LEFT JOIN vn_titles a3 ON a3.id = v.id AND a3.lang = COALESCE(p.a3_lang, v.olang) AND (NOT p.a3_official OR a3.official) AND p.a1_lang IS NOT NULL AND p.a2_lang IS NOT NULL + LEFT JOIN vn_titles a4 ON a4.id = v.id AND a4.lang = COALESCE(p.a4_lang, v.olang) AND (NOT p.a4_official OR a4.official) AND p.a1_lang IS NOT NULL AND p.a2_lang IS NOT NULL AND p.a3_lang IS NOT NULL + LEFT JOIN vn_titles a5 ON a5.id = v.id AND a5.lang = COALESCE(p.a5_lang, v.olang) AND (NOT p.a5_official OR a5.official) AND p.a1_lang IS NOT NULL AND p.a2_lang IS NOT NULL AND p.a3_lang IS NOT NULL AND p.a4_lang IS NOT NULL +$$ LANGUAGE SQL STABLE; + + +SET max_parallel_workers_per_gather = 0; + +EXPLAIN ANALYZE +SELECT id, title, alttitle + --FROM vnt('(en,,,,,,,,,,t,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f)'::langprefs) x + FROM vnt + WHERE NOT hidden --AND x.id < 'v10'; + ORDER BY title LIMIT 100; + +EXPLAIN ANALYZE +SELECT id, title, alttitle + --FROM vnt('(en,,,,,,,,,,t,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f)'::langprefs) x + FROM vnt + WHERE NOT hidden --AND x.id < 'v10'; + ORDER BY title LIMIT 100; diff --git a/pub/download/code/vntitles2.sql b/pub/download/code/vntitles2.sql new file mode 100644 index 0000000..2cc7bac --- /dev/null +++ b/pub/download/code/vntitles2.sql @@ -0,0 +1,66 @@ +BEGIN; + +CREATE TYPE langprefs AS ( + t1_lang language, -- NULL for original language + t2_lang language, + t3_lang language, + t4_lang language, + t5_lang language, + t1_latin boolean, + t2_latin boolean, + t3_latin boolean, + t4_latin boolean, + t5_latin boolean, + t1_official boolean, -- always true for original language + t2_official boolean, + t3_official boolean, + t4_official boolean, + t5_official boolean +); + +CREATE TYPE vntitle_state AS ( + rank smallint, + title text +); + +CREATE OR REPLACE FUNCTION vntitle_sfunc(state vntitle_state, t vn_titles, olang language, p langprefs) RETURNS vntitle_state AS $$ +BEGIN + IF state.rank > 1 AND COALESCE(p.t1_lang, olang) = t.lang AND (NOT p.t1_official OR t.official) THEN + RETURN ROW(1::smallint, COALESCE(CASE WHEN p.t1_latin THEN t.latin ELSE NULL END, t.title)); + ELSIF state.rank > 2 AND COALESCE(p.t2_lang, olang) = t.lang AND (NOT p.t2_official OR t.official) THEN + RETURN ROW(2::smallint, COALESCE(CASE WHEN p.t2_latin THEN t.latin ELSE NULL END, t.title)); + ELSIF state.rank > 3 AND COALESCE(p.t3_lang, olang) = t.lang AND (NOT p.t3_official OR t.official) THEN + RETURN ROW(3::smallint, COALESCE(CASE WHEN p.t3_latin THEN t.latin ELSE NULL END, t.title)); + ELSIF state.rank > 4 AND COALESCE(p.t4_lang, olang) = t.lang AND (NOT p.t4_official OR t.official) THEN + RETURN ROW(4::smallint, COALESCE(CASE WHEN p.t4_latin THEN t.latin ELSE NULL END, t.title)); + ELSIF state.rank > 5 AND COALESCE(p.t5_lang, olang) = t.lang AND (NOT p.t5_official OR t.official) THEN + RETURN ROW(5::smallint, COALESCE(CASE WHEN p.t5_latin THEN t.latin ELSE NULL END, t.title)); + END IF; + RETURN state; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + + +CREATE OR REPLACE AGGREGATE vntitle(vn_titles, language, langprefs) ( + SFUNC = vntitle_sfunc, + STYPE = vntitle_state, + INITCOND = '(6,)' +); + +SET max_parallel_workers_per_gather = 0; + +EXPLAIN ANALYZE +SELECT v.id, vntitle(t, v.olang, '(en,,,,,f,f,f,f,f,f,f,f,f,f)'::langprefs) title + FROM vn v + JOIN vn_titles t ON t.id = v.id + WHERE NOT hidden --AND v.id < 'v10' + GROUP BY v.id + ORDER BY title LIMIT 100; + +EXPLAIN ANALYZE +SELECT v.id, max(t.title) as title + FROM vn v + JOIN vn_titles t ON t.id = v.id + WHERE NOT hidden --AND v.id < 'v10' + GROUP BY v.id + ORDER BY title LIMIT 100; diff --git a/pub/download/code/vntitles3.sql b/pub/download/code/vntitles3.sql new file mode 100644 index 0000000..14c179e --- /dev/null +++ b/pub/download/code/vntitles3.sql @@ -0,0 +1,122 @@ +BEGIN; + +-- The original VIEW solution for comparison: +CREATE OR REPLACE VIEW vnt AS + SELECT v.* + , COALESCE(ve.title, vo.latin, vo.title) AS title + , CASE WHEN vo.latin IS NULL THEN '' ELSE vo.title END AS alttitle + FROM vn v + JOIN vn_titles vo ON vo.id = v.id AND vo.lang = v.olang + LEFT JOIN vn_titles ve ON ve.id = v.id AND ve.lang = 'en'; + +CREATE TABLE vnt_type (LIKE vn, title text, alttitle text); + +CREATE TYPE langprefs AS ( + -- NULL langs means unused slot + t1_lang language, + t2_lang language, + t3_lang language, + t4_lang language, + a1_lang language, + a2_lang language, + a3_lang language, + a4_lang language, + -- These should never be NULL + t1_latin boolean, + t2_latin boolean, + t3_latin boolean, + t4_latin boolean, + to_latin boolean, -- Original language fallback + a1_latin boolean, + a2_latin boolean, + a3_latin boolean, + a4_latin boolean, + ao_latin boolean, + -- These have three possible options: + -- * NULL: Only if lang == original, i.e. skip this slot if it's not the original language + -- * true: Only if official + -- * false: Use this language regardless of official/original status + t1_official boolean, + t2_official boolean, + t3_official boolean, + t4_official boolean, + a1_official boolean, + a2_official boolean, + a3_official boolean, + a4_official boolean +); + + +CREATE OR REPLACE FUNCTION vnt(p langprefs) RETURNS SETOF vnt_type AS $$ + SELECT v.* + -- The language selection logic below is specially written so that the planner can remove references to joined tables corresponding to NULL languages. + , COALESCE( + CASE WHEN p.t1_lang IS NULL OR (p.t1_official AND NOT t1.official) OR (p.t1_official IS NULL AND t1.lang <> v.olang) THEN NULL ELSE COALESCE(CASE WHEN p.t1_latin THEN t1.latin ELSE NULL END, t1.title) END, + CASE WHEN p.t2_lang IS NULL OR (p.t2_official AND NOT t2.official) OR (p.t2_official IS NULL AND t2.lang <> v.olang) THEN NULL ELSE COALESCE(CASE WHEN p.t2_latin THEN t2.latin ELSE NULL END, t2.title) END, + CASE WHEN p.t3_lang IS NULL OR (p.t3_official AND NOT t3.official) OR (p.t3_official IS NULL AND t3.lang <> v.olang) THEN NULL ELSE COALESCE(CASE WHEN p.t3_latin THEN t3.latin ELSE NULL END, t3.title) END, + CASE WHEN p.t4_lang IS NULL OR (p.t4_official AND NOT t4.official) OR (p.t4_official IS NULL AND t4.lang <> v.olang) THEN NULL ELSE COALESCE(CASE WHEN p.t4_latin THEN t4.latin ELSE NULL END, t4.title) END, + CASE WHEN p IS NULL OR p.to_latin THEN ol.latin ELSE NULL END, ol.title + ) AS title + , COALESCE( + CASE WHEN p.a1_lang IS NULL OR (p.a1_official AND NOT a1.official) OR (p.a1_official IS NULL AND a1.lang <> v.olang) THEN NULL ELSE COALESCE(CASE WHEN p.a1_latin THEN a1.latin ELSE NULL END, a1.title) END, + CASE WHEN p.a2_lang IS NULL OR (p.a2_official AND NOT a2.official) OR (p.a2_official IS NULL AND a2.lang <> v.olang) THEN NULL ELSE COALESCE(CASE WHEN p.a2_latin THEN a2.latin ELSE NULL END, a2.title) END, + CASE WHEN p.a3_lang IS NULL OR (p.a3_official AND NOT a3.official) OR (p.a3_official IS NULL AND a3.lang <> v.olang) THEN NULL ELSE COALESCE(CASE WHEN p.a3_latin THEN a3.latin ELSE NULL END, a3.title) END, + CASE WHEN p.a4_lang IS NULL OR (p.a4_official AND NOT a4.official) OR (p.a4_official IS NULL AND a4.lang <> v.olang) THEN NULL ELSE COALESCE(CASE WHEN p.a4_latin THEN a4.latin ELSE NULL END, a4.title) END, + CASE WHEN p.ao_latin THEN ol.latin ELSE NULL END, ol.title + ) AS alttitle + FROM vn v + JOIN vn_titles ol ON ol.id = v.id AND ol.lang = v.olang + -- The COALESCE() below is kind of meaningless, but apparently the query planner can't optimize out JOINs with NULL conditions. + LEFT JOIN vn_titles t1 ON t1.id = v.id AND t1.lang = COALESCE(p.t1_lang, 'en') + LEFT JOIN vn_titles t2 ON t2.id = v.id AND t2.lang = COALESCE(p.t2_lang, 'en') + LEFT JOIN vn_titles t3 ON t3.id = v.id AND t3.lang = COALESCE(p.t3_lang, 'en') + LEFT JOIN vn_titles t4 ON t4.id = v.id AND t4.lang = COALESCE(p.t4_lang, 'en') + LEFT JOIN vn_titles a1 ON a1.id = v.id AND a1.lang = COALESCE(p.a1_lang, 'en') + LEFT JOIN vn_titles a2 ON a2.id = v.id AND a2.lang = COALESCE(p.a2_lang, 'en') + LEFT JOIN vn_titles a3 ON a3.id = v.id AND a3.lang = COALESCE(p.a3_lang, 'en') + LEFT JOIN vn_titles a4 ON a4.id = v.id AND a4.lang = COALESCE(p.a4_lang, 'en') +$$ LANGUAGE SQL STABLE; + +SET max_parallel_workers_per_gather = 0; + +EXPLAIN ANALYZE +SELECT id, title, alttitle + FROM vnt('(en,,,,,,,,f,f,f,f,t,f,f,f,f,f,,,,,f,f,f,f)'::langprefs) x +-- FROM vnt(NULL) x + WHERE NOT hidden --AND x.id < 'v10' + ORDER BY title LIMIT 100; + +EXPLAIN ANALYZE +SELECT id, title, alttitle + FROM vnt('(en,,,,,,,,f,f,f,f,t,f,f,f,f,f,,,,,f,f,f,f)'::langprefs) x +-- FROM vnt(NULL) x + WHERE NOT hidden --AND x.id < 'v10' + ORDER BY title LIMIT 100; + +EXPLAIN ANALYZE +SELECT id, title, alttitle +-- FROM vnt('(en,,,,,,,,f,f,f,f,t,f,f,f,f,f,,,,,f,f,f,f)'::langprefs) x + FROM vnt x + WHERE NOT hidden --AND x.id < 'v10' + ORDER BY title LIMIT 100; + +EXPLAIN ANALYZE +SELECT id, title, alttitle +-- FROM vnt('(en,,,,,,,,f,f,f,f,t,f,f,f,f,f,,,,,f,f,f,f)'::langprefs) x + FROM vnt x + WHERE NOT hidden --AND x.id < 'v10' + ORDER BY title LIMIT 100; + +EXPLAIN ANALYZE +SELECT i.id, i.width, i.height, i.id AS vid, v.title + FROM (SELECT id, width, height FROM images i TABLESAMPLE SYSTEM (0.93) + WHERE i.c_weight > 0 + AND vndbid_type(i.id) = 'sf' + AND i.c_sexual_avg < 40 + AND i.c_violence_avg < 40 + ORDER BY random() + LIMIT 4) i(id) + JOIN vn_screenshots vs ON vs.scr = i.id + JOIN vnt('(en,,,,,,,,f,f,f,f,t,f,f,f,f,f,,,,,f,f,f,f)'::langprefs) v ON v.id = vs.id + WHERE NOT v.hidden + ORDER BY random() LIMIT '4'; diff --git a/pub/img/vndbtitleprefs.png b/pub/img/vndbtitleprefs.png new file mode 100644 index 0000000..18d0e0a Binary files /dev/null and b/pub/img/vndbtitleprefs.png differ diff --git a/pub/img/vndbtitles.jpg b/pub/img/vndbtitles.jpg new file mode 100644 index 0000000..66f0006 Binary files /dev/null and b/pub/img/vndbtitles.jpg differ diff --git a/pub/img/vndbtitles.xcf b/pub/img/vndbtitles.xcf new file mode 100644 index 0000000..c213210 Binary files /dev/null and b/pub/img/vndbtitles.xcf differ diff --git a/template.html b/template.html index bb9a0fc..8d1f131 100644 --- a/template.html +++ b/template.html @@ -26,17 +26,7 @@
  • Nginx-cfg
  • Ylib
  • Writing
  • -
  • Code dump - -
  • +
  • Code dump
  • Globster
  • $endif$ @@ -74,8 +64,6 @@
  • Info
  • Docs