By digoal
Feature Vector Table for the Content:
create table tbl_v_box_n (vid int primary key, vec float4[], score float4);
Feature Vectors Corresponding to the Content: N-dimension (tag capacity). Attach M tags to each piece of content on average. The tag value is a random value within 100.
create or replace function gen_rand_vec(int, int) returns float4[] as $$
select array(select (case when random()*$1 <=$2 then (random()*100)::int else 0 end)::float4 from generate_series(1,$1));
$$ language sql strict;
Insert 1 million records and 512 tags (in the vector dimension). Each piece of content is weighted in 10 random dimensions and 0 in other dimensions (5,000 central points and 5 in positive and negative random offsets.)
do language plpgsql $$
declare
x float4[];
begin
for i in 1..5000 loop
x := gen_rand_vec(512,10);
for y in 1..200 loop
insert into tbl_v_box_n select (i-1)*200+y, array(select (case when t=0 then 0 else t + 5 - round((10*random())::numeric,2) end)::float4 from unnest(x) t) t;
end loop;
end loop;
end;
$$;
Create the PASE plug-in:
create extension pase;
Create a vector index:
CREATE INDEX idx_tbl_v_box_n_1 ON tbl_v_box_n
USING
pase_hnsw(vec)
WITH
(dim = 512, base_nb_num = 16, ef_build = 40, ef_search = 200, base64_encoded = 0);
或
CREATE INDEX idx_tbl_v_box_n_2 ON tbl_v_box_n
USING
pase_ivfflat(vec)
WITH
(clustering_type = 1, distance_type = 0, dimension = 512, base64_encoded = 0, clustering_params = "10,5000");
For 1 million records, the HNSW index creation takes 1,500 seconds, and IVFFlat takes 1,091 seconds.
Note: The IVFFlat index needs to cluster the central points, so there must be enough data in the table to build the central points when creating the index. Otherwise, the clustered central points may deviate from the future data, leading to performance problems.
If there is a large amount of data to train the central points in advance, the external clustering method can be used. /data/xxxfile
is the file of clustered central points trained in advance.
CREATE INDEX idx_tbl_v_box_n_2 ON tbl_v_box_n
USING
pase_ivfflat(vec)
WITH
(clustering_type = 0, distance_type = 0, dimension = 512, base64_encoded = 0, clustering_params = "/data/xxxfile");
Create an hll plug-in to record ids hash values of the read content:
create extension hll;
postgres=> select hll_add(hll_empty(), hll_hash_integer(1));
hll_add
--------------------------
\x128b7f8895a3f5af28cafe
(1 row)
postgres=> select '\x128b7f8895a3f5af28cafe'::hll = hll_add('\x128b7f8895a3f5af28cafe'::hll, hll_hash_integer(1));
?column?
----------
t
(1 row)
postgres=> select '\x128b7f8895a3f5af28cafe'::hll = hll_add('\x128b7f8895a3f5af28cafe'::hll, hll_hash_integer(2));
?column?
----------
f
(1 row)
The user preference tag has 512 dimensions, among which 50 dimensions have a value.
select gen_rand_vec(512,50);
{0,0,0,0,0,0,0,0,0,0,0,0,75,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,13,72,0,0,0,0,0,0,0,0,30,0,0,0,0,0,0,0,0,0,0,0,92,0,0,0,0,0,45,0,4,0,0,0,0,0,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,47,0,0,0,0,0,0,0,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36,0,0,0,0,0,0,0,94,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,85,0,0,90,0,84,0,0,0,0,0,0,0,0,0,0,62,0,0,0,0,0,0,44,0,0,0,0,0,0,0,0,0,4,0,0,0,0,49,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,87,0,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0,0,0,0,0,0,78,0,27,0,0,0,0,0,57,0,0,0,0,0,0,0,0,99,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,73,0,0,0,4,0,0,77,0,74,0,0,0,0,0,21,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,83,0,0,0,0,0,0,0,64,0,85,0,0,0,0,97,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,51,82,22,0,0,0,0,0,0,48,58,86,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,0}
Forward preferred content to the user and filter the read content:
explain (analyze,verbose,timing,costs,buffers)
SELECT vid,
vec <?> '0,0,0,0,0,0,0,0,0,0,0,0,75,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,13,72,0,0,0,0,0,0,0,0,30,0,0,0,0,0,0,0,0,0,0,0,92,0,0,0,0,0,45,0,4,0,0,0,0,0,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,47,0,0,0,0,0,0,0,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36,0,0,0,0,0,0,0,94,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,85,0,0,90,0,84,0,0,0,0,0,0,0,0,0,0,62,0,0,0,0,0,0,44,0,0,0,0,0,0,0,0,0,4,0,0,0,0,49,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,87,0,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0,0,0,0,0,0,78,0,27,0,0,0,0,0,57,0,0,0,0,0,0,0,0,99,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,73,0,0,0,4,0,0,77,0,74,0,0,0,0,0,21,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,83,0,0,0,0,0,0,0,64,0,85,0,0,0,0,97,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,51,82,22,0,0,0,0,0,0,48,58,86,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,0:40:0'::pase as distance
FROM tbl_v_box_n
where
'\x128b7f8895a3f5af28cafe'::hll <> hll_add('\x128b7f8895a3f5af28cafe'::hll, hll_hash_integer(vid))
ORDER BY
vec <?> '0,0,0,0,0,0,0,0,0,0,0,0,75,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,13,72,0,0,0,0,0,0,0,0,30,0,0,0,0,0,0,0,0,0,0,0,92,0,0,0,0,0,45,0,4,0,0,0,0,0,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,47,0,0,0,0,0,0,0,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36,0,0,0,0,0,0,0,94,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,85,0,0,90,0,84,0,0,0,0,0,0,0,0,0,0,62,0,0,0,0,0,0,44,0,0,0,0,0,0,0,0,0,4,0,0,0,0,49,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,87,0,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0,0,0,0,0,0,78,0,27,0,0,0,0,0,57,0,0,0,0,0,0,0,0,99,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,73,0,0,0,4,0,0,77,0,74,0,0,0,0,0,21,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,83,0,0,0,0,0,0,0,64,0,85,0,0,0,0,97,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,51,82,22,0,0,0,0,0,0,48,58,86,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,0:40:0'::pase
LIMIT 100;
explain (analyze,verbose,timing,costs,buffers)
select array_agg(vid) from (
SELECT vid
FROM tbl_v_box_n
where
'\x128b7f8895a3f5af28cafe'::hll <> hll_add('\x128b7f8895a3f5af28cafe'::hll, hll_hash_integer(vid))
ORDER BY
vec <?> '0,0,0,0,0,0,0,0,0,0,0,0,75,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,13,72,0,0,0,0,0,0,0,0,30,0,0,0,0,0,0,0,0,0,0,0,92,0,0,0,0,0,45,0,4,0,0,0,0,0,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,47,0,0,0,0,0,0,0,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36,0,0,0,0,0,0,0,94,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,85,0,0,90,0,84,0,0,0,0,0,0,0,0,0,0,62,0,0,0,0,0,0,44,0,0,0,0,0,0,0,0,0,4,0,0,0,0,49,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,87,0,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0,0,0,0,0,0,78,0,27,0,0,0,0,0,57,0,0,0,0,0,0,0,0,99,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,73,0,0,0,4,0,0,77,0,74,0,0,0,0,0,21,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,83,0,0,0,0,0,0,0,64,0,85,0,0,0,0,97,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,51,82,22,0,0,0,0,0,0,48,58,86,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,0:40:0'::pase
LIMIT 100) t;
或 ivfflat
explain (analyze,verbose,timing,costs,buffers)
select array_agg(vid) from (
SELECT vid
FROM tbl_v_box_n
where
'\x128b7f8895a3f5af28cafe'::hll <> hll_add('\x128b7f8895a3f5af28cafe'::hll, hll_hash_integer(vid))
ORDER BY
vec <#> '0,0,0,0,0,0,0,0,0,0,0,0,0,5.5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,50.68,0,62.11,0,0,0,0,0,0,0,0,0,0,78.16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,46.44,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,81.15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.68,0,71.3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15.87,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,76.17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,69.94,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.39,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0:1:0'::pase
LIMIT 100) t;
Description:
https://www.alibabacloud.com/help/en/doc-detail/147837.html
<?> is a PASE-type operator, which is used to calculate similarity between the vectors to the left and right of a specific element. The vector to the left must use the float4[] data type, and the vector to the right must use the PASE data type.
The PASE data type is defined in the PASE plug-in and can contain up to three constructors. Take the float4[], 0, 1
part in the preceding third as an example.
The vector to the left must have the same number of dimensions as the vector to the right. Otherwise, the system reports similarity calculation errors.
<#> is an operator that is used by IVFFlat indexes.
You must execute the ORDER BY statement to make an IVFFlat index take effect. An IVFFlat index allows vectors to be sorted in ascending order.
The PASE data type requires three parameters to specify a vector. These parameters are separated by colons (:).
For example, 1,1,1:10:0 includes three parameters:
When using the IVFFlat index for querying, you can adjust the value of the second parameter to improve performance. However, you need to pay attention to the accuracy or whether the number of returned records meets the requirements. If the number is within the specified ef_search range but does not meet the requirement, you need to widen the ef_search range.
Stress Testing:
create or replace function test(int) returns void as $$
declare
begin
for i in 1..$1 loop
perform array_agg(vid) from (
SELECT vid
FROM tbl_v_box_n
where
'\x128b7f8895a3f5af28cafe'::hll <> hll_add('\x128b7f8895a3f5af28cafe'::hll, hll_hash_integer(vid))
ORDER BY
vec <?> '0,0,0,0,0,0,0,0,0,0,0,0,75,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,13,72,0,0,0,0,0,0,0,0,30,0,0,0,0,0,0,0,0,0,0,0,92,0,0,0,0,0,45,0,4,0,0,0,0,0,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,47,0,0,0,0,0,0,0,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36,0,0,0,0,0,0,0,94,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,85,0,0,90,0,84,0,0,0,0,0,0,0,0,0,0,62,0,0,0,0,0,0,44,0,0,0,0,0,0,0,0,0,4,0,0,0,0,49,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,87,0,7,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0,0,0,0,0,0,78,0,27,0,0,0,0,0,57,0,0,0,0,0,0,0,0,99,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,73,0,0,0,4,0,0,77,0,74,0,0,0,0,0,21,0,0,0,0,0,0,0,0,0,0,0,22,0,0,0,0,83,0,0,0,0,0,0,0,64,0,85,0,0,0,0,97,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,51,82,22,0,0,0,0,0,0,48,58,86,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,68,0,0,0,0,0,0,0,0,0,0,0,0,48,0,0,0:40:0'::pase
LIMIT 100) t;
end loop;
return;
end;
$$ language plpgsql strict;
vi test.sql
select test(100);
pgbench -M prepared -n -r -P 1 -f ./test.sql -c 20 -j 20 -T 120
progress: 1.0 s, 18.0 tps, lat 683.974 ms stddev 81.527
progress: 2.0 s, 21.0 tps, lat 722.688 ms stddev 100.880
progress: 3.0 s, 24.0 tps, lat 714.391 ms stddev 102.946
progress: 4.0 s, 28.0 tps, lat 860.122 ms stddev 141.135
progress: 5.0 s, 24.0 tps, lat 787.278 ms stddev 58.100
progress: 6.0 s, 21.0 tps, lat 786.168 ms stddev 91.312
progress: 7.0 s, 30.0 tps, lat 796.017 ms stddev 134.557
progress: 8.0 s, 24.0 tps, lat 791.886 ms stddev 84.715
progress: 9.0 s, 23.0 tps, lat 775.147 ms stddev 103.539
progress: 10.0 s, 27.0 tps, lat 810.352 ms stddev 66.641
progress: 11.0 s, 30.0 tps, lat 780.018 ms stddev 77.176
progress: 12.0 s, 23.0 tps, lat 773.011 ms stddev 65.566
progress: 13.0 s, 26.0 tps, lat 749.862 ms stddev 47.076
progress: 14.0 s, 24.0 tps, lat 805.899 ms stddev 112.102
progress: 15.0 s, 27.0 tps, lat 788.106 ms stddev 85.193
progress: 16.0 s, 24.0 tps, lat 793.259 ms stddev 83.253
progress: 17.0 s, 26.0 tps, lat 779.958 ms stddev 85.379
progress: 18.0 s, 26.0 tps, lat 765.178 ms stddev 78.601
A 4-core machine with QPS about 2,300
Update Stress Testing:
with a as (
select array(select (case when t=0 then 0 else t + 5 - round((10*random())::numeric,2) end)::float4 from unnest(vec) t) as vv from tbl_v_box_n where vid=1
)
update tbl_v_box_n set vec=a.vv from a where vid=1;
vi up.sql
\set vid random(1,1000000)
with a as (
select array(select (case when t=0 then 0 else t + 5 - round((10*random())::numeric,2) end)::float4 from unnest(vec) t) as vv from tbl_v_box_n where vid=:vid
)
update tbl_v_box_n set vec=a.vv from a where vid=:vid;
pgbench -M prepared -n -r -P 1 -f ./up.sql -c 20 -j 20 -T 120
Update method of the user preference tag:
The read content affects user preferences:
now_vec=(vec_old*cnt_old+vec_new*cnt_new)/(cnt_old+new_cnt)
now_cnt=cnt_old+cnt_new
vec_old: The current feature vector of the user
cnt_old: The number of pieces of content the user has read. This value is accumulated, so it increases. You can adjust this value to reduce the impact of old features.
vec_new: The feature vector central point of the user's newly read content
cnt_new: The number of pieces of content that have been recently read by the user
Process: The content ID is obtained based on the similarity between the user vector and the content vector. After the user reads several pieces of content, query the feature vectors of these content based on IDs, calculate the vector central point (vec_new), and merge it into the user feature vector (vec_old).
Age method: (to prevent user preferences from being framed) Decrease the value of cnt_old when calculating new feature vectors. This is equivalent to reducing the overall weight of the user's old feature vectors, which is like decreasing the denominator (cnt_old + new_cnt). The impact of the old value on the result is reduced when merging the new feature vectors.
The downgrade policy for cnt_old: In addition to adjusting based on the time, you can make decisions based on the user's recent skip rate. If the skip rate increases abnormally, you can also reduce the value of cnt_old.
Example of Dynamic SQL Execution:
create or replace function get_v_ids(
i_uid int8,
i_ef int,
i_limit int,
i_tbl_prefix text,
i_suffix text,
i_idx_ops text
) returns int8[] as $$
declare
u_hll_w1 hll;
u_hll_w2 hll;
u_hll_w7 hll;
v_vec text;
res int8[];
query text;
begin
set local enable_seqscan=off;
set local enable_bitmapscan=off;
set local enable_indexscan=on;
set local enable_indexonlyscan=on;
select coalesce(w1,hll_empty()),coalesce(w2,hll_empty()),coalesce(w7,hll_empty())
into u_hll_w1,u_hll_w2,u_hll_w7 from video_user_read where user_id=i_uid;
select rtrim(ltrim(vec::text,'{'),'}') into v_vec from u_like_tag where user_id = i_uid;
query := format($_$
select array_agg(v_id order by video_weight desc) from
(
select v_id,video_weight from %I v
where %L::hll || hll_hash_bigint(v.v_id) <> %L::hll
or %L::hll || hll_hash_bigint(v.v_id) <> %L::hll
or %L::hll || hll_hash_bigint(v.v_id) <> %L::hll
order by vec %s (%L)::pase
limit %s
) t
$_$,
i_tbl_prefix||'_'||i_suffix,
u_hll_w1,u_hll_w1,
u_hll_w2,u_hll_w2,
u_hll_w7,u_hll_w7,
i_idx_ops,
v_vec||':'||i_ef||':0',
i_limit
);
raise notice '%', query;
execute query into res;
return res;
end;
$$ language plpgsql strict;
select get_v_ids(123::int8, 1, 100, 'tbl', '1', '<#>');
Physical Stream Replication of Standby Database in PostgreSQL
PostgreSQL Sorting Deduplication and Limit Query Optimization – Recursion vs. Group
digoal - May 9, 2020
Kaiwai - September 9, 2019
Alibaba Clouder - March 6, 2020
Alibaba Clouder - January 22, 2020
Proxima - April 30, 2021
Alibaba Clouder - October 12, 2019
An on-demand database hosting service for PostgreSQL with automated monitoring, backup and disaster recovery capabilities
Learn MoreAlibaba Cloud PolarDB for PostgreSQL is an in-house relational database service 100% compatible with PostgreSQL and highly compatible with the Oracle syntax.
Learn MoreAn online MPP warehousing service based on the Greenplum Database open source program
Learn MoreAn on-demand database hosting service for MySQL with automated monitoring, backup and disaster recovery capabilities
Learn MoreMore Posts by digoal