-
Notifications
You must be signed in to change notification settings - Fork 64
Parse arrays with text format in postgres #402
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
f647789
7d73378
6013ced
bc4d93d
5b0ee94
d046c30
4f5ec63
375d20a
205b0d1
3d6ca35
61ab77c
7eef805
b2bf75e
9e5d848
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,7 +5,9 @@ use pgwire::{ | |
| api::{self, ClientInfo}, | ||
| error::PgWireError, | ||
| messages as msg, | ||
| types::FromSqlText, | ||
| }; | ||
| use postgres_types::Type; | ||
| use std::{collections::HashMap, io}; | ||
| use tokio_util::codec; | ||
|
|
||
|
|
@@ -169,3 +171,102 @@ where | |
| } | ||
| } | ||
| } | ||
|
|
||
| pub trait VecFromSqlText: Sized { | ||
| fn from_vec_sql_text( | ||
| ty: &Type, | ||
| input: &[u8], | ||
| ) -> Result<Self, Box<dyn std::error::Error + Sync + Send>>; | ||
| } | ||
|
|
||
| // Re-implementation of the ToSqlText trait from pg_wire to make it generic over different types. | ||
| // Implemented as a macro in pgwire | ||
| // https://github.com/sunng87/pgwire/blob/6cbce9d444cc86a01d992f6b35f84c024f10ceda/src/types/from_sql_text.rs#L402 | ||
| impl<T: FromSqlText> VecFromSqlText for Vec<T> { | ||
| fn from_vec_sql_text( | ||
| ty: &Type, | ||
| input: &[u8], | ||
| ) -> Result<Self, Box<dyn std::error::Error + Sync + Send>> { | ||
| // PostgreSQL array text format: {elem1,elem2,elem3} | ||
| // Remove the outer braces | ||
| let input_str = std::str::from_utf8(input)?; | ||
|
|
||
| if input_str.is_empty() { | ||
| return Ok(Vec::new()); | ||
| } | ||
|
|
||
| // Check if it's an array format | ||
| if !input_str.starts_with('{') || !input_str.ends_with('}') { | ||
| return Err("Invalid array format: must start with '{' and end with '}'".into()); | ||
| } | ||
|
|
||
| let inner = &input_str[1..input_str.len() - 1]; | ||
|
|
||
| if inner.is_empty() { | ||
| return Ok(Vec::new()); | ||
| } | ||
|
|
||
| let elements = extract_array_elements(inner)?; | ||
| let mut result = Vec::new(); | ||
|
|
||
| for element_str in elements { | ||
| let element = T::from_sql_text(ty, element_str.as_bytes())?; | ||
| result.push(element); | ||
| } | ||
|
|
||
| Ok(result) | ||
| } | ||
| } | ||
|
|
||
| // Helper function to extract array elements | ||
| // https://github.com/sunng87/pgwire/blob/6cbce9d444cc86a01d992f6b35f84c024f10ceda/src/types/from_sql_text.rs#L402 | ||
| fn extract_array_elements( | ||
| input: &str, | ||
| ) -> Result<Vec<String>, Box<dyn std::error::Error + Sync + Send>> { | ||
| if input.is_empty() { | ||
| return Ok(Vec::new()); | ||
| } | ||
|
|
||
| let mut elements = Vec::new(); | ||
| let mut current = String::new(); | ||
| let mut in_quotes = false; | ||
| let mut escape_next = false; | ||
| let mut depth = 0; // For nested arrays | ||
|
|
||
| for ch in input.chars() { | ||
| match ch { | ||
| '\\' if !escape_next => { | ||
| escape_next = true; | ||
| } | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Possible bug in pgwire's implementation where we add escape baskslashes to the string was fixed here so it works fine with go's postgres client. I have opened an issue in pgwire |
||
| '"' if !escape_next => { | ||
| in_quotes = !in_quotes; | ||
| // Don't include the quotes in the output | ||
| } | ||
| '{' if !in_quotes && !escape_next => { | ||
| depth += 1; | ||
| current.push(ch); | ||
| } | ||
| '}' if !in_quotes && !escape_next => { | ||
| depth -= 1; | ||
| current.push(ch); | ||
| } | ||
| ',' if !in_quotes && depth == 0 && !escape_next => { | ||
| // End of current element | ||
| if !current.trim().eq_ignore_ascii_case("NULL") { | ||
| elements.push(std::mem::take(&mut current)); | ||
| } | ||
| } | ||
| _ => { | ||
| current.push(ch); | ||
| escape_next = false; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Process the last element | ||
| if !current.is_empty() && !current.trim().eq_ignore_ascii_case("NULL") { | ||
| elements.push(current); | ||
| } | ||
|
|
||
| Ok(elements) | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,7 +8,8 @@ use corro_types::{ | |
| config::{PgConfig, PgTlsConfig}, | ||
| tls::{generate_ca, generate_client_cert, generate_server_cert}, | ||
| }; | ||
| use postgres_types::ToSql; | ||
| use pgwire::types::ToSqlText; | ||
| use postgres_types::{Format, IsNull, ToSql, Type}; | ||
| use rcgen::Certificate; | ||
| use rustls::pki_types::pem::PemObject; | ||
| use spawn::wait_for_all_pending_handles; | ||
|
|
@@ -805,6 +806,43 @@ async fn test_unnest_typing() { | |
| wait_for_all_pending_handles().await; | ||
| } | ||
|
|
||
| // wrapper so we can easily switch between text and binary formats | ||
| #[derive(Debug)] | ||
| struct SqlVec<'a, T> { | ||
| inner: &'a Vec<T>, | ||
| format: Format, | ||
| } | ||
|
|
||
| // test text encoding/decoding | ||
| impl<'a, T: ToSqlText + ToSql> ToSql for SqlVec<'a, T> { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Love this approach |
||
| fn to_sql( | ||
| &self, | ||
| ty: &Type, | ||
| out: &mut bytes::BytesMut, | ||
| ) -> Result<IsNull, Box<dyn std::error::Error + Sync + Send>> { | ||
| match self.format { | ||
| Format::Text => self.inner.to_sql_text(ty, out), | ||
| Format::Binary => self.inner.to_sql(ty, out), | ||
| } | ||
| } | ||
|
|
||
| fn accepts(ty: &postgres_types::Type) -> bool | ||
| where | ||
| Self: Sized, | ||
| { | ||
| match ty.kind() { | ||
| postgres_types::Kind::Array(_) => true, | ||
| _ => false, | ||
| } | ||
| } | ||
|
|
||
| fn encode_format(&self, _ty: &Type) -> Format { | ||
| self.format | ||
| } | ||
|
|
||
| postgres_types::to_sql_checked!(); | ||
| } | ||
|
|
||
| #[tokio::test(flavor = "multi_thread")] | ||
| async fn test_unnest_max_parameters() { | ||
| let (tripwire, tripwire_worker, tripwire_tx) = Tripwire::new_simple(); | ||
|
|
@@ -925,37 +963,50 @@ async fn test_unnest_vtab() { | |
|
|
||
| // Test single array unnest with text type | ||
| { | ||
| let col1 = vec!["a", "b", "c", "d", "e", "f"]; | ||
| let rows = client | ||
| .query( | ||
| "SELECT CAST(value0 AS text) FROM unnest(CAST($1 AS text[]))", | ||
| &[&col1], | ||
| ) | ||
| .await | ||
| .unwrap(); | ||
| for (i, row) in rows.iter().enumerate() { | ||
| let val: String = row.get(0); | ||
| assert_eq!(val, col1[i]); | ||
| for format in [Format::Text, Format::Binary] { | ||
somtochiama marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| let col1 = vec!["a", "b", "c", "d", "e", "f"]; | ||
| let sql_vec = SqlVec { | ||
| inner: &col1, | ||
| format, | ||
| }; | ||
| let rows = client | ||
| .query( | ||
| "SELECT CAST(value0 AS text) FROM unnest(CAST($1 AS text[]))", | ||
somtochiama marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| &[&sql_vec], | ||
| ) | ||
| .await | ||
| .unwrap(); | ||
| for (i, row) in rows.iter().enumerate() { | ||
| let val: String = row.get(0); | ||
| assert_eq!(val, col1[i]); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Test single array unnest with float type | ||
| { | ||
| let col1 = vec![1.0, 2.0, 3.0, 4.0, 1337.0, 12312312312.0]; | ||
| let rows = client | ||
| .query( | ||
| "SELECT CAST(value0 AS float) FROM unnest(CAST($1 AS float[]))", | ||
| &[&col1], | ||
| ) | ||
| .await | ||
| .unwrap(); | ||
| for (i, row) in rows.iter().enumerate() { | ||
| let val: f64 = row.get(0); | ||
| assert_eq!(val, col1[i]); | ||
| for format in [Format::Text, Format::Binary] { | ||
| let sql_vec = SqlVec { | ||
| inner: &col1, | ||
| format, | ||
| }; | ||
| let rows = client | ||
| .query( | ||
| "SELECT CAST(value0 AS float) FROM unnest(CAST($1 AS float[]))", | ||
| &[&sql_vec], | ||
| ) | ||
| .await | ||
| .unwrap(); | ||
| for (i, row) in rows.iter().enumerate() { | ||
| let val: f64 = row.get(0); | ||
| assert_eq!(val, col1[i]); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Test single array unnest with blob type | ||
| // TODO: pgwire's text encoding for blob[] is currently broken but we'd work for proper clients | ||
gorbak25 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| { | ||
| let col1 = vec![b"a", b"b", b"c", b"d", b"e", b"f"]; | ||
| let rows = client | ||
|
|
@@ -971,7 +1022,7 @@ async fn test_unnest_vtab() { | |
| } | ||
| } | ||
|
|
||
| // Now try all at once with different types | ||
| // Now try all at once with different types, use corro_unnest | ||
| { | ||
| let col1 = vec![1i64, 2, 3, 4, 1337, 12312312312]; | ||
| let col2 = vec!["a", "b", "c", "d", "e", "f"]; | ||
|
|
@@ -980,7 +1031,7 @@ async fn test_unnest_vtab() { | |
| let rows = client | ||
| .query( | ||
| "SELECT | ||
| CAST(value0 AS int), CAST(value1 AS text), CAST(value2 AS float), CAST(value3 AS blob) FROM unnest(CAST($1 AS int[]), CAST($2 AS text[]), CAST($3 AS float[]), CAST($4 AS blob[]))", | ||
| CAST(value0 AS int), CAST(value1 AS text), CAST(value2 AS float), CAST(value3 AS blob) FROM corro_unnest(CAST($1 AS int[]), CAST($2 AS text[]), CAST($3 AS float[]), CAST($4 AS blob[]))", | ||
| &[&col1, &col2, &col3, &col4], | ||
| ) | ||
| .await | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.