|
| 1 | +use crate::table::file_format_options::{ |
| 2 | + FileFormatOptions, TableOptions, WriterPropertiesFactory, WriterPropertiesFactoryRef, |
| 3 | +}; |
| 4 | +use crate::{crate_version, DeltaResult}; |
| 5 | +use arrow_schema::Schema as ArrowSchema; |
| 6 | +use async_trait::async_trait; |
| 7 | +use datafusion::catalog::Session; |
| 8 | +use datafusion::config::{ConfigField, EncryptionFactoryOptions, ExtensionOptions}; |
| 9 | +use datafusion::execution::parquet_encryption::EncryptionFactory; |
| 10 | +use object_store::path::Path; |
| 11 | +use parquet::basic::Compression; |
| 12 | +use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder}; |
| 13 | +use parquet::schema::types::ColumnPath; |
| 14 | +use std::fmt::{Debug, Formatter}; |
| 15 | +use std::sync::Arc; |
| 16 | +use uuid::Uuid; |
| 17 | + |
| 18 | +pub type SchemaRef = Arc<ArrowSchema>; |
| 19 | + |
| 20 | +#[derive(Clone, Debug)] |
| 21 | +pub struct TableEncryption { |
| 22 | + encryption_factory: Arc<dyn EncryptionFactory>, |
| 23 | + configuration: EncryptionFactoryOptions, |
| 24 | +} |
| 25 | + |
| 26 | +impl TableEncryption { |
| 27 | + pub fn new( |
| 28 | + encryption_factory: Arc<dyn EncryptionFactory>, |
| 29 | + configuration: EncryptionFactoryOptions, |
| 30 | + ) -> Self { |
| 31 | + Self { |
| 32 | + encryption_factory, |
| 33 | + configuration, |
| 34 | + } |
| 35 | + } |
| 36 | + |
| 37 | + pub fn new_with_extension_options<T: ExtensionOptions>( |
| 38 | + encryption_factory: Arc<dyn EncryptionFactory>, |
| 39 | + options: &T, |
| 40 | + ) -> DeltaResult<Self> { |
| 41 | + let mut configuration = EncryptionFactoryOptions::default(); |
| 42 | + for entry in options.entries() { |
| 43 | + if let Some(value) = &entry.value { |
| 44 | + configuration.set(&entry.key, value)?; |
| 45 | + } |
| 46 | + } |
| 47 | + Ok(Self { |
| 48 | + encryption_factory, |
| 49 | + configuration, |
| 50 | + }) |
| 51 | + } |
| 52 | + |
| 53 | + pub fn encryption_factory(&self) -> &Arc<dyn EncryptionFactory> { |
| 54 | + &self.encryption_factory |
| 55 | + } |
| 56 | + |
| 57 | + pub fn configuration(&self) -> &EncryptionFactoryOptions { |
| 58 | + &self.configuration |
| 59 | + } |
| 60 | + |
| 61 | + pub async fn update_writer_properties( |
| 62 | + &self, |
| 63 | + mut builder: WriterPropertiesBuilder, |
| 64 | + file_path: &Path, |
| 65 | + file_schema: &SchemaRef, |
| 66 | + ) -> DeltaResult<WriterPropertiesBuilder> { |
| 67 | + let encryption_properties = self |
| 68 | + .encryption_factory |
| 69 | + .get_file_encryption_properties(&self.configuration, file_schema, file_path) |
| 70 | + .await?; |
| 71 | + if let Some(encryption_properties) = encryption_properties { |
| 72 | + builder = builder.with_file_encryption_properties(encryption_properties); |
| 73 | + } |
| 74 | + Ok(builder) |
| 75 | + } |
| 76 | +} |
| 77 | + |
| 78 | +// More advanced factory with KMS support |
| 79 | +#[derive(Clone, Debug)] |
| 80 | +pub struct KMSWriterPropertiesFactory { |
| 81 | + writer_properties: WriterProperties, |
| 82 | + encryption: Option<TableEncryption>, |
| 83 | +} |
| 84 | + |
| 85 | +impl KMSWriterPropertiesFactory { |
| 86 | + pub fn with_encryption(table_encryption: TableEncryption) -> Self { |
| 87 | + let writer_properties = WriterProperties::builder() |
| 88 | + .set_compression(Compression::SNAPPY) // Code assumes Snappy by default |
| 89 | + .set_created_by(format!("delta-rs version {}", crate_version())) |
| 90 | + .build(); |
| 91 | + Self { |
| 92 | + writer_properties, |
| 93 | + encryption: Some(table_encryption), |
| 94 | + } |
| 95 | + } |
| 96 | +} |
| 97 | + |
| 98 | +#[async_trait] |
| 99 | +impl WriterPropertiesFactory for KMSWriterPropertiesFactory { |
| 100 | + fn compression(&self, column_path: &ColumnPath) -> Compression { |
| 101 | + self.writer_properties.compression(column_path) |
| 102 | + } |
| 103 | + |
| 104 | + async fn create_writer_properties( |
| 105 | + &self, |
| 106 | + file_path: &Path, |
| 107 | + file_schema: &Arc<ArrowSchema>, |
| 108 | + ) -> DeltaResult<WriterProperties> { |
| 109 | + let mut builder: WriterPropertiesBuilder = self.writer_properties.clone().into(); |
| 110 | + if let Some(encryption) = self.encryption.as_ref() { |
| 111 | + builder = encryption |
| 112 | + .update_writer_properties(builder, file_path, file_schema) |
| 113 | + .await?; |
| 114 | + } |
| 115 | + Ok(builder.build()) |
| 116 | + } |
| 117 | +} |
| 118 | + |
| 119 | +// ------------------------------------------------------------------------------------------------- |
| 120 | +// FileFormatOptions for KMS encryption based on settings in TableEncryption |
| 121 | +// ------------------------------------------------------------------------------------------------- |
| 122 | +pub struct KmsFileFormatOptions { |
| 123 | + table_encryption: TableEncryption, |
| 124 | + writer_properties_factory: WriterPropertiesFactoryRef, |
| 125 | + encryption_factory_id: String, |
| 126 | +} |
| 127 | + |
| 128 | +impl KmsFileFormatOptions { |
| 129 | + pub fn new(table_encryption: TableEncryption) -> Self { |
| 130 | + let encryption_factory_id = format!("delta-{}", Uuid::new_v4()); |
| 131 | + let writer_properties_factory = Arc::new(KMSWriterPropertiesFactory::with_encryption( |
| 132 | + table_encryption.clone(), |
| 133 | + )); |
| 134 | + Self { |
| 135 | + table_encryption, |
| 136 | + writer_properties_factory, |
| 137 | + encryption_factory_id, |
| 138 | + } |
| 139 | + } |
| 140 | +} |
| 141 | + |
| 142 | +impl Debug for KmsFileFormatOptions { |
| 143 | + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { |
| 144 | + f.debug_struct("KmsFileFormatOptions") |
| 145 | + .finish_non_exhaustive() |
| 146 | + } |
| 147 | +} |
| 148 | + |
| 149 | +impl FileFormatOptions for KmsFileFormatOptions { |
| 150 | + fn table_options(&self) -> TableOptions { |
| 151 | + let mut table_options = TableOptions::default(); |
| 152 | + table_options.parquet.crypto.factory_id = Some(self.encryption_factory_id.clone()); |
| 153 | + table_options.parquet.crypto.factory_options = |
| 154 | + self.table_encryption.configuration().clone(); |
| 155 | + table_options |
| 156 | + } |
| 157 | + |
| 158 | + fn writer_properties_factory(&self) -> WriterPropertiesFactoryRef { |
| 159 | + Arc::clone(&self.writer_properties_factory) |
| 160 | + } |
| 161 | + |
| 162 | + fn update_session(&self, session: &dyn Session) -> DeltaResult<()> { |
| 163 | + // Ensure DataFusion has the encryption factory registered |
| 164 | + session.runtime_env().register_parquet_encryption_factory( |
| 165 | + &self.encryption_factory_id, |
| 166 | + Arc::clone(self.table_encryption.encryption_factory()), |
| 167 | + ); |
| 168 | + Ok(()) |
| 169 | + } |
| 170 | +} |
0 commit comments