diff --git a/python/python/lance/__init__.py b/python/python/lance/__init__.py index d98de0424ad..be7fd2bd178 100644 --- a/python/python/lance/__init__.py +++ b/python/python/lance/__init__.py @@ -50,6 +50,12 @@ ts_types = Union[datetime, pd.Timestamp, str] +class FieldNotFoundError(Exception): + """Raised when a requested field is not found in the schema.""" + + pass + + __all__ = [ "Blob", "BlobArray", @@ -59,6 +65,7 @@ "blob_field", "DatasetBasePath", "DataStatistics", + "FieldNotFoundError", "FieldStatistics", "FragmentMetadata", "Index", diff --git a/python/python/tests/test_dataset.py b/python/python/tests/test_dataset.py index 9cf7c824a60..5e2de4c80b8 100644 --- a/python/python/tests/test_dataset.py +++ b/python/python/tests/test_dataset.py @@ -27,6 +27,7 @@ import pyarrow.parquet as pq import pytest from helper import ProgressForTest +from lance import FieldNotFoundError from lance._dataset.sharded_batch_iterator import ShardedBatchIterator from lance.commit import CommitConflictError from lance.dataset import LANCE_COMMIT_MESSAGE_KEY, AutoCleanupConfig @@ -5094,3 +5095,32 @@ def test_default_scan_options_nearest(tmp_path: Path) -> None: assert distances == sorted(distances) assert "id" in result.column_names + + +def test_file_not_field_not_found_error(): + data = pa.table( + { + "id": [1, 2, 3], + "foo": ["a", "b", "c"], + "bar": [0.1, 0.2, 0.3], + } + ) + ds = lance.write_dataset(data, "memory://") + + with pytest.raises(FieldNotFoundError) as e: + ds.scanner(columns=["non_existent_field"]).to_table() + # Too different to suggest anything + expected = ( + "FieldNotFoundError: Field 'non_existent_field' not found." + "\nAvailable fields: ['id', 'foo', 'bar']" + ) + assert str(e.value) == expected + + with pytest.raises(FieldNotFoundError) as e: + ds.scanner(columns=["baz"]).to_table() + # Close enough to suggest 'bar' + expected = ( + "FieldNotFoundError: Field 'baz' not found. Did you mean 'bar'?" + "\nAvailable fields: ['id', 'foo', 'bar']" + ) + assert str(e.value) == expected diff --git a/python/src/error.rs b/python/src/error.rs index 11e4146910b..4b41b547163 100644 --- a/python/src/error.rs +++ b/python/src/error.rs @@ -12,15 +12,26 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::LazyLock; + use lance_namespace::error::NamespaceError; use pyo3::{ exceptions::{PyIOError, PyNotImplementedError, PyRuntimeError, PyValueError}, types::{PyAnyMethods, PyModule}, - BoundObject, PyErr, PyResult, Python, + BoundObject, Py, PyAny, PyErr, PyResult, Python, }; use lance::Error as LanceError; +/// Lazily loads the FieldNotFoundError exception from the lance Python module. +static PY_FIELD_NOT_FOUND_ERROR: LazyLock>> = LazyLock::new(|| { + Python::attach(|py| { + py.import("lance") + .and_then(|lance| lance.getattr("FieldNotFoundError")) + .map(|err| err.unbind()) + }) +}); + /// Try to convert a NamespaceError to the corresponding Python exception. /// Returns the appropriate Python exception from lance_namespace.errors module. fn namespace_error_to_pyerr(py: Python<'_>, ns_err: &NamespaceError) -> PyErr { @@ -79,6 +90,23 @@ impl PythonErrorExt for std::result::Result { LanceError::NotFound { .. } => self.value_error(), LanceError::RefNotFound { .. } => self.value_error(), LanceError::VersionNotFound { .. } => self.value_error(), + LanceError::FieldNotFound { source } => { + let msg = source.to_string(); + match &*PY_FIELD_NOT_FOUND_ERROR { + Ok(exc_type) => Python::attach(|py| { + let exc_type = exc_type.bind(py); + Err(PyErr::from_value( + exc_type.call1((msg,)).expect("Failed to create exception"), + )) + }), + Err(_) => { + log::warn!( + "Failed to import FieldNotFoundError from lance module, falling back to ValueError" + ); + self.value_error() + } + } + } LanceError::Namespace { source, .. } => { // Try to downcast to NamespaceError and convert to proper Python exception if let Some(ns_err) = source.downcast_ref::() { diff --git a/rust/lance-core/src/datatypes/schema.rs b/rust/lance-core/src/datatypes/schema.rs index 3e8340bd19b..4ba280f916d 100644 --- a/rust/lance-core/src/datatypes/schema.rs +++ b/rust/lance-core/src/datatypes/schema.rs @@ -50,10 +50,9 @@ impl FieldRef<'_> { Ok(id) } FieldRef::ByPath(path) => { - let field = schema.field(path).ok_or_else(|| Error::InvalidInput { - source: format!("Field '{}' not found in schema", path).into(), - location: location!(), - })?; + let field = schema + .field(path) + .ok_or_else(|| Error::field_not_found(path, schema.field_paths()))?; Ok(field.id) } } @@ -235,10 +234,7 @@ impl Schema { candidates.push(projected_field) } } else if err_on_missing && first != ROW_ID && first != ROW_ADDR { - return Err(Error::Schema { - message: format!("Column {} does not exist", col.as_ref()), - location: location!(), - }); + return Err(Error::field_not_found(col.as_ref(), self.field_paths())); } } @@ -347,6 +343,27 @@ impl Schema { SchemaFieldIterPreOrder::new(self) } + /// Get all field paths in the schema as a list of strings. + /// + /// This returns all field paths in the schema, including nested fields. + /// For example, if there's a struct field "user" with a field "name", + /// this will return "user.name" as one of the paths. + pub fn field_paths(&self) -> Vec { + let mut paths = Vec::new(); + for field in self.fields_pre_order() { + let ancestry = self.field_ancestry_by_id(field.id); + if let Some(ancestry) = ancestry { + let path = ancestry + .iter() + .map(|f| f.name.as_str()) + .collect::>() + .join("."); + paths.push(path); + } + } + paths + } + /// Returns a new schema that only contains the fields in `column_ids`. /// /// This projection can filter out both top-level and nested fields @@ -507,12 +524,19 @@ impl Schema { // TODO: This is not a public API, change to pub(crate) after refactor is done. pub fn field_id(&self, column: &str) -> Result { - self.field(column) - .map(|f| f.id) - .ok_or_else(|| Error::Schema { - message: "Vector column not in schema".to_string(), + self.field(column).map(|f| f.id).ok_or_else(|| { + let paths = self.field_paths(); + let field_paths: Vec<&str> = paths.iter().map(|s| s.as_str()).collect(); + let suggestion = crate::levenshtein::find_best_suggestion(column, &field_paths); + let mut error_msg = format!("Vector column '{}' not in schema", column); + if let Some(suggestion) = suggestion { + error_msg = format!("{}. Did you mean '{}'?", error_msg, suggestion); + } + Error::Schema { + message: error_msg.to_string(), location: location!(), - }) + } + }) } pub fn top_level_field_ids(&self) -> Vec { diff --git a/rust/lance-core/src/error.rs b/rust/lance-core/src/error.rs index fe943e03a1d..95d050ea708 100644 --- a/rust/lance-core/src/error.rs +++ b/rust/lance-core/src/error.rs @@ -1,11 +1,50 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors +use std::fmt; + use arrow_schema::ArrowError; use snafu::{Location, Snafu}; type BoxedError = Box; +/// Error for when a requested field is not found in a schema. +/// +/// This error computes suggestions lazily (only when displayed) to avoid +/// computing Levenshtein distance when the error is created but never shown. +#[derive(Debug)] +pub struct FieldNotFoundError { + pub field_name: String, + pub candidates: Vec, +} + +impl fmt::Display for FieldNotFoundError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Field '{}' not found.", self.field_name)?; + let suggestion = + crate::levenshtein::find_best_suggestion(&self.field_name, &self.candidates); + if let Some(suggestion) = suggestion { + write!(f, " Did you mean '{}'?", suggestion)?; + } + write!(f, "\nAvailable fields: [")?; + for (i, candidate) in self.candidates.iter().take(10).enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "'{}'", candidate)?; + } + if self.candidates.len() > 10 { + let remaining = self.candidates.len() - 10; + write!(f, ", ... and {} more]", remaining)?; + } else { + write!(f, "]")?; + } + Ok(()) + } +} + +impl std::error::Error for FieldNotFoundError {} + /// Allocates error on the heap and then places `e` into it. #[inline] pub fn box_error(e: impl std::error::Error + Send + Sync + 'static) -> BoxedError { @@ -125,6 +164,10 @@ pub enum Error { /// or inspected using [`Error::external_source`]. #[snafu(transparent)] External { source: BoxedError }, + + /// A requested field was not found in a schema. + #[snafu(transparent)] + FieldNotFound { source: FieldNotFoundError }, } impl Error { @@ -197,6 +240,16 @@ impl Error { Self::External { source } } + /// Create a FieldNotFound error with the given field name and available candidates. + pub fn field_not_found(field_name: impl Into, candidates: Vec) -> Self { + Self::FieldNotFound { + source: FieldNotFoundError { + field_name: field_name.into(), + candidates, + }, + } + } + /// Returns a reference to the external error source if this is an `External` variant. /// /// This allows downcasting to recover the original error type. @@ -246,7 +299,7 @@ impl ToSnafuLocation for std::panic::Location<'static> { } } -pub type Result = std::result::Result; +pub type Result = std::result::Result; pub type ArrowResult = std::result::Result; #[cfg(feature = "datafusion")] pub type DataFusionResult = std::result::Result; diff --git a/rust/lance-core/src/levenshtein.rs b/rust/lance-core/src/levenshtein.rs new file mode 100644 index 00000000000..c4c8ffb9c28 --- /dev/null +++ b/rust/lance-core/src/levenshtein.rs @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +/// Calculate the Levenshtein distance between two strings. +/// +/// The Levenshtein distance is a measure of the number of single-character edits +/// (insertions, deletions, or substitutions) required to change one word into the other. +/// +/// # Examples +/// +/// ``` +/// use lance_core::levenshtein::levenshtein_distance; +/// +/// assert_eq!(levenshtein_distance("kitten", "sitting"), 3); +/// assert_eq!(levenshtein_distance("hello", "hello"), 0); +/// assert_eq!(levenshtein_distance("hello", "world"), 4); +/// ``` +pub fn levenshtein_distance(s1: &str, s2: &str) -> usize { + let s1_len = s1.chars().count(); + let s2_len = s2.chars().count(); + + // If one of the strings is empty, the distance is the length of the other + if s1_len == 0 { + return s2_len; + } + if s2_len == 0 { + return s1_len; + } + + // Create a matrix to store the distances + let mut matrix = vec![vec![0; s2_len + 1]; s1_len + 1]; + + // Initialize the first row and column + for i in 0..=s1_len { + matrix[i][0] = i; + } + for j in 0..=s2_len { + matrix[0][j] = j; + } + + // Fill the matrix + let s1_chars: Vec = s1.chars().collect(); + let s2_chars: Vec = s2.chars().collect(); + + for i in 1..=s1_len { + for j in 1..=s2_len { + let cost = if s1_chars[i - 1] == s2_chars[j - 1] { + 0 + } else { + 1 + }; + matrix[i][j] = std::cmp::min( + std::cmp::min(matrix[i - 1][j] + 1, matrix[i][j - 1] + 1), + matrix[i - 1][j - 1] + cost, + ); + } + } + + matrix[s1_len][s2_len] +} + +/// Find the best suggestion from a list of options based on Levenshtein distance. +/// +/// Returns `Some(suggestion)` if there's an option where the Levenshtein distance +/// is less than 1/3 of the length of the input string. +/// Otherwise returns `None`. +/// +/// # Examples +/// +/// ``` +/// use lance_core::levenshtein::find_best_suggestion; +/// +/// let options = vec!["vector", "vector", "vector"]; +/// assert_eq!(find_best_suggestion("vacter", &options), Some("vector")); +/// assert_eq!(find_best_suggestion("hello", &options), None); +/// ``` +pub fn find_best_suggestion<'a, 'b>( + input: &'a str, + options: &'b [impl AsRef], +) -> Option<&'b str> { + let input_len = input.chars().count(); + if input_len == 0 { + return None; + } + + let threshold = input_len / 3; + let mut best_option: Option<(&'b str, usize)> = None; + for option in options { + let distance = levenshtein_distance(input, option.as_ref()); + if distance <= threshold { + match &best_option { + None => best_option = Some((option.as_ref(), distance)), + Some((_, best_distance)) => { + if distance < *best_distance { + best_option = Some((option.as_ref(), distance)); + } + } + } + } + } + + best_option.map(|(option, _)| option) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_levenshtein_distance() { + assert_eq!(levenshtein_distance("", ""), 0); + assert_eq!(levenshtein_distance("a", ""), 1); + assert_eq!(levenshtein_distance("", "a"), 1); + assert_eq!(levenshtein_distance("abc", "abc"), 0); + assert_eq!(levenshtein_distance("kitten", "sitting"), 3); + assert_eq!(levenshtein_distance("hello", "world"), 4); + assert_eq!(levenshtein_distance("vector", "vector"), 0); + assert_eq!(levenshtein_distance("vector", "vector"), 1); + assert_eq!(levenshtein_distance("vacter", "vector"), 2); + } + + #[test] + fn test_find_best_suggestion() { + let options = vec!["vector", "vector", "vector", "column", "table"]; + + assert_eq!(find_best_suggestion("vacter", &options), Some("vector")); + assert_eq!(find_best_suggestion("vectr", &options), Some("vector")); + assert_eq!(find_best_suggestion("column", &options), Some("column")); + assert_eq!(find_best_suggestion("tble", &options), Some("table")); + + // Should return None if no good match + assert_eq!(find_best_suggestion("hello", &options), None); + assert_eq!(find_best_suggestion("world", &options), None); + + // Should return None if input is too short + assert_eq!(find_best_suggestion("v", &options), None); + assert_eq!(find_best_suggestion("", &options), None); + } +} diff --git a/rust/lance-core/src/lib.rs b/rust/lance-core/src/lib.rs index 8c669eda223..0860f710c84 100644 --- a/rust/lance-core/src/lib.rs +++ b/rust/lance-core/src/lib.rs @@ -9,6 +9,7 @@ pub mod cache; pub mod container; pub mod datatypes; pub mod error; +pub mod levenshtein; pub mod traits; pub mod utils; diff --git a/rust/lance-index/src/lib.rs b/rust/lance-index/src/lib.rs index 5ed4638b6cb..369e922d1ca 100644 --- a/rust/lance-index/src/lib.rs +++ b/rust/lance-index/src/lib.rs @@ -207,10 +207,39 @@ impl TryFrom<&str> for IndexType { "IVF_HNSW_FLAT" => Ok(Self::IvfHnswFlat), "IVF_HNSW_SQ" => Ok(Self::IvfHnswSq), "IVF_HNSW_PQ" => Ok(Self::IvfHnswPq), - _ => Err(Error::invalid_input( - format!("invalid index type: {}", value), - location!(), - )), + _ => { + let valid_index_types = vec![ + "BTree", + "Bitmap", + "LabelList", + "Inverted", + "NGram", + "FragmentReuse", + "MemWal", + "ZoneMap", + "Vector", + "IVF_FLAT", + "IVF_SQ", + "IVF_PQ", + "IVF_RQ", + "IVF_HNSW_FLAT", + "IVF_HNSW_SQ", + "IVF_HNSW_PQ", + ]; + let suggestion = + lance_core::levenshtein::find_best_suggestion(value, &valid_index_types); + let mut error_msg = format!("invalid index type: {}", value); + if let Some(suggestion) = suggestion { + error_msg = format!("{}. Did you mean '{}'?", error_msg, suggestion); + } + Err(Error::invalid_input(error_msg, location!())); + lance_core::levenshtein::find_best_suggestion(value, &valid_index_types); + let mut error_msg = format!("invalid index type: {}", value); + if let Some(suggestion) = suggestion { + error_msg = format!("{}. Did you mean '{}'?", error_msg, suggestion); + } + Err(Error::invalid_input(error_msg, location!())) + } } } } diff --git a/rust/lance-linalg/src/distance.rs b/rust/lance-linalg/src/distance.rs index 84c81fe85ed..370f472dc65 100644 --- a/rust/lance-linalg/src/distance.rs +++ b/rust/lance-linalg/src/distance.rs @@ -97,9 +97,16 @@ impl TryFrom<&str> for DistanceType { "cosine" => Ok(Self::Cosine), "dot" => Ok(Self::Dot), "hamming" => Ok(Self::Hamming), - _ => Err(ArrowError::InvalidArgumentError(format!( - "Metric type '{s}' is not supported" - ))), + _ => { + let valid_distance_types = vec!["l2", "euclidean", "cosine", "dot", "hamming"]; + let suggestion = + lance_core::levenshtein::find_best_suggestion(s, &valid_distance_types); + let mut error_msg = format!("Metric type '{s}' is not supported"); + if let Some(suggestion) = suggestion { + error_msg = format!("{}. Did you mean '{}'?", error_msg, suggestion); + } + Err(ArrowError::InvalidArgumentError(error_msg)) + } } } } diff --git a/rust/lance-linalg/src/lib.rs b/rust/lance-linalg/src/lib.rs index 0d7654cb7cf..3e8988a85eb 100644 --- a/rust/lance-linalg/src/lib.rs +++ b/rust/lance-linalg/src/lib.rs @@ -18,5 +18,5 @@ pub(crate) mod test_utils; pub use clustering::Clustering; -type Error = ArrowError; +use lance_core::Error; pub type Result = std::result::Result; diff --git a/rust/lance-namespace/src/error.rs b/rust/lance-namespace/src/error.rs index 71fb7c12c31..693b6abf08f 100644 --- a/rust/lance-namespace/src/error.rs +++ b/rust/lance-namespace/src/error.rs @@ -145,6 +145,34 @@ impl std::fmt::Display for ErrorCode { } } +/// Error for table column not found with suggestion. +#[derive(Debug, Clone)] +pub struct TableColumnNotFoundWithSuggestionError { + message: String, + suggestion: Option, +} + +impl TableColumnNotFoundWithSuggestionError { + pub fn new(message: String, suggestion: Option) -> Self { + Self { + message, + suggestion, + } + } +} + +impl std::fmt::Display for TableColumnNotFoundWithSuggestionError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Table column not found: {}", self.message)?; + if let Some(suggestion) = &self.suggestion { + write!(f, ". Did you mean '{}'?", suggestion)?; + } + Ok(()) + } +} + +impl std::error::Error for TableColumnNotFoundWithSuggestionError {} + /// Lance Namespace error type. /// /// This enum provides fine-grained error types for Lance Namespace operations. @@ -222,6 +250,12 @@ pub enum NamespaceError { #[snafu(display("Table column not found: {message}"))] TableColumnNotFound { message: String }, + /// The specified table column does not exist with suggestions. + #[snafu(transparent)] + TableColumnNotFoundWithSuggestion { + source: TableColumnNotFoundWithSuggestionError, + }, + /// Malformed request or invalid parameters. #[snafu(display("Invalid input: {message}"))] InvalidInput { message: String }, @@ -274,6 +308,7 @@ impl NamespaceError { Self::TransactionNotFound { .. } => ErrorCode::TransactionNotFound, Self::TableVersionNotFound { .. } => ErrorCode::TableVersionNotFound, Self::TableColumnNotFound { .. } => ErrorCode::TableColumnNotFound, + Self::TableColumnNotFoundWithSuggestion { .. } => ErrorCode::TableColumnNotFound, Self::InvalidInput { .. } => ErrorCode::InvalidInput, Self::ConcurrentModification { .. } => ErrorCode::ConcurrentModification, Self::PermissionDenied { .. } => ErrorCode::PermissionDenied, @@ -317,6 +352,16 @@ impl NamespaceError { None => Self::Internal { message }, } } + + /// Creates a NamespaceError for table column not found with suggestion. + pub fn table_column_not_found_with_suggestion( + message: impl Into, + suggestion: Option, + ) -> Self { + Self::TableColumnNotFoundWithSuggestion { + source: TableColumnNotFoundWithSuggestionError::new(message.into(), suggestion), + } + } } /// Converts a NamespaceError into a lance_core::Error. @@ -401,4 +446,25 @@ mod tests { }; assert_eq!(err.to_string(), "Table not found: users"); } + + #[test] + fn test_table_column_not_found_with_suggestion() { + let err = NamespaceError::table_column_not_found_with_suggestion( + "column_name".to_string(), + Some("column_name_suggestion".to_string()), + ); + assert_eq!(err.code(), ErrorCode::TableColumnNotFound); + assert_eq!( + err.to_string(), + "Table column not found: column_name. Did you mean 'column_name_suggestion'?" + ); + } + + #[test] + fn test_table_column_not_found_without_suggestion() { + let err = + NamespaceError::table_column_not_found_with_suggestion("column_name".to_string(), None); + assert_eq!(err.code(), ErrorCode::TableColumnNotFound); + assert_eq!(err.to_string(), "Table column not found: column_name"); + } } diff --git a/rust/lance/src/dataset/scanner.rs b/rust/lance/src/dataset/scanner.rs index a0812d6caf4..af53bb0f05f 100644 --- a/rust/lance/src/dataset/scanner.rs +++ b/rust/lance/src/dataset/scanner.rs @@ -994,12 +994,10 @@ impl Scanner { pub fn full_text_search(&mut self, query: FullTextSearchQuery) -> Result<&mut Self> { let fields = query.columns(); if !fields.is_empty() { + let schema = self.dataset.schema(); for field in fields.iter() { - if self.dataset.schema().field(field).is_none() { - return Err(Error::invalid_input( - format!("Column {} not found", field), - location!(), - )); + if schema.field(field).is_none() { + return Err(Error::field_not_found(field, schema.field_paths())); } } } @@ -1408,14 +1406,11 @@ impl Scanner { return Ok(self); } // Verify early that the fields exist + let schema = self.dataset.schema(); for column in ordering { - self.dataset - .schema() - .field(&column.column_name) - .ok_or(Error::invalid_input( - format!("Column {} not found", &column.column_name), - location!(), - ))?; + schema.field(&column.column_name).ok_or_else(|| { + Error::field_not_found(&column.column_name, schema.field_paths()) + })?; } } self.ordering = ordering; @@ -1479,12 +1474,7 @@ impl Scanner { let lance_schema = dataset.schema(); let field_path = lance_schema .resolve_case_insensitive(column_name) - .ok_or_else(|| { - Error::invalid_input( - format!("Field '{}' not found in schema", column_name), - location!(), - ) - })?; + .ok_or_else(|| Error::field_not_found(column_name, lance_schema.field_paths()))?; if field_path.len() == 1 { // Simple top-level column diff --git a/rust/test_error_suggestions.rs b/rust/test_error_suggestions.rs new file mode 100644 index 00000000000..4b6702f4aa8 --- /dev/null +++ b/rust/test_error_suggestions.rs @@ -0,0 +1,107 @@ +// Test file for error suggestions +use lance_core::{levenshtein::find_best_suggestion, DataType, Field, Schema}; +use lance_index::IndexType; +use lance_linalg::DistanceType; + +fn test_column_not_found() { + println!("=== Testing Column Not Found Errors ==="); + + // Create a schema with some fields + let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("vector", DataType::Float32, false), + Field::new("text", DataType::Utf8, false), + Field::new( + "metadata", + DataType::Struct( + vec![ + Field::new("name", DataType::Utf8, false), + Field::new("value", DataType::Float32, false), + ] + .into(), + ), + false, + ), + ]); + + // Test 1: Incorrect field name with suggestion + let result = schema.field_id("vacter"); + println!("Test 1 - Field 'vacter': {}", result.unwrap_err()); + + // Test 2: Incorrect nested field name + let result = schema.field_id("metadata.name"); + println!("Test 2 - Field 'metadata.name': {}", result.unwrap_err()); + + // Test 3: Completely wrong name + let result = schema.field_id("completely_wrong"); + println!("Test 3 - Field 'completely_wrong': {}", result.unwrap_err()); +} + +fn test_distance_type_not_found() { + println!("\n=== Testing Distance Type Not Found Errors ==="); + + // Test 1: Misspelled distance type with suggestion + let result = DistanceType::try_from("l1"); + println!("Test 1 - Distance 'l1': {}", result.unwrap_err()); + + // Test 2: Another misspelled distance type + let result = DistanceType::try_from("cosin"); + println!("Test 2 - Distance 'cosin': {}", result.unwrap_err()); + + // Test 3: Completely wrong distance type + let result = DistanceType::try_from("wrong_distance"); + println!( + "Test 3 - Distance 'wrong_distance': {}", + result.unwrap_err() + ); +} + +fn test_index_type_not_found() { + println!("\n=== Testing Index Type Not Found Errors ==="); + + // Test 1: Misspelled index type with suggestion + let result = IndexType::try_from("Btree"); + println!("Test 1 - Index 'Btree': {}", result.unwrap_err()); + + // Test 2: Another misspelled index type + let result = IndexType::try_from("Vectr"); + println!("Test 2 - Index 'Vectr': {}", result.unwrap_err()); + + // Test 3: Completely wrong index type + let result = IndexType::try_from("wrong_index"); + println!("Test 3 - Index 'wrong_index': {}", result.unwrap_err()); +} + +fn test_levenshtein_suggestion() { + println!("\n=== Testing Levenshtein Suggestion Function ==="); + + let options = vec!["vector", "id", "text", "metadata.name"]; + + // Test with 1 character difference + let suggestion = find_best_suggestion("vacter", &options); + println!("Test 1 - 'vacter' -> {:?}", suggestion); + + // Test with 2 character differences + let suggestion = find_best_suggestion("vecor", &options); + println!("Test 2 - 'vecor' -> {:?}", suggestion); + + // Test with more than 1/3 characters different + let suggestion = find_best_suggestion("vctr", &options); + println!("Test 3 - 'vctr' -> {:?}", suggestion); + + // Test exact match + let suggestion = find_best_suggestion("vector", &options); + println!("Test 4 - 'vector' -> {:?}", suggestion); +} + +fn main() { + println!("Testing Enhanced Error Messages with Suggestions"); + println!("=============================================="); + + test_levenshtein_suggestion(); + test_column_not_found(); + test_distance_type_not_found(); + test_index_type_not_found(); + + println!("\nAll tests completed!"); +}