Skip to content

Commit 1c6e526

Browse files
authored
Split attribute catalog from registry (in v2) (#1054)
* Split attribute catalog from registry. Only attribute definitions wind up in the registry now. Any refinement can be in the catalog but is NOT in the registry. * Fix tests for new attribute splits. * Clippy fix. * Cargo fmt
1 parent 2013621 commit 1c6e526

File tree

4 files changed

+115
-56
lines changed

4 files changed

+115
-56
lines changed

crates/weaver_forge/src/v2/registry.rs

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
use schemars::JsonSchema;
44
use serde::{Deserialize, Serialize};
5-
use weaver_resolved_schema::attribute::AttributeRef;
5+
use weaver_resolved_schema::{attribute::AttributeRef, v2::catalog::AttributeCatalog};
66

77
use crate::{
88
error::Error,
@@ -84,20 +84,23 @@ impl ForgeResolvedRegistry {
8484
) -> Result<Self, Error> {
8585
let mut errors = Vec::new();
8686

87+
let attribute_lookup = |r: &weaver_resolved_schema::v2::attribute::AttributeRef| {
88+
schema.attribute_catalog.attribute(r)
89+
};
8790
// We create an attribute lookup map.
8891
let mut attributes: Vec<Attribute> = schema
8992
.registry
9093
.attributes
9194
.iter()
95+
.filter_map(&attribute_lookup)
9296
.map(|a| Attribute {
9397
key: a.key.clone(),
9498
r#type: a.r#type.clone(),
9599
examples: a.examples.clone(),
96100
common: a.common.clone(),
97101
})
98102
.collect();
99-
let attribute_lookup =
100-
|r: &weaver_resolved_schema::v2::attribute::AttributeRef| attributes.get(r.0 as usize);
103+
101104
let mut metrics = Vec::new();
102105
for metric in schema.registry.metrics {
103106
let attributes = metric
@@ -447,14 +450,15 @@ mod tests {
447450
file_format: "2.0.0".to_owned(),
448451
schema_url: "https://example.com/schema".to_owned(),
449452
registry_id: "my-registry".to_owned(),
453+
attribute_catalog: vec![attribute::Attribute {
454+
key: "test.attr".to_owned(),
455+
r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String),
456+
examples: None,
457+
common: CommonFields::default(),
458+
}],
450459
registry: v2::registry::Registry {
451460
registry_url: "https://example.com/registry".to_owned(),
452-
attributes: vec![attribute::Attribute {
453-
key: "test.attr".to_owned(),
454-
r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String),
455-
examples: None,
456-
common: CommonFields::default(),
457-
}],
461+
attributes: vec![attribute::AttributeRef(0)],
458462
spans: vec![span::Span {
459463
r#type: SignalId::from("my-span".to_owned()),
460464
kind: SpanKindSpec::Internal,
@@ -610,6 +614,7 @@ mod tests {
610614
file_format: "2.0.0".to_owned(),
611615
schema_url: "https://example.com/schema".to_owned(),
612616
registry_id: "my-registry".to_owned(),
617+
attribute_catalog: vec![],
613618
registry: v2::registry::Registry {
614619
registry_url: "https://example.com/registry".to_owned(),
615620
attributes: vec![], // No attributes - This is the logic bug.

crates/weaver_resolved_schema/src/v2/catalog.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,31 @@ impl Catalog {
7575
}
7676
}
7777

78+
/// Provides methods which can resolve an `AttributeRef` into an `Attribute`.
79+
pub trait AttributeCatalog {
80+
/// Returns the attribute from an attribute ref if it exists.
81+
#[must_use]
82+
fn attribute(&self, attribute_ref: &AttributeRef) -> Option<&Attribute>;
83+
/// Returns the attribute name from an attribute ref if it exists
84+
/// in the catalog or None if it does not exist.
85+
#[must_use]
86+
fn attribute_key(&self, attribute_ref: &AttributeRef) -> Option<&str> {
87+
self.attribute(attribute_ref).map(|a| a.key.as_str())
88+
}
89+
}
90+
91+
impl AttributeCatalog for [Attribute] {
92+
fn attribute(&self, attribute_ref: &AttributeRef) -> Option<&Attribute> {
93+
self.get(attribute_ref.0 as usize)
94+
}
95+
}
96+
97+
impl AttributeCatalog for Vec<Attribute> {
98+
fn attribute(&self, attribute_ref: &AttributeRef) -> Option<&Attribute> {
99+
self.get(attribute_ref.0 as usize)
100+
}
101+
}
102+
78103
#[cfg(test)]
79104
mod test {
80105
use std::collections::BTreeMap;

crates/weaver_resolved_schema/src/v2/mod.rs

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use weaver_semconv::{
1313
};
1414

1515
use crate::v2::{
16+
attribute::Attribute,
1617
attribute_group::AttributeGroup,
1718
catalog::Catalog,
1819
entity::Entity,
@@ -46,6 +47,8 @@ pub struct ResolvedTelemetrySchema {
4647
pub schema_url: String,
4748
/// The ID of the registry that this schema belongs to.
4849
pub registry_id: String,
50+
/// Catalog of attributes. Note: this will include duplicates for the same key.
51+
pub attribute_catalog: Vec<Attribute>,
4952
/// The registry that this schema belongs to.
5053
pub registry: Registry,
5154
/// Refinements for the registry
@@ -57,7 +60,7 @@ impl ResolvedTelemetrySchema {
5760
/// Statistics about this schema.
5861
pub fn stats(&self) -> Stats {
5962
Stats {
60-
registry: self.registry.stats(),
63+
registry: self.registry.stats(&self.attribute_catalog),
6164
refinements: self.refinements.stats(),
6265
}
6366
}
@@ -67,12 +70,14 @@ impl ResolvedTelemetrySchema {
6770
impl TryFrom<crate::ResolvedTelemetrySchema> for ResolvedTelemetrySchema {
6871
type Error = crate::error::Error;
6972
fn try_from(value: crate::ResolvedTelemetrySchema) -> Result<Self, Self::Error> {
70-
let (registry, refinements) = convert_v1_to_v2(value.catalog, value.registry)?;
73+
let (attribute_catalog, registry, refinements) =
74+
convert_v1_to_v2(value.catalog, value.registry)?;
7175
Ok(ResolvedTelemetrySchema {
7276
// TODO - bump file format?
7377
file_format: value.file_format,
7478
schema_url: value.schema_url,
7579
registry_id: value.registry_id,
80+
attribute_catalog,
7681
registry,
7782
refinements,
7883
})
@@ -95,15 +100,15 @@ fn fix_span_group_id(group_id: &str) -> SignalId {
95100
pub fn convert_v1_to_v2(
96101
c: crate::catalog::Catalog,
97102
r: crate::registry::Registry,
98-
) -> Result<(Registry, Refinements), crate::error::Error> {
103+
) -> Result<(Vec<Attribute>, Registry, Refinements), crate::error::Error> {
99104
// When pulling attributes, as we collapse things, we need to filter
100105
// to just unique.
101-
let attributes: HashSet<attribute::Attribute> = c
106+
let attributes: HashSet<Attribute> = c
102107
.attributes
103108
.iter()
104109
.cloned()
105110
.map(|a| {
106-
attribute::Attribute {
111+
Attribute {
107112
key: a.name,
108113
r#type: a.r#type,
109114
examples: a.examples,
@@ -421,9 +426,31 @@ pub fn convert_v1_to_v2(
421426
}
422427
}
423428

429+
// Now we need to hunt for attribute definitions
430+
let mut attributes = Vec::new();
431+
for g in r.groups.iter() {
432+
for a in g.attributes.iter() {
433+
if let Some(attr) = c.attribute(a) {
434+
// Attribute definitions do not have lineage.
435+
let is_def = g
436+
.lineage
437+
.as_ref()
438+
.and_then(|l| l.attribute(&attr.name))
439+
.is_none();
440+
if is_def {
441+
if let Some(v2) = v2_catalog.convert_ref(attr) {
442+
attributes.push(v2);
443+
} else {
444+
// TODO logic error!
445+
}
446+
}
447+
}
448+
}
449+
}
450+
424451
let v2_registry = Registry {
425452
registry_url: r.registry_url,
426-
attributes: v2_catalog.into(),
453+
attributes,
427454
spans,
428455
metrics,
429456
events,
@@ -435,15 +462,19 @@ pub fn convert_v1_to_v2(
435462
metrics: metric_refinements,
436463
events: event_refinements,
437464
};
438-
Ok((v2_registry, v2_refinements))
465+
Ok((v2_catalog.into(), v2_registry, v2_refinements))
439466
}
440467

441468
#[cfg(test)]
442469
mod tests {
443470

444471
use weaver_semconv::{provenance::Provenance, stability::Stability};
445472

446-
use crate::{attribute::Attribute, lineage::GroupLineage, registry::Group};
473+
use crate::{
474+
attribute::Attribute,
475+
lineage::{AttributeLineage, GroupLineage},
476+
registry::Group,
477+
};
447478

448479
use super::*;
449480

@@ -496,6 +527,8 @@ mod tests {
496527
]);
497528
let mut refinement_span_lineage = GroupLineage::new(Provenance::new("tmp", "tmp"));
498529
refinement_span_lineage.extends("span.my-span");
530+
refinement_span_lineage
531+
.add_attribute_lineage("test.key".to_owned(), AttributeLineage::new("span.my-span"));
499532
let v1_registry = crate::registry::Registry {
500533
registry_url: "my.schema.url".to_owned(),
501534
groups: vec![
@@ -548,9 +581,11 @@ mod tests {
548581
],
549582
};
550583

551-
let (v2_registry, v2_refinements) =
584+
let (catalog, v2_registry, v2_refinements) =
552585
convert_v1_to_v2(v1_catalog, v1_registry).expect("Failed to convert v1 to v2");
553586
// assert only ONE attribute due to sharing.
587+
assert_eq!(catalog.len(), 1);
588+
// Assert one attribute shows up, due to lineage.
554589
assert_eq!(v2_registry.attributes.len(), 1);
555590
// assert attribute fields not shared show up on ref in span.
556591
assert_eq!(v2_registry.spans.len(), 1);
@@ -620,6 +655,8 @@ mod tests {
620655
]);
621656
let mut refinement_metric_lineage = GroupLineage::new(Provenance::new("tmp", "tmp"));
622657
refinement_metric_lineage.extends("metric.http");
658+
refinement_metric_lineage
659+
.add_attribute_lineage("test.key".to_owned(), AttributeLineage::new("metric.http"));
623660
let v1_registry = crate::registry::Registry {
624661
registry_url: "my.schema.url".to_owned(),
625662
groups: vec![
@@ -672,7 +709,7 @@ mod tests {
672709
],
673710
};
674711

675-
let (v2_registry, v2_refinements) =
712+
let (_, v2_registry, v2_refinements) =
676713
convert_v1_to_v2(v1_catalog, v1_registry).expect("Failed to convert v1 to v2");
677714
// assert only ONE attribute due to sharing.
678715
assert_eq!(v2_registry.attributes.len(), 1);
@@ -746,7 +783,7 @@ mod tests {
746783
}],
747784
};
748785

749-
let (v2_registry, _) =
786+
let (_, v2_registry, _) =
750787
convert_v1_to_v2(v1_catalog, v1_registry).expect("Failed to convert v1 to v2");
751788
assert_eq!(v2_registry.events.len(), 1);
752789
if let Some(event) = v2_registry.events.first() {
@@ -805,7 +842,7 @@ mod tests {
805842
}],
806843
};
807844

808-
let (v2_registry, _) =
845+
let (_, v2_registry, _) =
809846
convert_v1_to_v2(v1_catalog, v1_registry).expect("Failed to convert v1 to v2");
810847
assert_eq!(v2_registry.entities.len(), 1);
811848
if let Some(entity) = v2_registry.entities.first() {

crates/weaver_resolved_schema/src/v2/registry.rs

Lines changed: 27 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@ use serde::{Deserialize, Serialize};
77
use weaver_semconv::attribute::AttributeType;
88

99
use crate::v2::{
10-
attribute::{Attribute, AttributeRef},
10+
attribute::AttributeRef,
1111
attribute_group::AttributeGroup,
12+
catalog::AttributeCatalog,
1213
entity::Entity,
1314
event::Event,
1415
metric::Metric,
@@ -28,8 +29,8 @@ use crate::v2::{
2829
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema)]
2930
#[serde(deny_unknown_fields)]
3031
pub struct Registry {
31-
/// Catalog of attributes used in the schema.
32-
pub attributes: Vec<Attribute>,
32+
/// Catalog of attributes definitions.
33+
pub attributes: Vec<AttributeRef>,
3334

3435
/// Catalog of (public) attribute groups.
3536
pub attribute_groups: Vec<AttributeGroup>,
@@ -53,28 +54,18 @@ pub struct Registry {
5354
}
5455

5556
impl Registry {
56-
/// Returns the attribute from an attribute ref if it exists.
57-
#[must_use]
58-
pub fn attribute(&self, attribute_ref: &AttributeRef) -> Option<&Attribute> {
59-
self.attributes.get(attribute_ref.0 as usize)
60-
}
61-
/// Returns the attribute name from an attribute ref if it exists
62-
/// in the catalog or None if it does not exist.
63-
#[must_use]
64-
pub fn attribute_key(&self, attribute_ref: &AttributeRef) -> Option<&str> {
65-
self.attributes
66-
.get(attribute_ref.0 as usize)
67-
.map(|attr| attr.key.as_ref())
68-
}
69-
7057
/// Returns the statistics for this registry.
7158
#[must_use]
72-
pub fn stats(&self) -> RegistryStats {
59+
pub fn stats<T: AttributeCatalog>(&self, catalog: &T) -> RegistryStats {
7360
let attributes = {
7461
let mut attribute_type_breakdown = BTreeMap::new();
7562
let mut stability_breakdown = HashMap::new();
7663
let mut deprecated_count = 0;
77-
for attribute in &self.attributes {
64+
for attribute in self
65+
.attributes
66+
.iter()
67+
.filter_map(|ar| catalog.attribute(ar))
68+
{
7869
let attribute_type = if let AttributeType::Enum { members, .. } = &attribute.r#type
7970
{
8071
format!("enum(card:{:03})", members.len())
@@ -254,12 +245,26 @@ mod test {
254245
v2::{span::SpanName, CommonFields},
255246
};
256247

257-
use crate::v2::entity::EntityAttributeRef;
248+
use crate::v2::{attribute::Attribute, entity::EntityAttributeRef};
258249

259250
use super::*;
260251

261252
#[test]
262253
fn test_stats() {
254+
let catalog = vec![Attribute {
255+
key: "key".to_owned(),
256+
r#type: AttributeType::PrimitiveOrArray(
257+
weaver_semconv::attribute::PrimitiveOrArrayTypeSpec::String,
258+
),
259+
examples: None,
260+
common: CommonFields {
261+
brief: "test".to_owned(),
262+
note: "".to_owned(),
263+
stability: Stability::Stable,
264+
deprecated: None,
265+
annotations: BTreeMap::new(),
266+
},
267+
}];
263268
let registry = Registry {
264269
attribute_groups: vec![],
265270
registry_url: "https://opentelemetry.io/schemas/1.23.0".to_owned(),
@@ -311,22 +316,9 @@ mod test {
311316
annotations: BTreeMap::new(),
312317
},
313318
}],
314-
attributes: vec![Attribute {
315-
key: "key".to_owned(),
316-
r#type: AttributeType::PrimitiveOrArray(
317-
weaver_semconv::attribute::PrimitiveOrArrayTypeSpec::String,
318-
),
319-
examples: None,
320-
common: CommonFields {
321-
brief: "test".to_owned(),
322-
note: "".to_owned(),
323-
stability: Stability::Stable,
324-
deprecated: None,
325-
annotations: BTreeMap::new(),
326-
},
327-
}],
319+
attributes: vec![AttributeRef(0)],
328320
};
329-
let stats = registry.stats();
321+
let stats = registry.stats(&catalog);
330322
assert_eq!(stats.attributes.attribute_count, 1);
331323
assert_eq!(
332324
stats.attributes.attribute_type_breakdown.get("string"),

0 commit comments

Comments
 (0)