diff --git a/README.md b/README.md index eee9d7f0..4518de0f 100755 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@

CI Ask DeepWiki - Discord + Discord github star diff --git a/scripts/run_tpcc_matrix.sh b/scripts/run_tpcc_matrix.sh deleted file mode 100755 index 444e8c8e..00000000 --- a/scripts/run_tpcc_matrix.sh +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail - -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -cd "$ROOT_DIR" - -NUM_WARE="${TPCC_NUM_WARE:-1}" -MAX_DUPLICATE_RETRY="${TPCC_DUPLICATE_RETRY:-1}" -MAIN_MEASURE_TIME="${TPCC_MAIN_MEASURE_TIME:-}" -STAMP="${TPCC_RESULT_STAMP:-$(date +%Y-%m-%d_%H-%M-%S)}" -RESULT_DIR="${TPCC_RESULT_DIR:-$ROOT_DIR/tpcc/results/$STAMP}" -LOG_DIR="$RESULT_DIR/logs" -TMP_DIR="$ROOT_DIR/target/tpcc-run-data" -BINARY="$ROOT_DIR/target/release/tpcc" -SUMMARY_FILE="$RESULT_DIR/summary.md" - -mkdir -p "$LOG_DIR" "$TMP_DIR" - -if [[ ! -x "$BINARY" ]]; then - echo "missing binary: $BINARY" >&2 - echo "build it first with: cargo build -p tpcc --release" >&2 - exit 1 -fi - -extract_tpmc() { - local log_file="$1" - awk '//{getline; print $1; exit}' "$log_file" -} - -extract_p90() { - local log_file="$1" - local label="$2" - awk -v label="$label" ' - /<90th Percentile RT \(MaxRT\)>/ { in_block = 1; next } - in_block && index($0, label) { - gsub(/^[[:space:]]+/, "", $0) - print $3 - exit - } - ' "$log_file" -} - -should_retry_duplicate() { - local log_file="$1" - rg -q "UNIQUE constraint failed|duplicate key|primary key|Duplicate" "$log_file" -} - -run_variant() { - local name="$1" - local measure_label="$2" - local db_path="$3" - shift 3 - local -a cmd=("$@") - local log_file="$LOG_DIR/$name.log" - local status="ok" - local notes="-" - local attempts=0 - local max_attempts=$((MAX_DUPLICATE_RETRY + 1)) - - : > "$log_file" - - while (( attempts < max_attempts )); do - attempts=$((attempts + 1)) - rm -rf "$db_path" - - { - printf '## Attempt %s\n' "$attempts" - printf '$' - printf ' %q' "${cmd[@]}" - printf '\n\n' - } >> "$log_file" - - set +e - "${cmd[@]}" >> "$log_file" 2>&1 - local cmd_status=$? - set -e - - if [[ "$cmd_status" -eq 0 ]]; then - break - fi - - if (( attempts < max_attempts )) && should_retry_duplicate "$log_file"; then - notes="retry after duplicate-key failure" - printf '\n[runner] duplicate-key style failure detected, retrying %s from scratch\n\n' "$name" >> "$log_file" - continue - fi - - status="failed" - notes="$(tail -n 5 "$log_file" | tr '\n' ' ' | sed 's/[[:space:]]\\+/ /g; s/^ //; s/ $//')" - break - done - - local tpmc="-" - local new_order="-" - local payment="-" - local order_status="-" - local delivery="-" - local stock_level="-" - - if [[ "$status" == "ok" ]]; then - tpmc="$(extract_tpmc "$log_file" || echo -)" - new_order="$(extract_p90 "$log_file" "New-Order" || echo -)" - payment="$(extract_p90 "$log_file" "Payment" || echo -)" - order_status="$(extract_p90 "$log_file" "Order-Status" || echo -)" - delivery="$(extract_p90 "$log_file" "Delivery" || echo -)" - stock_level="$(extract_p90 "$log_file" "Stock-Level" || echo -)" - fi - - printf '| %s | %s | %s | %s | %s | %s | %s | %s | %s | %s | %s | [%s](./logs/%s.log) |\n' \ - "$name" \ - "$status" \ - "$attempts" \ - "$measure_label" \ - "$tpmc" \ - "$new_order" \ - "$payment" \ - "$order_status" \ - "$delivery" \ - "$stock_level" \ - "$notes" \ - "$name" \ - "$name" \ - >> "$SUMMARY_FILE" - - rm -rf "$db_path" - - cat "$log_file" -} - -cat > "$SUMMARY_FILE" < HepOptimizerPipeline { vec![ NormalizationRuleImpl::LimitProjectTranspose, NormalizationRuleImpl::PushLimitThroughJoin, - NormalizationRuleImpl::PushLimitIntoTableScan, ], ) .before_batch( @@ -370,6 +369,11 @@ fn default_optimizer_pipeline() -> HepOptimizerPipeline { HepBatchStrategy::once_topdown(), vec![NormalizationRuleImpl::EvaluatorBind], ) + .after_batch( + "Limit Into Scan".to_string(), + HepBatchStrategy::fix_point_topdown(10), + vec![NormalizationRuleImpl::PushLimitIntoTableScan], + ) .implementations(vec![ // DQL ImplementationRuleImpl::SimpleAggregate, diff --git a/src/execution/dml/update.rs b/src/execution/dml/update.rs index 255f9fd5..df1a22b0 100644 --- a/src/execution/dml/update.rs +++ b/src/execution/dml/update.rs @@ -23,10 +23,14 @@ use crate::iter_ext::Itertools; use crate::planner::operator::update::UpdateOperator; use crate::planner::LogicalPlan; use crate::storage::Transaction; -use crate::types::index::Index; +use crate::types::index::{Index, IndexMeta, IndexType}; use crate::types::tuple::{Schema, Tuple}; use crate::types::tuple_builder::TupleBuilder; -use std::{collections::HashMap, mem}; +use crate::types::ColumnId; +use std::{ + collections::{HashMap, HashSet}, + mem, +}; pub struct Update { table_name: TableName, @@ -79,6 +83,24 @@ impl<'a, T: Transaction + 'a> WriteExecutor<'a, T> for Update { } } +impl Update { + fn index_needs_update( + index_meta: &IndexMeta, + updated_column_ids: &HashSet, + updates_primary_key: bool, + ) -> bool { + if matches!(index_meta.ty, IndexType::PrimaryKey { .. }) { + return false; + } + + updates_primary_key + || index_meta + .column_ids + .iter() + .any(|column_id| updated_column_ids.contains(column_id)) + } +} + impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Update { fn next_tuple( &mut self, @@ -91,8 +113,14 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Update { }; let mut exprs_map = HashMap::with_capacity(self.value_exprs.len()); + let mut updated_column_ids = HashSet::with_capacity(self.value_exprs.len()); for (column, expr) in self.value_exprs.drain(..) { - exprs_map.insert(plan_arena.column(column).id(), expr); + let column = plan_arena.column(column); + let column_id = column + .id() + .ok_or_else(|| DatabaseError::column_not_found(column.name().to_string()))?; + updated_column_ids.insert(column_id); + exprs_map.insert(column_id, expr); } let table_cache = arena.context().table_cache(); @@ -104,6 +132,13 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Update { .transpose()? }; if let Some(table_snapshot) = table_snapshot { + let updates_primary_key = table_snapshot.primary_key_indices.iter().any(|index| { + table_snapshot + .columns + .get(*index) + .and_then(|column| plan_arena.column(*column).id()) + .is_some_and(|column_id| updated_column_ids.contains(&column_id)) + }); let serializers = self .input_schema .iter() @@ -118,17 +153,30 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Update { let Some(old_pk) = arena.result_tuple().pk.clone() else { continue; }; - for (index_meta, exprs) in table_snapshot.index_metas.iter() { + + let mut old_index_values = Vec::new(); + for (index_offset, (index_meta, exprs)) in + table_snapshot.index_metas.iter().enumerate() + { let index_meta = plan_arena.index(*index_meta); - with_projection_tmp_value(arena, None, exprs, |arena, value| { - let mut state = arena.local_state(plan_arena); - let (transaction, table_codec) = state.transaction_codec_mut(); - let index = Index::new(index_meta.id, &value, index_meta.ty); - transaction.del_index(table_codec, &self.table_name, &index, &old_pk) + if !Self::index_needs_update( + index_meta, + &updated_column_ids, + updates_primary_key, + ) { + continue; + } + + with_projection_tmp_value(arena, None, exprs, |_, value| { + old_index_values.push((index_offset, value)); + Ok(()) })?; } for (i, column) in self.input_schema.iter().enumerate() { - if let Some(expr) = exprs_map.get(&plan_arena.column(*column).id()) { + let Some(column_id) = plan_arena.column(*column).id() else { + continue; + }; + if let Some(expr) = exprs_map.get(&column_id) { let value = expr.eval(Some(arena.result_tuple()))?; arena.result_tuple_mut().values[i] = value; } @@ -140,19 +188,35 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Update { ); arena.result_tuple_mut().pk = Some(new_pk.clone()); - if new_pk != old_pk { + let primary_key_changed = new_pk != old_pk; + if primary_key_changed { let mut state = arena.local_state(plan_arena); let (transaction, table_codec) = state.transaction_codec_mut(); transaction.remove_tuple(table_codec, &self.table_name, &old_pk)?; is_overwrite = false; } - for (index_meta, exprs) in table_snapshot.index_metas.iter() { + + for (index_offset, old_value) in old_index_values { + let (index_meta, exprs) = &table_snapshot.index_metas[index_offset]; let index_meta = plan_arena.index(*index_meta); + let index_id = index_meta.id; + let index_ty = index_meta.ty; with_projection_tmp_value(arena, None, exprs, |arena, value| { + if !primary_key_changed && old_value == value { + return Ok(()); + } + let mut state = arena.local_state(plan_arena); let (transaction, table_codec) = state.transaction_codec_mut(); - let index = Index::new(index_meta.id, &value, index_meta.ty); - transaction.add_index(table_codec, &self.table_name, index, &new_pk) + let old_index = Index::new(index_id, &old_value, index_ty); + transaction.del_index( + table_codec, + &self.table_name, + &old_index, + &old_pk, + )?; + let new_index = Index::new(index_id, &value, index_ty); + transaction.add_index(table_codec, &self.table_name, new_index, &new_pk) })?; } diff --git a/src/expression/range_detacher.rs b/src/expression/range_detacher.rs index e5212e88..99e70efc 100644 --- a/src/expression/range_detacher.rs +++ b/src/expression/range_detacher.rs @@ -18,7 +18,7 @@ use crate::expression::{BinaryOperator, ScalarExpression}; use crate::iter_ext::Itertools; use crate::planner::PlanArena; use crate::types::value::DataValue; -use crate::types::ColumnId; +use crate::types::{ColumnId, LogicalType}; use kite_sql_serde_macros::ReferenceSerialization; use std::cmp::Ordering; use std::collections::Bound; @@ -39,6 +39,38 @@ pub enum Range { SortedRanges(Vec), } +#[derive(Debug, PartialEq, Eq, Clone, Hash, ReferenceSerialization)] +pub struct DetachedPredicate { + pub(crate) range: Range, + pub(crate) residual: Option, +} + +impl DetachedPredicate { + fn consumed(range: Range) -> Self { + Self { + range, + residual: None, + } + } + + fn combine_residuals( + left: Option, + right: Option, + ) -> Option { + match (left, right) { + (Some(left), Some(right)) => Some(ScalarExpression::Binary { + op: BinaryOperator::And, + left_expr: Box::new(left), + right_expr: Box::new(right), + evaluator: None, + ty: LogicalType::Boolean, + }), + (Some(expr), None) | (None, Some(expr)) => Some(expr), + (None, None) => None, + } + } +} + struct TreeNode { value: Option, children: Vec>, @@ -202,34 +234,82 @@ impl<'a, 'p> RangeDetacher<'a, 'p> { pub(crate) fn detach( &mut self, expr: &ScalarExpression, - ) -> Result, DatabaseError> { + ) -> Result, DatabaseError> { Ok(match expr { ScalarExpression::Binary { left_expr, right_expr, op, .. - } => match (self.detach(left_expr)?, self.detach(right_expr)?) { - (Some(left_binary), Some(right_binary)) => { - Self::merge_binary(*op, left_binary, right_binary) + } => { + if let (Some(col), Some(val)) = ( + left_expr.unpack_bound_col(false).map(|(column, _)| column), + right_expr.unpack_val(), + ) { + return self + .new_range(*op, col, val, false) + .map(|range| range.map(DetachedPredicate::consumed)); + } else if let (Some(val), Some(col)) = ( + left_expr.unpack_val(), + right_expr.unpack_bound_col(false).map(|(column, _)| column), + ) { + return self + .new_range(*op, col, val, true) + .map(|range| range.map(DetachedPredicate::consumed)); } - (None, None) => { - if let (Some(col), Some(val)) = ( - left_expr.unpack_bound_col(false).map(|(column, _)| column), - right_expr.unpack_val(), - ) { - return self.new_range(*op, col, val, false); - } else if let (Some(val), Some(col)) = ( - left_expr.unpack_val(), - right_expr.unpack_bound_col(false).map(|(column, _)| column), - ) { - return self.new_range(*op, col, val, true); - } - None + match op { + BinaryOperator::And => { + let left = self.detach(left_expr)?; + let right = self.detach(right_expr)?; + let (range, residual) = match (left, right) { + (Some(left_range), Some(right_range)) => { + let Some(range) = + Self::merge_binary(*op, left_range.range, right_range.range) + else { + return Ok(None); + }; + let residual = DetachedPredicate::combine_residuals( + left_range.residual, + right_range.residual, + ); + (range, residual) + } + (Some(detached), None) => { + let residual = DetachedPredicate::combine_residuals( + detached.residual, + Some(right_expr.as_ref().clone()), + ); + (detached.range, residual) + } + (None, Some(detached)) => { + let residual = DetachedPredicate::combine_residuals( + Some(left_expr.as_ref().clone()), + detached.residual, + ); + (detached.range, residual) + } + (None, None) => return Ok(None), + }; + Some(DetachedPredicate { range, residual }) + } + BinaryOperator::Or => { + let left = self.detach(left_expr)?; + let right = self.detach(right_expr)?; + if let (Some(left), Some(right)) = (left, right) { + if left.residual.is_none() && right.residual.is_none() { + if let Some(range) = + Self::merge_binary(*op, left.range, right.range) + { + return Ok(Some(DetachedPredicate::consumed(range))); + } + } + } + None + } + _ => None, } - (Some(binary), None) | (None, Some(binary)) => self.check_and(op, binary), - }, + } ScalarExpression::Alias { expr, .. } | ScalarExpression::TypeCast { expr, .. } => { self.detach(expr)? } @@ -238,12 +318,14 @@ impl<'a, 'p> RangeDetacher<'a, 'p> { let column = self.arena.column(*column); if let (Some(col_id), Some(col_table)) = (column.id(), column.table_name()) { if &col_id == self.column_id && col_table.as_ref() == self.table_name { - return if *negated { - // Range::NotEq(NULL_VALUE.clone()) - Ok(None) + return Ok(if *negated { + Some(DetachedPredicate::consumed(Range::Scope { + min: Bound::Unbounded, + max: Bound::Excluded(DataValue::Null), + })) } else { - Ok(Some(Range::Eq(DataValue::Null))) - }; + Some(DetachedPredicate::consumed(Range::Eq(DataValue::Null))) + }); } } @@ -713,17 +795,23 @@ impl<'a, 'p> RangeDetacher<'a, 'p> { order.reverse() } } + fn range_value_cmp(left: &DataValue, right: &DataValue) -> Option { + match (left, right) { + (DataValue::Null, DataValue::Null) => Some(Ordering::Equal), + (DataValue::Null, _) => Some(Ordering::Greater), + (_, DataValue::Null) => Some(Ordering::Less), + _ => left.partial_cmp(right), + } + } match (left_bound, right_bound) { (Bound::Unbounded, Bound::Unbounded) => Some(Ordering::Equal), (Bound::Unbounded, _) => Some(is_min_then_reverse(is_min, Ordering::Less)), (_, Bound::Unbounded) => Some(is_min_then_reverse(is_min, Ordering::Greater)), - (Bound::Included(left), Bound::Included(right)) => left.partial_cmp(right), - (Bound::Included(left), Bound::Excluded(right)) => left - .partial_cmp(right) + (Bound::Included(left), Bound::Included(right)) => range_value_cmp(left, right), + (Bound::Included(left), Bound::Excluded(right)) => range_value_cmp(left, right) .map(|order| order.then(is_min_then_reverse(is_min, Ordering::Less))), - (Bound::Excluded(left), Bound::Excluded(right)) => left.partial_cmp(right), - (Bound::Excluded(left), Bound::Included(right)) => left - .partial_cmp(right) + (Bound::Excluded(left), Bound::Excluded(right)) => range_value_cmp(left, right), + (Bound::Excluded(left), Bound::Included(right)) => range_value_cmp(left, right) .map(|order| order.then(is_min_then_reverse(is_min, Ordering::Greater))), } } @@ -739,6 +827,18 @@ impl<'a, 'p> RangeDetacher<'a, 'p> { if !self._is_belong(col) || column.id() != Some(*self.column_id) { return Ok(None); } + if val.is_null() { + return Ok(match op { + BinaryOperator::Spaceship => Some(Range::Eq(DataValue::Null)), + BinaryOperator::Eq + | BinaryOperator::NotEq + | BinaryOperator::Gt + | BinaryOperator::Lt + | BinaryOperator::GtEq + | BinaryOperator::LtEq => Some(Range::Dummy), + _ => None, + }); + } val = val.cast(column.datatype())?; if is_flip { op = match op { @@ -752,7 +852,11 @@ impl<'a, 'p> RangeDetacher<'a, 'p> { Ok(match op { BinaryOperator::Gt => Some(Range::Scope { min: Bound::Excluded(val.clone()), - max: Bound::Unbounded, + max: if column.nullable() { + Bound::Excluded(DataValue::Null) + } else { + Bound::Unbounded + }, }), BinaryOperator::Lt => Some(Range::Scope { min: Bound::Unbounded, @@ -760,7 +864,11 @@ impl<'a, 'p> RangeDetacher<'a, 'p> { }), BinaryOperator::GtEq => Some(Range::Scope { min: Bound::Included(val.clone()), - max: Bound::Unbounded, + max: if column.nullable() { + Bound::Excluded(DataValue::Null) + } else { + Bound::Unbounded + }, }), BinaryOperator::LtEq => Some(Range::Scope { min: Bound::Unbounded, @@ -770,15 +878,6 @@ impl<'a, 'p> RangeDetacher<'a, 'p> { _ => None, }) } - - /// Only conjunction can safely keep a range detached from one side of a binary expression. - fn check_and(&mut self, op: &BinaryOperator, binary: Range) -> Option { - if matches!(op, BinaryOperator::And) { - return Some(binary); - } - - None - } } impl fmt::Display for Range { @@ -815,9 +914,10 @@ impl fmt::Display for Range { #[allow(clippy::uninlined_format_args)] mod test { use crate::binder::test::build_t1_table; + use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef, TableName}; use crate::errors::DatabaseError; use crate::expression::range_detacher::{Range, RangeDetacher}; - use crate::expression::BinaryOperator; + use crate::expression::{BinaryOperator, ScalarExpression}; use crate::optimizer::heuristic::batch::HepBatchStrategy; use crate::optimizer::heuristic::optimizer::HepOptimizerPipeline; use crate::optimizer::rule::normalization::NormalizationRuleImpl; @@ -848,6 +948,198 @@ mod test { } } + fn test_column( + arena: &mut crate::planner::PlanArena, + table_name: &TableName, + column_id: crate::types::ColumnId, + name: &str, + nullable: bool, + ) -> Result { + let mut column = ColumnCatalog::new( + name.to_string(), + nullable, + ColumnDesc::new(LogicalType::Integer, None, false, None)?, + ); + column.set_ref_table(table_name.clone(), column_id, false); + Ok(arena.alloc_column(column)) + } + + fn cmp_predicate(column: ColumnRef, op: BinaryOperator, value: i32) -> ScalarExpression { + ScalarExpression::Binary { + op, + left_expr: Box::new(ScalarExpression::column_expr(column, 0)), + right_expr: Box::new(ScalarExpression::Constant(DataValue::Int32(value))), + evaluator: None, + ty: LogicalType::Boolean, + } + } + + fn and_predicate(left: ScalarExpression, right: ScalarExpression) -> ScalarExpression { + ScalarExpression::Binary { + op: BinaryOperator::And, + left_expr: Box::new(left), + right_expr: Box::new(right), + evaluator: None, + ty: LogicalType::Boolean, + } + } + + #[test] + fn test_detach_consumes_and_predicates_with_residual() -> Result<(), DatabaseError> { + let table_state = build_t1_table()?; + let mut plan_arena = crate::planner::PlanArena::new(&table_state.table_arena); + let plan = table_state.plan("select * from t1 where c1 > 10 and c2 > 20")?; + let op = plan_filter(plan, &mut plan_arena)?.unwrap(); + let detached = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) + .detach(&op.predicate)? + .expect("c1 predicate should be consumed"); + + assert_eq!( + detached.range, + Range::Scope { + min: Bound::Excluded(DataValue::Int32(10)), + max: Bound::Unbounded, + } + ); + let residual = detached.residual.expect("c2 predicate should remain"); + let residual_detached = + RangeDetacher::new("t1", table_state.column_id_by_name("c2"), &plan_arena) + .detach(&residual)? + .expect("residual should be exactly the c2 range predicate"); + assert_eq!( + residual_detached.range, + Range::Scope { + min: Bound::Excluded(DataValue::Int32(20)), + max: Bound::Unbounded, + } + ); + assert_eq!(residual_detached.residual, None); + + Ok(()) + } + + #[test] + fn test_detach_nullable_range_keeps_value_lower_bound() -> Result<(), DatabaseError> { + let table_arena = crate::planner::TableArenaCell::default(); + let mut plan_arena = crate::planner::PlanArena::new(&table_arena); + let table_name: TableName = ::std::sync::Arc::from("nullable_t"); + let column_id = 1; + let column = test_column(&mut plan_arena, &table_name, column_id, "c1", true)?; + let predicate = and_predicate( + cmp_predicate(column, BinaryOperator::Gt, 0), + cmp_predicate(column, BinaryOperator::Lt, 8), + ); + + let detached = RangeDetacher::new(table_name.as_ref(), &column_id, &plan_arena) + .detach(&predicate)? + .expect("nullable range predicate should be consumed"); + + assert_eq!( + detached.range, + Range::Scope { + min: Bound::Excluded(DataValue::Int32(0)), + max: Bound::Excluded(DataValue::Int32(8)), + } + ); + assert_eq!(detached.residual, None); + + Ok(()) + } + + #[test] + fn test_detach_nullable_single_sided_ranges() -> Result<(), DatabaseError> { + let table_arena = crate::planner::TableArenaCell::default(); + let mut plan_arena = crate::planner::PlanArena::new(&table_arena); + let table_name: TableName = ::std::sync::Arc::from("nullable_t"); + let column_id = 1; + let column = test_column(&mut plan_arena, &table_name, column_id, "c1", true)?; + let cases = [ + ( + cmp_predicate(column, BinaryOperator::Gt, 0), + Range::Scope { + min: Bound::Excluded(DataValue::Int32(0)), + max: Bound::Excluded(DataValue::Null), + }, + ), + ( + cmp_predicate(column, BinaryOperator::GtEq, 0), + Range::Scope { + min: Bound::Included(DataValue::Int32(0)), + max: Bound::Excluded(DataValue::Null), + }, + ), + ( + cmp_predicate(column, BinaryOperator::Lt, 8), + Range::Scope { + min: Bound::Unbounded, + max: Bound::Excluded(DataValue::Int32(8)), + }, + ), + ( + cmp_predicate(column, BinaryOperator::LtEq, 8), + Range::Scope { + min: Bound::Unbounded, + max: Bound::Included(DataValue::Int32(8)), + }, + ), + ( + ScalarExpression::IsNull { + negated: true, + expr: Box::new(ScalarExpression::column_expr(column, 0)), + }, + Range::Scope { + min: Bound::Unbounded, + max: Bound::Excluded(DataValue::Null), + }, + ), + ]; + + for (predicate, expected) in cases { + let detached = RangeDetacher::new(table_name.as_ref(), &column_id, &plan_arena) + .detach(&predicate)? + .expect("nullable single-sided predicate should be consumed"); + + assert_eq!(detached.range, expected); + assert_eq!(detached.residual, None); + } + + Ok(()) + } + + #[test] + fn test_detach_consumes_complete_or_only_when_both_sides_match() -> Result<(), DatabaseError> { + let table_state = build_t1_table()?; + let mut plan_arena = crate::planner::PlanArena::new(&table_state.table_arena); + let plan = table_state.plan("select * from t1 where c1 = 1 or c1 = 2")?; + let op = plan_filter(plan, &mut plan_arena)?.unwrap(); + let detached = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) + .detach(&op.predicate)? + .expect("both OR branches should be consumed"); + + assert_eq!( + detached.range, + Range::SortedRanges(vec![ + Range::Eq(DataValue::Int32(1)), + Range::Eq(DataValue::Int32(2)), + ]) + ); + assert_eq!(detached.residual, None); + Ok(()) + } + + #[test] + fn test_detach_does_not_partially_consume_or() -> Result<(), DatabaseError> { + let table_state = build_t1_table()?; + let mut plan_arena = crate::planner::PlanArena::new(&table_state.table_arena); + let plan = table_state.plan("select * from t1 where c1 = 1 or c2 = 2")?; + let op = plan_filter(plan, &mut plan_arena)?.unwrap(); + let detached = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) + .detach(&op.predicate)?; + + assert_eq!(detached, None); + Ok(()) + } + #[test] fn test_detach_ideal_cases() -> Result<(), DatabaseError> { let table_state = build_t1_table()?; @@ -857,6 +1149,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 = 1 => {}", range); assert_eq!(range, Range::Eq(DataValue::Int32(1))) @@ -866,6 +1159,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 = 1.0 => {}", range); assert_eq!(range, Range::Eq(DataValue::Int32(1))) @@ -874,7 +1168,8 @@ mod test { let plan = table_state.plan("select * from t1 where c1 != 1")?; let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) - .detach(&op.predicate)?; + .detach(&op.predicate)? + .map(|detached| detached.range); println!("c1 != 1 => {:#?}", range); assert_eq!(range, None) } @@ -883,6 +1178,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 > 1 => c1: {}", range); assert_eq!( @@ -898,6 +1194,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 >= 1 => c1: {}", range); assert_eq!( @@ -913,6 +1210,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 < 1 => c1: {}", range); assert_eq!( @@ -928,6 +1226,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 <= 1 => c1: {}", range); assert_eq!( @@ -943,6 +1242,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 < 1 and c1 >= 0 => c1: {}", range); assert_eq!( @@ -958,6 +1258,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 < 1 or c1 >= 0 => c1: {}", range); assert_eq!( @@ -974,6 +1275,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 = 1 and c1 = 0 => c1: {}", range); assert_eq!(range, Range::Dummy) @@ -983,6 +1285,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 = 1 or c1 = 0 => c1: {}", range); assert_eq!( @@ -998,6 +1301,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 = 1 and c1 = 1 => c1: {}", range); assert_eq!(range, Range::Eq(DataValue::Int32(1))) @@ -1007,6 +1311,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 = 1 or c1 = 1 => c1: {}", range); assert_eq!(range, Range::Eq(DataValue::Int32(1))) @@ -1017,6 +1322,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 > 1 and c1 = 1 => c1: {}", range); assert_eq!(range, Range::Dummy) @@ -1026,6 +1332,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 >= 1 and c1 = 1 => c1: {}", range); assert_eq!(range, Range::Eq(DataValue::Int32(1))) @@ -1035,6 +1342,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 > 1 or c1 = 1 => c1: {}", range); assert_eq!( @@ -1050,6 +1358,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 >= 1 or c1 = 1 => c1: {}", range); assert_eq!( @@ -1067,6 +1376,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!( "(c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4) => c1: {}", @@ -1086,6 +1396,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!( "(c1 > 0 and c1 < 3) or (c1 > 1 and c1 < 4) => c1: {}", @@ -1107,6 +1418,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!( "((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) and c1 = 0 => c1: {}", @@ -1121,6 +1433,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!( "((c1 > 0 and c1 < 3) or (c1 > 1 and c1 < 4)) and c1 = 0 => c1: {}", @@ -1135,6 +1448,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!( "((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) or c1 = 0 => c1: {}", @@ -1158,6 +1472,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!( "((c1 > 0 and c1 < 3) or (c1 > 1 and c1 < 4)) or c1 = 0 => c1: {}", @@ -1177,6 +1492,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("(((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) and c1 = 0) and (c1 >= 0 and c1 <= 2) => c1: {}", range); assert_eq!(range, Range::Dummy) @@ -1186,6 +1502,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("(((c1 > 0 and c1 < 3) and (c1 > 1 and c1 < 4)) and c1 = 0) or (c1 >= 0 and c1 <= 2) => c1: {}", range); assert_eq!( @@ -1202,6 +1519,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); assert_eq!( @@ -1223,6 +1541,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); assert_eq!( @@ -1244,7 +1563,8 @@ mod test { let plan = table_state.plan("select * from t1 where true")?; let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) - .detach(&op.predicate)?; + .detach(&op.predicate)? + .map(|detached| detached.range); println!("empty => c1: {:#?}", range); assert_eq!(range, None) } @@ -1253,7 +1573,8 @@ mod test { let plan = table_state.plan("select * from t1 where c2 = 1")?; let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) - .detach(&op.predicate)?; + .detach(&op.predicate)? + .map(|detached| detached.range); println!("c2 = 1 => c1: {:#?}", range); assert_eq!(range, None) } @@ -1261,7 +1582,8 @@ mod test { let plan = table_state.plan("select * from t1 where c1 > 1 or c2 > 1")?; let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) - .detach(&op.predicate)?; + .detach(&op.predicate)? + .map(|detached| detached.range); println!("c1 > 1 or c2 > 1 => c1: {:#?}", range); assert_eq!(range, None) } @@ -1269,7 +1591,8 @@ mod test { let plan = table_state.plan("select * from t1 where c1 > c2 or c2 > 1")?; let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) - .detach(&op.predicate)?; + .detach(&op.predicate)? + .map(|detached| detached.range); println!("c1 > c2 or c2 > 1 => c1: {:#?}", range); assert_eq!(range, None) } @@ -1281,6 +1604,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!( "c1 = 5 or (c1 > 5 and (c1 > 6 or c1 < 8) and c1 < 12) => c1: {}", @@ -1302,24 +1626,12 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? - .unwrap(); + .map(|detached| detached.range); println!( - "((c2 >= -8 and -4 >= c1) or (c1 >= 0 and 5 > c2)) and ((c2 > 0 and c1 <= 1) or (c1 > -8 and c2 < -6)) => c1: {}", + "((c2 >= -8 and -4 >= c1) or (c1 >= 0 and 5 > c2)) and ((c2 > 0 and c1 <= 1) or (c1 > -8 and c2 < -6)) => c1: {:#?}", range ); - assert_eq!( - range, - Range::SortedRanges(vec![ - Range::Scope { - min: Bound::Unbounded, - max: Bound::Included(DataValue::Int32(-4)), - }, - Range::Scope { - min: Bound::Included(DataValue::Int32(0)), - max: Bound::Unbounded, - } - ]) - ) + assert_eq!(range, None) } Ok(()) @@ -1332,8 +1644,11 @@ mod test { let mut detach_c1 = |sql: &str| -> Result, DatabaseError> { let plan = table_state.plan(sql)?; let op = plan_filter(plan, &mut plan_arena)?.unwrap(); - RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) - .detach(&op.predicate) + Ok( + RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) + .detach(&op.predicate)? + .map(|detached| detached.range), + ) }; assert_eq!( @@ -1379,30 +1694,27 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 = null => c1: {}", range); - assert_eq!(range, Range::Eq(DataValue::Null)) + assert_eq!(range, Range::Dummy) } { let plan = table_state.plan("select * from t1 where c1 = null or c1 = 1")?; let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 = null or c1 = 1 => c1: {}", range); - assert_eq!( - range, - Range::SortedRanges(vec![ - Range::Eq(DataValue::Null), - Range::Eq(DataValue::Int32(1)) - ]) - ) + assert_eq!(range, Range::Eq(DataValue::Int32(1))) } { let plan = table_state.plan("select * from t1 where c1 = null or c1 < 5")?; let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 = null or c1 < 5 => c1: {}", range); assert_eq!( @@ -1419,17 +1731,15 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 = null or (c1 > 1 and c1 < 5) => c1: {}", range); assert_eq!( range, - Range::SortedRanges(vec![ - Range::Eq(DataValue::Null), - Range::Scope { - min: Bound::Excluded(DataValue::Int32(1)), - max: Bound::Excluded(DataValue::Int32(5)), - }, - ]) + Range::Scope { + min: Bound::Excluded(DataValue::Int32(1)), + max: Bound::Excluded(DataValue::Int32(5)), + } ) } { @@ -1437,9 +1747,10 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 = null and c1 < 5 => c1: {}", range); - assert_eq!(range, Range::Eq(DataValue::Null)) + assert_eq!(range, Range::Dummy) } { let plan = @@ -1447,6 +1758,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 = null and (c1 > 1 and c1 < 5) => c1: {}", range); assert_eq!(range, Range::Dummy) @@ -1456,15 +1768,17 @@ mod test { let plan = table_state.plan("select * from t1 where c1 != null")?; let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) - .detach(&op.predicate)?; + .detach(&op.predicate)? + .map(|detached| detached.range); println!("c1 != null => c1: {:#?}", range); - assert_eq!(range, None) + assert_eq!(range, Some(Range::Dummy)) } { let plan = table_state.plan("select * from t1 where c1 = null or c1 != 1")?; let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) - .detach(&op.predicate)?; + .detach(&op.predicate)? + .map(|detached| detached.range); println!("c1 = null or c1 != 1 => c1: {:#?}", range); assert_eq!(range, None) } @@ -1472,33 +1786,42 @@ mod test { let plan = table_state.plan("select * from t1 where c1 != null or c1 < 5")?; let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) - .detach(&op.predicate)?; + .detach(&op.predicate)? + .map(|detached| detached.range); println!("c1 != null or c1 < 5 => c1: {:#?}", range); - assert_eq!(range, None) + assert_eq!( + range, + Some(Range::Scope { + min: Bound::Unbounded, + max: Bound::Excluded(DataValue::Int32(5)), + }) + ) } { let plan = table_state.plan("select * from t1 where c1 != null or (c1 > 1 and c1 < 5)")?; let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) - .detach(&op.predicate)?; + .detach(&op.predicate)? + .map(|detached| detached.range); println!("c1 != null or (c1 > 1 and c1 < 5) => c1: {:#?}", range); - assert_eq!(range, None) + assert_eq!( + range, + Some(Range::Scope { + min: Bound::Excluded(DataValue::Int32(1)), + max: Bound::Excluded(DataValue::Int32(5)), + }) + ) } { let plan = table_state.plan("select * from t1 where c1 != null and c1 < 5")?; let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 != null and c1 < 5 => c1: {}", range); - assert_eq!( - range, - Range::Scope { - min: Bound::Unbounded, - max: Bound::Excluded(DataValue::Int32(5)), - } - ) + assert_eq!(range, Range::Dummy) } { let plan = @@ -1506,27 +1829,22 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("c1 != null and (c1 > 1 and c1 < 5) => c1: {}", range); - assert_eq!( - range, - Range::Scope { - min: Bound::Excluded(DataValue::Int32(1)), - max: Bound::Excluded(DataValue::Int32(5)), - } - ) + assert_eq!(range, Range::Dummy) } { let plan = table_state.plan("select * from t1 where (c1 = null or (c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5))")?; let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("(c1 = null or (c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); assert_eq!( range, Range::SortedRanges(vec![ - Range::Eq(DataValue::Null), Range::Scope { min: Bound::Excluded(DataValue::Int32(0)), max: Bound::Excluded(DataValue::Int32(3)), @@ -1543,12 +1861,12 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) or (c1 = null or (c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); assert_eq!( range, Range::SortedRanges(vec![ - Range::Eq(DataValue::Null), Range::Scope { min: Bound::Excluded(DataValue::Int32(0)), max: Bound::Excluded(DataValue::Int32(3)), @@ -1565,6 +1883,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("(c1 = null or (c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and ((c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); assert_eq!( @@ -1586,6 +1905,7 @@ mod test { let op = plan_filter(plan, &mut plan_arena)?.unwrap(); let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &plan_arena) .detach(&op.predicate)? + .map(|detached| detached.range) .unwrap(); println!("((c1 < 2 and c1 > 0) or (c1 < 6 and c1 > 4)) and (c1 = null or (c1 < 3 and c1 > 1) or (c1 < 7 and c1 > 5)) => c1: {}", range); assert_eq!( diff --git a/src/optimizer/heuristic/optimizer.rs b/src/optimizer/heuristic/optimizer.rs index 132fbb7d..6c7b1d68 100644 --- a/src/optimizer/heuristic/optimizer.rs +++ b/src/optimizer/heuristic/optimizer.rs @@ -627,11 +627,10 @@ mod tests { use crate::optimizer::rule::implementation::ImplementationRuleImpl; use crate::optimizer::rule::normalization::NormalizationRuleImpl; use crate::planner::operator::sort::SortField; - use crate::planner::operator::{PhysicalOption, PlanImpl, SortOption}; + use crate::planner::operator::{Operator, PlanImpl, SortOption}; use crate::storage::{Storage, Transaction}; - use crate::types::index::{IndexInfo, IndexLookup, IndexMeta, IndexType}; + use crate::types::index::IndexLookup; use crate::types::value::DataValue; - use crate::types::LogicalType; use std::ops::Bound; use tempfile::TempDir; @@ -708,50 +707,31 @@ mod tests { &mut plan_arena, )?; - let expected_index_meta = plan_arena.alloc_index(IndexMeta { - id: 0, - column_ids: vec![plan_arena.column(c1_column).id().unwrap()], - table_name: "t1".to_string().into(), - pk_ty: LogicalType::Integer, - value_ty: LogicalType::Integer, - name: "pk_index".to_string(), - ty: IndexType::PrimaryKey { is_multiple: false }, - }); - + let join_plan = best_plan.childrens.pop_only(); + let left_scan = join_plan.childrens.pop_twins().0; + assert!(matches!(left_scan.operator, Operator::TableScan(_))); + let Some(physical_option) = left_scan.physical_option else { + unreachable!("left scan should select index scan"); + }; + let PlanImpl::IndexScan(index_info) = &physical_option.plan else { + unreachable!("left scan should select index scan"); + }; assert_eq!( - best_plan - .childrens - .pop_only() - .childrens - .pop_twins() - .0 - .childrens - .pop_only() - .physical_option, - Some(PhysicalOption::new( - PlanImpl::IndexScan(Box::new(IndexInfo { - meta: expected_index_meta, - sort_option: SortOption::OrderBy { - fields: sort_fields.clone(), - ignore_prefix_len: 0, - }, - lookup: Some(IndexLookup::Static(Range::SortedRanges(vec![ - Range::Eq(DataValue::Int32(2)), - Range::Scope { - min: Bound::Excluded(DataValue::Int32(40)), - max: Bound::Unbounded, - } - ]))), - covered_deserializers: None, - cover_mapping: None, - sort_elimination_hint: None, - stream_distinct_hint: None, - })), - SortOption::OrderBy { - fields: sort_fields, - ignore_prefix_len: 0, + index_info.lookup, + Some(IndexLookup::Static(Range::SortedRanges(vec![ + Range::Eq(DataValue::Int32(2)), + Range::Scope { + min: Bound::Excluded(DataValue::Int32(40)), + max: Bound::Unbounded, } - )) + ]))) + ); + assert_eq!( + physical_option.sort_option(), + &SortOption::OrderBy { + fields: sort_fields, + ignore_prefix_len: 0, + } ); Ok(()) diff --git a/src/optimizer/rule/normalization/agg_elimination.rs b/src/optimizer/rule/normalization/elimination.rs similarity index 82% rename from src/optimizer/rule/normalization/agg_elimination.rs rename to src/optimizer/rule/normalization/elimination.rs index d23b04d3..336f6cea 100644 --- a/src/optimizer/rule/normalization/agg_elimination.rs +++ b/src/optimizer/rule/normalization/elimination.rs @@ -21,7 +21,7 @@ use crate::planner::operator::sort::SortField; use crate::planner::operator::table_scan::TableScanOperator; use crate::planner::operator::{Operator, PhysicalOption, PlanImpl, SortOption}; use crate::planner::{Childrens, LogicalPlan}; -use crate::types::index::IndexOrderHint; +use crate::types::index::{IndexLookup, IndexOrderHint}; pub struct EliminateRedundantSort; @@ -64,6 +64,47 @@ impl NormalizationRule for EliminateRedundantSort { } } +pub struct EliminateIndexFilter; + +impl NormalizationRule for EliminateIndexFilter { + fn apply( + &self, + plan: &mut LogicalPlan, + _: &mut crate::planner::PlanArena, + ) -> Result { + if !matches!(plan.operator, Operator::Filter(_)) { + return Ok(false); + } + + let residual = { + let Some(child) = only_child_mut(plan) else { + return Ok(false); + }; + let Some(PhysicalOption { + plan: PlanImpl::IndexScan(index_info), + .. + }) = child.physical_option.as_ref() + else { + return Ok(false); + }; + if !matches!(index_info.lookup, Some(IndexLookup::Static(_))) { + return Ok(false); + } + index_info.residual_predicate.clone() + }; + + if let Some(residual) = residual { + let Operator::Filter(filter_op) = &mut plan.operator else { + unreachable!("filter operator checked before residual rewrite"); + }; + filter_op.predicate = residual; + return Ok(true); + } + + Ok(replace_with_only_child(plan)) + } +} + fn mark_sort_preserving_indexes( plan: &mut LogicalPlan, required: &[SortField], @@ -253,6 +294,9 @@ pub(crate) fn apply_annotated_post_rules( if EliminateRedundantSort.apply(plan, arena)? { changed = true; } + if EliminateIndexFilter.apply(plan, arena)? { + changed = true; + } if UseStreamDistinct.apply(plan, arena)? { changed = true; } @@ -382,7 +426,7 @@ pub(crate) fn covers( #[cfg(all(test, not(target_arch = "wasm32")))] mod tests { - use super::{EliminateRedundantSort, UseStreamDistinct}; + use super::{EliminateIndexFilter, EliminateRedundantSort, UseStreamDistinct}; use crate::catalog::{ColumnCatalog, TableName}; use crate::errors::DatabaseError; use crate::expression::range_detacher::Range; @@ -473,6 +517,7 @@ mod tests { meta, sort_option: sort_option.clone(), lookup: None, + residual_predicate: None, covered_deserializers: None, cover_mapping: None, sort_elimination_hint: None, @@ -481,6 +526,47 @@ mod tests { (index_info, sort_option) } + fn build_filter_with_selected_index( + arena: &mut crate::planner::PlanArena, + predicate: ScalarExpression, + residual: Option, + ) -> LogicalPlan { + let column = arena.alloc_column(ColumnCatalog::new_dummy("c1".to_string())); + let sort_field = SortField::new(ScalarExpression::column_expr(column, 0), true, false); + let (mut index_info, sort_option) = build_index_info(arena, vec![sort_field], 0); + index_info.lookup = Some(IndexLookup::Static(Range::Scope { + min: Bound::Unbounded, + max: Bound::Unbounded, + })); + index_info.residual_predicate = residual; + + let mut scan = LogicalPlan::new( + Operator::TableScan(TableScanOperator { + table_name: ::std::sync::Arc::from("t1"), + columns: vec![column], + limit: (None, None), + index_infos: vec![index_info.clone()], + with_pk: false, + }), + Childrens::None, + ); + scan.physical_option = Some(PhysicalOption::new( + PlanImpl::IndexScan(Box::new(index_info)), + sort_option, + )); + + let mut filter = LogicalPlan::new( + Operator::Filter(FilterOperator { + predicate, + is_optimized: false, + having: false, + }), + Childrens::Only(Box::new(scan)), + ); + filter.physical_option = Some(PhysicalOption::new(PlanImpl::Filter, SortOption::Follow)); + filter + } + fn build_distinct_scan_plan( arena: &mut crate::planner::PlanArena, ) -> (LogicalPlan, SortOption) { @@ -510,6 +596,7 @@ mod tests { }), sort_option: sort_option.clone(), lookup: None, + residual_predicate: None, covered_deserializers: None, cover_mapping: None, sort_elimination_hint: None, @@ -539,6 +626,64 @@ mod tests { (plan, sort_option) } + #[test] + fn exact_index_filter_is_removed_after_physical_selection() -> Result<(), DatabaseError> { + let table_arena = crate::planner::TableArenaCell::default(); + let mut arena = crate::planner::PlanArena::new(&table_arena); + let predicate = ScalarExpression::Constant(DataValue::Boolean(true)); + let mut plan = build_filter_with_selected_index(&mut arena, predicate, None); + + let rule = EliminateIndexFilter; + assert!(rule.apply(&mut plan, &mut arena)?); + assert!(matches!(plan.operator, Operator::TableScan(_))); + Ok(()) + } + + #[test] + fn partial_index_filter_keeps_residual_after_physical_selection() -> Result<(), DatabaseError> { + let table_arena = crate::planner::TableArenaCell::default(); + let mut arena = crate::planner::PlanArena::new(&table_arena); + let predicate = ScalarExpression::Constant(DataValue::Boolean(true)); + let residual = ScalarExpression::Constant(DataValue::Boolean(false)); + let mut plan = + build_filter_with_selected_index(&mut arena, predicate, Some(residual.clone())); + + let rule = EliminateIndexFilter; + assert!(rule.apply(&mut plan, &mut arena)?); + let Operator::Filter(filter_op) = &plan.operator else { + unreachable!("residual predicate should keep filter"); + }; + assert_eq!(filter_op.predicate, residual); + Ok(()) + } + + #[test] + fn probe_index_filter_is_not_removed_after_physical_selection() -> Result<(), DatabaseError> { + let table_arena = crate::planner::TableArenaCell::default(); + let mut arena = crate::planner::PlanArena::new(&table_arena); + let predicate = ScalarExpression::Constant(DataValue::Boolean(true)); + let mut plan = build_filter_with_selected_index(&mut arena, predicate, None); + let Childrens::Only(child) = plan.childrens.as_mut() else { + unreachable!("filter should have a scan child"); + }; + let Operator::TableScan(scan_op) = &mut child.operator else { + unreachable!("filter child should be a table scan"); + }; + scan_op.index_infos[0].lookup = Some(IndexLookup::Probe); + let Some(physical_option) = child.physical_option.as_mut() else { + unreachable!("scan should have selected physical option"); + }; + let PlanImpl::IndexScan(index_info) = &mut physical_option.plan else { + unreachable!("scan should have selected index scan"); + }; + index_info.lookup = Some(IndexLookup::Probe); + + let rule = EliminateIndexFilter; + assert!(!rule.apply(&mut plan, &mut arena)?); + assert!(matches!(plan.operator, Operator::Filter(_))); + Ok(()) + } + #[test] fn remove_sort_when_index_matches_order() -> Result<(), DatabaseError> { let table_arena = crate::planner::TableArenaCell::default(); diff --git a/src/optimizer/rule/normalization/mod.rs b/src/optimizer/rule/normalization/mod.rs index 011412e2..1f9df2bc 100644 --- a/src/optimizer/rule/normalization/mod.rs +++ b/src/optimizer/rule/normalization/mod.rs @@ -34,20 +34,20 @@ use crate::optimizer::rule::normalization::simplification::ConstantCalculation; use crate::optimizer::rule::normalization::simplification::SimplifyFilter; use crate::optimizer::rule::normalization::top_k::TopK; use crate::planner::LogicalPlan; -mod agg_elimination; mod column_pruning; mod combine_operators; mod compilation_in_advance; +mod elimination; mod min_max_top_k; mod parameterized_index; mod pushdown_limit; mod pushdown_predicates; mod simplification; mod top_k; -pub(crate) use agg_elimination::{ +pub(crate) use compilation_in_advance::evaluator_bind_current; +pub(crate) use elimination::{ apply_annotated_post_rules, apply_scan_order_hint, OrderHintKind, ScanOrderHint, }; -pub(crate) use compilation_in_advance::evaluator_bind_current; pub(crate) use parameterized_index::ParameterizeMarkApply; pub(crate) use simplification::constant_calculation_current; diff --git a/src/optimizer/rule/normalization/pushdown_predicates.rs b/src/optimizer/rule/normalization/pushdown_predicates.rs index af5bc125..d27eef69 100644 --- a/src/optimizer/rule/normalization/pushdown_predicates.rs +++ b/src/optimizer/rule/normalization/pushdown_predicates.rs @@ -13,7 +13,7 @@ // limitations under the License. use crate::errors::DatabaseError; -use crate::expression::range_detacher::{Range, RangeDetacher}; +use crate::expression::range_detacher::{DetachedPredicate, Range, RangeDetacher}; use crate::expression::visitor_mut::{PositionShift, VisitorMut}; use crate::expression::{BinaryOperator, ScalarExpression}; use crate::iter_ext::Itertools; @@ -27,7 +27,7 @@ use crate::types::index::{IndexInfo, IndexLookup, IndexMetaRef, IndexType}; use crate::types::value::DataValue; use crate::types::LogicalType; use std::ops::Bound; -use std::{mem, slice}; +use std::{borrow::Cow, mem, slice}; const EMPTY_SCHEMA: [crate::catalog::ColumnRef; 0] = []; @@ -244,6 +244,7 @@ impl NormalizationRule for PushPredicateIntoScan { for IndexInfo { meta, lookup, + residual_predicate, covered_deserializers, cover_mapping, sort_option, @@ -261,7 +262,7 @@ impl NormalizationRule for PushPredicateIntoScan { return Err(DatabaseError::InvalidIndex); }; let index_meta = arena.index(*meta); - *lookup = match index_meta.ty { + let detached = match index_meta.ty { IndexType::PrimaryKey { is_multiple: false } | IndexType::Unique | IndexType::Normal => RangeDetacher::new( @@ -269,18 +270,20 @@ impl NormalizationRule for PushPredicateIntoScan { &index_meta.column_ids[0], arena, ) - .detach(&filter_op.predicate)? - .map(IndexLookup::Static), + .detach(&filter_op.predicate)?, IndexType::PrimaryKey { is_multiple: true } | IndexType::Composite => { Self::composite_range(filter_op, *meta, ignore_prefix_len, arena)? - .map(IndexLookup::Static) } }; - if lookup.is_none() { + let Some(detached) = detached else { + *lookup = None; + *residual_predicate = None; continue; - } + }; changed = true; + *lookup = Some(IndexLookup::Static(detached.range)); + *residual_predicate = detached.residual; *covered_deserializers = None; *cover_mapping = None; @@ -330,24 +333,33 @@ impl PushPredicateIntoScan { meta: IndexMetaRef, ignore_prefix_len: &mut usize, arena: &crate::planner::PlanArena, - ) -> Result, DatabaseError> { + ) -> Result, DatabaseError> { let meta = arena.index(meta); let mut res = None; let mut eq_ranges = Vec::with_capacity(meta.column_ids.len()); let mut apply_column_count = 0; + let mut residual = Some(Cow::Borrowed(&op.predicate)); for column_id in meta.column_ids.iter() { - if let Some(range) = RangeDetacher::new(meta.table_name.as_ref(), column_id, arena) - .detach(&op.predicate)? - { - apply_column_count += 1; - - if range.only_eq() { - eq_ranges.push(range); - continue; - } - res = range.combining_eqs(&eq_ranges); + let Some(predicate) = residual.take() else { + break; + }; + let Some(detached) = RangeDetacher::new(meta.table_name.as_ref(), column_id, arena) + .detach(predicate.as_ref())? + else { + residual = Some(predicate); + break; + }; + residual = detached.residual.map(Cow::Owned); + + let range = detached.range; + apply_column_count += 1; + + if range.only_eq() { + eq_ranges.push(range); + continue; } + res = range.combining_eqs(&eq_ranges); break; } *ignore_prefix_len = eq_ranges.len(); @@ -357,7 +369,7 @@ impl PushPredicateIntoScan { res = range.combining_eqs(&eq_ranges); } } - Ok(res.map(|range| { + let range = res.map(|range| { if range.only_eq() && apply_column_count != meta.column_ids.len() { fn eq_to_scope(range: Range) -> Range { match range { @@ -380,6 +392,10 @@ impl PushPredicateIntoScan { return eq_to_scope(range); } range + }); + Ok(range.map(|range| DetachedPredicate { + range, + residual: residual.map(|predicate| predicate.into_owned()), })) } } @@ -501,7 +517,7 @@ mod tests { use crate::binder::test::build_t1_table; use crate::catalog::{ColumnCatalog, ColumnDesc, TableName}; use crate::errors::DatabaseError; - use crate::expression::range_detacher::Range; + use crate::expression::range_detacher::{Range, RangeDetacher}; use crate::expression::{BinaryOperator, ScalarExpression}; use crate::optimizer::heuristic::batch::HepBatchStrategy; use crate::optimizer::heuristic::optimizer::{ @@ -526,6 +542,46 @@ mod tests { builder.build().instantiate(plan).find_best(None, arena) } + fn build_test_column( + arena: &mut PlanArena, + table_name: &TableName, + id: crate::types::ColumnId, + name: &str, + ) -> Result { + let mut column = ColumnCatalog::new( + name.to_string(), + false, + ColumnDesc::new(LogicalType::Integer, None, false, None)?, + ); + column.set_ref_table(table_name.clone(), id, false); + Ok(arena.alloc_column(column)) + } + + fn cmp_predicate( + op: BinaryOperator, + column: crate::catalog::ColumnRef, + position: usize, + value: i32, + ) -> ScalarExpression { + ScalarExpression::Binary { + op, + left_expr: Box::new(ScalarExpression::column_expr(column, position)), + right_expr: Box::new(ScalarExpression::Constant(DataValue::Int32(value))), + evaluator: None, + ty: LogicalType::Boolean, + } + } + + fn and_predicate(left: ScalarExpression, right: ScalarExpression) -> ScalarExpression { + ScalarExpression::Binary { + op: BinaryOperator::And, + left_expr: Box::new(left), + right_expr: Box::new(right), + evaluator: None, + ty: LogicalType::Boolean, + } + } + #[test] fn test_push_predicate_into_scan() -> Result<(), DatabaseError> { let table_state = build_t1_table()?; @@ -568,6 +624,140 @@ mod tests { Ok(()) } + #[test] + fn test_composite_range_consumes_prefix_equalities_continuously() -> Result<(), DatabaseError> { + let table_name: TableName = ::std::sync::Arc::from("mock_table"); + let table_arena = crate::planner::TableArenaCell::default(); + let mut arena = PlanArena::new(&table_arena); + let c1 = build_test_column(&mut arena, &table_name, 1, "c1")?; + let c2 = build_test_column(&mut arena, &table_name, 2, "c2")?; + let c3 = build_test_column(&mut arena, &table_name, 3, "c3")?; + let c4 = build_test_column(&mut arena, &table_name, 4, "c4")?; + let index_meta = arena.alloc_index(IndexMeta { + id: 0, + column_ids: vec![1, 2, 3], + table_name: table_name.clone(), + pk_ty: LogicalType::Integer, + value_ty: LogicalType::Tuple(vec![ + LogicalType::Integer, + LogicalType::Integer, + LogicalType::Integer, + ]), + name: "idx_c1_c2_c3".to_string(), + ty: IndexType::Composite, + }); + let predicate = and_predicate( + and_predicate( + cmp_predicate(BinaryOperator::Eq, c1, 0, 1), + cmp_predicate(BinaryOperator::Eq, c2, 1, 2), + ), + and_predicate( + cmp_predicate(BinaryOperator::Eq, c3, 2, 3), + cmp_predicate(BinaryOperator::Eq, c4, 3, 4), + ), + ); + let filter = FilterOperator { + predicate, + is_optimized: false, + having: false, + }; + let mut ignore_prefix_len = 0; + + let detached = super::PushPredicateIntoScan::composite_range( + &filter, + index_meta, + &mut ignore_prefix_len, + &arena, + )? + .expect("composite prefix should be consumed"); + + assert_eq!(ignore_prefix_len, 3); + assert_eq!( + detached.range, + Range::Eq(DataValue::Tuple( + vec![ + DataValue::Int32(1), + DataValue::Int32(2), + DataValue::Int32(3), + ], + false, + )) + ); + let residual = detached.residual.expect("c4 predicate should remain"); + let residual_detached = RangeDetacher::new(table_name.as_ref(), &4, &arena) + .detach(&residual)? + .expect("residual should be the c4 predicate"); + assert_eq!(residual_detached.range, Range::Eq(DataValue::Int32(4))); + assert_eq!(residual_detached.residual, None); + + Ok(()) + } + + #[test] + fn test_composite_range_stops_consuming_after_first_non_equality() -> Result<(), DatabaseError> + { + let table_name: TableName = ::std::sync::Arc::from("mock_table"); + let table_arena = crate::planner::TableArenaCell::default(); + let mut arena = PlanArena::new(&table_arena); + let c1 = build_test_column(&mut arena, &table_name, 1, "c1")?; + let c2 = build_test_column(&mut arena, &table_name, 2, "c2")?; + let c3 = build_test_column(&mut arena, &table_name, 3, "c3")?; + let index_meta = arena.alloc_index(IndexMeta { + id: 0, + column_ids: vec![1, 2, 3], + table_name: table_name.clone(), + pk_ty: LogicalType::Integer, + value_ty: LogicalType::Tuple(vec![ + LogicalType::Integer, + LogicalType::Integer, + LogicalType::Integer, + ]), + name: "idx_c1_c2_c3".to_string(), + ty: IndexType::Composite, + }); + let predicate = and_predicate( + and_predicate( + cmp_predicate(BinaryOperator::Eq, c1, 0, 1), + cmp_predicate(BinaryOperator::Gt, c2, 1, 2), + ), + cmp_predicate(BinaryOperator::Eq, c3, 2, 3), + ); + let filter = FilterOperator { + predicate, + is_optimized: false, + having: false, + }; + let mut ignore_prefix_len = 0; + + let detached = super::PushPredicateIntoScan::composite_range( + &filter, + index_meta, + &mut ignore_prefix_len, + &arena, + )? + .expect("composite prefix should be consumed"); + + assert_eq!(ignore_prefix_len, 1); + assert_eq!( + detached.range, + Range::Scope { + min: Bound::Excluded(DataValue::Tuple( + vec![DataValue::Int32(1), DataValue::Int32(2)], + false, + )), + max: Bound::Excluded(DataValue::Tuple(vec![DataValue::Int32(1)], true)), + } + ); + let residual = detached.residual.expect("c3 predicate should remain"); + let residual_detached = RangeDetacher::new(table_name.as_ref(), &3, &arena) + .detach(&residual)? + .expect("residual should be the c3 predicate"); + assert_eq!(residual_detached.range, Range::Eq(DataValue::Int32(3))); + assert_eq!(residual_detached.residual, None); + + Ok(()) + } + #[test] fn test_cover_mapping_matches_scan_order() -> Result<(), DatabaseError> { let table_name: TableName = ::std::sync::Arc::from("mock_table"); @@ -638,6 +828,7 @@ mod tests { ignore_prefix_len: 0, }, lookup: None, + residual_predicate: None, covered_deserializers: None, cover_mapping: None, sort_elimination_hint: None, @@ -650,6 +841,7 @@ mod tests { ignore_prefix_len: 0, }, lookup: None, + residual_predicate: None, covered_deserializers: None, cover_mapping: None, sort_elimination_hint: None, diff --git a/src/optimizer/rule/normalization/simplification.rs b/src/optimizer/rule/normalization/simplification.rs index 158d54ed..50b81f8e 100644 --- a/src/optimizer/rule/normalization/simplification.rs +++ b/src/optimizer/rule/normalization/simplification.rs @@ -205,6 +205,7 @@ mod test { if let Operator::Filter(filter_op) = filter_op.operator { let range = RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &arena) .detach(&filter_op.predicate)? + .map(|detached| detached.range) .unwrap(); assert_eq!( range, @@ -310,7 +311,8 @@ mod test { if let Operator::Filter(filter_op) = filter_op.operator { Ok( RangeDetacher::new("t1", table_state.column_id_by_name("c1"), &arena) - .detach(&filter_op.predicate)?, + .detach(&filter_op.predicate)? + .map(|detached| detached.range), ) } else { Ok(None) @@ -426,7 +428,9 @@ mod test { let filter_op = best_plan.childrens.pop_only(); if let Operator::Filter(filter_op) = filter_op.operator { - Ok(RangeDetacher::new("t1", column_id, arena).detach(&filter_op.predicate)?) + Ok(RangeDetacher::new("t1", column_id, arena) + .detach(&filter_op.predicate)? + .map(|detached| detached.range)) } else { Ok(None) } @@ -560,10 +564,7 @@ mod test { assert_eq!( plan_filter(&plan_1, table_state.column_id_by_name("c1"), &mut arena)?, - Some(Range::Scope { - min: Bound::Excluded(DataValue::Int32(1)), - max: Bound::Unbounded, - }) + None ); Ok(()) @@ -593,7 +594,10 @@ mod test { assert_eq!( plan_filter(&plan_1, table_state.column_id_by_name("c1"), &mut arena)?, - None + Some(Range::Scope { + min: Bound::Unbounded, + max: Bound::Excluded(DataValue::Null), + }) ); Ok(()) diff --git a/src/planner/operator/table_scan.rs b/src/planner/operator/table_scan.rs index b20c19ae..a337fea5 100644 --- a/src/planner/operator/table_scan.rs +++ b/src/planner/operator/table_scan.rs @@ -70,6 +70,7 @@ impl TableScanOperator { ignore_prefix_len: 0, }, lookup: None, + residual_predicate: None, covered_deserializers: None, cover_mapping: None, sort_elimination_hint: None, diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 6b121a63..9c5d48e8 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1261,7 +1261,7 @@ fn encode_bound<'a>( Ok(Bound::Included(buffer.as_slice())) } Bound::Excluded(val) => { - inner.bound_key(table_codec, params, val, is_upper, buffer)?; + inner.bound_key(table_codec, params, val, !is_upper, buffer)?; Ok(Bound::Excluded(buffer.as_slice())) } Bound::Unbounded => Ok(Bound::Unbounded), diff --git a/src/storage/rocksdb.rs b/src/storage/rocksdb.rs index 37cee65f..afe6d24f 100644 --- a/src/storage/rocksdb.rs +++ b/src/storage/rocksdb.rs @@ -1075,6 +1075,66 @@ mod test { Ok(()) } + #[test] + fn test_secondary_index_exclusive_range_skips_equal_prefix() -> Result<(), DatabaseError> { + let temp_dir = TempDir::new().expect("unable to create temporary working directory"); + let mut kite_sql = DataBaseBuilder::path(temp_dir.path()).build_rocksdb()?; + kite_sql.ddl("create table t1 (id int primary key, c1 int)")?; + kite_sql.load(CatalogKind::Table("t1".to_string().into()))?; + kite_sql + .run("insert into t1 values (0, 0), (1, 1), (2, 2), (9, 9), (10, 10)")? + .done()?; + kite_sql.ddl("create index idx_c1 on t1(c1)")?; + kite_sql.load(CatalogKind::Table("t1".to_string().into()))?; + + let transaction = kite_sql.storage.transaction()?; + let table = transaction + .table(kite_sql.state.table_cache(), "t1".to_string().into())? + .unwrap() + .clone(); + let plan_arena = PlanArena::new(kite_sql.state.table_arena()); + let c1_column = table + .get_column_by_name("c1") + .ok_or_else(|| DatabaseError::column_not_found("c1"))?; + let idx_c1 = *table + .indexes + .iter() + .find(|index| plan_arena.index(**index).name == "idx_c1") + .ok_or(DatabaseError::InvalidIndex)?; + let c1_deserializer = vec![plan_arena.column(c1_column).datatype().serializable()]; + + let mut iter = transaction.read_by_index( + kite_sql.state.table_cache(), + &plan_arena, + "t1".to_string().into(), + (None, None), + vec![c1_column], + idx_c1, + vec![Range::Scope { + min: Bound::Excluded(DataValue::Int32(0)), + max: Bound::Excluded(DataValue::Int32(10)), + }], + false, + Some(c1_deserializer), + None, + )?; + + let mut values = Vec::new(); + while let Some(tuple) = crate::storage::next_tuple_for_test(&mut iter)? { + values.push(tuple.values[0].clone()); + } + assert_eq!( + values, + vec![ + DataValue::Int32(1), + DataValue::Int32(2), + DataValue::Int32(9), + ] + ); + + Ok(()) + } + #[test] fn test_read_by_index_cover() -> Result<(), DatabaseError> { let temp_dir = TempDir::new().expect("unable to create temporary working directory"); diff --git a/src/types/index.rs b/src/types/index.rs index c122d3bb..e769cfaa 100644 --- a/src/types/index.rs +++ b/src/types/index.rs @@ -73,6 +73,7 @@ pub struct IndexInfo { pub(crate) meta: IndexMetaRef, pub(crate) sort_option: SortOption, pub(crate) lookup: Option, + pub(crate) residual_predicate: Option, pub(crate) covered_deserializers: Option>, pub(crate) cover_mapping: Option>, pub(crate) sort_elimination_hint: Option, diff --git a/tests/slt/stream_distinct_explain.slt b/tests/slt/stream_distinct_explain.slt index 6db0069a..e7a960fb 100644 --- a/tests/slt/stream_distinct_explain.slt +++ b/tests/slt/stream_distinct_explain.slt @@ -14,7 +14,7 @@ analyze table distinct_t; query T explain select distinct c1 from distinct_t where c1 < 10 and c1 > 0; ---- -Projection [#2] [Project => (Sort Option: Follow)] Aggregate [] -> Group By [#2] [StreamDistinct => (Sort Option: Follow)] Filter ((#2 < 10) && (#2 > 0)), Is Having: false [Filter => (Sort Option: Follow)] TableScan distinct_t -> [#2] [IndexScan By #1 => (0, 10) Covered => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#2] [Project => (Sort Option: Follow)] Aggregate [] -> Group By [#2] [StreamDistinct => (Sort Option: Follow)] TableScan distinct_t -> [#2] [IndexScan By #1 => (0, 10) Covered => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] statement ok drop index distinct_t.distinct_t_c1_index; diff --git a/tests/slt/update.slt b/tests/slt/update.slt index 7d81bb4f..643540f1 100644 --- a/tests/slt/update.slt +++ b/tests/slt/update.slt @@ -65,3 +65,39 @@ select * from t where id = 2 statement ok drop table t + +statement ok +create table t_update_idx(id int primary key, b int, c int) + +statement ok +copy t_update_idx from 'tests/data/row_20000.csv' ( DELIMITER '|' ) + +statement ok +create index idx_t_update_idx_b on t_update_idx(b) + +statement ok +analyze table t_update_idx + +query T +explain select id, b, c from t_update_idx where b = 10 +---- +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t_update_idx -> [#1, #2, #3] [IndexScan By #2 => 10 => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] + +statement ok +update t_update_idx set c = 111 where id = 9 + +query III +select id, b, c from t_update_idx where b = 10 +---- +9 10 111 + +statement ok +update t_update_idx set id = 300000 where id = 9 + +query III +select id, b, c from t_update_idx where b = 10 +---- +300000 10 111 + +statement ok +drop table t_update_idx diff --git a/tests/slt/where_by_index_explain.slt b/tests/slt/where_by_index_explain.slt index ff641049..c7ee8cdd 100644 --- a/tests/slt/where_by_index_explain.slt +++ b/tests/slt/where_by_index_explain.slt @@ -27,17 +27,17 @@ Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, query T explain select * from t1 where id = 0; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (#1 = 0), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => 0 => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => 0 => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where id = 0 and id = 1; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter ((#1 = 0) && (#1 = 1)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => Dummy => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => Dummy => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where id = 0 and id != 0; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter ((#1 = 0) && (#1 != 0)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => 0 => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (#1 != 0), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => 0 => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where id = 0 or id != 0 limit 10; @@ -47,42 +47,42 @@ Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Limit 10 [Limit => (S query T explain select * from t1 where id = 0 and id != 0 and id = 3; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (((#1 = 0) && (#1 != 0)) && (#1 = 3)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => Dummy => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (#1 != 0), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => Dummy => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where id = 0 and id != 0 or id = 3; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (((#1 = 0) && (#1 != 0)) || (#1 = 3)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => 0, 3 => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (((#1 = 0) && (#1 != 0)) || (#1 = 3)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [SeqScan => (Sort Option: None)] query T explain select * from t1 where id > 0 and id = 3; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter ((#1 > 0) && (#1 = 3)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => 3 => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => 3 => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where id >= 0 and id <= 3; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter ((#1 >= 0) && (#1 <= 3)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => [0, 3] => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => [0, 3] => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where id <= 0 and id >= 3; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter ((#1 <= 0) && (#1 >= 3)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => Dummy => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => Dummy => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where (id > 10) = false; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (#1 <= 10), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => (-inf, 10] => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => (-inf, 10] => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where (id > 10) != true; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (#1 <= 10), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => (-inf, 10] => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => (-inf, 10] => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where not (id > 10); ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (#1 <= 10), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => (-inf, 10] => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => (-inf, 10] => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where id >= 3 or id <= 9 limit 10; @@ -92,12 +92,12 @@ Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Limit 10 [Limit => (S query T explain select * from t1 where id <= 3 or id >= 9 limit 10; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Limit 10 [Limit => (Sort Option: Follow)] Filter ((#1 <= 3) || (#1 >= 9)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => (-inf, 3], [9, +inf) => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3], Limit: 10 [IndexScan By #0 => (-inf, 3], [9, +inf) => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where (id >= 0 and id <= 3) or (id >= 9 and id <= 12); ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (((#1 >= 0) && (#1 <= 3)) || ((#1 >= 9) && (#1 <= 12))), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => [0, 3], [9, 12] => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => [0, 3], [9, 12] => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where (id >= 0 or id <= 3) and (id >= 9 or id <= 12) limit 10; @@ -107,42 +107,42 @@ Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Limit 10 [Limit => (S query T explain select * from t1 where id = 5 or (id > 5 and (id > 6 or id < 8) and id < 12); ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter ((#1 = 5) || (((#1 > 5) && ((#1 > 6) || (#1 < 8))) && (#1 < 12))), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => [5, 12) => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #0 => [5, 12) => (Sort Option: OrderBy: (#1 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where c1 = 7 and c2 = 8; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter ((#2 = 7) && (#3 = 8)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #1 => 7 => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (#3 = 8), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #1 => 7 => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where c1 = 7 and c2 < 9; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter ((#2 = 7) && (#3 < 9)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #1 => 7 => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (#3 < 9), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #1 => 7 => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where (c1 = 7 or c1 = 10) and c2 < 9; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (((#2 = 7) || (#2 = 10)) && (#3 < 9)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #1 => 7, 10 => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (#3 < 9), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #1 => 7, 10 => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where c1 is null and c2 is null; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (#2 is null && #3 is null), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #1 => null => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter #3 is null, Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #1 => null => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where c1 > 0 and c1 < 8; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter ((#2 > 0) && (#2 < 8)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #1 => (0, 8) => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #1 => (0, 8) => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where c2 > 0 and c2 < 9; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter ((#3 > 0) && (#3 < 9)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #2 => (0, 9) => (Sort Option: OrderBy: (#3 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #2 => (0, 9) => (Sort Option: OrderBy: (#3 Asc Nulls Last) ignore_prefix_len: 0)] query T explain select * from t1 where c2 = 5; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter (#3 = 5), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #2 => 5 => (Sort Option: OrderBy: (#3 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #2 => 5 => (Sort Option: OrderBy: (#3 Asc Nulls Last) ignore_prefix_len: 0)] statement ok update t1 set c2 = 9 where c1 = 1 @@ -150,7 +150,7 @@ update t1 set c2 = 9 where c1 = 1 query T explain select * from t1 where c2 > 0 and c2 < 10; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter ((#3 > 0) && (#3 < 10)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #2 => (0, 10) => (Sort Option: OrderBy: (#3 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #2 => (0, 10) => (Sort Option: OrderBy: (#3 Asc Nulls Last) ignore_prefix_len: 0)] statement ok delete from t1 where c1 = 4 @@ -158,20 +158,20 @@ delete from t1 where c1 = 4 query T explain select * from t1 where c2 > 0 and c2 < 10; ---- -Projection [#1, #2, #3] [Project => (Sort Option: Follow)] Filter ((#3 > 0) && (#3 < 10)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #2 => (0, 10) => (Sort Option: OrderBy: (#3 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#1, #2, #3] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, #2, #3] [IndexScan By #2 => (0, 10) => (Sort Option: OrderBy: (#3 Asc Nulls Last) ignore_prefix_len: 0)] # unique covered query T explain select c1 from t1 where c1 < 10; ---- -Projection [#2] [Project => (Sort Option: Follow)] Filter (#2 < 10), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#2] [IndexScan By #1 => (-inf, 10) Covered => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#2] [Project => (Sort Option: Follow)] TableScan t1 -> [#2] [IndexScan By #1 => (-inf, 10) Covered => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] # unique covered with primary key projection query T explain select c1, id from t1 where c1 < 10; ---- -Projection [#2, #1] [Project => (Sort Option: Follow)] Filter (#2 < 10), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#1, #2] [IndexScan By #1 => (-inf, 10) => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#2, #1] [Project => (Sort Option: Follow)] TableScan t1 -> [#1, #2] [IndexScan By #1 => (-inf, 10) => (Sort Option: OrderBy: (#2 Asc Nulls Last) ignore_prefix_len: 0)] statement ok drop index t1.u_c1_index; @@ -180,7 +180,7 @@ drop index t1.u_c1_index; query T explain select c2 from t1 where c2 < 10 and c2 > 0; ---- -Projection [#3] [Project => (Sort Option: Follow)] Filter ((#3 < 10) && (#3 > 0)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#3] [IndexScan By #2 => (0, 10) Covered => (Sort Option: OrderBy: (#3 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#3] [Project => (Sort Option: Follow)] TableScan t1 -> [#3] [IndexScan By #2 => (0, 10) Covered => (Sort Option: OrderBy: (#3 Asc Nulls Last) ignore_prefix_len: 0)] statement ok insert into t1 values(100000002, 100000002, 8); @@ -189,7 +189,7 @@ insert into t1 values(100000002, 100000002, 8); query T explain select distinct c2 from t1 where c2 < 10 and c2 > 0; ---- -Projection [#3] [Project => (Sort Option: Follow)] Aggregate [] -> Group By [#3] [StreamDistinct => (Sort Option: Follow)] Filter ((#3 < 10) && (#3 > 0)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#3] [IndexScan By #2 => (0, 10) Covered => (Sort Option: OrderBy: (#3 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#3] [Project => (Sort Option: Follow)] Aggregate [] -> Group By [#3] [StreamDistinct => (Sort Option: Follow)] TableScan t1 -> [#3] [IndexScan By #2 => (0, 10) Covered => (Sort Option: OrderBy: (#3 Asc Nulls Last) ignore_prefix_len: 0)] statement ok delete from t1 where id = 100000002; @@ -201,13 +201,13 @@ drop index t1.c2_index; query T explain select c1, c2 from t1 where c1 < 10 and c1 > 0 and c2 >0 and c2 < 10; ---- -Projection [#2, #3] [Project => (Sort Option: Follow)] Filter ((((#2 < 10) && (#2 > 0)) && (#3 > 0)) && (#3 < 10)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#2, #3] [IndexScan By #3 => ((0), (10)) Covered => (Sort Option: OrderBy: (#2 Asc Nulls Last, #3 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#2, #3] [Project => (Sort Option: Follow)] Filter ((#3 > 0) && (#3 < 10)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#2, #3] [IndexScan By #3 => ((0), (10)) Covered => (Sort Option: OrderBy: (#2 Asc Nulls Last, #3 Asc Nulls Last) ignore_prefix_len: 0)] # composite covered projection reorder query T explain select c2, c1 from t1 where c1 < 10 and c1 > 0 and c2 > 0 and c2 < 10; ---- -Projection [#3, #2] [Project => (Sort Option: Follow)] Filter ((((#2 < 10) && (#2 > 0)) && (#3 > 0)) && (#3 < 10)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#2, #3] [IndexScan By #3 => ((0), (10)) Covered => (Sort Option: OrderBy: (#2 Asc Nulls Last, #3 Asc Nulls Last) ignore_prefix_len: 0)] +Projection [#3, #2] [Project => (Sort Option: Follow)] Filter ((#3 > 0) && (#3 < 10)), Is Having: false [Filter => (Sort Option: Follow)] TableScan t1 -> [#2, #3] [IndexScan By #3 => ((0), (10)) Covered => (Sort Option: OrderBy: (#2 Asc Nulls Last, #3 Asc Nulls Last) ignore_prefix_len: 0)] statement ok diff --git a/tpcc/README.md b/tpcc/README.md index f723fe82..6c5449f0 100644 --- a/tpcc/README.md +++ b/tpcc/README.md @@ -3,15 +3,8 @@ Run `make tpcc` (or `cargo run -p tpcc --release`) to exercise the workload on K Run `make tpcc-dual` to execute the workload on KiteSQL while mirroring every statement to an in-memory SQLite database; the runner asserts that both engines return identical tuples, making it ideal for correctness validation. This target runs for 60 seconds (`--measure-time 60`). Use `cargo run -p tpcc --release -- --backend dual --measure-time ` for a custom duration. -## Performance Matrix Script -Use `./scripts/run_tpcc_matrix.sh` to run the TPCC performance comparison in one shot. - -- The script measures `kitesql-lmdb`, `kitesql-rocksdb`, `sqlite-balanced`, and `sqlite-practical`. -- Main variants follow TPCC's default duration unless `TPCC_MAIN_MEASURE_TIME` is overridden. -- If a run fails with a duplicate-key style error, the script clears that backend's database and retries that variant once. -- Outputs are written to `tpcc/results//`, including `summary.md` and per-backend raw logs. - -For more stable local numbers on machines that thermal-throttle under sustained TPCC load, use the Python runner: +## Stable Performance Runner +Use the Python runner for local performance comparisons: ```shell ./scripts/run_tpcc_stable.py --build @@ -33,7 +26,7 @@ Example shorter smoke run: Before a formal local run, clear old TPCC data and Linux page cache so each matrix starts from a comparable state: ```shell -rm -rf target/tpcc-stable-run-data target/tpcc-run-data kite_sql_tpcc kite_sql_tpcc.sqlite +rm -rf target/tpcc-stable-run-data kite_sql_tpcc kite_sql_tpcc.sqlite sync sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches' ``` @@ -43,11 +36,6 @@ The runner reads CPU temperatures from Linux `/sys/class/hwmon` and `/sys/class/ Duplicate-key note: The benchmark stores `history.h_date` as `timestamp(6)`, so high-throughput `Payment` transactions do not collide on second-level timestamp buckets. A duplicate-primary-key failure during TPCC should be treated as a run failure and investigated or rerun from a clean database. -Example: -```shell -TPCC_DUPLICATE_RETRY=1 ./scripts/run_tpcc_matrix.sh -``` - - i9-13900HX - 32.0 GB - KIOXIA-EXCERIA PLUS G3 SSD diff --git a/tpcc/src/main.rs b/tpcc/src/main.rs index 06849802..4a0d9925 100644 --- a/tpcc/src/main.rs +++ b/tpcc/src/main.rs @@ -809,6 +809,7 @@ impl_from_tpcc_error!(std::io::Error, Io); #[ignore] #[test] fn explain_tpcc() -> Result<(), DatabaseError> { + use kite_sql::db::CatalogKind; use kite_sql::db::{DataBaseBuilder, ResultIter}; use kite_sql::types::tuple::Tuple; @@ -849,7 +850,20 @@ fn explain_tpcc() -> Result<(), DatabaseError> { Ok(value) } - let database = DataBaseBuilder::path(tpcc_db_path()).build_lmdb()?; + let mut database = DataBaseBuilder::path(tpcc_db_path()).build_lmdb()?; + for table_name in [ + "item", + "warehouse", + "stock", + "district", + "customer", + "history", + "orders", + "new_orders", + "order_line", + ] { + database.load(CatalogKind::Table(table_name.to_string().into()))?; + } let mut tx = database.new_transaction()?; let (c_w_id, c_d_id, c_id, c_last, c_balance, c_data) = with_next_tuple(