diff --git a/Cargo.lock b/Cargo.lock index 58d9bd5c8..c71c2a802 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -276,6 +276,7 @@ dependencies = [ "chia-bls 0.16.0", "chia-client", "chia-consensus", + "chia-datalayer", "chia-protocol", "chia-puzzles", "chia-secp", @@ -370,6 +371,36 @@ dependencies = [ "thiserror", ] +[[package]] +name = "chia-datalayer" +version = "0.16.0" +dependencies = [ + "chia-protocol", + "chia-sha2", + "chia-traits 0.15.0", + "chia_streamable_macro 0.15.0", + "clvm-utils", + "clvmr", + "expect-test", + "hex", + "hex-literal", + "num-traits", + "open", + "percent-encoding", + "pyo3", + "rstest", + "thiserror", + "url", +] + +[[package]] +name = "chia-datalayer-fuzz" +version = "0.16.0" +dependencies = [ + "chia-datalayer", + "libfuzzer-sys", +] + [[package]] name = "chia-fuzz" version = "0.16.0" @@ -537,6 +568,7 @@ dependencies = [ "chia-bls 0.16.0", "chia-client", "chia-consensus", + "chia-datalayer", "chia-protocol", "chia-ssl", "chia-traits 0.15.0", @@ -909,6 +941,12 @@ dependencies = [ "syn", ] +[[package]] +name = "dissimilar" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59f8e79d1fbf76bdfbde321e902714bf6c49df88a7dda6fc682fc2979226962d" + [[package]] name = "ecdsa" version = "0.16.9" @@ -955,6 +993,16 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "expect-test" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e0be0a561335815e06dab7c62e50353134c796e7a6155402a64bcff66b6a5e0" +dependencies = [ + "dissimilar", + "once_cell", +] + [[package]] name = "fallible-iterator" version = "0.3.0" @@ -1004,6 +1052,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + [[package]] name = "futures" version = "0.3.30" @@ -1251,6 +1308,145 @@ version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9" +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + [[package]] name = "indexmap" version = "2.2.6" @@ -1273,6 +1469,15 @@ version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f958d3d68f4167080a18141e10381e7634563984a537f2a49a30fd8e53ac5767" +[[package]] +name = "is-docker" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "928bae27f42bc99b60d9ac7334e3a21d10ad8f1835a4e12ec3ec0464765ed1b3" +dependencies = [ + "once_cell", +] + [[package]] name = "is-terminal" version = "0.4.12" @@ -1284,6 +1489,16 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "is-wsl" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "173609498df190136aa7dea1a91db051746d339e18476eed5ca40521f02d7aa5" +dependencies = [ + "is-docker", + "once_cell", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -1405,6 +1620,12 @@ version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" +[[package]] +name = "litemap" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" + [[package]] name = "log" version = "0.4.22" @@ -1575,6 +1796,17 @@ version = "11.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" +[[package]] +name = "open" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a877bf6abd716642a53ef1b89fb498923a4afca5c754f9050b4d081c05c4b3" +dependencies = [ + "is-wsl", + "libc", + "pathdiff", +] + [[package]] name = "openssl" version = "0.10.68" @@ -1635,6 +1867,12 @@ dependencies = [ "sha2", ] +[[package]] +name = "pathdiff" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d61c5ce1153ab5b689d0c074c4e7fc613e942dfb7dd9eea5ab202d2ad91fe361" + [[package]] name = "pem" version = "3.0.4" @@ -1654,6 +1892,12 @@ dependencies = [ "base64ct", ] +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + [[package]] name = "pin-project-lite" version = "0.2.14" @@ -2222,6 +2466,12 @@ dependencies = [ "der", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "strsim" version = "0.11.1" @@ -2342,6 +2592,16 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -2455,12 +2715,35 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "url" +version = "2.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + [[package]] name = "utf-8" version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -2672,6 +2955,18 @@ dependencies = [ "memchr", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "x509-parser" version = "0.16.0" @@ -2699,6 +2994,30 @@ dependencies = [ "time", ] +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.35" @@ -2720,6 +3039,27 @@ dependencies = [ "syn", ] +[[package]] +name = "zerofrom" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "zeroize" version = "1.8.1" @@ -2740,6 +3080,28 @@ dependencies = [ "syn", ] +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zstd" version = "0.13.2" diff --git a/Cargo.toml b/Cargo.toml index 15def5a9c..c8564fdf0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,6 +55,7 @@ chia-bls = { workspace = true, optional = true } chia-secp = { workspace = true, optional = true } chia-client = { workspace = true, optional = true } chia-consensus = { workspace = true, optional = true } +chia-datalayer = { workspace = true, optional = true } chia-protocol = { workspace = true, optional = true } chia-ssl = { workspace = true, optional = true } chia-traits = { workspace = true, optional = true } @@ -75,6 +76,7 @@ default = [ "secp", "client", "consensus", + "datalayer", "protocol", "ssl", "traits", @@ -88,6 +90,7 @@ bls = ["dep:chia-bls", "clvm-traits/chia-bls"] secp = ["dep:chia-secp", "clvm-traits/chia-secp"] client = ["dep:chia-client"] consensus = ["dep:chia-consensus"] +datalayer = ["dep:chia-datalayer"] protocol = ["dep:chia-protocol"] ssl = ["dep:chia-ssl"] traits = ["dep:chia-traits"] @@ -107,6 +110,7 @@ chia_streamable_macro = { path = "./crates/chia_streamable_macro", version = "0. chia-bls = { path = "./crates/chia-bls", version = "0.16.0" } chia-client = { path = "./crates/chia-client", version = "0.16.0" } chia-consensus = { path = "./crates/chia-consensus", version = "0.16.0" } +chia-datalayer = { path = "./crates/chia-datalayer", version = "0.16.0" } chia-protocol = { path = "./crates/chia-protocol", version = "0.16.0" } chia-secp = { path = "./crates/chia-secp", version = "0.16.0" } chia-ssl = { path = "./crates/chia-ssl", version = "0.11.0" } @@ -118,6 +122,7 @@ clvm-utils = { path = "./crates/clvm-utils", version = "0.16.0" } clvm-derive = { path = "./crates/clvm-derive", version = "0.13.0" } chia-fuzz = { path = "./crates/chia-consensus/fuzz", version = "0.16.0" } chia-bls-fuzz = { path = "./crates/chia-bls/fuzz", version = "0.16.0" } +chia-datalayer-fuzz = { path = "./crates/chia-datalayer/fuzz", version = "0.16.0" } chia-protocol-fuzz = { path = "./crates/chia-protocol/fuzz", version = "0.16.0" } chia-puzzles-fuzz = { path = "./crates/chia-puzzles/fuzz", version = "0.16.0" } clvm-traits-fuzz = { path = "./crates/clvm-traits/fuzz", version = "0.16.0" } @@ -138,6 +143,7 @@ arbitrary = "1.4.1" rand = "0.8.5" criterion = "0.5.1" rstest = "0.22.0" +expect-test = "1.5.0" tokio = "1.42.0" tokio-tungstenite = "0.24.0" futures-util = "0.3.31" @@ -160,3 +166,6 @@ openssl = "0.10.68" k256 = "0.13.4" p256 = "0.13.2" rand_chacha = "0.3.1" +open = "5.3.0" +url = "2.5.2" +percent-encoding = "2.3.1" diff --git a/crates/chia-datalayer/Cargo.toml b/crates/chia-datalayer/Cargo.toml new file mode 100644 index 000000000..74969ee0e --- /dev/null +++ b/crates/chia-datalayer/Cargo.toml @@ -0,0 +1,41 @@ +[package] +name = "chia-datalayer" +version = "0.16.0" +edition = "2021" +license = "Apache-2.0" +description = "DataLayer modules for Chia blockchain" +authors = ["Chia Network, Inc. "] +homepage = "https://github.com/Chia-Network/chia_rs" +repository = "https://github.com/Chia-Network/chia_rs" + +[lints] +workspace = true + +[features] +py-bindings = ["dep:pyo3"] + +[lib] +crate-type = ["rlib"] + +[dependencies] +clvmr = { workspace = true } +num-traits = { workspace = true } +pyo3 = { workspace = true, optional = true } +thiserror = { workspace = true } +chia_streamable_macro = { workspace = true } +chia-traits = { workspace = true } +chia-sha2 = { workspace = true } +chia-protocol = { workspace = true } + +[dev-dependencies] +clvm-utils = { workspace = true } +expect-test = { workspace = true } +hex = { workspace = true } +hex-literal = { workspace = true } +open = { workspace = true } +percent-encoding = { workspace = true } +rstest = { workspace = true } +url = { workspace = true } + +[package.metadata.cargo-machete] +ignored = ["chia-sha2"] diff --git a/crates/chia-datalayer/fuzz/.gitignore b/crates/chia-datalayer/fuzz/.gitignore new file mode 100644 index 000000000..1a45eee77 --- /dev/null +++ b/crates/chia-datalayer/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/crates/chia-datalayer/fuzz/Cargo.toml b/crates/chia-datalayer/fuzz/Cargo.toml new file mode 100644 index 000000000..fa5cbaaeb --- /dev/null +++ b/crates/chia-datalayer/fuzz/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "chia-datalayer-fuzz" +version = "0.16.0" +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +[dependencies.chia-datalayer] +path = ".." + +[[bin]] +name = "merkle_blob_new" +path = "fuzz_targets/merkle_blob_new.rs" +test = false +doc = false +bench = false diff --git a/crates/chia-datalayer/fuzz/fuzz_targets/merkle_blob_new.rs b/crates/chia-datalayer/fuzz/fuzz_targets/merkle_blob_new.rs new file mode 100644 index 000000000..902519c35 --- /dev/null +++ b/crates/chia-datalayer/fuzz/fuzz_targets/merkle_blob_new.rs @@ -0,0 +1,18 @@ +#![no_main] + +use libfuzzer_sys::{arbitrary::Unstructured, fuzz_target}; + +use chia_datalayer::{MerkleBlob, BLOCK_SIZE}; + +fuzz_target!(|data: &[u8]| { + let mut unstructured = Unstructured::new(data); + let block_count = unstructured.int_in_range(0..=1000).unwrap(); + let mut bytes = vec![0u8; block_count * BLOCK_SIZE]; + unstructured.fill_buffer(&mut bytes).unwrap(); + + let Ok(mut blob) = MerkleBlob::new(bytes) else { + return; + }; + blob.check_integrity_on_drop = false; + let _ = blob.check_integrity(); +}); diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs new file mode 100644 index 000000000..fd4f335c9 --- /dev/null +++ b/crates/chia-datalayer/src/lib.rs @@ -0,0 +1,3 @@ +mod merkle; + +pub use merkle::*; diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs new file mode 100644 index 000000000..03d6a53c1 --- /dev/null +++ b/crates/chia-datalayer/src/merkle.rs @@ -0,0 +1,2307 @@ +#[cfg(feature = "py-bindings")] +use pyo3::{ + buffer::PyBuffer, exceptions::PyValueError, pyclass, pymethods, FromPyObject, IntoPy, PyObject, + PyResult, Python, +}; + +use chia_protocol::Bytes32; +use chia_streamable_macro::Streamable; +use chia_traits::Streamable; +use clvmr::sha2::Sha256; +use num_traits::ToBytes; +use std::cmp::Ordering; +use std::collections::{HashMap, HashSet, VecDeque}; +use std::iter::zip; +use std::ops::Range; +use thiserror::Error; + +#[cfg_attr(feature = "py-bindings", derive(FromPyObject), pyo3(transparent))] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Streamable)] +pub struct TreeIndex(u32); + +#[cfg(feature = "py-bindings")] +impl IntoPy for TreeIndex { + fn into_py(self, py: Python<'_>) -> PyObject { + self.0.into_py(py) + } +} + +impl std::fmt::Display for TreeIndex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +type Parent = Option; +type Hash = Bytes32; +/// Key and value ids are provided from outside of this code and are implemented as +/// the row id from sqlite which is a signed 8 byte integer. The actual key and +/// value data bytes will not be handled within this code, only outside. +#[cfg_attr(feature = "py-bindings", derive(FromPyObject), pyo3(transparent))] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Streamable)] +pub struct KvId(i64); + +#[cfg(feature = "py-bindings")] +impl IntoPy for KvId { + fn into_py(self, py: Python<'_>) -> PyObject { + self.0.into_py(py) + } +} + +impl std::fmt::Display for KvId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +#[derive(Debug, Error, PartialEq, Eq)] +pub enum Error { + #[error("unknown NodeType value: {0:?}")] + UnknownNodeTypeValue(u8), + + #[error("unknown dirty value: {0:?}")] + UnknownDirtyValue(u8), + + // TODO: don't use String here + #[error("failed loading metadata: {0}")] + FailedLoadingMetadata(String), + + // TODO: don't use String here + #[error("failed loading node: {0}")] + FailedLoadingNode(String), + + #[error("blob length must be a multiple of block count, found extra bytes: {0}")] + InvalidBlobLength(usize), + + #[error("key already present")] + KeyAlreadyPresent, + + #[error("requested insertion at root but tree not empty")] + UnableToInsertAsRootOfNonEmptyTree, + + #[error("unable to find a leaf")] + UnableToFindALeaf, + + #[error("error while finding a leaf: {0:?}")] + FailedWhileFindingALeaf(String), + + #[error("unknown key: {0:?}")] + UnknownKey(KvId), + + #[error("key not in key to index cache: {0:?}")] + IntegrityKeyNotInCache(KvId), + + #[error("key to index cache for {0:?} should be {1:?} got: {2:?}")] + IntegrityKeyToIndexCacheIndex(KvId, TreeIndex, TreeIndex), + + #[error("parent and child relationship mismatched: {0:?}")] + IntegrityParentChildMismatch(TreeIndex), + + #[error("found {0:?} leaves but key to index cache length is: {1}")] + IntegrityKeyToIndexCacheLength(usize, usize), + + #[error("unmatched parent -> child references found: {0}")] + IntegrityUnmatchedChildParentRelationships(usize), + + #[error("expected total node count {0:?} found: {1:?}")] + IntegrityTotalNodeCount(TreeIndex, usize), + + #[error("zero-length seed bytes not allowed")] + ZeroLengthSeedNotAllowed, + + #[error("block index out of range: {0:?}")] + BlockIndexOutOfRange(TreeIndex), + + #[error("node not a leaf: {0:?}")] + NodeNotALeaf(InternalNode), + + #[error("from streamable: {0:?}")] + Streaming(chia_traits::chia_error::Error), + + #[error("index not a child: {0}")] + IndexIsNotAChild(TreeIndex), + + #[error("cycle found")] + CycleFound, + + #[error("block index out of bounds: {0}")] + BlockIndexOutOfBounds(TreeIndex), +} + +// assumptions +// - root is at index 0 +// - any case with no keys will have a zero length blob + +// define the serialized block format +const METADATA_RANGE: Range = 0..METADATA_SIZE; +const METADATA_SIZE: usize = 2; +// TODO: figure out the real max better than trial and error? +const DATA_SIZE: usize = 53; +pub const BLOCK_SIZE: usize = METADATA_SIZE + DATA_SIZE; +type BlockBytes = [u8; BLOCK_SIZE]; +type MetadataBytes = [u8; METADATA_SIZE]; +type DataBytes = [u8; DATA_SIZE]; +const DATA_RANGE: Range = METADATA_SIZE..METADATA_SIZE + DATA_SIZE; + +fn streamable_from_bytes_ignore_extra_bytes(bytes: &[u8]) -> Result +where + T: Streamable, +{ + let mut cursor = std::io::Cursor::new(bytes); + // TODO: consider trusted mode? + T::parse::(&mut cursor).map_err(Error::Streaming) +} + +#[repr(u8)] +#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, Streamable)] +pub enum NodeType { + Internal = 0, + Leaf = 1, +} + +#[allow(clippy::needless_pass_by_value)] +fn sha256_num(input: T) -> Hash { + let mut hasher = Sha256::new(); + hasher.update(input.to_be_bytes()); + + Bytes32::new(hasher.finalize()) +} + +fn sha256_bytes(input: &[u8]) -> Hash { + let mut hasher = Sha256::new(); + hasher.update(input); + + Bytes32::new(hasher.finalize()) +} + +fn internal_hash(left_hash: &Hash, right_hash: &Hash) -> Hash { + let mut hasher = Sha256::new(); + hasher.update(b"\x02"); + hasher.update(left_hash); + hasher.update(right_hash); + + Bytes32::new(hasher.finalize()) +} + +#[cfg_attr(feature = "py-bindings", pyclass(eq, eq_int))] +#[repr(u8)] +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Streamable)] +pub enum Side { + Left = 0, + Right = 1, +} + +#[cfg_attr(feature = "py-bindings", pyclass)] +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +pub enum InsertLocation { + // error: Unit variant `Auto` is not yet supported in a complex enum + // = help: change to a struct variant with no fields: `Auto { }` + // = note: the enum is complex because of non-unit variant `Leaf` + Auto {}, + AsRoot {}, + Leaf { index: TreeIndex, side: Side }, +} + +#[derive(Copy, Clone, Hash, Debug, PartialEq, Eq, Streamable)] +pub struct NodeMetadata { + // OPT: could save 1-2% of tree space by packing (and maybe don't do that) + pub node_type: NodeType, + pub dirty: bool, +} + +#[cfg_attr(feature = "py-bindings", pyclass(get_all))] +#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, Streamable)] +pub struct InternalNode { + pub parent: Parent, + pub hash: Hash, + pub left: TreeIndex, + pub right: TreeIndex, +} + +impl InternalNode { + pub fn sibling_index(&self, index: TreeIndex) -> Result { + if index == self.right { + Ok(self.left) + } else if index == self.left { + Ok(self.right) + } else { + Err(Error::IndexIsNotAChild(index)) + } + } +} + +#[cfg_attr(feature = "py-bindings", pyclass(get_all))] +#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, Streamable)] +pub struct LeafNode { + pub parent: Parent, + pub hash: Hash, + pub key: KvId, + pub value: KvId, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Node { + Internal(InternalNode), + Leaf(LeafNode), +} + +impl Node { + fn parent(&self) -> Parent { + match self { + Node::Internal(node) => node.parent, + Node::Leaf(node) => node.parent, + } + } + + fn set_parent(&mut self, parent: Parent) { + match self { + Node::Internal(node) => node.parent = parent, + Node::Leaf(node) => node.parent = parent, + } + } + + fn hash(&self) -> Hash { + match self { + Node::Internal(node) => node.hash, + Node::Leaf(node) => node.hash, + } + } + + fn set_hash(&mut self, hash: Hash) { + match self { + Node::Internal(ref mut node) => node.hash = hash, + Node::Leaf(ref mut node) => node.hash = hash, + } + } + + #[allow(clippy::trivially_copy_pass_by_ref)] + pub fn from_bytes(metadata: &NodeMetadata, blob: &DataBytes) -> Result { + Ok(match metadata.node_type { + NodeType::Internal => Node::Internal(streamable_from_bytes_ignore_extra_bytes(blob)?), + NodeType::Leaf => Node::Leaf(streamable_from_bytes_ignore_extra_bytes(blob)?), + }) + } + + pub fn to_bytes(&self) -> Result { + let mut base = match self { + Node::Internal(node) => node.to_bytes(), + Node::Leaf(node) => node.to_bytes(), + } + .map_err(Error::Streaming)?; + assert!(base.len() <= DATA_SIZE); + base.resize(DATA_SIZE, 0); + Ok(base + .as_slice() + .try_into() + .expect("padding was added above, might be too large")) + } + + fn expect_leaf(&self, message: &str) -> LeafNode { + let Node::Leaf(leaf) = self else { + let message = message.replace("<>", &format!("{self:?}")); + panic!("{}", message) + }; + + *leaf + } + + fn try_into_leaf(self) -> Result { + match self { + Node::Leaf(leaf) => Ok(leaf), + Node::Internal(internal) => Err(Error::NodeNotALeaf(internal)), + } + } +} + +#[cfg(feature = "py-bindings")] +impl IntoPy for Node { + fn into_py(self, py: Python<'_>) -> PyObject { + match self { + Node::Internal(node) => node.into_py(py), + Node::Leaf(node) => node.into_py(py), + } + } +} + +fn block_range(index: TreeIndex) -> Range { + let block_start = index.0 as usize * BLOCK_SIZE; + block_start..block_start + BLOCK_SIZE +} + +pub struct Block { + // TODO: metadata node type and node's type not verified for agreement + metadata: NodeMetadata, + node: Node, +} + +impl Block { + pub fn to_bytes(&self) -> Result { + let mut blob: BlockBytes = [0; BLOCK_SIZE]; + blob[METADATA_RANGE].copy_from_slice(&self.metadata.to_bytes().map_err(Error::Streaming)?); + blob[DATA_RANGE].copy_from_slice(&self.node.to_bytes()?); + + Ok(blob) + } + + pub fn from_bytes(blob: BlockBytes) -> Result { + let metadata_blob: MetadataBytes = blob[METADATA_RANGE].try_into().unwrap(); + let data_blob: DataBytes = blob[DATA_RANGE].try_into().unwrap(); + let metadata = NodeMetadata::from_bytes(&metadata_blob) + .map_err(|message| Error::FailedLoadingMetadata(message.to_string()))?; + let node = Node::from_bytes(&metadata, &data_blob) + .map_err(|message| Error::FailedLoadingNode(message.to_string()))?; + + Ok(Block { metadata, node }) + } + + pub fn update_hash(&mut self, left: &Hash, right: &Hash) { + self.node.set_hash(internal_hash(left, right)); + self.metadata.dirty = false; + } +} + +fn get_free_indexes_and_keys_values_indexes( + blob: &Vec, +) -> Result<(HashSet, HashMap), Error> { + let index_count = blob.len() / BLOCK_SIZE; + + let mut seen_indexes: Vec = vec![false; index_count]; + let mut key_to_index: HashMap = HashMap::default(); + + for item in MerkleBlobLeftChildFirstIterator::new(blob) { + let (index, block) = item?; + seen_indexes[index.0 as usize] = true; + + if let Node::Leaf(leaf) = block.node { + key_to_index.insert(leaf.key, index); + } + } + + let mut free_indexes: HashSet = HashSet::new(); + for (index, seen) in seen_indexes.iter().enumerate() { + if !seen { + free_indexes.insert(TreeIndex(index as u32)); + } + } + + Ok((free_indexes, key_to_index)) +} + +/// Stores a DataLayer merkle tree in bytes and provides serialization on each access so that only +/// the parts presently in use are stored in active objects. The bytes are grouped as blocks of +/// equal size regardless of being internal vs. external nodes so that block indexes can be used +/// for references to particular nodes and readily converted to byte indexes. The leaf nodes +/// do not hold the DataLayer key and value data but instead an id for each of the key and value +/// such that the code using a merkle blob can store the key and value as they see fit. Each node +/// stores the hash for the merkle aspect of the tree. +#[cfg_attr(feature = "py-bindings", pyclass(get_all))] +#[derive(Debug)] +pub struct MerkleBlob { + blob: Vec, + // TODO: would be nice for this to be deterministic ala a fifo set + free_indexes: HashSet, + key_to_index: HashMap, + // TODO: used by fuzzing, some cleaner way? making it cfg-dependent is annoying with + // the type stubs + pub check_integrity_on_drop: bool, +} + +impl MerkleBlob { + pub fn new(blob: Vec) -> Result { + let length = blob.len(); + let remainder = length % BLOCK_SIZE; + if remainder != 0 { + return Err(Error::InvalidBlobLength(remainder)); + } + + // TODO: maybe integrate integrity check here if quick enough + let (free_indexes, key_to_index) = get_free_indexes_and_keys_values_indexes(&blob)?; + + let self_ = Self { + blob, + free_indexes, + key_to_index, + check_integrity_on_drop: true, + }; + + Ok(self_) + } + + fn clear(&mut self) { + self.blob.clear(); + self.key_to_index.clear(); + self.free_indexes.clear(); + } + + pub fn insert( + &mut self, + key: KvId, + value: KvId, + hash: &Hash, + insert_location: InsertLocation, + ) -> Result { + if self.key_to_index.contains_key(&key) { + return Err(Error::KeyAlreadyPresent); + } + + let insert_location = match insert_location { + InsertLocation::Auto {} => self.get_random_insert_location_by_kvid(key)?, + _ => insert_location, + }; + + match insert_location { + InsertLocation::Auto {} => { + unreachable!("this should have been caught and processed above") + } + InsertLocation::AsRoot {} => { + if !self.key_to_index.is_empty() { + return Err(Error::UnableToInsertAsRootOfNonEmptyTree); + }; + self.insert_first(key, value, hash) + } + InsertLocation::Leaf { index, side } => { + let old_leaf = self.get_node(index)?.try_into_leaf()?; + + let internal_node_hash = match side { + Side::Left => internal_hash(hash, &old_leaf.hash), + Side::Right => internal_hash(&old_leaf.hash, hash), + }; + + let node = LeafNode { + parent: None, + hash: *hash, + key, + value, + }; + + if self.key_to_index.len() == 1 { + self.insert_second(node, &old_leaf, &internal_node_hash, side) + } else { + self.insert_third_or_later(node, &old_leaf, index, &internal_node_hash, side) + } + } + } + } + + fn insert_first(&mut self, key: KvId, value: KvId, hash: &Hash) -> Result { + let new_leaf_block = Block { + metadata: NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }, + node: Node::Leaf(LeafNode { + parent: None, + key, + value, + hash: *hash, + }), + }; + + let index = self.extend_index(); + self.insert_entry_to_blob(index, &new_leaf_block)?; + + Ok(index) + } + + fn insert_second( + &mut self, + mut node: LeafNode, + old_leaf: &LeafNode, + internal_node_hash: &Hash, + side: Side, + ) -> Result { + self.clear(); + let root_index = self.get_new_index(); + let left_index = self.get_new_index(); + let right_index = self.get_new_index(); + + let new_internal_block = Block { + metadata: NodeMetadata { + node_type: NodeType::Internal, + dirty: false, + }, + node: Node::Internal(InternalNode { + parent: None, + left: left_index, + right: right_index, + hash: *internal_node_hash, + }), + }; + + self.insert_entry_to_blob(root_index, &new_internal_block)?; + + node.parent = Some(TreeIndex(0)); + + let nodes = [ + ( + match side { + Side::Left => right_index, + Side::Right => left_index, + }, + LeafNode { + parent: Some(TreeIndex(0)), + key: old_leaf.key, + value: old_leaf.value, + hash: old_leaf.hash, + }, + ), + ( + match side { + Side::Left => left_index, + Side::Right => right_index, + }, + node, + ), + ]; + + for (index, node) in nodes { + let block = Block { + metadata: NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }, + node: Node::Leaf(node), + }; + + self.insert_entry_to_blob(index, &block)?; + } + + Ok(nodes[1].0) + } + + fn insert_third_or_later( + &mut self, + mut node: LeafNode, + old_leaf: &LeafNode, + old_leaf_index: TreeIndex, + internal_node_hash: &Hash, + side: Side, + ) -> Result { + let new_leaf_index = self.get_new_index(); + let new_internal_node_index = self.get_new_index(); + + node.parent = Some(new_internal_node_index); + + let new_leaf_block = Block { + metadata: NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }, + node: Node::Leaf(node), + }; + self.insert_entry_to_blob(new_leaf_index, &new_leaf_block)?; + + let (left_index, right_index) = match side { + Side::Left => (new_leaf_index, old_leaf_index), + Side::Right => (old_leaf_index, new_leaf_index), + }; + let new_internal_block = Block { + metadata: NodeMetadata { + node_type: NodeType::Internal, + dirty: false, + }, + node: Node::Internal(InternalNode { + parent: old_leaf.parent, + left: left_index, + right: right_index, + hash: *internal_node_hash, + }), + }; + self.insert_entry_to_blob(new_internal_node_index, &new_internal_block)?; + + let Some(old_parent_index) = old_leaf.parent else { + panic!("root found when not expected") + }; + + self.update_parent(old_leaf_index, Some(new_internal_node_index))?; + + let mut old_parent_block = self.get_block(old_parent_index)?; + if let Node::Internal(ref mut internal_node, ..) = old_parent_block.node { + if old_leaf_index == internal_node.left { + internal_node.left = new_internal_node_index; + } else if old_leaf_index == internal_node.right { + internal_node.right = new_internal_node_index; + } else { + panic!("child not a child of its parent"); + } + } else { + panic!("expected internal node but found leaf"); + }; + + self.insert_entry_to_blob(old_parent_index, &old_parent_block)?; + + self.mark_lineage_as_dirty(old_parent_index)?; + + Ok(new_leaf_index) + } + + pub fn batch_insert(&mut self, mut keys_values_hashes: I) -> Result<(), Error> + where + I: Iterator, + { + // OPT: would it be worthwhile to hold the entire blocks? + let mut indexes = vec![]; + + if self.key_to_index.len() <= 1 { + for _ in 0..2 { + let Some(((key, value), hash)) = keys_values_hashes.next() else { + return Ok(()); + }; + self.insert(key, value, &hash, InsertLocation::Auto {})?; + } + } + + for ((key, value), hash) in keys_values_hashes { + let new_leaf_index = self.get_new_index(); + let new_block = Block { + metadata: NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }, + node: Node::Leaf(LeafNode { + parent: None, + hash, + key, + value, + }), + }; + self.insert_entry_to_blob(new_leaf_index, &new_block)?; + indexes.push(new_leaf_index); + } + + // OPT: can we insert the top node first? maybe more efficient to update it's children + // than to update the parents of the children when traversing leaf to sub-root? + while indexes.len() > 1 { + let mut new_indexes = vec![]; + + for chunk in indexes.chunks(2) { + let [index_1, index_2] = match chunk { + [index] => { + new_indexes.push(*index); + continue; + } + [index_1, index_2] => [*index_1, *index_2], + _ => unreachable!( + "chunk should always be either one or two long and be handled above" + ), + }; + + let new_internal_node_index = self.get_new_index(); + + let mut hashes = vec![]; + for index in [index_1, index_2] { + let block = self.update_parent(index, Some(new_internal_node_index))?; + hashes.push(block.node.hash()); + } + + let new_block = Block { + metadata: NodeMetadata { + node_type: NodeType::Internal, + dirty: false, + }, + node: Node::Internal(InternalNode { + parent: None, + hash: internal_hash(&hashes[0], &hashes[1]), + left: index_1, + right: index_2, + }), + }; + + self.insert_entry_to_blob(new_internal_node_index, &new_block)?; + new_indexes.push(new_internal_node_index); + } + + indexes = new_indexes; + } + + if indexes.len() == 1 { + // OPT: can we avoid this extra min height leaf traversal? + let min_height_leaf = self.get_min_height_leaf()?; + self.insert_from_key(min_height_leaf.key, indexes[0], Side::Left)?; + }; + + Ok(()) + } + + fn insert_from_key( + &mut self, + old_leaf_key: KvId, + new_index: TreeIndex, + side: Side, + ) -> Result<(), Error> { + // NAME: consider name, we're inserting a subtree at a leaf + // TODO: seems like this ought to be fairly similar to regular insert + + // TODO: but what about the old leaf being the root... is that what the batch insert + // pre-filling of two leafs is about? if so, this needs to be making sure of that + // or something. + + struct Stuff { + index: TreeIndex, + hash: Hash, + } + + let new_internal_node_index = self.get_new_index(); + let (old_leaf_index, old_leaf, _old_block) = self.get_leaf_by_key(old_leaf_key)?; + let new_node = self.get_node(new_index)?; + + let new_stuff = Stuff { + index: new_index, + hash: new_node.hash(), + }; + let old_stuff = Stuff { + index: old_leaf_index, + hash: old_leaf.hash, + }; + let (left, right) = match side { + Side::Left => (new_stuff, old_stuff), + Side::Right => (old_stuff, new_stuff), + }; + let internal_node_hash = internal_hash(&left.hash, &right.hash); + + let block = Block { + metadata: NodeMetadata { + node_type: NodeType::Internal, + dirty: false, + }, + node: Node::Internal(InternalNode { + parent: old_leaf.parent, + hash: internal_node_hash, + left: left.index, + right: right.index, + }), + }; + self.insert_entry_to_blob(new_internal_node_index, &block)?; + self.update_parent(new_index, Some(new_internal_node_index))?; + + let Some(old_leaf_parent) = old_leaf.parent else { + // TODO: relates to comment at the beginning about assumptions about the tree etc + panic!("not handling this case"); + }; + + let mut parent = self.get_block(old_leaf_parent)?; + if let Node::Internal(ref mut internal) = parent.node { + match old_leaf_index { + x if x == internal.left => internal.left = new_internal_node_index, + x if x == internal.right => internal.right = new_internal_node_index, + _ => panic!("parent not a child a grandparent"), + } + } else { + panic!("not handling this case now...") + } + self.insert_entry_to_blob(old_leaf_parent, &parent)?; + self.update_parent(old_leaf_index, Some(new_internal_node_index))?; + + Ok(()) + } + + fn get_min_height_leaf(&self) -> Result { + let (_index, block) = MerkleBlobBreadthFirstIterator::new(&self.blob) + .next() + .ok_or(Error::UnableToFindALeaf)??; + + Ok(block + .node + .expect_leaf("unexpectedly found internal node first: <>")) + } + + pub fn delete(&mut self, key: KvId) -> Result<(), Error> { + let (leaf_index, leaf, _leaf_block) = self.get_leaf_by_key(key)?; + self.key_to_index.remove(&key); + + let Some(parent_index) = leaf.parent else { + self.clear(); + return Ok(()); + }; + + self.free_indexes.insert(leaf_index); + let maybe_parent = self.get_node(parent_index)?; + let Node::Internal(parent) = maybe_parent else { + panic!("parent node not internal: {maybe_parent:?}") + }; + let sibling_index = parent.sibling_index(leaf_index)?; + let mut sibling_block = self.get_block(sibling_index)?; + + let Some(grandparent_index) = parent.parent else { + sibling_block.node.set_parent(None); + self.insert_entry_to_blob(TreeIndex(0), &sibling_block)?; + + if let Node::Internal(node) = sibling_block.node { + for child_index in [node.left, node.right] { + self.update_parent(child_index, Some(TreeIndex(0)))?; + } + }; + + self.free_indexes.insert(sibling_index); + + return Ok(()); + }; + + self.free_indexes.insert(parent_index); + let mut grandparent_block = self.get_block(grandparent_index)?; + + sibling_block.node.set_parent(Some(grandparent_index)); + self.insert_entry_to_blob(sibling_index, &sibling_block)?; + + if let Node::Internal(ref mut internal) = grandparent_block.node { + match parent_index { + x if x == internal.left => internal.left = sibling_index, + x if x == internal.right => internal.right = sibling_index, + _ => panic!("parent not a child a grandparent"), + } + } else { + panic!("grandparent not an internal node") + } + self.insert_entry_to_blob(grandparent_index, &grandparent_block)?; + + self.mark_lineage_as_dirty(grandparent_index)?; + + Ok(()) + } + + pub fn upsert(&mut self, key: KvId, value: KvId, new_hash: &Hash) -> Result<(), Error> { + let Ok((leaf_index, mut leaf, mut block)) = self.get_leaf_by_key(key) else { + self.insert(key, value, new_hash, InsertLocation::Auto {})?; + return Ok(()); + }; + + leaf.hash.clone_from(new_hash); + leaf.value = value; + // OPT: maybe just edit in place? + block.node = Node::Leaf(leaf); + self.insert_entry_to_blob(leaf_index, &block)?; + + if let Some(parent) = block.node.parent() { + self.mark_lineage_as_dirty(parent)?; + }; + + Ok(()) + } + + pub fn check_integrity(&self) -> Result<(), Error> { + let mut leaf_count: usize = 0; + let mut internal_count: usize = 0; + let mut child_to_parent: HashMap = HashMap::new(); + + for item in MerkleBlobParentFirstIterator::new(&self.blob) { + let (index, block) = item?; + if let Some(parent) = block.node.parent() { + if child_to_parent.remove(&index) != Some(parent) { + return Err(Error::IntegrityParentChildMismatch(index)); + } + } + match block.node { + Node::Internal(node) => { + internal_count += 1; + child_to_parent.insert(node.left, index); + child_to_parent.insert(node.right, index); + } + Node::Leaf(node) => { + leaf_count += 1; + let cached_index = self + .key_to_index + .get(&node.key) + .ok_or(Error::IntegrityKeyNotInCache(node.key))?; + if *cached_index != index { + return Err(Error::IntegrityKeyToIndexCacheIndex( + node.key, + index, + *cached_index, + )); + }; + assert!( + !self.free_indexes.contains(&index), + "{}", + format!("active index found in free index list: {index:?}") + ); + } + } + } + + let key_to_index_cache_length = self.key_to_index.len(); + if leaf_count != key_to_index_cache_length { + return Err(Error::IntegrityKeyToIndexCacheLength( + leaf_count, + key_to_index_cache_length, + )); + } + let total_count = leaf_count + internal_count + self.free_indexes.len(); + let extend_index = self.extend_index(); + if total_count != extend_index.0 as usize { + return Err(Error::IntegrityTotalNodeCount(extend_index, total_count)); + }; + if !child_to_parent.is_empty() { + return Err(Error::IntegrityUnmatchedChildParentRelationships( + child_to_parent.len(), + )); + } + + Ok(()) + } + + fn update_parent( + &mut self, + index: TreeIndex, + parent: Option, + ) -> Result { + let mut block = self.get_block(index)?; + block.node.set_parent(parent); + self.insert_entry_to_blob(index, &block)?; + + Ok(block) + } + + fn mark_lineage_as_dirty(&mut self, index: TreeIndex) -> Result<(), Error> { + let mut next_index = Some(index); + + while let Some(this_index) = next_index { + let mut block = Block::from_bytes(self.get_block_bytes(this_index)?)?; + + if block.metadata.dirty { + return Ok(()); + } + + block.metadata.dirty = true; + self.insert_entry_to_blob(this_index, &block)?; + next_index = block.node.parent(); + } + + Ok(()) + } + + fn get_new_index(&mut self) -> TreeIndex { + match self.free_indexes.iter().next().copied() { + None => { + let index = self.extend_index(); + self.blob.extend_from_slice(&[0; BLOCK_SIZE]); + // NOTE: explicitly not marking index as free since that would hazard two + // sequential calls to this function through this path to both return + // the same index + index + } + Some(new_index) => { + self.free_indexes.remove(&new_index); + new_index + } + } + } + + // TODO: not really that random + fn get_random_insert_location_by_seed( + &self, + seed_bytes: &[u8], + ) -> Result { + let mut seed_bytes = Vec::from(seed_bytes); + + if self.blob.is_empty() { + return Ok(InsertLocation::AsRoot {}); + } + + // TODO: zero means left here but right below? + let side = if (seed_bytes.last().ok_or(Error::ZeroLengthSeedNotAllowed)? & 1 << 7) == 0 { + Side::Left + } else { + Side::Right + }; + let mut next_index = TreeIndex(0); + let mut node = self.get_node(next_index)?; + + loop { + for byte in &seed_bytes { + for bit in 0..8 { + match node { + Node::Leaf { .. } => { + return Ok(InsertLocation::Leaf { + index: next_index, + side, + }) + } + Node::Internal(internal) => { + next_index = if byte & (1 << bit) != 0 { + internal.left + } else { + internal.right + }; + node = self.get_node(next_index)?; + } + } + } + } + + seed_bytes = sha256_bytes(&seed_bytes).into(); + } + } + + fn get_random_insert_location_by_kvid(&self, seed: KvId) -> Result { + let seed = sha256_num(seed.0); + + self.get_random_insert_location_by_seed(&seed) + } + + fn extend_index(&self) -> TreeIndex { + let blob_length = self.blob.len(); + let index: TreeIndex = TreeIndex((blob_length / BLOCK_SIZE) as u32); + let remainder = blob_length % BLOCK_SIZE; + assert_eq!(remainder, 0, "blob length {blob_length:?} not a multiple of {BLOCK_SIZE:?}, remainder: {remainder:?}"); + + index + } + + fn insert_entry_to_blob(&mut self, index: TreeIndex, block: &Block) -> Result<(), Error> { + let new_block_bytes = block.to_bytes()?; + let extend_index = self.extend_index(); + match index.cmp(&extend_index) { + Ordering::Greater => return Err(Error::BlockIndexOutOfRange(index)), + Ordering::Equal => self.blob.extend_from_slice(&new_block_bytes), + Ordering::Less => { + // OPT: lots of deserialization here for just the key + let old_block = self.get_block(index)?; + // TODO: should we be more careful about accidentally reading garbage like + // from a freshly gotten index + if !self.free_indexes.contains(&index) + && old_block.metadata.node_type == NodeType::Leaf + { + if let Node::Leaf(old_node) = old_block.node { + self.key_to_index.remove(&old_node.key); + }; + }; + self.blob[block_range(index)].copy_from_slice(&new_block_bytes); + } + } + + if let Node::Leaf(ref node) = block.node { + self.key_to_index.insert(node.key, index); + }; + + self.free_indexes.take(&index); + + Ok(()) + } + + fn get_block(&self, index: TreeIndex) -> Result { + Block::from_bytes(self.get_block_bytes(index)?) + } + + fn get_hash(&self, index: TreeIndex) -> Result { + Ok(self.get_block(index)?.node.hash()) + } + + fn get_block_bytes(&self, index: TreeIndex) -> Result { + Ok(self + .blob + .get(block_range(index)) + .ok_or(Error::BlockIndexOutOfRange(index))? + .try_into() + .unwrap_or_else(|e| panic!("failed getting block {index}: {e}"))) + } + + pub fn get_node(&self, index: TreeIndex) -> Result { + Ok(self.get_block(index)?.node) + } + + pub fn get_leaf_by_key(&self, key: KvId) -> Result<(TreeIndex, LeafNode, Block), Error> { + let index = *self.key_to_index.get(&key).ok_or(Error::UnknownKey(key))?; + let block = self.get_block(index)?; + let leaf = block.node.expect_leaf(&format!( + "expected leaf for index from key cache: {index} -> <>" + )); + + Ok((index, leaf, block)) + } + + pub fn get_parent_index(&self, index: TreeIndex) -> Result { + Ok(self.get_block(index)?.node.parent()) + } + + pub fn get_lineage_with_indexes( + &self, + index: TreeIndex, + ) -> Result, Error> { + let mut next_index = Some(index); + let mut lineage = vec![]; + + while let Some(this_index) = next_index { + let node = self.get_node(this_index)?; + next_index = node.parent(); + lineage.push((index, node)); + } + + Ok(lineage) + } + + pub fn get_lineage_indexes(&self, index: TreeIndex) -> Result, Error> { + let mut next_index = Some(index); + let mut lineage: Vec = vec![]; + + while let Some(this_index) = next_index { + lineage.push(this_index); + next_index = self.get_parent_index(this_index)?; + } + + Ok(lineage) + } + + // pub fn iter(&self) -> MerkleBlobLeftChildFirstIterator<'_> { + // <&Self as IntoIterator>::into_iter(self) + // } + + pub fn calculate_lazy_hashes(&mut self) -> Result<(), Error> { + // OPT: yeah, storing the whole set of blocks via collect is not great + for item in MerkleBlobLeftChildFirstIterator::new(&self.blob).collect::>() { + let (index, mut block) = item?; + // OPT: really want a pruned traversal, not filter + if !block.metadata.dirty { + continue; + } + + let Node::Internal(ref leaf) = block.node else { + panic!("leaves should not be dirty") + }; + // OPT: obviously inefficient to re-get/deserialize these blocks inside + // an iteration that's already doing that + let left_hash = self.get_hash(leaf.left)?; + let right_hash = self.get_hash(leaf.right)?; + block.update_hash(&left_hash, &right_hash); + self.insert_entry_to_blob(index, &block)?; + } + + Ok(()) + } +} + +impl PartialEq for MerkleBlob { + fn eq(&self, other: &Self) -> bool { + // NOTE: this is checking tree structure equality, not serialized bytes equality + for item in zip( + MerkleBlobLeftChildFirstIterator::new(&self.blob), + MerkleBlobLeftChildFirstIterator::new(&other.blob), + ) { + let (Ok((_, self_block)), Ok((_, other_block))) = item else { + // TODO: it's an error though, hmm + return false; + }; + if (self_block.metadata.dirty || other_block.metadata.dirty) + || self_block.node.hash() != other_block.node.hash() + { + return false; + } + match self_block.node { + // NOTE: this is effectively checked by the controlled overall traversal + Node::Internal(..) => {} + Node::Leaf(..) => return self_block.node == other_block.node, + } + } + + true + } +} + +// impl<'a> IntoIterator for &'a MerkleBlob { +// type Item = (TreeIndex, Block); +// type IntoIter = MerkleBlobLeftChildFirstIterator<'a>; +// +// fn into_iter(self) -> Self::IntoIter { +// MerkleBlobLeftChildFirstIterator::new(&self.blob) +// } +// } + +#[cfg(feature = "py-bindings")] +#[pymethods] +impl MerkleBlob { + #[allow(clippy::needless_pass_by_value)] + #[new] + pub fn py_init(blob: PyBuffer) -> PyResult { + assert!( + blob.is_c_contiguous(), + "from_bytes() must be called with a contiguous buffer" + ); + #[allow(unsafe_code)] + let slice = + unsafe { std::slice::from_raw_parts(blob.buf_ptr() as *const u8, blob.len_bytes()) }; + + Self::new(Vec::from(slice)).map_err(|e| PyValueError::new_err(e.to_string())) + } + + #[pyo3(name = "insert", signature = (key, value, hash, reference_kid = None, side = None))] + pub fn py_insert( + &mut self, + key: KvId, + value: KvId, + hash: Hash, + reference_kid: Option, + // TODO: should be a Side, but python has a different Side right now + side: Option, + ) -> PyResult<()> { + let insert_location = match (reference_kid, side) { + (None, None) => InsertLocation::Auto {}, + (Some(key), Some(side)) => InsertLocation::Leaf { + index: *self + .key_to_index + .get(&key) + .ok_or(PyValueError::new_err(format!( + "unknown key id passed as insert location reference: {key}" + )))?, + side: Side::from_bytes(&[side])?, + }, + _ => { + return Err(PyValueError::new_err( + "must specify neither or both of reference_kid and side", + )); + } + }; + self.insert(key, value, &hash, insert_location) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + + Ok(()) + } + + #[pyo3(name = "delete")] + pub fn py_delete(&mut self, key: KvId) -> PyResult<()> { + self.delete(key) + .map_err(|e| PyValueError::new_err(e.to_string())) + } + + #[pyo3(name = "get_raw_node")] + pub fn py_get_raw_node(&mut self, index: TreeIndex) -> PyResult { + self.get_node(index) + .map_err(|e| PyValueError::new_err(e.to_string())) + } + + #[pyo3(name = "calculate_lazy_hashes")] + pub fn py_calculate_lazy_hashes(&mut self) -> PyResult<()> { + self.calculate_lazy_hashes() + .map_err(|e| PyValueError::new_err(e.to_string())) + } + + #[pyo3(name = "get_lineage_with_indexes")] + pub fn py_get_lineage_with_indexes( + &self, + index: TreeIndex, + py: Python<'_>, + ) -> PyResult { + let list = pyo3::types::PyList::empty_bound(py); + + for (index, node) in self + .get_lineage_with_indexes(index) + .map_err(|e| PyValueError::new_err(e.to_string()))? + { + use pyo3::conversion::IntoPy; + use pyo3::types::PyListMethods; + list.append((index.into_py(py), node.into_py(py)))?; + } + + Ok(list.into()) + } + + #[pyo3(name = "get_nodes_with_indexes")] + pub fn py_get_nodes_with_indexes(&self, py: Python<'_>) -> PyResult { + let list = pyo3::types::PyList::empty_bound(py); + + for item in MerkleBlobParentFirstIterator::new(&self.blob) { + use pyo3::conversion::IntoPy; + use pyo3::types::PyListMethods; + let (index, block) = item.map_err(|e| PyValueError::new_err(e.to_string()))?; + list.append((index.into_py(py), block.node.into_py(py)))?; + } + + Ok(list.into()) + } + + #[pyo3(name = "empty")] + pub fn py_empty(&self) -> PyResult { + Ok(self.key_to_index.is_empty()) + } + + #[pyo3(name = "get_root_hash")] + pub fn py_get_root_hash(&self) -> PyResult> { + self.py_get_hash_at_index(TreeIndex(0)) + } + + #[pyo3(name = "get_hash_at_index")] + pub fn py_get_hash_at_index(&self, index: TreeIndex) -> PyResult> { + if self.key_to_index.is_empty() { + return Ok(None); + } + + let block = self + .get_block(index) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + if block.metadata.dirty { + return Err(PyValueError::new_err("root hash is dirty")); + } + + Ok(Some(block.node.hash())) + } + + #[pyo3(name = "batch_insert")] + pub fn py_batch_insert( + &mut self, + keys_values: Vec<(KvId, KvId)>, + hashes: Vec, + ) -> PyResult<()> { + if keys_values.len() != hashes.len() { + return Err(PyValueError::new_err( + "key/value and hash collection lengths must match", + )); + } + + self.batch_insert(&mut zip(keys_values, hashes)) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + + Ok(()) + } + + #[pyo3(name = "__len__")] + pub fn py_len(&self) -> PyResult { + Ok(self.blob.len()) + } +} + +fn try_get_block(blob: &[u8], index: TreeIndex) -> Result { + // TODO: check limits and return error + let range = block_range(index); + let block_bytes: BlockBytes = blob + .get(range) + .ok_or(Error::BlockIndexOutOfBounds(index))? + .try_into() + .unwrap(); + + Block::from_bytes(block_bytes) +} + +struct MerkleBlobLeftChildFirstIteratorItem { + visited: bool, + index: TreeIndex, +} + +pub struct MerkleBlobLeftChildFirstIterator<'a> { + blob: &'a Vec, + deque: VecDeque, + already_queued: HashSet, +} + +impl<'a> MerkleBlobLeftChildFirstIterator<'a> { + fn new(blob: &'a Vec) -> Self { + let mut deque = VecDeque::new(); + if blob.len() / BLOCK_SIZE > 0 { + deque.push_back(MerkleBlobLeftChildFirstIteratorItem { + visited: false, + index: TreeIndex(0), + }); + } + + Self { + blob, + deque, + already_queued: HashSet::new(), + } + } +} + +impl Iterator for MerkleBlobLeftChildFirstIterator<'_> { + type Item = Result<(TreeIndex, Block), Error>; + + fn next(&mut self) -> Option { + // left sibling first, children before parents + + loop { + let item = self.deque.pop_front()?; + let block = match try_get_block(self.blob, item.index) { + Ok(block) => block, + Err(e) => return Some(Err(e)), + }; + + match block.node { + Node::Leaf(..) => return Some(Ok((item.index, block))), + Node::Internal(ref node) => { + if item.visited { + return Some(Ok((item.index, block))); + }; + + if self.already_queued.contains(&item.index) { + return Some(Err(Error::CycleFound)); + } + self.already_queued.insert(item.index); + + self.deque.push_front(MerkleBlobLeftChildFirstIteratorItem { + visited: true, + index: item.index, + }); + self.deque.push_front(MerkleBlobLeftChildFirstIteratorItem { + visited: false, + index: node.right, + }); + self.deque.push_front(MerkleBlobLeftChildFirstIteratorItem { + visited: false, + index: node.left, + }); + } + } + } + } +} + +pub struct MerkleBlobParentFirstIterator<'a> { + blob: &'a Vec, + deque: VecDeque, + already_queued: HashSet, +} + +impl<'a> MerkleBlobParentFirstIterator<'a> { + fn new(blob: &'a Vec) -> Self { + let mut deque = VecDeque::new(); + if blob.len() / BLOCK_SIZE > 0 { + deque.push_back(TreeIndex(0)); + } + + Self { + blob, + deque, + already_queued: HashSet::new(), + } + } +} + +impl Iterator for MerkleBlobParentFirstIterator<'_> { + type Item = Result<(TreeIndex, Block), Error>; + + fn next(&mut self) -> Option { + // left sibling first, parents before children + + let index = self.deque.pop_front()?; + let block = match try_get_block(self.blob, index) { + Ok(block) => block, + Err(e) => return Some(Err(e)), + }; + + if let Node::Internal(ref node) = block.node { + if self.already_queued.contains(&index) { + return Some(Err(Error::CycleFound)); + } + self.already_queued.insert(index); + + self.deque.push_back(node.left); + self.deque.push_back(node.right); + } + + Some(Ok((index, block))) + } +} + +pub struct MerkleBlobBreadthFirstIterator<'a> { + blob: &'a Vec, + deque: VecDeque, + already_queued: HashSet, +} + +impl<'a> MerkleBlobBreadthFirstIterator<'a> { + #[allow(unused)] + fn new(blob: &'a Vec) -> Self { + let mut deque = VecDeque::new(); + if blob.len() / BLOCK_SIZE > 0 { + deque.push_back(TreeIndex(0)); + } + + Self { + blob, + deque, + already_queued: HashSet::new(), + } + } +} + +impl Iterator for MerkleBlobBreadthFirstIterator<'_> { + type Item = Result<(TreeIndex, Block), Error>; + + fn next(&mut self) -> Option { + // left sibling first, parent depth before child depth + + loop { + let index = self.deque.pop_front()?; + let block = match try_get_block(self.blob, index) { + Ok(block) => block, + Err(e) => return Some(Err(e)), + }; + + match block.node { + Node::Leaf(..) => return Some(Ok((index, block))), + Node::Internal(node) => { + if self.already_queued.contains(&index) { + return Some(Err(Error::CycleFound)); + } + self.already_queued.insert(index); + + self.deque.push_back(node.left); + self.deque.push_back(node.right); + } + } + } + } +} + +#[cfg(any(test, debug_assertions))] +impl Drop for MerkleBlob { + fn drop(&mut self) { + if self.check_integrity_on_drop { + self.check_integrity() + .expect("integrity check failed while dropping merkle blob"); + } + } +} + +#[cfg(test)] +mod dot; +#[cfg(test)] +mod tests { + use super::*; + use crate::merkle::dot::DotLines; + use expect_test::{expect, Expect}; + use rstest::{fixture, rstest}; + use std::time::{Duration, Instant}; + + fn open_dot(_lines: &mut DotLines) { + // crate::merkle::dot::open_dot(_lines); + } + + impl MerkleBlob { + fn get_key_value_map(&self) -> HashMap { + let mut key_value = HashMap::new(); + for key in self.key_to_index.keys() { + // silly waste of having the index, but test code and type narrowing so, ok i guess + let (_leaf_index, leaf, _leaf_block) = self.get_leaf_by_key(*key).unwrap(); + key_value.insert(*key, leaf.value); + } + + key_value + } + } + + #[test] + fn test_node_type_serialized_values() { + assert_eq!(NodeType::Internal as u8, 0); + assert_eq!(NodeType::Leaf as u8, 1); + + for node_type in [NodeType::Internal, NodeType::Leaf] { + assert_eq!( + Streamable::to_bytes(&node_type).unwrap()[0], + node_type as u8, + ); + assert_eq!( + streamable_from_bytes_ignore_extra_bytes::(&[node_type as u8]).unwrap(), + node_type, + ); + } + } + + #[test] + fn test_internal_hash() { + // in Python: Program.to((left_hash, right_hash)).get_tree_hash_precalc(left_hash, right_hash) + + let left: Hash = (0u8..32).collect::>().try_into().unwrap(); + let right: Hash = (32u8..64).collect::>().try_into().unwrap(); + + assert_eq!( + internal_hash(&left, &right), + Bytes32::new( + clvm_utils::tree_hash_pair( + clvm_utils::TreeHash::new(left.to_bytes()), + clvm_utils::TreeHash::new(right.to_bytes()), + ) + .to_bytes() + ), + ); + } + + #[rstest] + fn test_node_metadata_from_to( + #[values(false, true)] dirty: bool, + #[values(NodeType::Internal, NodeType::Leaf)] node_type: NodeType, + ) { + let bytes: [u8; 2] = [Streamable::to_bytes(&node_type).unwrap()[0], dirty as u8]; + let object = NodeMetadata::from_bytes(&bytes).unwrap(); + assert_eq!(object, NodeMetadata { node_type, dirty },); + assert_eq!(object.to_bytes().unwrap(), bytes); + } + + #[fixture] + fn small_blob() -> MerkleBlob { + let mut blob = MerkleBlob::new(vec![]).unwrap(); + + blob.insert( + KvId(0x0001_0203_0405_0607), + KvId(0x1011_1213_1415_1617), + &sha256_num(0x1020), + InsertLocation::Auto {}, + ) + .unwrap(); + + blob.insert( + KvId(0x2021_2223_2425_2627), + KvId(0x3031_3233_3435_3637), + &sha256_num(0x2030), + InsertLocation::Auto {}, + ) + .unwrap(); + + blob + } + + #[fixture] + fn traversal_blob(mut small_blob: MerkleBlob) -> MerkleBlob { + small_blob + .insert( + KvId(103), + KvId(204), + &sha256_num(0x1324), + InsertLocation::Leaf { + index: TreeIndex(1), + side: Side::Right, + }, + ) + .unwrap(); + small_blob + .insert( + KvId(307), + KvId(404), + &sha256_num(0x9183), + InsertLocation::Leaf { + index: TreeIndex(3), + side: Side::Right, + }, + ) + .unwrap(); + + small_blob + } + + #[rstest] + fn test_get_lineage(small_blob: MerkleBlob) { + let lineage = small_blob.get_lineage_with_indexes(TreeIndex(2)).unwrap(); + for (_, node) in &lineage { + println!("{node:?}"); + } + assert_eq!(lineage.len(), 2); + let (_, last_node) = lineage.last().unwrap(); + assert_eq!(last_node.parent(), None); + } + + #[rstest] + #[case::right(0, TreeIndex(2), Side::Left)] + #[case::left(0xff, TreeIndex(1), Side::Right)] + fn test_get_random_insert_location_by_seed( + #[case] seed: u8, + #[case] expected_index: TreeIndex, + #[case] expected_side: Side, + small_blob: MerkleBlob, + ) { + let location = small_blob + .get_random_insert_location_by_seed(&[seed; 32]) + .unwrap(); + + assert_eq!( + location, + InsertLocation::Leaf { + index: expected_index, + side: expected_side + }, + ); + } + + #[test] + fn test_get_random_insert_location_by_seed_with_seed_too_short() { + let mut blob = MerkleBlob::new(vec![]).unwrap(); + let seed = [0xff]; + let layer_count = 8 * seed.len() + 10; + + for n in 0..layer_count { + let key = KvId((n + 100) as i64); + let value = KvId((n + 100) as i64); + let hash = sha256_num(key.0); + let insert_location = blob.get_random_insert_location_by_seed(&seed).unwrap(); + blob.insert(key, value, &hash, insert_location).unwrap(); + } + + let location = blob.get_random_insert_location_by_seed(&seed).unwrap(); + + let InsertLocation::Leaf { index, .. } = location else { + panic!() + }; + let lineage = blob.get_lineage_indexes(index).unwrap(); + + assert_eq!(lineage.len(), layer_count); + assert!(lineage.len() > seed.len() * 8); + } + + #[rstest] + fn test_just_insert_a_bunch( + // just allowing parallelism of testing 100,000 inserts total + #[values(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)] n: i64, + ) { + let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + + let mut total_time = Duration::new(0, 0); + + let count = 10_000; + let m = count * n; + for i in m..(m + count) { + let start = Instant::now(); + merkle_blob + // NOTE: yeah this hash is garbage + .insert(KvId(i), KvId(i), &sha256_num(i), InsertLocation::Auto {}) + .unwrap(); + let end = Instant::now(); + total_time += end.duration_since(start); + } + + println!("total time: {total_time:?}"); + // TODO: check, well... something + + merkle_blob.calculate_lazy_hashes().unwrap(); + } + + #[test] + fn test_delete_in_reverse_creates_matching_trees() { + const COUNT: usize = 10; + let mut dots = vec![]; + + let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + let mut reference_blobs = vec![]; + + let key_value_ids: [KvId; COUNT] = core::array::from_fn(|i| KvId(i as i64)); + + for key_value_id in key_value_ids { + let hash: Hash = sha256_num(key_value_id.0); + + println!("inserting: {key_value_id}"); + merkle_blob.calculate_lazy_hashes().unwrap(); + reference_blobs.push(MerkleBlob::new(merkle_blob.blob.clone()).unwrap()); + merkle_blob + .insert(key_value_id, key_value_id, &hash, InsertLocation::Auto {}) + .unwrap(); + dots.push(merkle_blob.to_dot().unwrap().dump()); + } + + merkle_blob.check_integrity().unwrap(); + + for key_value_id in key_value_ids.iter().rev() { + println!("deleting: {key_value_id}"); + merkle_blob.delete(*key_value_id).unwrap(); + merkle_blob.calculate_lazy_hashes().unwrap(); + assert_eq!(merkle_blob, reference_blobs[key_value_id.0 as usize]); + dots.push(merkle_blob.to_dot().unwrap().dump()); + } + } + + #[test] + fn test_insert_first() { + let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + + let key_value_id = KvId(1); + open_dot(merkle_blob.to_dot().unwrap().set_note("empty")); + merkle_blob + .insert( + key_value_id, + key_value_id, + &sha256_num(key_value_id.0), + InsertLocation::Auto {}, + ) + .unwrap(); + open_dot(merkle_blob.to_dot().unwrap().set_note("first after")); + + assert_eq!(merkle_blob.key_to_index.len(), 1); + } + + #[rstest] + fn test_insert_choosing_side( + #[values(Side::Left, Side::Right)] side: Side, + #[values(1, 2)] pre_count: usize, + ) { + let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + + let mut last_key: KvId = KvId(0); + for i in 1..=pre_count { + let key = KvId(i as i64); + open_dot(merkle_blob.to_dot().unwrap().set_note("empty")); + merkle_blob + .insert(key, key, &sha256_num(key.0), InsertLocation::Auto {}) + .unwrap(); + last_key = key; + } + + let key_value_id: KvId = KvId((pre_count + 1) as i64); + open_dot(merkle_blob.to_dot().unwrap().set_note("first after")); + merkle_blob + .insert( + key_value_id, + key_value_id, + &sha256_num(key_value_id.0), + InsertLocation::Leaf { + index: merkle_blob.key_to_index[&last_key], + side, + }, + ) + .unwrap(); + open_dot(merkle_blob.to_dot().unwrap().set_note("first after")); + + let sibling = merkle_blob + .get_node(merkle_blob.key_to_index[&last_key]) + .unwrap(); + let parent = merkle_blob.get_node(sibling.parent().unwrap()).unwrap(); + let Node::Internal(internal) = parent else { + panic!() + }; + + let left = merkle_blob + .get_node(internal.left) + .unwrap() + .expect_leaf("<>"); + let right = merkle_blob + .get_node(internal.right) + .unwrap() + .expect_leaf("<>"); + + let expected_keys: [KvId; 2] = match side { + Side::Left => [KvId(pre_count as i64 + 1), KvId(pre_count as i64)], + Side::Right => [KvId(pre_count as i64), KvId(pre_count as i64 + 1)], + }; + assert_eq!([left.key, right.key], expected_keys); + } + + #[test] + fn test_delete_last() { + let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + + let key_value_id = KvId(1); + open_dot(merkle_blob.to_dot().unwrap().set_note("empty")); + merkle_blob + .insert( + key_value_id, + key_value_id, + &sha256_num(key_value_id.0), + InsertLocation::Auto {}, + ) + .unwrap(); + open_dot(merkle_blob.to_dot().unwrap().set_note("first after")); + merkle_blob.check_integrity().unwrap(); + + merkle_blob.delete(key_value_id).unwrap(); + + assert_eq!(merkle_blob.key_to_index.len(), 0); + } + + #[rstest] + fn test_delete_frees_index(mut small_blob: MerkleBlob) { + let key = KvId(0x0001_0203_0405_0607); + let index = small_blob.key_to_index[&key]; + small_blob.delete(key).unwrap(); + + assert_eq!( + small_blob.free_indexes, + HashSet::from([index, TreeIndex(2)]) + ); + } + + #[rstest] + fn test_get_new_index_with_free_index(mut small_blob: MerkleBlob) { + open_dot(small_blob.to_dot().unwrap().set_note("initial")); + let key = KvId(0x0001_0203_0405_0607); + let _ = small_blob.key_to_index[&key]; + small_blob.delete(key).unwrap(); + open_dot(small_blob.to_dot().unwrap().set_note("after delete")); + + let expected = HashSet::from([TreeIndex(1), TreeIndex(2)]); + assert_eq!(small_blob.free_indexes, expected); + } + + #[rstest] + fn test_dump_small_blob_bytes(small_blob: MerkleBlob) { + println!("{}", hex::encode(small_blob.blob.clone())); + } + + #[test] + fn test_node_type_from_u8_invalid() { + let invalid_value = 2; + let actual = streamable_from_bytes_ignore_extra_bytes::(&[invalid_value as u8]); + actual.expect_err("invalid node type value should fail"); + } + + #[test] + fn test_node_specific_sibling_index_panics_for_unknown_sibling() { + let node = InternalNode { + parent: None, + hash: sha256_num(0), + left: TreeIndex(0), + right: TreeIndex(1), + }; + let index = TreeIndex(2); + assert_eq!( + node.sibling_index(TreeIndex(2)), + Err(Error::IndexIsNotAChild(index)) + ); + } + + #[rstest] + fn test_get_free_indexes(small_blob: MerkleBlob) { + let mut blob = small_blob.blob.clone(); + let expected_free_index = TreeIndex((blob.len() / BLOCK_SIZE) as u32); + blob.extend_from_slice(&[0; BLOCK_SIZE]); + let (free_indexes, _) = get_free_indexes_and_keys_values_indexes(&blob).unwrap(); + assert_eq!(free_indexes, HashSet::from([expected_free_index])); + } + + #[test] + fn test_merkle_blob_new_errs_for_nonmultiple_of_block_length() { + MerkleBlob::new(vec![1]).expect_err("invalid length should fail"); + } + + #[rstest] + fn test_upsert_inserts(small_blob: MerkleBlob) { + let key = KvId(1234); + assert!(!small_blob.key_to_index.contains_key(&key)); + let value = KvId(5678); + + let mut insert_blob = MerkleBlob::new(small_blob.blob.clone()).unwrap(); + insert_blob + .insert(key, value, &sha256_num(key.0), InsertLocation::Auto {}) + .unwrap(); + open_dot(insert_blob.to_dot().unwrap().set_note("first after")); + + let mut upsert_blob = MerkleBlob::new(small_blob.blob.clone()).unwrap(); + upsert_blob.upsert(key, value, &sha256_num(key.0)).unwrap(); + open_dot(upsert_blob.to_dot().unwrap().set_note("first after")); + + assert_eq!(insert_blob.blob, upsert_blob.blob); + } + + #[rstest] + fn test_upsert_upserts(mut small_blob: MerkleBlob) { + let before_blocks = + MerkleBlobLeftChildFirstIterator::new(&small_blob.blob).collect::>(); + let (key, index) = small_blob.key_to_index.iter().next().unwrap(); + let original = small_blob.get_node(*index).unwrap().expect_leaf("<>"); + let new_value = KvId(original.value.0 + 1); + + small_blob.upsert(*key, new_value, &original.hash).unwrap(); + + let after_blocks = + MerkleBlobLeftChildFirstIterator::new(&small_blob.blob).collect::>(); + + assert_eq!(before_blocks.len(), after_blocks.len()); + for item in zip(before_blocks, after_blocks) { + let ((before_index, before_block), (after_index, after_block)) = + (item.0.unwrap(), item.1.unwrap()); + assert_eq!(before_block.node.parent(), after_block.node.parent()); + assert_eq!(before_index, after_index); + let before: LeafNode = match before_block.node { + Node::Leaf(leaf) => leaf, + Node::Internal(internal) => { + let Node::Internal(after) = after_block.node else { + panic!() + }; + assert_eq!(internal.left, after.left); + assert_eq!(internal.right, after.right); + continue; + } + }; + let Node::Leaf(after) = after_block.node else { + panic!() + }; + assert_eq!(before.key, after.key); + if before.key == original.key { + assert_eq!(after.value, new_value); + } else { + assert_eq!(before.value, after.value); + } + } + } + + #[test] + fn test_double_insert_fails() { + let mut blob = MerkleBlob::new(vec![]).unwrap(); + let kv = KvId(0); + blob.insert(kv, kv, &Bytes32::new([0u8; 32]), InsertLocation::Auto {}) + .unwrap(); + blob.insert(kv, kv, &Bytes32::new([0u8; 32]), InsertLocation::Auto {}) + .expect_err(""); + } + + #[rstest] + fn test_batch_insert( + #[values(0, 1, 2, 10)] pre_inserts: usize, + #[values(0, 1, 2, 8, 9)] count: usize, + ) { + let mut blob = MerkleBlob::new(vec![]).unwrap(); + for i in 0..pre_inserts { + let i = KvId(i as i64); + blob.insert(i, i, &sha256_num(i.0), InsertLocation::Auto {}) + .unwrap(); + } + open_dot(blob.to_dot().unwrap().set_note("initial")); + + let mut batch: Vec<((KvId, KvId), Hash)> = vec![]; + + let mut batch_map = HashMap::new(); + for i in pre_inserts..(pre_inserts + count) { + let i = KvId(i as i64); + batch.push(((i, i), sha256_num(i.0))); + batch_map.insert(i, i); + } + + let before = blob.get_key_value_map(); + blob.batch_insert(batch.into_iter()).unwrap(); + let after = blob.get_key_value_map(); + + open_dot( + blob.to_dot() + .unwrap() + .set_note(&format!("after batch insert of {count} values")), + ); + + let mut expected = before.clone(); + expected.extend(batch_map); + + assert_eq!(after, expected); + } + + fn iterator_test_reference(index: TreeIndex, block: &Block) -> (u32, NodeType, i64, i64, Hash) { + match block.node { + Node::Leaf(leaf) => ( + index.0, + block.metadata.node_type, + leaf.key.0, + leaf.value.0, + block.node.hash(), + ), + Node::Internal(internal) => ( + index.0, + block.metadata.node_type, + internal.left.0 as i64, + internal.right.0 as i64, + block.node.hash(), + ), + } + } + + #[rstest] + // expect-test is adding them back + #[allow(clippy::needless_raw_string_hashes)] + #[case::left_child_first( + "left child first", + MerkleBlobLeftChildFirstIterator::new, + expect![[r#" + [ + ( + 1, + Leaf, + 283686952306183, + 1157726452361532951, + d8ddfc94e7201527a6a93ee04aed8c5c122ac38af6dbf6e5f1caefba2597230d, + ), + ( + 3, + Leaf, + 103, + 204, + 2d47301cff01acc863faa5f57e8fbc632114f1dc764772852ed0c29c0f248bd3, + ), + ( + 5, + Leaf, + 307, + 404, + 97148f80dd9289a1b67527c045fd47662d575ccdb594701a56c2255ac84f6113, + ), + ( + 6, + Internal, + 3, + 5, + b946284149e4f4a0e767ef2feb397533fb112bf4d99c887348cec4438e38c1ce, + ), + ( + 4, + Internal, + 1, + 6, + eee0c40977ba1c0e16a467f30f64d9c2579ff25dd01913e33962c3f1db86c2ea, + ), + ( + 2, + Leaf, + 2315169217770759719, + 3472611983179986487, + 0f980325ebe9426fa295f3f69cc38ef8fe6ce8f3b9f083556c0f927e67e56651, + ), + ( + 0, + Internal, + 4, + 2, + 0e4a8b1ecee43f457bbe2b30e94ac2afc0d3a6536f891a2ced5e96ce07fe9932, + ), + ] + "#]], + )] + // expect-test is adding them back + #[allow(clippy::needless_raw_string_hashes)] + #[case::parent_first( + "parent first", + MerkleBlobParentFirstIterator::new, + expect![[r#" + [ + ( + 0, + Internal, + 4, + 2, + 0e4a8b1ecee43f457bbe2b30e94ac2afc0d3a6536f891a2ced5e96ce07fe9932, + ), + ( + 4, + Internal, + 1, + 6, + eee0c40977ba1c0e16a467f30f64d9c2579ff25dd01913e33962c3f1db86c2ea, + ), + ( + 2, + Leaf, + 2315169217770759719, + 3472611983179986487, + 0f980325ebe9426fa295f3f69cc38ef8fe6ce8f3b9f083556c0f927e67e56651, + ), + ( + 1, + Leaf, + 283686952306183, + 1157726452361532951, + d8ddfc94e7201527a6a93ee04aed8c5c122ac38af6dbf6e5f1caefba2597230d, + ), + ( + 6, + Internal, + 3, + 5, + b946284149e4f4a0e767ef2feb397533fb112bf4d99c887348cec4438e38c1ce, + ), + ( + 3, + Leaf, + 103, + 204, + 2d47301cff01acc863faa5f57e8fbc632114f1dc764772852ed0c29c0f248bd3, + ), + ( + 5, + Leaf, + 307, + 404, + 97148f80dd9289a1b67527c045fd47662d575ccdb594701a56c2255ac84f6113, + ), + ] + "#]])] + // expect-test is adding them back + #[allow(clippy::needless_raw_string_hashes)] + #[case::breadth_first( + "breadth first", + MerkleBlobBreadthFirstIterator::new, + expect![[r#" + [ + ( + 2, + Leaf, + 2315169217770759719, + 3472611983179986487, + 0f980325ebe9426fa295f3f69cc38ef8fe6ce8f3b9f083556c0f927e67e56651, + ), + ( + 1, + Leaf, + 283686952306183, + 1157726452361532951, + d8ddfc94e7201527a6a93ee04aed8c5c122ac38af6dbf6e5f1caefba2597230d, + ), + ( + 3, + Leaf, + 103, + 204, + 2d47301cff01acc863faa5f57e8fbc632114f1dc764772852ed0c29c0f248bd3, + ), + ( + 5, + Leaf, + 307, + 404, + 97148f80dd9289a1b67527c045fd47662d575ccdb594701a56c2255ac84f6113, + ), + ] + "#]])] + fn test_iterators<'a, F, T>( + #[case] note: &str, + #[case] iterator_new: F, + #[case] expected: Expect, + #[by_ref] traversal_blob: &'a MerkleBlob, + ) where + F: Fn(&'a Vec) -> T, + T: Iterator>, + { + let mut dot_actual = traversal_blob.to_dot().unwrap(); + dot_actual.set_note(note); + + let mut actual = vec![]; + { + let blob: &Vec = &traversal_blob.blob; + for item in iterator_new(blob) { + let (index, block) = item.unwrap(); + actual.push(iterator_test_reference(index, &block)); + dot_actual.push_traversal(index); + } + } + + traversal_blob.to_dot().unwrap(); + + open_dot(&mut dot_actual); + + expected.assert_debug_eq(&actual); + } + + #[rstest] + fn test_root_insert_location_when_not_empty(mut small_blob: MerkleBlob) { + small_blob + .insert(KvId(0), KvId(0), &sha256_num(0), InsertLocation::AsRoot {}) + .expect_err("tree not empty so inserting to root should fail"); + } + + #[rstest] + fn test_free_index_reused(mut small_blob: MerkleBlob) { + // there must be enough nodes to avoid the few-node insertion methods that clear the blob + let count = 5; + for n in 0..count { + small_blob + .insert(KvId(n), KvId(n), &sha256_num(n), InsertLocation::Auto {}) + .unwrap(); + } + let (key, index) = { + let (key, index) = small_blob.key_to_index.iter().next().unwrap(); + (*key, *index) + }; + let expected_length = small_blob.blob.len(); + assert!(!small_blob.free_indexes.contains(&index)); + small_blob.delete(key).unwrap(); + assert!(small_blob.free_indexes.contains(&index)); + let free_indexes = small_blob.free_indexes.clone(); + assert_eq!(free_indexes.len(), 2); + let new_index = small_blob + .insert( + KvId(count), + KvId(count), + &sha256_num(count), + InsertLocation::Auto {}, + ) + .unwrap(); + assert_eq!(small_blob.blob.len(), expected_length); + assert!(free_indexes.contains(&new_index)); + assert!(small_blob.free_indexes.is_empty()); + } +} diff --git a/crates/chia-datalayer/src/merkle/dot.rs b/crates/chia-datalayer/src/merkle/dot.rs new file mode 100644 index 000000000..9689cc354 --- /dev/null +++ b/crates/chia-datalayer/src/merkle/dot.rs @@ -0,0 +1,134 @@ +use crate::merkle::{ + Error, InternalNode, LeafNode, MerkleBlob, MerkleBlobLeftChildFirstIterator, Node, TreeIndex, +}; +use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; +use url::Url; + +pub struct DotLines { + pub nodes: Vec, + pub connections: Vec, + pub pair_boxes: Vec, + pub traversal: Vec, + pub note: String, + pub last_traversed_index: Option, +} + +impl Default for DotLines { + fn default() -> Self { + Self::new() + } +} + +impl DotLines { + pub fn new() -> Self { + Self { + nodes: vec![], + connections: vec![], + pair_boxes: vec![], + traversal: vec![], + note: String::new(), + last_traversed_index: None, + } + } + + pub fn push(&mut self, mut other: DotLines) { + self.nodes.append(&mut other.nodes); + self.connections.append(&mut other.connections); + self.pair_boxes.append(&mut other.pair_boxes); + self.traversal.append(&mut other.traversal); + } + + pub fn push_traversal(&mut self, index: TreeIndex) { + if let Some(last_index) = self.last_traversed_index { + self.traversal.push(format!( + r#"node_{last_index} -> node_{index} [constraint=false; color="red"]"# + )); + } + self.last_traversed_index = Some(index); + } + + pub fn dump(&mut self) -> String { + // TODO: consuming itself, secretly + let note = &self.note; + let mut result = vec![]; + if !note.is_empty() { + result.push(format!("# {note}")); + result.push(String::new()); + } + result.push("digraph {".to_string()); + result.append(&mut self.nodes); + result.append(&mut self.connections); + result.append(&mut self.pair_boxes); + result.append(&mut self.traversal); + result.push("}".to_string()); + + result.push(String::new()); + result.join("\n") + } + + pub fn set_note(&mut self, note: &str) -> &mut Self { + self.note = String::from(note); + + self + } +} + +impl Node { + pub fn to_dot(&self, index: TreeIndex) -> DotLines { + // TODO: can this be done without introducing a blank line? + let node_to_parent = match self.parent() { + Some(parent) => format!("node_{index} -> node_{parent} [constraint=false]"), + None => String::new(), + }; + + match self { + Node::Internal ( InternalNode {left, right, ..}) => DotLines{ + nodes: vec![ + format!("node_{index} [label=\"{index}\"]"), + ], + connections: vec![ + format!("node_{index} -> node_{left};"), + format!("node_{index} -> node_{right};"), + node_to_parent, + ], + pair_boxes: vec![ + format!("subgraph cluster_node_{index}_children {{ style=invis; {{rank = same; node_{left}->node_{right}[style=invis]; rankdir = LR}} }}"), + ], + note: String::new(), + ..Default::default() + }, + Node::Leaf (LeafNode{key, value, ..}) => DotLines{ + nodes: vec![ + format!("node_{index} [shape=box, label=\"{index}\\nvalue: {key}\\nvalue: {value}\"];"), + ], + connections: vec![node_to_parent], + note: String::new(), + ..Default::default() + }, + } + } +} + +impl MerkleBlob { + pub fn to_dot(&self) -> Result { + let mut result = DotLines::new(); + for item in MerkleBlobLeftChildFirstIterator::new(&self.blob) { + let (index, block) = item?; + result.push(block.node.to_dot(index)); + } + + Ok(result) + } +} + +// TODO: better conditional execution than the commenting i'm doing now +#[allow(unused)] +pub fn open_dot(lines: &mut DotLines) { + let mut url = Url::parse("http://edotor.net").unwrap(); + // https://edotor.net/?engine=dot#graph%20%7B%7D%0A -> graph {} + url.query_pairs_mut().append_pair("engine", "dot"); + url.set_fragment(Some( + &utf8_percent_encode(&lines.dump(), NON_ALPHANUMERIC).to_string(), + )); + open::that(url.as_str()).unwrap(); +} diff --git a/src/lib.rs b/src/lib.rs index 26eb6956c..fe47e8d15 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ pub use chia_bls as bls; pub use chia_client as client; pub use chia_consensus as consensus; +pub use chia_datalayer as datalayer; pub use chia_protocol as protocol; pub use chia_puzzles as puzzles; pub use chia_secp as secp; diff --git a/tests/test_datalayer.py b/tests/test_datalayer.py new file mode 100644 index 000000000..73723f15b --- /dev/null +++ b/tests/test_datalayer.py @@ -0,0 +1,54 @@ +from chia_rs import LeafNode, MerkleBlob +from chia_rs.sized_bytes import bytes32 +from chia_rs.sized_ints import int64, uint8 + + +def test_merkle_blob(): + blob = bytes.fromhex( + "000100770a5d50f980316e3a856b2f0447e1c1285064cd301c731e5b16c16d187d0ff90000000400000002000000000000000000000000010001000000060c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b00000000000000010000000000000001010001000000000c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b00000000000000000000000000000000010001000000040c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0000000000000002000000000000000200010100000000770a5d50f980316e3a856b2f0447e1c1285064cd301c731e5b16c16d187d0ff900000003000000060000000000000000010001000000060c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0000000000000003000000000000000300000100000004770a5d50f980316e3a856b2f0447e1c1285064cd301c731e5b16c16d187d0ff900000005000000010000000000000000" + ) + merkle_blob = MerkleBlob(blob) + print(merkle_blob) + print(dir(merkle_blob)) + assert len(merkle_blob) == len(blob) + + +def test_just_insert_a_bunch() -> None: + HASH = bytes32(range(12, 44)) + + import pathlib + + path = pathlib.Path("~/tmp/mbt/").expanduser() + path.joinpath("py").mkdir(parents=True, exist_ok=True) + path.joinpath("rs").mkdir(parents=True, exist_ok=True) + + merkle_blob = MerkleBlob(blob=bytearray()) + import time + + total_time = 0.0 + for i in range(100_000): + start = time.monotonic() + merkle_blob.insert(int64(i), int64(i), HASH) + end = time.monotonic() + total_time += end - start + + +# TODO: make this a real test +def test_checking_coverage() -> None: + count = 100 + + merkle_blob = MerkleBlob(blob=bytearray()) + for i in range(count): + if i % 2 == 0: + merkle_blob.insert(int64(i), int64(i), bytes32.zeros) + else: + merkle_blob.insert( + int64(i), int64(i), bytes32.zeros, int64(i - 1), uint8(0) + ) + + keys = { + node.key + for index, node in merkle_blob.get_nodes_with_indexes() + if isinstance(node, LeafNode) + } + assert keys == set(range(count)) diff --git a/wheel/Cargo.toml b/wheel/Cargo.toml index f8d97b529..127441f31 100644 --- a/wheel/Cargo.toml +++ b/wheel/Cargo.toml @@ -31,6 +31,7 @@ hex = { workspace = true } pyo3 = { workspace = true, features = ["multiple-pymethods"] } chia-consensus = { workspace = true, features = ["py-bindings"] } chia-bls = { workspace = true, features = ["py-bindings"] } +chia-datalayer = { workspace = true, features = ["py-bindings"] } chia-protocol = { workspace = true, features = ["py-bindings"] } clvm-utils = { workspace = true } chia-ssl = { workspace = true } diff --git a/wheel/generate_type_stubs.py b/wheel/generate_type_stubs.py index 188071eb0..888c7f409 100644 --- a/wheel/generate_type_stubs.py +++ b/wheel/generate_type_stubs.py @@ -274,7 +274,7 @@ def parse_rust_source(filename: str, upper_case: bool) -> list[tuple[str, list[s # this file is generated by generate_type_stubs.py # -from typing import Optional, Sequence, Union, Any, ClassVar, final +from typing import Mapping, Optional, Sequence, Union, Any, ClassVar, final from .sized_bytes import bytes32, bytes100 from .sized_ints import uint8, uint16, uint32, uint64, uint128, int8, int16, int32, int64 from typing_extensions import Self @@ -395,6 +395,62 @@ def derive_child_sk_unhardened(sk: PrivateKey, index: int) -> PrivateKey: ... @staticmethod def derive_child_pk_unhardened(pk: G1Element, index: int) -> G1Element: ... + +@final +class InternalNode: + @property + def parent(self) -> Optional[uint32]: ... + @property + def hash(self) -> bytes: ... + + @property + def left(self) -> uint32: ... + @property + def right(self) -> uint32: ... + + +@final +class LeafNode: + @property + def parent(self) -> Optional[uint32]: ... + @property + def hash(self) -> bytes: ... + + @property + def key(self) -> int64: ... + @property + def value(self) -> int64: ... + + +@final +class MerkleBlob: + @property + def blob(self) -> bytearray: ... + @property + def free_indexes(self) -> set[uint32]: ... + @property + def key_to_index(self) -> Mapping[int64, uint32]: ... + @property + def check_integrity_on_drop(self) -> bool: ... + + def __init__( + self, + blob: bytes, + ) -> None: ... + + def insert(self, key: int64, value: int64, hash: bytes32, reference_kid: Optional[int64] = None, side: Optional[uint8] = None) -> None: ... + def delete(self, key: int64) -> None: ... + def get_raw_node(self, index: uint32) -> Union[InternalNode, LeafNode]: ... + def calculate_lazy_hashes(self) -> None: ... + def get_lineage_with_indexes(self, index: uint32) -> list[tuple[uint32, Union[InternalNode, LeafNode]]]:... + def get_nodes_with_indexes(self) -> list[tuple[uint32, Union[InternalNode, LeafNode]]]: ... + def empty(self) -> bool: ... + def get_root_hash(self) -> bytes32: ... + def batch_insert(self, keys_values: list[tuple[int64, int64]], hashes: list[bytes32]): ... + def get_hash_at_index(self, index: uint32): ... + + def __len__(self) -> int: ... + @final class MerkleSet: def get_root(self) -> bytes32: ... diff --git a/wheel/python/chia_rs/chia_rs.pyi b/wheel/python/chia_rs/chia_rs.pyi index 34fa2efdd..b5f35b98b 100644 --- a/wheel/python/chia_rs/chia_rs.pyi +++ b/wheel/python/chia_rs/chia_rs.pyi @@ -3,7 +3,7 @@ # this file is generated by generate_type_stubs.py # -from typing import Optional, Sequence, Union, Any, ClassVar, final +from typing import Mapping, Optional, Sequence, Union, Any, ClassVar, final from .sized_bytes import bytes32, bytes100 from .sized_ints import uint8, uint16, uint32, uint64, uint128, int8, int16, int32, int64 from typing_extensions import Self @@ -124,6 +124,62 @@ class AugSchemeMPL: @staticmethod def derive_child_pk_unhardened(pk: G1Element, index: int) -> G1Element: ... + +@final +class InternalNode: + @property + def parent(self) -> Optional[uint32]: ... + @property + def hash(self) -> bytes: ... + + @property + def left(self) -> uint32: ... + @property + def right(self) -> uint32: ... + + +@final +class LeafNode: + @property + def parent(self) -> Optional[uint32]: ... + @property + def hash(self) -> bytes: ... + + @property + def key(self) -> int64: ... + @property + def value(self) -> int64: ... + + +@final +class MerkleBlob: + @property + def blob(self) -> bytearray: ... + @property + def free_indexes(self) -> set[uint32]: ... + @property + def key_to_index(self) -> Mapping[int64, uint32]: ... + @property + def check_integrity_on_drop(self) -> bool: ... + + def __init__( + self, + blob: bytes, + ) -> None: ... + + def insert(self, key: int64, value: int64, hash: bytes32, reference_kid: Optional[int64] = None, side: Optional[uint8] = None) -> None: ... + def delete(self, key: int64) -> None: ... + def get_raw_node(self, index: uint32) -> Union[InternalNode, LeafNode]: ... + def calculate_lazy_hashes(self) -> None: ... + def get_lineage_with_indexes(self, index: uint32) -> list[tuple[uint32, Union[InternalNode, LeafNode]]]:... + def get_nodes_with_indexes(self) -> list[tuple[uint32, Union[InternalNode, LeafNode]]]: ... + def empty(self) -> bool: ... + def get_root_hash(self) -> bytes32: ... + def batch_insert(self, keys_values: list[tuple[int64, int64]], hashes: list[bytes32]): ... + def get_hash_at_index(self, index: uint32): ... + + def __len__(self) -> int: ... + @final class MerkleSet: def get_root(self) -> bytes32: ... diff --git a/wheel/src/api.rs b/wheel/src/api.rs index e0a149f54..eaeca3210 100644 --- a/wheel/src/api.rs +++ b/wheel/src/api.rs @@ -78,6 +78,8 @@ use chia_bls::{ Signature, }; +use chia_datalayer::{InternalNode, LeafNode, MerkleBlob}; + #[pyfunction] pub fn compute_merkle_set_root<'p>( py: Python<'p>, @@ -476,6 +478,11 @@ pub fn chia_rs(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { // constants m.add_class::()?; + // datalayer + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + // merkle tree m.add_class::()?; m.add_function(wrap_pyfunction!(confirm_included_already_hashed, m)?)?;