structure
This commit is contained in:
305
client-network/src/data.rs
Normal file
305
client-network/src/data.rs
Normal file
@@ -0,0 +1,305 @@
|
||||
use std::collections::HashMap;
|
||||
use std::hash::{DefaultHasher, Hash, Hasher};
|
||||
use rand::{rng, Rng};
|
||||
|
||||
// --- Constants ---
|
||||
const MAX_CHUNK_DATA_SIZE: usize = 1024;
|
||||
const MAX_DIRECTORY_ENTRIES: usize = 16;
|
||||
const MAX_BIG_CHILDREN: usize = 32;
|
||||
const MIN_BIG_CHILDREN: usize = 2;
|
||||
const FILENAME_HASH_SIZE: usize = 32;
|
||||
const DIRECTORY_ENTRY_SIZE: usize = FILENAME_HASH_SIZE * 2; // 64 bytes
|
||||
|
||||
fn dummy_hash(data: &[u8]) -> NodeHash {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
data.hash(&mut hasher);
|
||||
let hash_u64 = hasher.finish();
|
||||
|
||||
let mut hash_array = [0u8; FILENAME_HASH_SIZE];
|
||||
// Simple way to spread a 64-bit hash across 32 bytes for a unique-ish ID
|
||||
for i in 0..8 {
|
||||
hash_array[i] = (hash_u64 >> (i * 8)) as u8;
|
||||
}
|
||||
hash_array // The rest remains 0, satisfying the 32-byte requirement
|
||||
}
|
||||
|
||||
fn generate_random_filename() -> [u8; FILENAME_HASH_SIZE] {
|
||||
let mut rng = rand::rng();
|
||||
let mut filename_bytes = [0; FILENAME_HASH_SIZE];
|
||||
|
||||
// Generate a random length for the base name
|
||||
let name_len = rng.random_range(5..21);
|
||||
|
||||
// Generate random alphanumeric characters
|
||||
for i in 0..name_len {
|
||||
let char_code = rng.random_range(97..123); // 'a' through 'z'
|
||||
if i < FILENAME_HASH_SIZE {
|
||||
filename_bytes[i] = char_code as u8;
|
||||
}
|
||||
}
|
||||
|
||||
// Append a common extension
|
||||
let ext = if rng.random_bool(0.5) { ".txt" } else { ".dat" };
|
||||
let ext_bytes = ext.as_bytes();
|
||||
let start_index = name_len.min(FILENAME_HASH_SIZE - ext_bytes.len());
|
||||
if start_index < FILENAME_HASH_SIZE {
|
||||
filename_bytes[start_index..(start_index + ext_bytes.len())].copy_from_slice(ext_bytes);
|
||||
}
|
||||
|
||||
filename_bytes
|
||||
}
|
||||
|
||||
pub type NodeHash = [u8; FILENAME_HASH_SIZE];
|
||||
|
||||
pub fn node_hash_to_hex_string(hash: &NodeHash) -> String {
|
||||
hash.iter()
|
||||
.map(|b| format!("{:02x}", b))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[repr(u8)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum MerkleNode {
|
||||
// up to 1024 bytes of raw data.
|
||||
Chunk(ChunkNode) = 0,
|
||||
// 0 to 16 directory entries.
|
||||
Directory(DirectoryNode) = 1,
|
||||
// list of 2 to 32 hashes pointing to Chunk or Big nodes.
|
||||
Big(BigNode) = 3,
|
||||
// list of 2 to 32 hashes pointing to Directory or BigDirectory nodes.
|
||||
BigDirectory(BigDirectoryNode) = 4,
|
||||
}
|
||||
|
||||
fn generate_random_file_node(storage: &mut HashMap<NodeHash, MerkleNode>) -> Result<NodeHash, String> {
|
||||
let mut rng = rng();
|
||||
let is_big = rng.random_bool(0.2); // 20% chance of being a big file
|
||||
|
||||
if !is_big {
|
||||
// Generate a simple Chunk Node
|
||||
let node = MerkleNode::Chunk(ChunkNode::new_random());
|
||||
let hash = dummy_hash(&node.serialize());
|
||||
storage.insert(hash, node);
|
||||
Ok(hash)
|
||||
} else {
|
||||
// Generate a Big Node (a file composed of chunks)
|
||||
let num_children = rng.random_range(MIN_BIG_CHILDREN..=MAX_BIG_CHILDREN.min(8)); // Limit complexity
|
||||
let mut children_hashes = Vec::with_capacity(num_children);
|
||||
|
||||
for _ in 0..num_children {
|
||||
// Children must be Chunk or Big; for simplicity, we only generate Chunk children here.
|
||||
let chunk_node = MerkleNode::Chunk(ChunkNode::new_random());
|
||||
let chunk_hash = dummy_hash(&chunk_node.serialize());
|
||||
storage.insert(chunk_hash, chunk_node);
|
||||
children_hashes.push(chunk_hash);
|
||||
}
|
||||
|
||||
let node = MerkleNode::Big(BigNode::new(children_hashes)?);
|
||||
let hash = dummy_hash(&node.serialize());
|
||||
storage.insert(hash, node);
|
||||
Ok(hash)
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_random_directory_node(
|
||||
depth: u32,
|
||||
max_depth: u32,
|
||||
storage: &mut HashMap<NodeHash, MerkleNode>
|
||||
) -> Result<NodeHash, String> {
|
||||
let mut rng = rng();
|
||||
let current_depth = depth + 1;
|
||||
let is_big_dir = rng.random_bool(0.3) && current_depth < max_depth;
|
||||
|
||||
if !is_big_dir || current_depth >= max_depth {
|
||||
// Generate a simple Directory Node (leaf level directory)
|
||||
let num_entries = rng.random_range(1..=MAX_DIRECTORY_ENTRIES.min(5)); // Limit directory size for testing
|
||||
let mut entries = Vec::with_capacity(num_entries);
|
||||
|
||||
for _ in 0..num_entries {
|
||||
if rng.random_bool(0.7) {
|
||||
// 70% chance of creating a file (Chunk/Big)
|
||||
let file_hash = generate_random_file_node(storage)?;
|
||||
let entry = DirectoryEntry {
|
||||
filename: generate_random_filename(),
|
||||
content_hash: file_hash,
|
||||
};
|
||||
entries.push(entry);
|
||||
} else if current_depth < max_depth {
|
||||
// 30% chance of creating a subdirectory
|
||||
let dir_hash = generate_random_directory_node(current_depth, max_depth, storage)?;
|
||||
|
||||
// Create a basic directory entry name
|
||||
let mut filename_bytes = [0; 32];
|
||||
let subdir_name = format!("dir_{}", current_depth);
|
||||
filename_bytes[..subdir_name.len()].copy_from_slice(subdir_name.as_bytes());
|
||||
|
||||
let entry = DirectoryEntry {
|
||||
filename: filename_bytes,
|
||||
content_hash: dir_hash,
|
||||
};
|
||||
entries.push(entry);
|
||||
}
|
||||
}
|
||||
|
||||
let node = MerkleNode::Directory(DirectoryNode::new(entries)?);
|
||||
let hash = dummy_hash(&node.serialize());
|
||||
storage.insert(hash, node);
|
||||
Ok(hash)
|
||||
|
||||
} else {
|
||||
// Generate a BigDirectory Node (internal directory structure)
|
||||
let num_children = rng.random_range(MIN_BIG_CHILDREN..=MAX_BIG_CHILDREN.min(4)); // Limit children count
|
||||
let mut children = Vec::with_capacity(num_children);
|
||||
|
||||
for _ in 0..num_children {
|
||||
// Children must be Directory or BigDirectory
|
||||
let child_hash = generate_random_directory_node(current_depth, max_depth, storage)?;
|
||||
children.push(child_hash);
|
||||
}
|
||||
|
||||
let node = MerkleNode::BigDirectory(BigDirectoryNode::new(children)?);
|
||||
let hash = dummy_hash(&node.serialize());
|
||||
storage.insert(hash, node);
|
||||
Ok(hash)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ChunkNode {
|
||||
pub data: Vec<u8>,
|
||||
}
|
||||
impl ChunkNode {
|
||||
pub fn new(data: Vec<u8>) -> Result<Self, String> {
|
||||
if data.len() > MAX_CHUNK_DATA_SIZE {
|
||||
return Err(format!("Chunk data exceeds {} bytes", data.len()));
|
||||
}
|
||||
Ok(ChunkNode { data })
|
||||
}
|
||||
|
||||
pub fn new_random() -> Self {
|
||||
let mut rng = rand::rng();
|
||||
|
||||
// Determine a random length between 1 and MAX_CHUNK_DATA_SIZE (inclusive).
|
||||
// Using +1 ensures the range is up to 1024.
|
||||
let random_len = rng.random_range(1..=MAX_CHUNK_DATA_SIZE);
|
||||
|
||||
// Initialize a vector with the random length
|
||||
let mut data = vec![0u8; random_len];
|
||||
|
||||
// Fill the vector with random bytes
|
||||
rng.fill(&mut data[..]);
|
||||
|
||||
// Since we generated the length based on MAX_CHUNK_DATA_SIZE,
|
||||
// this is guaranteed to be valid and doesn't need to return a Result.
|
||||
ChunkNode { data }
|
||||
}
|
||||
}
|
||||
|
||||
// Helper struct
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DirectoryEntry {
|
||||
pub filename: [u8; FILENAME_HASH_SIZE],
|
||||
pub content_hash: NodeHash,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DirectoryNode {
|
||||
pub entries: Vec<DirectoryEntry>,
|
||||
}
|
||||
|
||||
impl DirectoryNode {
|
||||
pub fn new(entries: Vec<DirectoryEntry>) -> Result<Self, String> {
|
||||
if entries.len() > MAX_DIRECTORY_ENTRIES {
|
||||
return Err(format!("Directory exceeds {} bytes", entries.len()));
|
||||
}
|
||||
Ok(DirectoryNode { entries })
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BigNode {
|
||||
pub children_hashes: Vec<NodeHash>,
|
||||
}
|
||||
|
||||
impl BigNode {
|
||||
pub fn new(children_hashes: Vec<NodeHash>) -> Result<Self, String> {
|
||||
let n = children_hashes.len();
|
||||
if n < MIN_BIG_CHILDREN || n > MAX_BIG_CHILDREN {
|
||||
return Err(format!(
|
||||
"Big node must have between {} and {} children, found {}",
|
||||
MIN_BIG_CHILDREN, MAX_BIG_CHILDREN, n
|
||||
));
|
||||
}
|
||||
Ok(BigNode { children_hashes })
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BigDirectoryNode {
|
||||
pub children_hashes: Vec<NodeHash>,
|
||||
}
|
||||
|
||||
impl BigDirectoryNode {
|
||||
pub fn new(children_hashes: Vec<NodeHash>) -> Result<Self, String> {
|
||||
let n = children_hashes.len();
|
||||
if n < MIN_BIG_CHILDREN || n > MAX_BIG_CHILDREN {
|
||||
return Err(format!(
|
||||
"BigDirectory node must have between {} and {} children, found {}",
|
||||
MIN_BIG_CHILDREN, MAX_BIG_CHILDREN, n
|
||||
));
|
||||
}
|
||||
Ok(BigDirectoryNode { children_hashes })
|
||||
}
|
||||
}
|
||||
|
||||
impl MerkleNode {
|
||||
pub fn get_type_byte(&self) -> u8 {
|
||||
match self {
|
||||
MerkleNode::Chunk(_) => 0,
|
||||
MerkleNode::Directory(_) => 1,
|
||||
MerkleNode::Big(_) => 3,
|
||||
MerkleNode::BigDirectory(_) => 4,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn serialize(&self) -> Vec<u8> {
|
||||
let mut bytes = Vec::new();
|
||||
// 1. Add the type byte
|
||||
bytes.push(self.get_type_byte());
|
||||
|
||||
// 2. Add the node-specific data
|
||||
match self {
|
||||
MerkleNode::Chunk(node) => {
|
||||
bytes.extend_from_slice(&node.data);
|
||||
}
|
||||
MerkleNode::Directory(node) => {
|
||||
// The data is the sequence of directory entries
|
||||
for entry in &node.entries {
|
||||
bytes.extend_from_slice(&entry.filename);
|
||||
bytes.extend_from_slice(&entry.content_hash);
|
||||
}
|
||||
}
|
||||
MerkleNode::Big(node) => {
|
||||
// The data is the list of child hashes
|
||||
for hash in &node.children_hashes {
|
||||
bytes.extend_from_slice(hash);
|
||||
}
|
||||
}
|
||||
MerkleNode::BigDirectory(node) => {
|
||||
// The data is the list of child hashes
|
||||
for hash in &node.children_hashes {
|
||||
bytes.extend_from_slice(hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
bytes
|
||||
}
|
||||
|
||||
pub fn generate_random_tree(max_depth: u32) -> Result<(NodeHash, HashMap<NodeHash, MerkleNode>), String> {
|
||||
let mut storage = HashMap::new();
|
||||
|
||||
// Start tree generation from the root directory at depth 0
|
||||
let root_hash = generate_random_directory_node(0, max_depth, &mut storage)?;
|
||||
|
||||
Ok((root_hash, storage))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user