484 lines
16 KiB
Rust
484 lines
16 KiB
Rust
use rand::{Rng, rng};
|
|
use sha2::{Digest, Sha256};
|
|
use std::collections::HashMap;
|
|
|
|
use std::fs::{File, OpenOptions, create_dir};
|
|
use std::io::Write;
|
|
|
|
// --- Constants ---
|
|
pub const MAX_CHUNK_DATA_SIZE: usize = 1024;
|
|
pub const MAX_DIRECTORY_ENTRIES: usize = 16;
|
|
pub const MAX_BIG_CHILDREN: usize = 32;
|
|
pub const MIN_BIG_CHILDREN: usize = 2;
|
|
pub const FILENAME_HASH_SIZE: usize = 32;
|
|
pub const DIRECTORY_ENTRY_SIZE: usize = FILENAME_HASH_SIZE * 2; // 64 bytes
|
|
|
|
pub type NodeHash = [u8; FILENAME_HASH_SIZE];
|
|
|
|
pub fn node_hash_to_hex_string(hash: &NodeHash) -> String {
|
|
hash.iter().map(|b| format!("{:02x}", b)).collect()
|
|
}
|
|
|
|
#[repr(u8)]
|
|
#[derive(Debug, Clone)]
|
|
pub enum MerkleNode {
|
|
// up to 1024 bytes of raw data.
|
|
Chunk(ChunkNode) = 0,
|
|
// 0 to 16 directory entries.
|
|
Directory(DirectoryNode) = 1,
|
|
// list of 2 to 32 hashes pointing to Chunk or Big nodes.
|
|
Big(BigNode) = 2,
|
|
// list of 2 to 32 hashes pointing to Directory or BigDirectory nodes.
|
|
BigDirectory(BigDirectoryNode) = 3,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct MerkleTree {
|
|
pub data: HashMap<NodeHash, MerkleNode>,
|
|
pub root: NodeHash,
|
|
}
|
|
|
|
impl MerkleTree {
|
|
pub fn new(data: HashMap<NodeHash, MerkleNode>, root: NodeHash) -> MerkleTree {
|
|
MerkleTree { data, root }
|
|
}
|
|
pub fn clear_data(&mut self) {
|
|
self.data.clear();
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct ChunkNode {
|
|
pub data: Vec<u8>,
|
|
}
|
|
impl ChunkNode {
|
|
pub fn new(data: Vec<u8>) -> Result<Self, String> {
|
|
if data.len() > MAX_CHUNK_DATA_SIZE {
|
|
return Err(format!("Chunk data exceeds {} bytes", data.len()));
|
|
}
|
|
Ok(ChunkNode { data })
|
|
}
|
|
|
|
pub fn new_random() -> Self {
|
|
let mut rng = rand::rng();
|
|
|
|
// Determine a random length between 1 and MAX_CHUNK_DATA_SIZE (inclusive).
|
|
// Using +1 ensures the range is up to 1024.
|
|
let random_len = rng.random_range(1..=MAX_CHUNK_DATA_SIZE);
|
|
|
|
// Initialize a vector with the random length
|
|
let mut data = vec![0u8; random_len];
|
|
|
|
// Fill the vector with random bytes
|
|
rng.fill(&mut data[..]);
|
|
|
|
// Since we generated the length based on MAX_CHUNK_DATA_SIZE,
|
|
// this is guaranteed to be valid and doesn't need to return a Result.
|
|
ChunkNode { data }
|
|
}
|
|
}
|
|
|
|
// Helper struct
|
|
#[derive(Debug, Clone)]
|
|
pub struct DirectoryEntry {
|
|
pub filename: [u8; FILENAME_HASH_SIZE],
|
|
pub content_hash: NodeHash,
|
|
}
|
|
|
|
pub fn filename_to_string(filename: [u8; FILENAME_HASH_SIZE]) -> String {
|
|
let end_index = filename
|
|
.iter()
|
|
.position(|&b| b == 0)
|
|
.unwrap_or(FILENAME_HASH_SIZE);
|
|
String::from_utf8_lossy(&filename[..end_index]).to_string()
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct DirectoryNode {
|
|
pub entries: Vec<DirectoryEntry>,
|
|
}
|
|
|
|
impl DirectoryNode {
|
|
pub fn new(entries: Vec<DirectoryEntry>) -> Result<Self, String> {
|
|
if entries.len() > MAX_DIRECTORY_ENTRIES {
|
|
return Err(format!("Directory exceeds {} bytes", entries.len()));
|
|
}
|
|
Ok(DirectoryNode { entries })
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct BigNode {
|
|
pub children_hashes: Vec<NodeHash>,
|
|
}
|
|
|
|
impl BigNode {
|
|
pub fn new(children_hashes: Vec<NodeHash>) -> Result<Self, String> {
|
|
let n = children_hashes.len();
|
|
if n < MIN_BIG_CHILDREN || n > MAX_BIG_CHILDREN {
|
|
return Err(format!(
|
|
"Big node must have between {} and {} children, found {}",
|
|
MIN_BIG_CHILDREN, MAX_BIG_CHILDREN, n
|
|
));
|
|
}
|
|
Ok(BigNode { children_hashes })
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct BigDirectoryNode {
|
|
pub children_hashes: Vec<NodeHash>,
|
|
// pub children_hashes: Vec<DirectoryEntry>,
|
|
}
|
|
|
|
impl BigDirectoryNode {
|
|
pub fn new(children_hashes: Vec<NodeHash>) -> Result<Self, String> {
|
|
let n = children_hashes.len();
|
|
if n < MIN_BIG_CHILDREN || n > MAX_BIG_CHILDREN {
|
|
return Err(format!(
|
|
"BigDirectory node must have between {} and {} children, found {}",
|
|
MIN_BIG_CHILDREN, MAX_BIG_CHILDREN, n
|
|
));
|
|
}
|
|
Ok(BigDirectoryNode { children_hashes })
|
|
}
|
|
}
|
|
|
|
impl MerkleNode {
|
|
pub fn get_type_byte(&self) -> u8 {
|
|
match self {
|
|
MerkleNode::Chunk(_) => 0,
|
|
MerkleNode::Directory(_) => 1,
|
|
MerkleNode::Big(_) => 2,
|
|
MerkleNode::BigDirectory(_) => 3,
|
|
}
|
|
}
|
|
|
|
pub fn serialize(&self) -> Vec<u8> {
|
|
let mut bytes = Vec::new();
|
|
bytes.push(self.get_type_byte());
|
|
|
|
match self {
|
|
MerkleNode::Chunk(node) => {
|
|
bytes.extend_from_slice(&node.data);
|
|
}
|
|
MerkleNode::Directory(node) => {
|
|
for entry in &node.entries {
|
|
bytes.extend_from_slice(&entry.filename);
|
|
bytes.extend_from_slice(&entry.content_hash);
|
|
}
|
|
}
|
|
MerkleNode::Big(node) => {
|
|
for hash in &node.children_hashes {
|
|
bytes.extend_from_slice(hash);
|
|
}
|
|
}
|
|
MerkleNode::BigDirectory(node) => {
|
|
for hash in &node.children_hashes {
|
|
bytes.extend_from_slice(hash);
|
|
}
|
|
}
|
|
}
|
|
bytes
|
|
}
|
|
}
|
|
|
|
fn hash(data: &[u8]) -> NodeHash {
|
|
let root_hash = Sha256::digest(&data);
|
|
println!("root hash: {:?}", root_hash);
|
|
let res: NodeHash = root_hash.try_into().expect("incorrect size");
|
|
res
|
|
/*let mut hasher = DefaultHasher::new();
|
|
data.hash(&mut hasher);
|
|
let hash_u64 = hasher.finish();
|
|
|
|
let mut hash_array = [0u8; FILENAME_HASH_SIZE];
|
|
// Simple way to spread a 64-bit hash across 32 bytes for a unique-ish ID
|
|
for i in 0..8 {
|
|
hash_array[i] = (hash_u64 >> (i * 8)) as u8;
|
|
}
|
|
hash_array // The rest remains 0, satisfying the 32-byte requirement
|
|
*/
|
|
}
|
|
|
|
fn generate_random_filename() -> [u8; FILENAME_HASH_SIZE] {
|
|
let mut rng = rand::rng();
|
|
let mut filename_bytes = [0; FILENAME_HASH_SIZE];
|
|
|
|
// Generate a random length for the base name
|
|
let name_len = rng.random_range(5..21);
|
|
|
|
// Generate random alphanumeric characters
|
|
for i in 0..name_len {
|
|
let char_code = rng.random_range(97..123); // 'a' through 'z'
|
|
if i < FILENAME_HASH_SIZE {
|
|
filename_bytes[i] = char_code as u8;
|
|
}
|
|
}
|
|
|
|
// Append a common extension
|
|
let ext = if rng.random_bool(0.5) { ".txt" } else { ".dat" };
|
|
let ext_bytes = ext.as_bytes();
|
|
let start_index = name_len.min(FILENAME_HASH_SIZE - ext_bytes.len());
|
|
if start_index < FILENAME_HASH_SIZE {
|
|
filename_bytes[start_index..(start_index + ext_bytes.len())].copy_from_slice(ext_bytes);
|
|
}
|
|
|
|
filename_bytes
|
|
}
|
|
|
|
fn generate_random_file_node(
|
|
storage: &mut HashMap<NodeHash, MerkleNode>,
|
|
) -> Result<NodeHash, String> {
|
|
let mut rng = rng();
|
|
let is_big = rng.random_bool(0.2); // 20% chance of being a big file
|
|
|
|
if !is_big {
|
|
// Generate a simple Chunk Node
|
|
let node = MerkleNode::Chunk(ChunkNode::new_random());
|
|
let hash = hash(&node.serialize());
|
|
storage.insert(hash, node);
|
|
Ok(hash)
|
|
} else {
|
|
// Generate a Big Node (a file composed of chunks)
|
|
let num_children = rng.random_range(MIN_BIG_CHILDREN..=MAX_BIG_CHILDREN.min(8)); // Limit complexity
|
|
let mut children_hashes = Vec::with_capacity(num_children);
|
|
|
|
for _ in 0..num_children {
|
|
// Children must be Chunk or Big; for simplicity, we only generate Chunk children here.
|
|
let chunk_node = MerkleNode::Chunk(ChunkNode::new_random());
|
|
let chunk_hash = hash(&chunk_node.serialize());
|
|
storage.insert(chunk_hash, chunk_node);
|
|
children_hashes.push(chunk_hash);
|
|
}
|
|
|
|
let node = MerkleNode::Big(BigNode::new(children_hashes)?);
|
|
let hash = hash(&node.serialize());
|
|
storage.insert(hash, node);
|
|
Ok(hash)
|
|
}
|
|
}
|
|
|
|
fn generate_random_directory_node(
|
|
depth: u32,
|
|
max_depth: u32,
|
|
storage: &mut HashMap<NodeHash, MerkleNode>,
|
|
) -> Result<NodeHash, String> {
|
|
let mut rng = rng();
|
|
let current_depth = depth + 1;
|
|
let is_big_dir = rng.random_bool(0.3) && current_depth < max_depth;
|
|
|
|
if !is_big_dir || current_depth >= max_depth {
|
|
// Generate a simple Directory Node (leaf level directory)
|
|
let num_entries = rng.random_range(1..=MAX_DIRECTORY_ENTRIES.min(5)); // Limit directory size for testing
|
|
let mut entries = Vec::with_capacity(num_entries);
|
|
|
|
for _ in 0..num_entries {
|
|
if rng.random_bool(0.7) {
|
|
// 70% chance of creating a file (Chunk/Big)
|
|
let file_hash = generate_random_file_node(storage)?;
|
|
let entry = DirectoryEntry {
|
|
filename: generate_random_filename(),
|
|
content_hash: file_hash,
|
|
};
|
|
entries.push(entry);
|
|
} else if current_depth < max_depth {
|
|
// 30% chance of creating a subdirectory
|
|
let dir_hash = generate_random_directory_node(current_depth, max_depth, storage)?;
|
|
|
|
// Create a basic directory entry name
|
|
let mut filename_bytes = [0; 32];
|
|
let subdir_name = format!("dir_{}", current_depth);
|
|
filename_bytes[..subdir_name.len()].copy_from_slice(subdir_name.as_bytes());
|
|
|
|
let entry = DirectoryEntry {
|
|
filename: filename_bytes,
|
|
content_hash: dir_hash,
|
|
};
|
|
entries.push(entry);
|
|
}
|
|
}
|
|
|
|
let node = MerkleNode::Directory(DirectoryNode::new(entries)?);
|
|
let hash = hash(&node.serialize());
|
|
storage.insert(hash, node);
|
|
Ok(hash)
|
|
} else {
|
|
// Generate a BigDirectory Node (internal directory structure)
|
|
let num_children = rng.random_range(MIN_BIG_CHILDREN..=MAX_BIG_CHILDREN.min(4)); // Limit children count
|
|
let mut children = Vec::with_capacity(num_children);
|
|
|
|
for _ in 0..num_children {
|
|
// Children must be Directory or BigDirectory
|
|
let child_hash = generate_random_directory_node(current_depth, max_depth, storage)?;
|
|
children.push(child_hash);
|
|
}
|
|
|
|
let node = MerkleNode::BigDirectory(BigDirectoryNode::new(children)?);
|
|
let hash = hash(&node.serialize());
|
|
storage.insert(hash, node);
|
|
Ok(hash)
|
|
}
|
|
}
|
|
|
|
pub fn generate_random_tree(
|
|
max_depth: u32,
|
|
) -> Result<(NodeHash, HashMap<NodeHash, MerkleNode>), String> {
|
|
let mut storage = HashMap::new();
|
|
|
|
// Start tree generation from the root directory at depth 0
|
|
let root_hash = generate_random_directory_node(0, max_depth, &mut storage)?;
|
|
|
|
Ok((root_hash, storage))
|
|
}
|
|
|
|
pub fn generate_base_tree() -> MerkleTree {
|
|
let mut res = HashMap::new();
|
|
|
|
let bob_content = "where is bob".to_string().into_bytes();
|
|
let alice_content = "alice".to_string().into_bytes();
|
|
let oscar_content = "oscar is the opponent".to_string().into_bytes();
|
|
|
|
let mut children_nodes = Vec::new();
|
|
for _ in 0..10 {
|
|
let mut i_nodes = Vec::new();
|
|
for _ in 0..10 {
|
|
let node1 = MerkleNode::Chunk(ChunkNode::new(bob_content.clone()).unwrap());
|
|
let hash = hash(&node1.serialize());
|
|
i_nodes.push(hash);
|
|
res.insert(hash, node1);
|
|
}
|
|
let bignode = MerkleNode::Big(BigNode::new(i_nodes).unwrap());
|
|
let hashbig = hash(&bignode.serialize());
|
|
children_nodes.push(hashbig);
|
|
res.insert(hashbig, bignode);
|
|
}
|
|
|
|
let bignode = MerkleNode::Big(BigNode::new(children_nodes).unwrap());
|
|
let hashbig = hash(&bignode.serialize());
|
|
|
|
let node1 = MerkleNode::Chunk(ChunkNode::new(bob_content).unwrap());
|
|
let hash1 = hash(&node1.serialize());
|
|
|
|
let node2 = MerkleNode::Chunk(ChunkNode::new(alice_content).unwrap());
|
|
let hash2 = hash(&node2.serialize());
|
|
|
|
res.insert(hash1, node1);
|
|
res.insert(hash2, node2);
|
|
res.insert(hashbig, bignode);
|
|
|
|
let node3 = MerkleNode::Chunk(ChunkNode::new(oscar_content).unwrap());
|
|
let hash3 = hash(&node3.serialize());
|
|
|
|
res.insert(hash3, node3);
|
|
|
|
let dir1 = MerkleNode::Directory(DirectoryNode {
|
|
entries: [DirectoryEntry {
|
|
filename: generate_random_filename(),
|
|
content_hash: hash3,
|
|
}]
|
|
.to_vec(),
|
|
});
|
|
let hash_dir1 = hash(&dir1.serialize());
|
|
|
|
res.insert(hash_dir1, dir1);
|
|
|
|
let root = MerkleNode::Directory(DirectoryNode {
|
|
entries: [
|
|
DirectoryEntry {
|
|
filename: generate_random_filename(),
|
|
content_hash: hashbig,
|
|
},
|
|
DirectoryEntry {
|
|
filename: generate_random_filename(),
|
|
content_hash: hash2,
|
|
},
|
|
DirectoryEntry {
|
|
filename: generate_random_filename(),
|
|
content_hash: hash_dir1,
|
|
},
|
|
]
|
|
.to_vec(),
|
|
});
|
|
|
|
let root_hash = Sha256::digest(&root.serialize());
|
|
println!("root hash: {:?}", root_hash);
|
|
res.insert(root_hash.try_into().expect("incorrect size"), root);
|
|
|
|
MerkleTree::new(res, root_hash.try_into().expect("incorrect size"))
|
|
}
|
|
|
|
pub fn node_to_file(tree: &MerkleTree, node: &MerkleNode, path: String, i: u8) {
|
|
match node.clone() {
|
|
MerkleNode::Directory(dir) => {
|
|
if i != 0 {
|
|
let new_path = format!("{}/fold_{}", path.clone(), i);
|
|
match create_dir(new_path.clone()) {
|
|
Ok(_) => println!("Directory created successfully!"),
|
|
Err(e) => println!("Failed to create directory: {}", e),
|
|
}
|
|
}
|
|
for entry in dir.entries {
|
|
// creer un fichier pour chaque entry
|
|
if let Ok(filename_str) = String::from_utf8(entry.filename.to_vec()) {
|
|
let new_name = format!("{}{}", path.clone(), remove_null_bytes(&filename_str));
|
|
|
|
println!("new_name: {}", new_name);
|
|
let file = OpenOptions::new()
|
|
.append(true)
|
|
.create(true)
|
|
.open(new_name.clone());
|
|
match file {
|
|
Ok(mut fileok) => {
|
|
if let Some(current) = tree.data.get(&entry.content_hash) {
|
|
big_or_chunk_to_file(&tree, ¤t, &mut fileok);
|
|
}
|
|
}
|
|
Err(e) => {
|
|
eprintln!("error creaation file: {}", e);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
MerkleNode::BigDirectory(bigdir) => {
|
|
for entry in bigdir.children_hashes.iter() {
|
|
if let Some(current) = tree.data.get(entry) {
|
|
node_to_file(tree, current, path.clone(), i + 1);
|
|
}
|
|
}
|
|
}
|
|
_ => {
|
|
eprintln!("invalid type of dir");
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn remove_null_bytes(input: &str) -> String {
|
|
input.chars().filter(|&c| c != '\0').collect()
|
|
}
|
|
|
|
pub fn big_or_chunk_to_file(tree: &MerkleTree, node: &MerkleNode, file: &mut File) {
|
|
match node {
|
|
MerkleNode::Big(big) => {
|
|
for entry in big.children_hashes.iter() {
|
|
if let Some(current) = tree.data.get(entry) {
|
|
big_or_chunk_to_file(tree, current, file);
|
|
}
|
|
}
|
|
}
|
|
MerkleNode::Chunk(chunk) => {
|
|
if !chunk.data.is_empty() {
|
|
let mut data = chunk.data.clone();
|
|
data.remove(0);
|
|
let _ = file.write(&data);
|
|
} else {
|
|
println!("chunk.data is empty, nothing to write");
|
|
}
|
|
}
|
|
_ => {
|
|
println!("invalid type of file");
|
|
}
|
|
}
|
|
}
|