Files
p2p/client-network/src/data.rs
2026-01-23 01:11:02 +01:00

518 lines
17 KiB
Rust

use rand::{Rng, rng};
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::hash::{DefaultHasher, Hash, Hasher};
use std::fs::{File, OpenOptions, create_dir};
use std::io::{self, Write};
use std::env;
// --- Constants ---
pub const MAX_CHUNK_DATA_SIZE: usize = 1024;
pub const MAX_DIRECTORY_ENTRIES: usize = 16;
pub const MAX_BIG_CHILDREN: usize = 32;
pub const MIN_BIG_CHILDREN: usize = 2;
pub const FILENAME_HASH_SIZE: usize = 32;
pub const DIRECTORY_ENTRY_SIZE: usize = FILENAME_HASH_SIZE * 2; // 64 bytes
pub type NodeHash = [u8; FILENAME_HASH_SIZE];
pub fn node_hash_to_hex_string(hash: &NodeHash) -> String {
hash.iter().map(|b| format!("{:02x}", b)).collect()
}
#[repr(u8)]
#[derive(Debug, Clone)]
pub enum MerkleNode {
// up to 1024 bytes of raw data.
Chunk(ChunkNode) = 0,
// 0 to 16 directory entries.
Directory(DirectoryNode) = 1,
// list of 2 to 32 hashes pointing to Chunk or Big nodes.
Big(BigNode) = 3,
// list of 2 to 32 hashes pointing to Directory or BigDirectory nodes.
BigDirectory(BigDirectoryNode) = 4,
}
#[derive(Debug, Clone)]
pub struct MerkleTree {
pub data: HashMap<NodeHash, MerkleNode>,
pub root: NodeHash,
}
impl MerkleTree {
pub fn new(data: HashMap<NodeHash, MerkleNode>, root: NodeHash) -> MerkleTree {
MerkleTree { data, root }
}
}
#[derive(Debug, Clone)]
pub struct ChunkNode {
pub data: Vec<u8>,
}
impl ChunkNode {
pub fn new(data: Vec<u8>) -> Result<Self, String> {
if data.len() > MAX_CHUNK_DATA_SIZE {
return Err(format!("Chunk data exceeds {} bytes", data.len()));
}
Ok(ChunkNode { data })
}
pub fn new_random() -> Self {
let mut rng = rand::rng();
// Determine a random length between 1 and MAX_CHUNK_DATA_SIZE (inclusive).
// Using +1 ensures the range is up to 1024.
let random_len = rng.random_range(1..=MAX_CHUNK_DATA_SIZE);
// Initialize a vector with the random length
let mut data = vec![0u8; random_len];
// Fill the vector with random bytes
rng.fill(&mut data[..]);
// Since we generated the length based on MAX_CHUNK_DATA_SIZE,
// this is guaranteed to be valid and doesn't need to return a Result.
ChunkNode { data }
}
}
// Helper struct
#[derive(Debug, Clone)]
pub struct DirectoryEntry {
pub filename: [u8; FILENAME_HASH_SIZE],
pub content_hash: NodeHash,
}
pub fn filename_to_string(filename: [u8; FILENAME_HASH_SIZE]) -> String {
let end_index = filename
.iter()
.position(|&b| b == 0)
.unwrap_or(FILENAME_HASH_SIZE);
String::from_utf8_lossy(&filename[..end_index]).to_string()
}
#[derive(Debug, Clone)]
pub struct DirectoryNode {
pub entries: Vec<DirectoryEntry>,
}
impl DirectoryNode {
pub fn new(entries: Vec<DirectoryEntry>) -> Result<Self, String> {
if entries.len() > MAX_DIRECTORY_ENTRIES {
return Err(format!("Directory exceeds {} bytes", entries.len()));
}
Ok(DirectoryNode { entries })
}
}
#[derive(Debug, Clone)]
pub struct BigNode {
pub children_hashes: Vec<NodeHash>,
}
impl BigNode {
pub fn new(children_hashes: Vec<NodeHash>) -> Result<Self, String> {
let n = children_hashes.len();
if n < MIN_BIG_CHILDREN || n > MAX_BIG_CHILDREN {
return Err(format!(
"Big node must have between {} and {} children, found {}",
MIN_BIG_CHILDREN, MAX_BIG_CHILDREN, n
));
}
Ok(BigNode { children_hashes })
}
}
#[derive(Debug, Clone)]
pub struct BigDirectoryNode {
pub children_hashes: Vec<NodeHash>,
// pub children_hashes: Vec<DirectoryEntry>,
}
impl BigDirectoryNode {
pub fn new(children_hashes: Vec<NodeHash>) -> Result<Self, String> {
let n = children_hashes.len();
if n < MIN_BIG_CHILDREN || n > MAX_BIG_CHILDREN {
return Err(format!(
"BigDirectory node must have between {} and {} children, found {}",
MIN_BIG_CHILDREN, MAX_BIG_CHILDREN, n
));
}
Ok(BigDirectoryNode { children_hashes })
}
}
impl MerkleNode {
pub fn get_type_byte(&self) -> u8 {
match self {
MerkleNode::Chunk(_) => 0,
MerkleNode::Directory(_) => 1,
MerkleNode::Big(_) => 3,
MerkleNode::BigDirectory(_) => 4,
}
}
pub fn serialize(&self) -> Vec<u8> {
let mut bytes = Vec::new();
bytes.push(self.get_type_byte());
match self {
MerkleNode::Chunk(node) => {
bytes.extend_from_slice(&node.data);
}
MerkleNode::Directory(node) => {
for entry in &node.entries {
bytes.extend_from_slice(&entry.filename);
bytes.extend_from_slice(&entry.content_hash);
}
}
MerkleNode::Big(node) => {
for hash in &node.children_hashes {
bytes.extend_from_slice(hash);
}
}
MerkleNode::BigDirectory(node) => {
for hash in &node.children_hashes {
bytes.extend_from_slice(hash);
}
}
}
bytes
}
}
fn hash(data: &[u8]) -> NodeHash {
let root_hash = Sha256::digest(&data);
println!("root hash: {:?}", root_hash);
let res: NodeHash = root_hash.try_into().expect("incorrect size");
res
/*let mut hasher = DefaultHasher::new();
data.hash(&mut hasher);
let hash_u64 = hasher.finish();
let mut hash_array = [0u8; FILENAME_HASH_SIZE];
// Simple way to spread a 64-bit hash across 32 bytes for a unique-ish ID
for i in 0..8 {
hash_array[i] = (hash_u64 >> (i * 8)) as u8;
}
hash_array // The rest remains 0, satisfying the 32-byte requirement
*/
}
fn generate_random_filename() -> [u8; FILENAME_HASH_SIZE] {
let mut rng = rand::rng();
let mut filename_bytes = [0; FILENAME_HASH_SIZE];
// Generate a random length for the base name
let name_len = rng.random_range(5..21);
// Generate random alphanumeric characters
for i in 0..name_len {
let char_code = rng.random_range(97..123); // 'a' through 'z'
if i < FILENAME_HASH_SIZE {
filename_bytes[i] = char_code as u8;
}
}
// Append a common extension
let ext = if rng.random_bool(0.5) { ".txt" } else { ".dat" };
let ext_bytes = ext.as_bytes();
let start_index = name_len.min(FILENAME_HASH_SIZE - ext_bytes.len());
if start_index < FILENAME_HASH_SIZE {
filename_bytes[start_index..(start_index + ext_bytes.len())].copy_from_slice(ext_bytes);
}
filename_bytes
}
fn generate_random_file_node(
storage: &mut HashMap<NodeHash, MerkleNode>,
) -> Result<NodeHash, String> {
let mut rng = rng();
let is_big = rng.random_bool(0.2); // 20% chance of being a big file
if !is_big {
// Generate a simple Chunk Node
let node = MerkleNode::Chunk(ChunkNode::new_random());
let hash = hash(&node.serialize());
storage.insert(hash, node);
Ok(hash)
} else {
// Generate a Big Node (a file composed of chunks)
let num_children = rng.random_range(MIN_BIG_CHILDREN..=MAX_BIG_CHILDREN.min(8)); // Limit complexity
let mut children_hashes = Vec::with_capacity(num_children);
for _ in 0..num_children {
// Children must be Chunk or Big; for simplicity, we only generate Chunk children here.
let chunk_node = MerkleNode::Chunk(ChunkNode::new_random());
let chunk_hash = hash(&chunk_node.serialize());
storage.insert(chunk_hash, chunk_node);
children_hashes.push(chunk_hash);
}
let node = MerkleNode::Big(BigNode::new(children_hashes)?);
let hash = hash(&node.serialize());
storage.insert(hash, node);
Ok(hash)
}
}
fn generate_random_directory_node(
depth: u32,
max_depth: u32,
storage: &mut HashMap<NodeHash, MerkleNode>,
) -> Result<NodeHash, String> {
let mut rng = rng();
let current_depth = depth + 1;
let is_big_dir = rng.random_bool(0.3) && current_depth < max_depth;
if !is_big_dir || current_depth >= max_depth {
// Generate a simple Directory Node (leaf level directory)
let num_entries = rng.random_range(1..=MAX_DIRECTORY_ENTRIES.min(5)); // Limit directory size for testing
let mut entries = Vec::with_capacity(num_entries);
for _ in 0..num_entries {
if rng.random_bool(0.7) {
// 70% chance of creating a file (Chunk/Big)
let file_hash = generate_random_file_node(storage)?;
let entry = DirectoryEntry {
filename: generate_random_filename(),
content_hash: file_hash,
};
entries.push(entry);
} else if current_depth < max_depth {
// 30% chance of creating a subdirectory
let dir_hash = generate_random_directory_node(current_depth, max_depth, storage)?;
// Create a basic directory entry name
let mut filename_bytes = [0; 32];
let subdir_name = format!("dir_{}", current_depth);
filename_bytes[..subdir_name.len()].copy_from_slice(subdir_name.as_bytes());
let entry = DirectoryEntry {
filename: filename_bytes,
content_hash: dir_hash,
};
entries.push(entry);
}
}
let node = MerkleNode::Directory(DirectoryNode::new(entries)?);
let hash = hash(&node.serialize());
storage.insert(hash, node);
Ok(hash)
} else {
// Generate a BigDirectory Node (internal directory structure)
let num_children = rng.random_range(MIN_BIG_CHILDREN..=MAX_BIG_CHILDREN.min(4)); // Limit children count
let mut children = Vec::with_capacity(num_children);
for _ in 0..num_children {
// Children must be Directory or BigDirectory
let child_hash = generate_random_directory_node(current_depth, max_depth, storage)?;
children.push(child_hash);
}
let node = MerkleNode::BigDirectory(BigDirectoryNode::new(children)?);
let hash = hash(&node.serialize());
storage.insert(hash, node);
Ok(hash)
}
}
pub fn generate_random_tree(
max_depth: u32,
) -> Result<(NodeHash, HashMap<NodeHash, MerkleNode>), String> {
let mut storage = HashMap::new();
// Start tree generation from the root directory at depth 0
let root_hash = generate_random_directory_node(0, max_depth, &mut storage)?;
Ok((root_hash, storage))
}
pub fn generate_base_tree() -> MerkleTree {
let mut res = HashMap::new();
let bob_content = "where is bob".to_string().into_bytes();
let alice_content = "alice".to_string().into_bytes();
let oscar_content = "oscar is the opponent".to_string().into_bytes();
let mut children_nodes = Vec::new();
for i in 0..10 {
let mut i_nodes = Vec::new();
for j in 0..10 {
let node1 = MerkleNode::Chunk(ChunkNode::new(bob_content.clone()).unwrap());
let hash = hash(&node1.serialize());
i_nodes.push(hash);
res.insert(hash, node1);
}
let bignode = MerkleNode::Big(BigNode::new(i_nodes).unwrap());
let hashbig = hash(&bignode.serialize());
children_nodes.push(hashbig);
res.insert(hashbig, bignode);
}
let bignode = MerkleNode::Big(BigNode::new(children_nodes).unwrap());
let hashbig = hash(&bignode.serialize());
let node1 = MerkleNode::Chunk(ChunkNode::new(bob_content).unwrap());
let hash1 = hash(&node1.serialize());
let node2 = MerkleNode::Chunk(ChunkNode::new(alice_content).unwrap());
let hash2 = hash(&node2.serialize());
//res.insert(hash1, node1);
//res.insert(hash2, node2);
res.insert(hashbig, bignode);
let node3 = MerkleNode::Chunk(ChunkNode::new(oscar_content).unwrap());
let hash3 = hash(&node3.serialize());
//res.insert(hash3, node3);
let dir1 = MerkleNode::Directory(DirectoryNode {
entries: [DirectoryEntry {
filename: generate_random_filename(),
content_hash: hash3,
}]
.to_vec(),
});
let hash_dir1 = hash(&dir1.serialize());
//res.insert(hash_dir1, dir1);
let root = MerkleNode::Directory(DirectoryNode {
entries: [
DirectoryEntry {
filename: generate_random_filename(),
content_hash: hashbig,
},
/*DirectoryEntry {
filename: generate_random_filename(),
content_hash: hash2,
},
DirectoryEntry {
filename: generate_random_filename(),
content_hash: hash_dir1,
},*/
]
.to_vec(),
});
let root_hash = Sha256::digest(&root.serialize());
println!("root hash: {:?}", root_hash);
res.insert(root_hash.try_into().expect("incorrect size"), root);
MerkleTree::new(res, root_hash.try_into().expect("incorrect size"))
}
pub fn node_to_file(tree: &MerkleTree, node: &MerkleNode, path: String, i: u8) {
match node.clone() {
MerkleNode::Directory(dir) => {
if i != 0 {
let new_path = format!("{}/fold_{}", path.clone(), i);
match create_dir(new_path.clone()) {
Ok(_) => println!("Directory created successfully!"),
Err(e) => println!("Failed to create directory: {}", e),
}
}
for entry in dir.entries {
// creer un fichier pour chaque entry
if let Ok(filename_str) = String::from_utf8(entry.filename.to_vec()) {
let new_name = format!("{}{}", path.clone(), remove_null_bytes(&filename_str));
println!("new_name: {}", new_name);
let file = OpenOptions::new()
.append(true)
.create(true)
.open(new_name.clone());
match file {
Ok(mut fileok) => {
if let Some(current) = tree.data.get(&entry.content_hash) {
big_or_chunk_to_file(&tree, &current, &mut fileok);
}
}
Err(e) => {
eprintln!("error creaation file: {}", e);
}
}
}
}
}
MerkleNode::BigDirectory(bigdir) => {
for entry in bigdir.children_hashes.iter() {
if let Some(current) = tree.data.get(entry) {
node_to_file(tree, current, path.clone(), i + 1);
}
}
}
_ => {
eprintln!("invalid type of dir");
}
}
}
pub fn remove_null_bytes(input: &str) -> String {
input.chars().filter(|&c| c != '\0').collect()
}
pub fn big_or_chunk_to_file(tree: &MerkleTree, node: &MerkleNode, file: &mut File) {
match node {
MerkleNode::Big(big) => {
for entry in big.children_hashes.iter() {
if let Some(current) = tree.data.get(entry) {
big_or_chunk_to_file(tree, current, file);
}
}
}
MerkleNode::Chunk(chunk) => {
println!("wrote data");
let _ = file.write_all(&chunk.data);
}
_ => {
println!("invalid type of file");
}
}
}
#[cfg(test)]
mod tests {
use super::*;
///
/// creates a cryptographic signature
///
#[test]
fn test_saving_tree() {
if let Ok(current_dir) = env::current_dir() {
println!("Current working directory: {:?}", current_dir);
}
println!("--------- tree test starts ------------");
match create_dir("../Download/") {
Ok(_) => println!("Directory created successfully!"),
Err(e) => println!("Failed to create directory: {}", e),
}
let tree = generate_base_tree();
println!("--------- test tree created ------------");
if let Some(root_node) = tree.data.get(&tree.root) {
node_to_file(&tree, root_node, "../Download/".to_string(), 0);
}
}
/*#[test]
fn signing_message() {
let username = String::from("gamixtreize");
let crypto_pair = CryptographicSignature::new(username.clone());
let handshake = HandshakeMessage::hello(0, 12, username);
let ser = handshake.serialize();
let signed_message = sign_message(&crypto_pair, &ser);
println!("unsigned_message: {:?}", ser);
println!("signed_message: {:?}", signed_message);
}*/
}