From 6d5096cf4fede6cba396a214a7a3ab0bc2bdf729 Mon Sep 17 00:00:00 2001 From: Joseph Ferano Date: Sat, 14 Jun 2025 10:59:43 +0700 Subject: [PATCH] Updating README --- Cargo.toml | 2 +- README.md | 135 +++++++++++++++++++------------------------ src/bin/copy.rs | 4 +- src/bin/data_node.rs | 4 +- src/bin/ls.rs | 4 +- src/bin/meta_data.rs | 4 +- 6 files changed, 69 insertions(+), 84 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6a7de88..eb7a4ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "a03" +name = "distributed-fs" version = "0.1.0" authors = ["Joseph Ferano "] diff --git a/README.md b/README.md index 697ffa2..505e118 100644 --- a/README.md +++ b/README.md @@ -1,94 +1,79 @@ -## Distributed File System in Rust for CCOM4017 +# Distributed File System in Rust -This suite of programs handles file copying over TCP with a client/server model. -It contains the following programs; -- copy -- ls -- data_node -- meta_data +A distributed file system implementation with client-server architecture, built as one of my early projects exploring Rust and distributed systems concepts. -`copy` and `ls` are clients that connect to the servers. `copy` sends file read and write requests -to the `meta_data` server, which uses a sqlite3 database to keep track of which nodes are connected, -as well as which files have been added. When a file is added, `meta_data` sends the list of available -`data_node` servers, `copy` then divides the file up by the amount of nodes, then proceeds to transfer -each chunk over 256 bytes at a time. `ls` simply prints out a list of the existing files on the -`meta_data` server. +## Architecture -The code uses `serde_json` to serialize and deserialize Rust structs to and from json. The clients and -servers then listen for incoming streams of data and parses them as json. As well as exchanging -metadata, this protocol also establishes the handshake to then transfer the raw file chunks. +The system consists of four main components: -`rusqlite` is used for managing the sqlite database. This allows SQL queries to be performed from -the rust code and manage the data base in a relatively type safe way. Unit tests in the `meta_data` -provide coverage of these SQL operations against an in-memory version +- **copy** - Client for reading and writing files to the distributed system +- **ls** - Client for listing files stored in the system +- **data_node** - Storage server that handles file chunks +- **meta_data** - Metadata server managing file locations and node registry -### WARNING: -If you're my professor, please do not generate a database with the default `createdb.py` -provided in the skeleton dfs. I have included a custom version of the file in the root of the project. -The reason being that I changed chunks to be integers rather than strings, in order to provide ordering -to the chunks when transferring. +## How It Works -##### Running +The metadata server uses SQLite to track connected data nodes and file locations. When storing a file, the client connects to the metadata server, which provides a list of available data nodes. The client then divides the file into chunks and distributes them across multiple data nodes, transferring 256 bytes at a time. -To run the `ls` provide an endpoint in the _`ip:port`_ format. _`ip`_ can be _"localhost"_, consider -using `./` to avoid a naming conflict with the GNU version of `ls` +The system uses JSON serialization via `serde_json` for communication between components. All network communication happens over TCP with a custom protocol for coordinating file operations. -```$ ./ls 127.0.0.1:6770``` +## Usage -The `meta_data` server takes an optional port, but will default to `8000` if none is specified. - -```$ meta_data 6710``` - -The data node takes two endpoints in the _`ip:port`_ and then a an optional path. The first endpoint -is the ip and port, both for binding to a TCP port and also to send itself to the `meta_data` server. -The second endpoint is the `meta_data` server's ip and port. The optional base path will default to the -working directory if none is provided. - -```$ data_node localhost:6771 127.0.0.1:8000 my_cool_data_node``` - -The `copy` takes two different parameter versions, depending on whether it's sending to or receiving -from the server. To send a file, provide the path to the local file, then the endpoint with the file -in the _`ip:host:filepath`_ format. The `data_node` will save the file relative to the base path -provided to it. - -```$ copy some_path/pug.jpg localhost:6700:another_path/pug.jpg``` - -To receive a file, simply invert the parameters - -```$ copy localhost:6700:another_path/pug.jpg some_path/pug.jpg``` - -##### Misc Scripts - -`shutdown_node` sends a json request with a provided port to shutdown a `data_node`. This ensures -that the node can terminate gracefully and unregister itself from the `meta_data` server. I was -advised against using Unix Signals, so opted for this instead. - -```$ shutdown_node 6770``` - -`sm` just does a _send message_ to a provide port. It can be used to test and inspect jsons. It can -for instance be used to mimic the `ls`; - -``` -$ sm '{"p_type":"ListFiles","json":null}' 8000 -Connection to localhost 8000 port [tcp/*] succeeded! -{"paths":["pug.jpg 21633 bytes"]}% +### Starting the Metadata Server +```bash +cargo run --bin meta_data [port] +# Defaults to port 8000 if not specified ``` -`clean_db` just recreates the `dfs.db` with the custom python script. +### Starting Data Nodes +```bash +cargo run --bin data_node [base_path] +# Example: cargo run --bin data_node localhost:6771 127.0.0.1:8000 ./data +``` -##### Building +### Listing Files +```bash +cargo run --bin ls +# Example: cargo run --bin ls 127.0.0.1:8000 +``` -If you wish to compile the code, install rust and cargo -[link](https://www.rust-lang.org/en-US/install.html) +### Copying Files -Then just run build +**Upload to distributed system:** +```bash +cargo run --bin copy +# Example: cargo run --bin copy ./document.pdf localhost:8000:docs/document.pdf +``` -```cargo build``` +**Download from distributed system:** +```bash +cargo run --bin copy +# Example: cargo run --bin copy localhost:8000:docs/document.pdf ./document.pdf +``` -If you wish to run a specific algorithm; +## Database Setup -```cargo run --bin copy ``` +Run the included Python script to initialize the SQLite database: +```bash +python3 createdb.py +``` -##### Testing +The database schema includes tables for file metadata (inodes), data node registry, and block location tracking. -`cargo test --bin meta_data` +## Building + +```bash +cargo build +``` + +## Testing + +```bash +cargo test --bin meta_data +``` + +## Dependencies + +- **rusqlite** - SQLite database interface +- **serde** - Serialization framework +- **serde_json** - JSON support for network protocol \ No newline at end of file diff --git a/src/bin/copy.rs b/src/bin/copy.rs index dcddefb..3fd8bd4 100644 --- a/src/bin/copy.rs +++ b/src/bin/copy.rs @@ -1,9 +1,9 @@ -extern crate a03; +extern crate distributed_fs; extern crate serde; extern crate serde_json; extern crate serde_derive; -use a03::*; +use distributed_fs::*; use std::net::{TcpStream, Shutdown}; use std::fs::File; use std::fs; diff --git a/src/bin/data_node.rs b/src/bin/data_node.rs index 21cc5cd..9543308 100644 --- a/src/bin/data_node.rs +++ b/src/bin/data_node.rs @@ -1,9 +1,9 @@ -extern crate a03; +extern crate distributed_fs; extern crate serde; extern crate serde_json; extern crate serde_derive; -use a03::*; +use distributed_fs::*; use std::net::{TcpStream, Shutdown}; use std::io::{Write, BufWriter}; use std::net::TcpListener; diff --git a/src/bin/ls.rs b/src/bin/ls.rs index 4038ded..e0dfd63 100644 --- a/src/bin/ls.rs +++ b/src/bin/ls.rs @@ -1,9 +1,9 @@ -extern crate a03; +extern crate distributed_fs; extern crate serde; extern crate serde_json; extern crate serde_derive; -use a03::*; +use distributed_fs::*; use std::net::{TcpStream, Shutdown }; use std::io::Write; diff --git a/src/bin/meta_data.rs b/src/bin/meta_data.rs index f1cd952..8fa0726 100644 --- a/src/bin/meta_data.rs +++ b/src/bin/meta_data.rs @@ -1,10 +1,10 @@ -extern crate a03; +extern crate distributed_fs; extern crate rusqlite; extern crate serde; extern crate serde_json; extern crate serde_derive; -use a03::*; +use distributed_fs::*; use rusqlite::types::ToSql; use rusqlite::{Connection, NO_PARAMS}; use std::borrow::Cow;