diff --git a/README.md b/README.md index 6acedfb..b3b0d30 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,61 @@ -# data-processing-cli -Data Processing Toolkit — an interactive command-line application +# Data Processing CLI + +Interactive command-line tool for file navigation and data processing. + +## Requirements + +- Node.js 24.10.0+ + +## Setup +``` +npm run start +``` + +## Commands + +### Navigation + +| Command | Description | +|--------|-------------| +| `up` | Move up one directory level | +| `cd ` | Navigate to directory (relative or absolute) | +| `ls` | List files and folders in current directory | +| `.exit` | Exit the application | + +### Data Processing + +#### Count lines, words and characters +``` +count --input file.txt +``` + +#### Calculate file hash +``` +hash --input file.txt +hash --input file.txt --algorithm md5 +hash --input file.txt --algorithm sha512 +hash --input file.txt --save +``` +Supported algorithms: `sha256` (default), `md5`, `sha512` + +#### Compare file hash +``` +hash-compare --input file.txt --hash file.txt.sha256 +hash-compare --input file.txt --hash file.txt.md5 --algorithm md5 +``` + +#### Convert CSV to JSON +``` +csv-to-json --input data.csv --output data.json +``` + +#### Convert JSON to CSV +``` +json-to-csv --input data.json --output data.csv +``` + +## Notes + +- All file paths can be relative (to current working directory) or absolute +- All file operations use Streams API +- Working directory starts at user home directory \ No newline at end of file diff --git a/package.json b/package.json new file mode 100644 index 0000000..d04b2d0 --- /dev/null +++ b/package.json @@ -0,0 +1,25 @@ +{ + "name": "data-processing-cli", + "version": "1.0.0", + "description": "Data Processing Toolkit — an interactive command-line application", + "engines": { + "node": ">=24.10.0", + "npm": ">=10.9.2" + }, + "scripts": { + "start": "node src/main.js", + "test": "echo \"Error: no test specified\" && exit 1" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/maiano/data-processing-cli.git" + }, + "keywords": [], + "author": "", + "license": "MIT", + "type": "module", + "bugs": { + "url": "https://github.com/maiano/data-processing-cli/issues" + }, + "homepage": "https://github.com/maiano/data-processing-cli#readme" +} diff --git a/src/commands/count.js b/src/commands/count.js new file mode 100644 index 0000000..3e273d1 --- /dev/null +++ b/src/commands/count.js @@ -0,0 +1,20 @@ +import { resolvePath } from "../utils/pathResolver.js"; +import { ERRORS } from "../utils/errors.js"; +import { state } from "../core/state.js"; +import { countFileStats } from "../services/countService.js"; + +export async function count(args) { + const input = args.input; + + if (!input) { + throw new Error(ERRORS.INVALID_INPUT); + } + + const filePath = resolvePath(state.cwd, input); + + const stats = await countFileStats(filePath); + + console.log(`Lines: ${stats.lines}`); + console.log(`Words: ${stats.words}`); + console.log(`Characters: ${stats.characters}`); +} diff --git a/src/commands/csvToJson.js b/src/commands/csvToJson.js new file mode 100644 index 0000000..31467df --- /dev/null +++ b/src/commands/csvToJson.js @@ -0,0 +1,17 @@ +import { resolvePath } from "../utils/pathResolver.js"; +import { ERRORS } from "../utils/errors.js"; +import { state } from "../core/state.js"; +import { csvToJsonStream } from "../services/csvService.js"; + +export async function csvToJson(args) { + const { input, output } = args; + + if (!input || !output) { + throw new Error(ERRORS.INVALID_INPUT); + } + + const inputPath = resolvePath(state.cwd, input); + const outputPath = resolvePath(state.cwd, output); + + await csvToJsonStream(inputPath, outputPath); +} diff --git a/src/commands/hash.js b/src/commands/hash.js new file mode 100644 index 0000000..2244ed4 --- /dev/null +++ b/src/commands/hash.js @@ -0,0 +1,34 @@ +import fs from "fs/promises"; +import path from "path"; +import { resolvePath } from "../utils/pathResolver.js"; +import { ERRORS } from "../utils/errors.js"; +import { state } from "../core/state.js"; +import { hashFile } from "../services/hashService.js"; + +export async function hash(args) { + const input = args.input; + const algorithm = args.algorithm || "sha256"; + + if (!input) { + throw new Error(ERRORS.INVALID_INPUT); + } + + const filePath = resolvePath(state.cwd, input); + + const digest = await hashFile(filePath, algorithm); + + console.log(`${algorithm}: ${digest}`); + + if (args.save) { + const fileName = path.basename(filePath); + const dir = path.dirname(filePath); + + const hashPath = path.join(dir, `${fileName}.${algorithm}`); + + try { + await fs.writeFile(hashPath, digest); + } catch { + throw new Error(ERRORS.OPERATION_FAILED); + } + } +} diff --git a/src/commands/hashCompare.js b/src/commands/hashCompare.js new file mode 100644 index 0000000..9056b56 --- /dev/null +++ b/src/commands/hashCompare.js @@ -0,0 +1,41 @@ +import fs from "fs/promises"; +import { resolvePath } from "../utils/pathResolver.js"; +import { ERRORS } from "../utils/errors.js"; +import { state } from "../core/state.js"; +import { hashFile } from "../services/hashService.js"; + +export async function hashCompare(args) { + const input = args.input; + const hashPathArg = args.hash; + const algorithm = args.algorithm || "sha256"; + + if (!input || !hashPathArg) { + throw new Error(ERRORS.INVALID_INPUT); + } + + const filePath = resolvePath(state.cwd, input); + const hashFilePath = resolvePath(state.cwd, hashPathArg); + + let actualHash; + try { + actualHash = await hashFile(filePath, algorithm); + } catch { + throw new Error(ERRORS.OPERATION_FAILED); + } + + let expectedHash; + + try { + expectedHash = await fs.readFile(hashFilePath, "utf8"); + } catch { + throw new Error(ERRORS.OPERATION_FAILED); + } + + expectedHash = expectedHash.trim().toLowerCase(); + + if (actualHash.toLowerCase() === expectedHash) { + console.log("OK"); + } else { + console.log("MISMATCH"); + } +} diff --git a/src/commands/jsonToCsv.js b/src/commands/jsonToCsv.js new file mode 100644 index 0000000..d184b57 --- /dev/null +++ b/src/commands/jsonToCsv.js @@ -0,0 +1,17 @@ +import { resolvePath } from "../utils/pathResolver.js"; +import { ERRORS } from "../utils/errors.js"; +import { state } from "../core/state.js"; +import { jsonToCsvStream } from "../services/csvService.js"; + +export async function jsonToCsv(args) { + const { input, output } = args; + + if (!input || !output) { + throw new Error(ERRORS.INVALID_INPUT); + } + + const inputPath = resolvePath(state.cwd, input); + const outputPath = resolvePath(state.cwd, output); + + await jsonToCsvStream(inputPath, outputPath); +} diff --git a/src/core/repl.js b/src/core/repl.js new file mode 100644 index 0000000..036f411 --- /dev/null +++ b/src/core/repl.js @@ -0,0 +1,34 @@ +import readline from "readline"; +import { runCommand } from "./router.js"; +import { state } from "./state.js"; + +export function run() { + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + prompt: "> ", + }); + + rl.prompt(); + + rl.on("line", async (line) => { + const input = line.trim(); + + if (input === ".exit") { + exit(rl); + return; + } + + const success = await runCommand(input); + if (success) console.log(`You are currently in ${state.cwd}`); + rl.prompt(); + }); + + rl.on("SIGINT", () => exit(rl)); +} + +function exit(rl) { + console.log("Thank you for using Data Processing CLI!"); + rl.close(); + process.exit(0); +} diff --git a/src/core/router.js b/src/core/router.js new file mode 100644 index 0000000..e0b2e20 --- /dev/null +++ b/src/core/router.js @@ -0,0 +1,26 @@ +import { parseArgs } from "../utils/argParser.js"; + +const commands = {}; + +export function registerCommand(name, handler) { + commands[name] = handler; +} + +export async function runCommand(input) { + try { + const [command, ...args] = input.split(" "); + + const handler = commands[command]; + + if (!handler) { + console.log("Invalid input"); + return false; + } + + await handler(parseArgs(args)); + return true; + } catch (e) { + if (e.message === "INVALID_INPUT") console.log("Invalid input"); + else console.log("Operation failed"); + } +} diff --git a/src/core/state.js b/src/core/state.js new file mode 100644 index 0000000..9812969 --- /dev/null +++ b/src/core/state.js @@ -0,0 +1,5 @@ +import os from "os"; + +export const state = { + cwd: os.homedir(), +}; diff --git a/src/main.js b/src/main.js new file mode 100644 index 0000000..5b03d98 --- /dev/null +++ b/src/main.js @@ -0,0 +1,28 @@ +import { state } from "./core/state.js"; +import { run } from "./core/repl.js"; +import { registerCommand } from "./core/router.js"; + +import { up } from "./navigation/up.js"; +import { cd } from "./navigation/cd.js"; +import { ls } from "./navigation/ls.js"; + +import { count } from "./commands/count.js"; +import { hash } from "./commands/hash.js"; +import { hashCompare } from "./commands/hashCompare.js"; +import { csvToJson } from "./commands/csvToJson.js"; +import { jsonToCsv } from "./commands/jsonToCsv.js"; + +console.log("Welcome to Data Processing CLI!"); +console.log(`You are currently in ${state.cwd}`); + +registerCommand("up", up); +registerCommand("cd", cd); +registerCommand("ls", ls); + +registerCommand("count", count); +registerCommand("hash", hash); +registerCommand("hash-compare", hashCompare); +registerCommand("csv-to-json", csvToJson); +registerCommand("json-to-csv", jsonToCsv); + +run(); diff --git a/src/navigation/cd.js b/src/navigation/cd.js new file mode 100644 index 0000000..8b96c97 --- /dev/null +++ b/src/navigation/cd.js @@ -0,0 +1,28 @@ +import fs from "fs/promises"; +import { state } from "../core/state.js"; +import { resolvePath } from "../utils/pathResolver.js"; +import { ERRORS } from "../utils/errors.js"; + +export async function cd(args) { + const target = args._[0]; + + if (!target) { + throw new Error(ERRORS.INVALID_INPUT); + } + + const resolved = resolvePath(state.cwd, target); + + let stat; + + try { + stat = await fs.stat(resolved); + } catch { + throw new Error(ERRORS.OPERATION_FAILED); + } + + if (!stat.isDirectory()) { + throw new Error(ERRORS.OPERATION_FAILED); + } + + state.cwd = resolved; +} diff --git a/src/navigation/ls.js b/src/navigation/ls.js new file mode 100644 index 0000000..92656f3 --- /dev/null +++ b/src/navigation/ls.js @@ -0,0 +1,37 @@ +import fs from "fs/promises"; +import path from "path"; +import { state } from "../core/state.js"; +import { ERRORS } from "../utils/errors.js"; + +export async function ls() { + let entries; + try { + entries = await fs.readdir(state.cwd); + } catch { + throw new Error(ERRORS.OPERATION_FAILED); + } + + const items = await Promise.all( + entries.map(async (entry) => { + const fullPath = path.join(state.cwd, entry); + const stat = await fs.stat(fullPath); + + return { + name: entry, + type: stat.isDirectory() ? "folder" : "file", + }; + }), + ); + + items.sort((a, b) => { + if (a.type !== b.type) { + return a.type === "folder" ? -1 : 1; + } + + return a.name.localeCompare(b.name); + }); + + for (const item of items) { + console.log(`${item.name} [${item.type}]`); + } +} diff --git a/src/navigation/up.js b/src/navigation/up.js new file mode 100644 index 0000000..8bd2f5b --- /dev/null +++ b/src/navigation/up.js @@ -0,0 +1,10 @@ +import path from "path"; +import { state } from "../core/state.js"; + +export async function up() { + const parent = path.dirname(state.cwd); + + if (parent !== state.cwd) { + state.cwd = parent; + } +} diff --git a/src/services/countService.js b/src/services/countService.js new file mode 100644 index 0000000..7c14418 --- /dev/null +++ b/src/services/countService.js @@ -0,0 +1,61 @@ +import { pipeline } from "stream/promises"; +import { Transform } from "stream"; +import fs from "fs"; +import { ERRORS } from "../utils/errors.js"; + +const WHITESPACE = /\s/; + +export async function countFileStats(filePath) { + let inWord = false; + let lastChar = null; + const stats = { + lines: 0, + words: 0, + characters: 0, + }; + + const counter = new Transform({ + transform(chunk, _enc, done) { + const str = chunk.toString("utf8"); + + for (const char of str) { + stats.characters++; + lastChar = char; + + if (char === "\n") { + stats.lines++; + } + + if (WHITESPACE.test(char)) { + inWord = false; + } else { + if (!inWord) { + stats.words++; + inWord = true; + } + } + } + + done(); + }, + }); + + try { + await pipeline( + fs.createReadStream(filePath, { encoding: "utf8" }), + counter, + ); + } catch { + throw new Error(ERRORS.OPERATION_FAILED); + } + + if (stats.characters > 0 && lastChar !== "\n") { + stats.lines++; + } + + return { + lines: stats.lines, + words: stats.words, + characters: stats.characters, + }; +} diff --git a/src/services/csvService.js b/src/services/csvService.js new file mode 100644 index 0000000..e2d712a --- /dev/null +++ b/src/services/csvService.js @@ -0,0 +1,107 @@ +import fs from "fs"; +import fsp from "fs/promises"; +import { Readable, Transform } from "stream"; +import { pipeline } from "stream/promises"; +import { ERRORS } from "../utils/errors.js"; + +function buildObject(headers, line) { + const values = line.split(","); + const obj = {}; + headers.forEach((h, i) => { + obj[h] = values[i] ?? ""; + }); + return obj; +} + +export async function csvToJsonStream(inputPath, outputPath) { + let headers = null; + let isFirst = true; + let buffer = ""; + + const transform = new Transform({ + transform(chunk, _enc, callback) { + buffer += chunk.toString(); + + const lines = buffer.split("\n"); + buffer = lines.pop(); + + for (const line of lines) { + if (!line.trim()) continue; + + if (!headers) { + headers = line.trimEnd().split(","); + continue; + } + + const json = JSON.stringify(buildObject(headers, line.trimEnd())); + this.push(isFirst ? "[\n" + json : ",\n" + json); + isFirst = false; + } + + callback(); + }, + + flush(callback) { + if (buffer.trim() && headers) { + const json = JSON.stringify(buildObject(headers, buffer.trimEnd())); + this.push(isFirst ? "[\n" + json : ",\n" + json); + isFirst = false; + } + + this.push(isFirst ? "[]" : "\n]"); + callback(); + }, + }); + + try { + await pipeline( + fs.createReadStream(inputPath), + transform, + fs.createWriteStream(outputPath), + ); + } catch { + throw new Error(ERRORS.OPERATION_FAILED); + } +} + +function* generateLines(headers, data) { + yield headers.join(",") + "\n"; + for (const row of data) { + yield headers.map((h) => String(row[h] ?? "")).join(",") + "\n"; + } +} + +export async function jsonToCsvStream(inputPath, outputPath) { + let data; + + try { + const content = await fsp.readFile(inputPath, "utf8"); + data = JSON.parse(content); + } catch { + throw new Error(ERRORS.OPERATION_FAILED); + } + + if (!Array.isArray(data)) { + throw new Error(ERRORS.OPERATION_FAILED); + } + + if (data.length === 0) { + try { + await fsp.writeFile(outputPath, ""); + } catch { + throw new Error(ERRORS.OPERATION_FAILED); + } + return; + } + + const headers = Object.keys(data[0]); + + try { + await pipeline( + Readable.from(generateLines(headers, data)), + fs.createWriteStream(outputPath), + ); + } catch { + throw new Error(ERRORS.OPERATION_FAILED); + } +} diff --git a/src/services/hashService.js b/src/services/hashService.js new file mode 100644 index 0000000..0af366a --- /dev/null +++ b/src/services/hashService.js @@ -0,0 +1,23 @@ +import fs from "fs"; +import { pipeline } from "stream/promises"; +import crypto from "crypto"; +import { Transform } from "stream"; +import { ERRORS } from "../utils/errors.js"; + +const SUPPORTED = new Set(["sha256", "md5", "sha512"]); + +export async function hashFile(filePath, algorithm = "sha256") { + if (!SUPPORTED.has(algorithm)) { + throw new Error(ERRORS.OPERATION_FAILED); + } + + const hash = crypto.createHash(algorithm); + + try { + await pipeline(fs.createReadStream(filePath), hash); + } catch { + throw new Error(ERRORS.OPERATION_FAILED); + } + + return hash.digest("hex"); +} diff --git a/src/utils/argParser.js b/src/utils/argParser.js new file mode 100644 index 0000000..5fb4a53 --- /dev/null +++ b/src/utils/argParser.js @@ -0,0 +1,28 @@ +export function parseArgs(tokens) { + const result = { _: [] }; + + for (let i = 0; i < tokens.length; i++) { + const token = tokens[i]; + + if (token.startsWith("--")) { + const key = token.slice(2); + + if (!key) continue; + + const next = tokens[i + 1]; + + if (next === undefined || next.startsWith("--")) { + result[key] = true; + } else { + result[key] = next; + i++; + } + + continue; + } + + result._.push(token); + } + + return result; +} diff --git a/src/utils/errors.js b/src/utils/errors.js new file mode 100644 index 0000000..b8001dd --- /dev/null +++ b/src/utils/errors.js @@ -0,0 +1,4 @@ +export const ERRORS = { + INVALID_INPUT: "INVALID_INPUT", + OPERATION_FAILED: "OPERATION_FAILED", +}; diff --git a/src/utils/pathResolver.js b/src/utils/pathResolver.js new file mode 100644 index 0000000..2ae8a7e --- /dev/null +++ b/src/utils/pathResolver.js @@ -0,0 +1,8 @@ +import path from "path"; +import { ERRORS } from "./errors.js"; + +export function resolvePath(cwd, inputPath) { + if (!inputPath) throw new Error(ERRORS.INVALID_INPUT); + if (path.isAbsolute(inputPath)) return path.normalize(inputPath); + return path.resolve(cwd, inputPath); +}