diff --git a/Core/Sources/Core/UserDictionary/UserDictionaryTextFormat.swift b/Core/Sources/Core/UserDictionary/UserDictionaryTextFormat.swift new file mode 100644 index 00000000..21c11c54 --- /dev/null +++ b/Core/Sources/Core/UserDictionary/UserDictionaryTextFormat.swift @@ -0,0 +1,207 @@ +import Foundation + +public enum UserDictionaryTextFormat: String, CaseIterable, Identifiable, Sendable { + case automatic + case googleJapaneseInput + case msime + case atok + case kotoeri + + public var id: String { + rawValue + } + + public var localizedName: String { + switch self { + case .automatic: + "自動判定" + case .googleJapaneseInput: + "Google日本語入力 / Mozc" + case .msime: + "Microsoft IME" + case .atok: + "ATOK" + case .kotoeri: + "ことえり" + } + } +} + +public struct UserDictionaryImportResult: Sendable { + public var dictionaryName: String? + public var entries: [Config.UserDictionaryEntry] + public var skippedLineCount: Int + + public init(dictionaryName: String?, entries: [Config.UserDictionaryEntry], skippedLineCount: Int) { + self.dictionaryName = dictionaryName + self.entries = entries + self.skippedLineCount = skippedLineCount + } +} + +public enum UserDictionaryTextCodec { + public static func decodeText(from data: Data) -> String? { + if data.starts(with: [0xEF, 0xBB, 0xBF]) { + return String(data: Data(data.dropFirst(3)), encoding: .utf8) + } + if data.starts(with: [0xFF, 0xFE]) { + return String(data: Data(data.dropFirst(2)), encoding: .utf16LittleEndian) + } + if data.starts(with: [0xFE, 0xFF]) { + return String(data: Data(data.dropFirst(2)), encoding: .utf16BigEndian) + } + return String(data: data, encoding: .utf8) + ?? String(data: data, encoding: .shiftJIS) + ?? String(data: data, encoding: .utf16) + } + + public static func importEntries( + from text: String, + format requestedFormat: UserDictionaryTextFormat = .automatic + ) -> UserDictionaryImportResult { + let lines = text + .replacingOccurrences(of: "\r\n", with: "\n") + .replacingOccurrences(of: "\r", with: "\n") + .components(separatedBy: "\n") + let format = requestedFormat == .automatic ? guessFormat(from: lines) : requestedFormat + var dictionaryName: String? + var entries: [Config.UserDictionaryEntry] = [] + var skippedLineCount = 0 + + for line in lines { + let trimmed = line.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { + continue + } + if let name = dictionaryNameHeader(in: trimmed) { + dictionaryName = name + continue + } + guard !isCommentOrHeader(trimmed, format: format) else { + continue + } + + let columns: [String] + switch format { + case .kotoeri: + columns = splitCSV(trimmed) + case .automatic, .googleJapaneseInput, .msime, .atok: + columns = line.components(separatedBy: "\t") + } + + guard columns.count >= 3 else { + skippedLineCount += 1 + continue + } + let reading = columns[0].trimmingCharacters(in: .whitespacesAndNewlines) + let word = columns[1].trimmingCharacters(in: .whitespacesAndNewlines) + let hint = columns.count >= 4 ? normalizedHint(columns[3]) : nil + guard !reading.isEmpty, !word.isEmpty else { + skippedLineCount += 1 + continue + } + entries.append(.init(word: word, reading: reading, hint: hint)) + } + + return .init(dictionaryName: dictionaryName, entries: entries, skippedLineCount: skippedLineCount) + } + + public static func exportEntries(_ entries: [Config.UserDictionaryEntry], dictionaryName: String) -> String { + let header = [ + "!Dictionary File", + "!Version: 1.0", + "!User Dictionary Name: \(dictionaryName)" + ] + let body = entries.map { entry in + [ + sanitizeField(entry.reading), + sanitizeField(entry.word), + "名詞", + sanitizeField(entry.hint ?? "") + ].joined(separator: "\t") + } + return (header + body).joined(separator: "\n") + "\n" + } + + private static func dictionaryNameHeader(in line: String) -> String? { + let prefix = "!User Dictionary Name:" + guard line.hasPrefix(prefix) else { + return nil + } + let name = line.dropFirst(prefix.count).trimmingCharacters(in: .whitespacesAndNewlines) + return name.isEmpty ? nil : name + } + + private static func isCommentOrHeader(_ line: String, format: UserDictionaryTextFormat) -> Bool { + switch format { + case .msime, .atok: + line.hasPrefix("!") + case .googleJapaneseInput, .automatic: + line.hasPrefix("!") || line.hasPrefix("#") + case .kotoeri: + line.hasPrefix("//") + } + } + + private static func guessFormat(from lines: [String]) -> UserDictionaryTextFormat { + for line in lines { + let trimmed = line.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { + continue + } + let lower = trimmed.lowercased() + if lower.hasPrefix("!microsoft ime") { + return .msime + } + if lower.hasPrefix("!!dicut") || lower.hasPrefix("!!atok_tango_text_header") { + return .atok + } + if trimmed.hasPrefix("\""), trimmed.hasSuffix("\""), !trimmed.contains("\t") { + return .kotoeri + } + if trimmed.hasPrefix("#") || trimmed.contains("\t") || trimmed.hasPrefix("!") { + return .googleJapaneseInput + } + } + return .googleJapaneseInput + } + + private static func normalizedHint(_ value: String) -> String? { + let hint = value.trimmingCharacters(in: .whitespacesAndNewlines) + return hint.isEmpty ? nil : hint + } + + private static func sanitizeField(_ value: String) -> String { + value + .replacingOccurrences(of: "\t", with: " ") + .replacingOccurrences(of: "\n", with: " ") + .replacingOccurrences(of: "\r", with: " ") + } + + private static func splitCSV(_ line: String) -> [String] { + var fields: [String] = [] + var current = "" + var inQuotes = false + var index = line.startIndex + while index < line.endIndex { + let character = line[index] + if character == "\"" { + let nextIndex = line.index(after: index) + if inQuotes, nextIndex < line.endIndex, line[nextIndex] == "\"" { + current.append("\"") + index = nextIndex + } else { + inQuotes.toggle() + } + } else if character == ",", !inQuotes { + fields.append(current) + current = "" + } else { + current.append(character) + } + index = line.index(after: index) + } + fields.append(current) + return fields + } +} diff --git a/Core/Tests/CoreTests/UserDictionaryTests/UserDictionaryTextFormatTests.swift b/Core/Tests/CoreTests/UserDictionaryTests/UserDictionaryTextFormatTests.swift new file mode 100644 index 00000000..18ec4a89 --- /dev/null +++ b/Core/Tests/CoreTests/UserDictionaryTests/UserDictionaryTextFormatTests.swift @@ -0,0 +1,54 @@ +@testable import Core +import Foundation +import Testing + +@Test func importGoogleJapaneseInputTSVWithComments() { + let text = [ + "!Dictionary File", + "!Version: 1.0", + "!User Dictionary Name: 化学統合版", + ["くろむこう", "Cr鋼", "名詞", "Crを特徴とする鋼材。"].joined(separator: "\t"), + ["えんそいおん", "Clイオン", "名詞", "Clの電荷を持つイオン。"].joined(separator: "\t"), + ["こめなし", "コメントなし", "名詞"].joined(separator: "\t") + ].joined(separator: "\n") + + let result = UserDictionaryTextCodec.importEntries(from: text, format: .automatic) + + #expect(result.dictionaryName == "化学統合版") + #expect(result.entries.count == 3) + #expect(result.entries[0].reading == "くろむこう") + #expect(result.entries[0].word == "Cr鋼") + #expect(result.entries[0].hint == "Crを特徴とする鋼材。") + #expect(result.entries[2].hint == nil) +} + +@Test func exportGoogleJapaneseInputTSV() { + let entries = [ + Config.UserDictionaryEntry(word: "Cohen-Macaulay", reading: "こーえんまこーれー", hint: "深さが Krull 次元に等しいことを表す性質"), + Config.UserDictionaryEntry(word: "正則列", reading: "せいそくれつ", hint: nil) + ] + + let exported = UserDictionaryTextCodec.exportEntries(entries, dictionaryName: "ユーザ辞書") + + #expect(exported.contains("!User Dictionary Name: ユーザ辞書")) + #expect(exported.contains(["こーえんまこーれー", "Cohen-Macaulay", "名詞", "深さが Krull 次元に等しいことを表す性質"].joined(separator: "\t"))) + #expect(exported.contains(["せいそくれつ", "正則列", "名詞", ""].joined(separator: "\t"))) +} + +@Test func decodeUTF16DictionaryWithoutLeavingBOM() throws { + let text = ["よみ", "単語", "名詞", "コメント"].joined(separator: "\t") + let littleEndianData = Data([0xFF, 0xFE]) + text.data(using: .utf16LittleEndian)! + let bigEndianData = Data([0xFE, 0xFF]) + text.data(using: .utf16BigEndian)! + + let littleEndianResult = UserDictionaryTextCodec.importEntries( + from: try #require(UserDictionaryTextCodec.decodeText(from: littleEndianData)), + format: .googleJapaneseInput + ) + let bigEndianResult = UserDictionaryTextCodec.importEntries( + from: try #require(UserDictionaryTextCodec.decodeText(from: bigEndianData)), + format: .googleJapaneseInput + ) + + #expect(littleEndianResult.entries.first?.reading == "よみ") + #expect(bigEndianResult.entries.first?.reading == "よみ") +} diff --git a/azooKeyMac/Windows/UserDictionaryEditorWindow.swift b/azooKeyMac/Windows/UserDictionaryEditorWindow.swift index 22ba337e..88b1ab6e 100644 --- a/azooKeyMac/Windows/UserDictionaryEditorWindow.swift +++ b/azooKeyMac/Windows/UserDictionaryEditorWindow.swift @@ -5,8 +5,10 @@ // Created by miwa on 2024/09/22. // +import AppKit import Core import SwiftUI +import UniformTypeIdentifiers struct UserDictionaryEditorWindow: View { @@ -14,6 +16,9 @@ struct UserDictionaryEditorWindow: View { @State private var editTargetID: UUID? @State private var undoItem: Config.UserDictionaryEntry? + @State private var importFormat: UserDictionaryTextFormat = .automatic + @State private var alertMessage = "" + @State private var showingAlert = false @ViewBuilder private func helpButton(helpContent: LocalizedStringKey, isPresented: Binding) -> some View { @@ -38,8 +43,9 @@ struct UserDictionaryEditorWindow: View { Text("ユーザ辞書の設定") .bold() .font(.title) - Text("この機能はβ版です。予告なく仕様を変更することがあるほか、最大50件に限定しています。") + Text("この機能はβ版です。予告なく仕様を変更することがあります。") .font(.caption) + importExportControls Spacer() if let editTargetID { let itemBinding = Binding( @@ -125,6 +131,104 @@ struct UserDictionaryEditorWindow: View { } .frame(minHeight: 300, maxHeight: 600) .frame(minWidth: 600, maxWidth: 800) + .alert("ユーザ辞書", isPresented: $showingAlert) { + Button("OK") {} + } message: { + Text(alertMessage) + } + } + + private var importExportControls: some View { + HStack(spacing: 8) { + Picker("形式", selection: $importFormat) { + ForEach(UserDictionaryTextFormat.allCases) { format in + Text(format.localizedName).tag(format) + } + } + .frame(width: 220) + + Button("読み込む", systemImage: "square.and.arrow.down") { + importFromFile() + } + Button("書き出す", systemImage: "square.and.arrow.up") { + exportToFile() + } + } + .controlSize(.regular) + } + + private func importFromFile() { + let panel = NSOpenPanel() + panel.allowsMultipleSelection = false + panel.canChooseDirectories = false + panel.canChooseFiles = true + panel.title = "ユーザ辞書ファイルを選択" + + panel.begin { response in + guard response == .OK, let url = panel.url else { + return + } + let canAccess = url.startAccessingSecurityScopedResource() + defer { + if canAccess { + url.stopAccessingSecurityScopedResource() + } + } + do { + let data = try Data(contentsOf: url) + guard let text = UserDictionaryTextCodec.decodeText(from: data) else { + showAlert("ファイルの文字コードを判定できませんでした。") + return + } + let result = UserDictionaryTextCodec.importEntries(from: text, format: importFormat) + guard !result.entries.isEmpty else { + showAlert("有効な単語が見つかりませんでした。") + return + } + self.userDictionary.value.items.append(contentsOf: result.entries) + editTargetID = nil + undoItem = nil + let skipped = result.skippedLineCount == 0 ? "" : " / \(result.skippedLineCount)行をスキップしました" + showAlert("\(result.entries.count)件を読み込みました\(skipped)。") + } catch { + showAlert("読み込みに失敗しました: \(error.localizedDescription)") + } + } + } + + private func exportToFile() { + let panel = NSSavePanel() + panel.title = "ユーザ辞書の書き出し" + panel.nameFieldStringValue = "ユーザ辞書.txt" + panel.canCreateDirectories = true + panel.allowedContentTypes = [.plainText] + + panel.begin { response in + guard response == .OK, let url = panel.url else { + return + } + let canAccess = url.startAccessingSecurityScopedResource() + defer { + if canAccess { + url.stopAccessingSecurityScopedResource() + } + } + do { + let exported = UserDictionaryTextCodec.exportEntries( + self.userDictionary.value.items, + dictionaryName: "ユーザ辞書" + ) + try Data(exported.utf8).write(to: url) + showAlert("\(url.lastPathComponent)を書き出しました。") + } catch { + showAlert("書き出しに失敗しました: \(error.localizedDescription)") + } + } + } + + private func showAlert(_ message: String) { + alertMessage = message + showingAlert = true } } diff --git a/azooKeyMac/azooKeyMac.entitlements b/azooKeyMac/azooKeyMac.entitlements index c4080206..ae64c731 100644 --- a/azooKeyMac/azooKeyMac.entitlements +++ b/azooKeyMac/azooKeyMac.entitlements @@ -8,6 +8,8 @@ group.dev.ensan.inputmethod.azooKeyMac + com.apple.security.files.user-selected.read-write + com.apple.security.network.client com.apple.security.temporary-exception.mach-register.global-name