Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 207 additions & 0 deletions Core/Sources/Core/UserDictionary/UserDictionaryTextFormat.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
import Foundation

public enum UserDictionaryTextFormat: String, CaseIterable, Identifiable, Sendable {
case automatic
case googleJapaneseInput
case msime
case atok
case kotoeri

public var id: String {
rawValue
}

public var localizedName: String {
switch self {
case .automatic:
"自動判定"
case .googleJapaneseInput:
"Google日本語入力 / Mozc"
case .msime:
"Microsoft IME"
case .atok:
"ATOK"
case .kotoeri:
"ことえり"
}
}
}

public struct UserDictionaryImportResult: Sendable {
public var dictionaryName: String?
public var entries: [Config.UserDictionaryEntry]
public var skippedLineCount: Int

public init(dictionaryName: String?, entries: [Config.UserDictionaryEntry], skippedLineCount: Int) {
self.dictionaryName = dictionaryName
self.entries = entries
self.skippedLineCount = skippedLineCount
}
}

public enum UserDictionaryTextCodec {
public static func decodeText(from data: Data) -> String? {
if data.starts(with: [0xEF, 0xBB, 0xBF]) {
return String(data: Data(data.dropFirst(3)), encoding: .utf8)
}
if data.starts(with: [0xFF, 0xFE]) {
return String(data: Data(data.dropFirst(2)), encoding: .utf16LittleEndian)
}
if data.starts(with: [0xFE, 0xFF]) {
return String(data: Data(data.dropFirst(2)), encoding: .utf16BigEndian)
}
return String(data: data, encoding: .utf8)
?? String(data: data, encoding: .shiftJIS)
?? String(data: data, encoding: .utf16)
}

public static func importEntries(
from text: String,
format requestedFormat: UserDictionaryTextFormat = .automatic
) -> UserDictionaryImportResult {
let lines = text
.replacingOccurrences(of: "\r\n", with: "\n")
.replacingOccurrences(of: "\r", with: "\n")
.components(separatedBy: "\n")
let format = requestedFormat == .automatic ? guessFormat(from: lines) : requestedFormat
var dictionaryName: String?
var entries: [Config.UserDictionaryEntry] = []
var skippedLineCount = 0

for line in lines {
let trimmed = line.trimmingCharacters(in: .whitespacesAndNewlines)
guard !trimmed.isEmpty else {
continue
}
if let name = dictionaryNameHeader(in: trimmed) {
dictionaryName = name
continue
}
guard !isCommentOrHeader(trimmed, format: format) else {
continue
}

let columns: [String]
switch format {
case .kotoeri:
columns = splitCSV(trimmed)
case .automatic, .googleJapaneseInput, .msime, .atok:
columns = line.components(separatedBy: "\t")
}

guard columns.count >= 3 else {
skippedLineCount += 1
continue
}
let reading = columns[0].trimmingCharacters(in: .whitespacesAndNewlines)
let word = columns[1].trimmingCharacters(in: .whitespacesAndNewlines)
let hint = columns.count >= 4 ? normalizedHint(columns[3]) : nil
guard !reading.isEmpty, !word.isEmpty else {
skippedLineCount += 1
continue
}
entries.append(.init(word: word, reading: reading, hint: hint))
}

return .init(dictionaryName: dictionaryName, entries: entries, skippedLineCount: skippedLineCount)
}

public static func exportEntries(_ entries: [Config.UserDictionaryEntry], dictionaryName: String) -> String {
let header = [
"!Dictionary File",
"!Version: 1.0",
"!User Dictionary Name: \(dictionaryName)"
]
let body = entries.map { entry in
[
sanitizeField(entry.reading),
sanitizeField(entry.word),
"名詞",
sanitizeField(entry.hint ?? "")
].joined(separator: "\t")
}
return (header + body).joined(separator: "\n") + "\n"
}

private static func dictionaryNameHeader(in line: String) -> String? {
let prefix = "!User Dictionary Name:"
guard line.hasPrefix(prefix) else {
return nil
}
let name = line.dropFirst(prefix.count).trimmingCharacters(in: .whitespacesAndNewlines)
return name.isEmpty ? nil : name
}

private static func isCommentOrHeader(_ line: String, format: UserDictionaryTextFormat) -> Bool {
switch format {
case .msime, .atok:
line.hasPrefix("!")
case .googleJapaneseInput, .automatic:
line.hasPrefix("!") || line.hasPrefix("#")
case .kotoeri:
line.hasPrefix("//")
}
}

private static func guessFormat(from lines: [String]) -> UserDictionaryTextFormat {
for line in lines {
let trimmed = line.trimmingCharacters(in: .whitespacesAndNewlines)
guard !trimmed.isEmpty else {
continue
}
let lower = trimmed.lowercased()
if lower.hasPrefix("!microsoft ime") {
return .msime
}
if lower.hasPrefix("!!dicut") || lower.hasPrefix("!!atok_tango_text_header") {
return .atok
}
if trimmed.hasPrefix("\""), trimmed.hasSuffix("\""), !trimmed.contains("\t") {
return .kotoeri
}
if trimmed.hasPrefix("#") || trimmed.contains("\t") || trimmed.hasPrefix("!") {
return .googleJapaneseInput
}
}
return .googleJapaneseInput
}

private static func normalizedHint(_ value: String) -> String? {
let hint = value.trimmingCharacters(in: .whitespacesAndNewlines)
return hint.isEmpty ? nil : hint
}

private static func sanitizeField(_ value: String) -> String {
value
.replacingOccurrences(of: "\t", with: " ")
.replacingOccurrences(of: "\n", with: " ")
.replacingOccurrences(of: "\r", with: " ")
}

private static func splitCSV(_ line: String) -> [String] {
var fields: [String] = []
var current = ""
var inQuotes = false
var index = line.startIndex
while index < line.endIndex {
let character = line[index]
if character == "\"" {
let nextIndex = line.index(after: index)
if inQuotes, nextIndex < line.endIndex, line[nextIndex] == "\"" {
current.append("\"")
index = nextIndex
} else {
inQuotes.toggle()
}
} else if character == ",", !inQuotes {
fields.append(current)
current = ""
} else {
current.append(character)
}
index = line.index(after: index)
}
fields.append(current)
return fields
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
@testable import Core
import Foundation
import Testing

@Test func importGoogleJapaneseInputTSVWithComments() {
let text = [
"!Dictionary File",
"!Version: 1.0",
"!User Dictionary Name: 化学統合版",
["くろむこう", "Cr鋼", "名詞", "Crを特徴とする鋼材。"].joined(separator: "\t"),
["えんそいおん", "Clイオン", "名詞", "Clの電荷を持つイオン。"].joined(separator: "\t"),
["こめなし", "コメントなし", "名詞"].joined(separator: "\t")
].joined(separator: "\n")

let result = UserDictionaryTextCodec.importEntries(from: text, format: .automatic)

#expect(result.dictionaryName == "化学統合版")
#expect(result.entries.count == 3)
#expect(result.entries[0].reading == "くろむこう")
#expect(result.entries[0].word == "Cr鋼")
#expect(result.entries[0].hint == "Crを特徴とする鋼材。")
#expect(result.entries[2].hint == nil)
}

@Test func exportGoogleJapaneseInputTSV() {
let entries = [
Config.UserDictionaryEntry(word: "Cohen-Macaulay", reading: "こーえんまこーれー", hint: "深さが Krull 次元に等しいことを表す性質"),
Config.UserDictionaryEntry(word: "正則列", reading: "せいそくれつ", hint: nil)
]

let exported = UserDictionaryTextCodec.exportEntries(entries, dictionaryName: "ユーザ辞書")

#expect(exported.contains("!User Dictionary Name: ユーザ辞書"))
#expect(exported.contains(["こーえんまこーれー", "Cohen-Macaulay", "名詞", "深さが Krull 次元に等しいことを表す性質"].joined(separator: "\t")))
#expect(exported.contains(["せいそくれつ", "正則列", "名詞", ""].joined(separator: "\t")))
}

@Test func decodeUTF16DictionaryWithoutLeavingBOM() throws {
let text = ["よみ", "単語", "名詞", "コメント"].joined(separator: "\t")
let littleEndianData = Data([0xFF, 0xFE]) + text.data(using: .utf16LittleEndian)!
let bigEndianData = Data([0xFE, 0xFF]) + text.data(using: .utf16BigEndian)!

let littleEndianResult = UserDictionaryTextCodec.importEntries(
from: try #require(UserDictionaryTextCodec.decodeText(from: littleEndianData)),
format: .googleJapaneseInput
)
let bigEndianResult = UserDictionaryTextCodec.importEntries(
from: try #require(UserDictionaryTextCodec.decodeText(from: bigEndianData)),
format: .googleJapaneseInput
)

#expect(littleEndianResult.entries.first?.reading == "よみ")
#expect(bigEndianResult.entries.first?.reading == "よみ")
}
106 changes: 105 additions & 1 deletion azooKeyMac/Windows/UserDictionaryEditorWindow.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,20 @@
// Created by miwa on 2024/09/22.
//

import AppKit
import Core
import SwiftUI
import UniformTypeIdentifiers

struct UserDictionaryEditorWindow: View {

@ConfigState private var userDictionary = Config.UserDictionary()

@State private var editTargetID: UUID?
@State private var undoItem: Config.UserDictionaryEntry?
@State private var importFormat: UserDictionaryTextFormat = .automatic
@State private var alertMessage = ""
@State private var showingAlert = false

@ViewBuilder
private func helpButton(helpContent: LocalizedStringKey, isPresented: Binding<Bool>) -> some View {
Expand All @@ -38,8 +43,9 @@ struct UserDictionaryEditorWindow: View {
Text("ユーザ辞書の設定")
.bold()
.font(.title)
Text("この機能はβ版です。予告なく仕様を変更することがあるほか、最大50件に限定しています。")
Text("この機能はβ版です。予告なく仕様を変更することがあります。")
.font(.caption)
importExportControls
Spacer()
if let editTargetID {
let itemBinding = Binding(
Expand Down Expand Up @@ -125,6 +131,104 @@ struct UserDictionaryEditorWindow: View {
}
.frame(minHeight: 300, maxHeight: 600)
.frame(minWidth: 600, maxWidth: 800)
.alert("ユーザ辞書", isPresented: $showingAlert) {
Button("OK") {}
} message: {
Text(alertMessage)
}
}

private var importExportControls: some View {
HStack(spacing: 8) {
Picker("形式", selection: $importFormat) {
ForEach(UserDictionaryTextFormat.allCases) { format in
Text(format.localizedName).tag(format)
}
}
.frame(width: 220)

Button("読み込む", systemImage: "square.and.arrow.down") {
importFromFile()
}
Button("書き出す", systemImage: "square.and.arrow.up") {
exportToFile()
}
}
.controlSize(.regular)
}

private func importFromFile() {
let panel = NSOpenPanel()
panel.allowsMultipleSelection = false
panel.canChooseDirectories = false
panel.canChooseFiles = true
panel.title = "ユーザ辞書ファイルを選択"

panel.begin { response in
guard response == .OK, let url = panel.url else {
return
}
let canAccess = url.startAccessingSecurityScopedResource()
defer {
if canAccess {
url.stopAccessingSecurityScopedResource()
}
}
do {
let data = try Data(contentsOf: url)
guard let text = UserDictionaryTextCodec.decodeText(from: data) else {
showAlert("ファイルの文字コードを判定できませんでした。")
return
}
let result = UserDictionaryTextCodec.importEntries(from: text, format: importFormat)
guard !result.entries.isEmpty else {
showAlert("有効な単語が見つかりませんでした。")
return
}
self.userDictionary.value.items.append(contentsOf: result.entries)
editTargetID = nil
undoItem = nil
let skipped = result.skippedLineCount == 0 ? "" : " / \(result.skippedLineCount)行をスキップしました"
showAlert("\(result.entries.count)件を読み込みました\(skipped)。")
} catch {
showAlert("読み込みに失敗しました: \(error.localizedDescription)")
}
}
}

private func exportToFile() {
let panel = NSSavePanel()
panel.title = "ユーザ辞書の書き出し"
panel.nameFieldStringValue = "ユーザ辞書.txt"
panel.canCreateDirectories = true
panel.allowedContentTypes = [.plainText]

panel.begin { response in
guard response == .OK, let url = panel.url else {
return
}
let canAccess = url.startAccessingSecurityScopedResource()
defer {
if canAccess {
url.stopAccessingSecurityScopedResource()
}
}
do {
let exported = UserDictionaryTextCodec.exportEntries(
self.userDictionary.value.items,
dictionaryName: "ユーザ辞書"
)
try Data(exported.utf8).write(to: url)
showAlert("\(url.lastPathComponent)を書き出しました。")
} catch {
showAlert("書き出しに失敗しました: \(error.localizedDescription)")
}
}
}

private func showAlert(_ message: String) {
alertMessage = message
showingAlert = true
}
}

Expand Down
Loading
Loading