Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
315770b
Hindi TN changes
ngachchi Oct 30, 2024
4f57bdd
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 30, 2024
5d7f43c
Updated date for Hindi TN cache
ngachchi Oct 30, 2024
69341fb
additional whitelist class .tsv files and unused imports removed
ngachchi Oct 30, 2024
2b797ee
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 30, 2024
71b6e93
incorporated suggestions for unused statements and another for closin…
ngachchi Oct 30, 2024
d88ae37
Merge branch 'main' into hi_tn
ngachchi Nov 5, 2024
d52fea5
Combined Hindi TN and ITN seperate blocks into single
ngachchi Nov 5, 2024
65d3c12
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 5, 2024
9c49578
Added init.py files and removed unused commented lines
ngachchi Nov 5, 2024
345fe78
commented irrevelant references and unused snippets from whitelist an…
ngachchi Nov 5, 2024
d6702cf
Whitelist and Word class changes
ngachchi Nov 7, 2024
9fc7091
post processor changes with minor fixes
ngachchi Nov 8, 2024
1064772
remove space before punctuation for sparrowhawk file
ngachchi Nov 11, 2024
ba28fdb
minor fixes for measure class
ngachchi Nov 11, 2024
a260a8e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 12, 2024
5deb7b7
Updated Jenkinsfile
ngachchi Nov 12, 2024
1b9b5c3
removed unused imports and statements
ngachchi Nov 12, 2024
c609dc7
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 12, 2024
8ac3042
updated date stamp for HI cache and commented ITN grammars
ngachchi Nov 12, 2024
88b0aea
Updates the cache
zoobereq Nov 13, 2024
6beb81c
Disables Hindi ITN L0 checks
zoobereq Nov 13, 2024
3161e7e
Reapplies ITN CI Checks
zoobereq Nov 13, 2024
631662e
Adds missing inits
zoobereq Nov 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 17 additions & 12 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pipeline {
HY_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-0'
MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1'
JA_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-17-24-1'
HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-29-24-0'
HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/11-13-24-0'
DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
}
stages {
Expand Down Expand Up @@ -94,22 +94,27 @@ pipeline {
}
}
stage('L0: Create HI TN/ITN Grammars') {
when {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('L0: Hi TN grammars') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hi --text="१" --cache_dir ${HI_TN_CACHE}'
}
}
}
failFast true
parallel {
stage('L0: Hi ITN grammars') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --language hi --text="बीस" --cache_dir ${HI_TN_CACHE}'
}
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hi --text="एक" --cache_dir ${HI_TN_CACHE}'
}
}

}
}
}

stage('L0: Create DE/ES TN/ITN Grammars') {
when {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
17 changes: 17 additions & 0 deletions nemo_text_processing/text_normalization/hi/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from nemo_text_processing.text_normalization.hi.taggers.tokenize_and_classify import ClassifyFst
from nemo_text_processing.text_normalization.hi.verbalizers.verbalize import VerbalizeFst
from nemo_text_processing.text_normalization.hi.verbalizers.verbalize_final import VerbalizeFinalFst
13 changes: 13 additions & 0 deletions nemo_text_processing/text_normalization/hi/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
13 changes: 13 additions & 0 deletions nemo_text_processing/text_normalization/hi/data/date/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
31 changes: 31 additions & 0 deletions nemo_text_processing/text_normalization/hi/data/date/days.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
०१ एक
०२ दो
०३ तीन
०४ चार
०५ पाँच
०६ छः
०७ सात
०८ आठ
०९ नौ
१० दस
११ ग्यारह
१२ बारह
१३ तेरह
१४ चौदह
१५ पंद्रह
१६ सोलह
१७ सत्रह
१८ अठारह
१९ उन्नीस
२० बीस
२१ इक्कीस
२२ बाईस
२३ तेईस
२४ चौबीस
२५ पच्चीस
२६ छब्बीस
२७ सत्ताईस
२८ अट्ठाईस
२९ उनतीस
३० तीस
३१ इकतीस
12 changes: 12 additions & 0 deletions nemo_text_processing/text_normalization/hi/data/date/months.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
०१ जनवरी
०२ फ़रवरी
०३ मार्च
०४ अप्रैल
०५ मई
०६ जून
०७ जुलाई
०८ अगस्त
०९ सितंबर
१० अक्टूबर
११ नवंबर
१२ दिसंबर
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
154 changes: 154 additions & 0 deletions nemo_text_processing/text_normalization/hi/data/measure/unit.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
°C डिग्री सेल्सियस
°F डिग्री फारेनहाइट
K केल्विन
g ग्राम
kg किलोग्राम
mg मिलीग्राम
cg सेंटीग्राम
dg डेसीग्राम
Tg टेराग्राम
Mg मेगाग्राम
Gg गीगाग्राम
hg हेक्टोग्राम
dag डेकाग्राम
lb पाउंड
oz आउन्स
t टन
st स्टोन
q क्विंटल
m मीटर
cm सेंटीमीटर
mm मिलीमीटर
km किलोमीटर
dm डेसीमीटर
dam डेकामीटर
nm नैनोमीटर
hm हेक्टोमीटर
Mm मेगामीटर
my मिरियामीटर
mi मील
ft फीट
in इंच
yd यार्ड
µm माइक्रोमीटर
m² वर्ग मीटर
cm² वर्ग सेंटीमीटर
mm² वर्ग मिलीमीटर
km² वर्ग किलोमीटर
hm² वर्ग हेक्टोमीटर
dm² वर्ग डेसीमीटर
dam² वर्ग डेकामीटर
yd² वर्ग यार्ड
ft² वर्ग फीट
ac² वर्ग एकड़
my² वर्ग मिरियामीटर
mi² वर्ग मील
nm² वर्ग नैनोमीटर
µm² वर्ग माइक्रोमीटर
in² वर्ग इंच
my³ घन मिरियामीटर
mi³ घन मील
ac³ घन एकड़
ha हेक्टेयर
ac एकड़
गज गज
गज² वर्ग गज
गज³ घन गज
m³ घन मीटर
cm³ घन सेंटीमीटर
mm³ घन मिलीमीटर
dam³ घन डेकामीटर
µm³ घन माइक्रोमीटर
ml³ घन मिलीलीटर
l लीटर
kl किलोलीटर
ml मिलीलीटर
ml² वर्ग मिलीलीटर
dl डेसीलीटर
hl हेक्टोलीटर
cl सेंटीलीटर
dal डेकालीटर
dl² वर्ग डेसीलीटर
dal² वर्ग डेकालीटर
dl³ घन डेसीलीटर
dal³ घन डेकालीटर
L लीटर
kL किलोलीटर
mL मिलीलीटर
mL² वर्ग मिलीलीटर
mL³ घन मिलीलीटर
dL डेसीलीटर
hL हेक्टोलीटर
cL सेंटीलीटर
daL डेकालीटर
dL² वर्ग डेसीलीटर
daL² वर्ग डेकालीटर
dL³ घन डेसीलीटर
daL³ घन डेकालीटर
GB गीगाबाइट
in³ घन इंच
ft³ घन फीट
yd³ घन यार्ड
my³ घन मिरियामीटर
mi³ घन मील
dm³ घन डेसीमीटर
dm³ घन डेसीमीटर
km³ घन किलोमीटर
nm³ घन नैनोमीटर
mm³ घन मिलीमीटर
qt क्वार्ट
gal गैलन
pt पिंट
W वाट
MW मेगावाट
KW किलोवाट
b बिट
Mb मेगाबिट
B बाइट
GB गीगाबाइट
KB किलोबाइट
TB टेराबाइट
MB मेगाबाइट
PB पेटाबाइट
EB एक्साबाइट
ZB जेटाबाइट
YB योटाबाइट
BB ब्रोन्टोबाइट
C कूलंब
V वोल्ट
Pa पास्कल
A ऐंपीयर
J जूल
s सेकंड
hr घंटा
h घंटे
min मिनट
ha हेक्टेयर
ha² वर्ग हेक्टेयर
Ω ओम
MΩ मेगाओम
doz दर्जन
Hz हर्ट्ज़
GHz गीगाहर्ट्ज़
KHz किलोहर्ट्ज़
N न्यूटन
dB डेसीबल
yr साल
yr वर्ष
hp हॉर्सपॉवर
d दिन
month महीना
months महीने
ct कैरेट
pH पीएच
km/h किलोमीटर प्रति घंटा
km/min किलोमीटर प्रति मिनट
m/h मीटर प्रति घंटा
mi/s मील प्रति सेकंड
mi/h मील प्रति घंटा
mi/min मील प्रति मिनट
₹/ac रुपए प्रति एकड़
x बाई
X बाई
* बाई
- से
13 changes: 13 additions & 0 deletions nemo_text_processing/text_normalization/hi/data/money/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
10 changes: 10 additions & 0 deletions nemo_text_processing/text_normalization/hi/data/money/currency.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
₹ रुपए
P पैसे
£ पाउंड
₩ वॉन
$ डॉलर
₺ लीरा
৳ टका
¥ येन
₦ नाइरा
€ यूरो
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
१ एक
२ दो
३ तीन
४ चार
५ पाँच
६ छह
७ सात
८ आठ
९ नौ
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
१०० एक सौ
Loading