1+ #!/usr/bin/python3
2+
3+ """
4+ preprocessor for sphinx files
5+ - reads in all .rst files in input directory
6+ - writes processed .rst files to output directory
7+
8+ by copying files to output directory, we aren't
9+ modifying the original .rst files in any way.
10+
11+ preprocessor steps:
12+ - format tables
13+ """
14+
15+ import argparse
16+ import shutil
17+ import os
18+ import itertools
19+
20+
21+ def find_markdown_tables (lines ):
22+ """
23+ return (row_start, row_end) for all markdown tables.
24+ """
25+ indices = [idx for idx , line in enumerate (lines ) if line .startswith ("|" )]
26+ nindices = len (indices )
27+ indices_type = ["end" if i + 1 == nindices or indices [i + 1 ] - indices [i ] > 1
28+ else "start" if i == 0 or indices [i ] - indices [i - 1 ] > 1
29+ else "mid" for i in range (nindices )]
30+ return list (zip ([indices [i ] for i in range (nindices ) if indices_type [i ] == "start" ],
31+ [indices [i ] for i in range (nindices ) if indices_type [i ] == "end" ]))
32+
33+
34+ def get_markdown_row_data (row ):
35+ """
36+ get row (or header) data from markdown row (or header)
37+
38+ row
39+ | apples | red | a |
40+
41+ data
42+ ['apples', 'red', 'a']
43+ """
44+ return [e .strip () for e in row .split ("|" )[1 :- 1 ]]
45+
46+
47+ def convert_table (table , in_format = "md" , out_format = "rst-2" ):
48+ """
49+ md format
50+ | fruit | color | startswith |
51+ | ----- | ----- | ---------- |
52+ | apples | red | a |
53+ | oranges | orange | o |
54+
55+ rst-1 format
56+ +---------+--------+------------+
57+ | fruit | color | startswith |
58+ +=========+========+============+
59+ | apples | red | a |
60+ +---------+--------+------------+
61+ | oranges | orange | o |
62+ +---------+--------+------------+
63+
64+ rst-2 format
65+ ======= ====== ==========
66+ fruit color startswith
67+ ======= ====== ==========
68+ apples red a
69+ oranges orange o
70+ ======= ===== ==========
71+ """
72+ if in_format == "md" and out_format == "rst-2" :
73+ header = get_markdown_row_data (table [0 ])
74+ rows = [get_markdown_row_data (row ) for row in table [2 :]]
75+ cols = [[row [col_idx ] for row in rows ]
76+ for col_idx in range (len (header ))]
77+ tbl_separator = ' ' .join (
78+ ['=' * max (len (header [col_idx ]), len (max (col , key = len )))
79+ for col_idx , col in enumerate (cols )],
80+ )
81+ tbl_header = ' ' .join ([
82+ header [idx ] + ' ' * (max (len (header [idx ]), len (max (col , key = len ))) - len (header [idx ]))
83+ for idx , col in enumerate (cols )
84+ ])
85+ tbl_rows = [
86+ ' ' .join ([
87+ row [idx ] + ' ' * (max (len (header [idx ]), len (max (col , key = len ))) - len (row [idx ]))
88+ for idx , col in enumerate (cols )
89+ ]) for row in rows
90+ ]
91+ return [tbl_separator ] + [tbl_header ] + [tbl_separator ] + tbl_rows + [tbl_separator ]
92+
93+ raise Exception ("Conversion from {} to {} not implemented" .format (
94+ in_format , out_format
95+ ))
96+
97+
98+ def get_processed_lines (lines ):
99+ """
100+ replace all md tables in lines with rst-2 tables
101+
102+ - additional processing can be added later if needed
103+ """
104+ tpos = find_markdown_tables (lines )
105+
106+ if not tpos :
107+ return lines
108+
109+ chunks = [[(0 , tpos [0 ][0 ] - 1 )]] + \
110+ [[pos ] if idx == 0
111+ else [(tpos [idx - 1 ][1 ] + 1 , pos [0 ] - 1 ), pos ] for idx , pos in enumerate (tpos )] + \
112+ [[(tpos [- 1 ][1 ] + 1 , len (lines ) - 1 )]]
113+
114+ return list (itertools .chain .from_iterable ([
115+ convert_table (lines [chunk [0 ]:chunk [1 ]+ 1 ], "md" , "rst-2" ) if chunk in tpos \
116+ else lines [chunk [0 ]:chunk [1 ]+ 1 ] \
117+ for chunk in list (itertools .chain (* chunks ))
118+ ]))
119+
120+
121+ if __name__ == "__main__" :
122+
123+ parser = argparse .ArgumentParser ()
124+ parser .add_argument ("--input" , help = "Input Sphinx source directory" )
125+ parser .add_argument ("--output" , help = "Output Sphinx source directory" )
126+ args = parser .parse_args ()
127+
128+ shutil .rmtree (args .output , ignore_errors = True )
129+ shutil .copytree (args .input , args .output )
130+
131+ for root , dirs , files in os .walk (args .output ):
132+ for file in files :
133+ if file .endswith ('.rst' ):
134+ with open ('{}/{}' .format (root , file )) as f :
135+ data = f .read ()
136+ lines = get_processed_lines (data .splitlines ())
137+ with open ('{}/{}' .format (root , file ), 'w' ) as f :
138+ f .write ('\n ' .join (lines ))
139+
140+ print ("preprocessor step finished" )
0 commit comments