blob: 648aef258a6118c78885fdbd194579e38a887baf [file] [log] [blame]
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +01001#!/bin/bash
2
3#set -x # uncomment for bash script debugging
4
5# ============================================================================
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17# ============LICENSE_END=====================================================
18
19###
20### warnstats
21###
22### AUTHOR(S):
23### Thomas Kulik, Deutsche Telekom AG, 2020
24###
25### DESCRIPTION:
26### warnstat helps to find the onap modules (projects) and rst-files which are
27### responsible for the most warnings during the documentation build process
Thomas Kulik961abe12020-03-19 14:49:59 +010028### it requires a tox build logfile, parses it line by line, prints out some
29### statistics and provides links to the local rst file and html version.
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +010030###
31
32###
33### CHANGELOG (LATEST ON TOP)
34###
Thomas Kulik961abe12020-03-19 14:49:59 +010035### 1.4.0 (2020-03-18) - the link to the local html and rst file is provided in
36### the output. this may help to ease the debug process.
37### use mouse-over/context menu functionality of bash to
38### easily open files with your browser or rst editor.
39### - improved handling for module names (in case they are
40### no real onap projects/modules but directories which
41### contain additional documentation in rst format).
42### 1.3.1 (2020-03-10) - fixed minor typo in usage message
43### 1.3.0 (2020-03-09) - initial release
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +010044###
45
Thomas Kulik961abe12020-03-19 14:49:59 +010046script_version="1.4.0 (2020-03-18)"
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +010047
48echo " ";
Thomas Kulik961abe12020-03-19 14:49:59 +010049echo " warnstats version ${script_version}";
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +010050
51declare -A module_array
52declare -A message_short_array
53declare -A message_long_array
54declare -A rstfile_array
Thomas Kulik961abe12020-03-19 14:49:59 +010055declare -A rstfilepath_array
56declare -A htmlfilepath_array
57declare -A webpath_array
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +010058
59###
60### simple script argument handling
61###
62
63logfile=$1;
64
65# check if there is an argument at all
66if [[ "$logfile" == "" ]] ; then
67 echo 'Usage: warnstats [tox-logfile]'
68 exit 1
69fi
70
71# check if argument is a file
72if [ ! -f $logfile ] ; then
73 echo "Error: can't find tox-logfile \"$logfile\""
74 exit 1
75fi
76
Thomas Kulik961abe12020-03-19 14:49:59 +010077# get local html build directory
78html_build_dir=$(grep "sphinx-build -b html" $logfile);
79html_build_dir=$(echo "$html_build_dir" | grep -oP "/ .*/doc/docs/_build/html$");
80html_build_dir=$(echo "$html_build_dir" | sed -r 's:^/ ::');
81echo " tox html build directory: $html_build_dir"
82
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +010083# read in the tox build logfile - use only lines which contain a warning
84readarray -t logfile_array < <(grep ": WARNING:" $logfile);
85
86# process filtered logfile line by line
Thomas Kulik961abe12020-03-19 14:49:59 +010087echo " tox logfile: $logfile";
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +010088for line in "${logfile_array[@]}"
89do
90 # count warning lines
91 (( counter++ ));
Thomas Kulik961abe12020-03-19 14:49:59 +010092 echo -n -e " lines processed: $counter\r";
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +010093
Thomas Kulik961abe12020-03-19 14:49:59 +010094 #
95 # extract path to local rst file
96 #
97 path_rst=$line;
98 #echo "DBUG line: $line"
99 # remove problematic text in line that causes regex to fail
100 path_rst=$(echo "$path_rst" | sed -r 's:, other instance in.*::');
101 #echo "DBUG path_rst: $path_rst"
102 # grep the rst file path
103 path_rst=$(echo "$path_rst" | grep -oP "^/.*\.rst");
104 #echo "DBUG path_rst: $path_rst"
105 if [[ "$path_rst" == "" ]] ; then
106 path_rst="path_to_rst_missing"
107 #echo "DBUG path_rst: $path_rst"
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +0100108 fi
Thomas Kulik961abe12020-03-19 14:49:59 +0100109 path_rst="file:$path_rst";
110 #echo "DBUG path_rst: $path_rst"
111 # finally embed the full rst path in a message to use mouse-over/context menu of bash to open file
112 #echo -e '\e]8;;'$path_rst'\a(rst)\e]8;;\a'
113 path_rst='\e]8;;'$path_rst'\arst\e]8;;\a';
114 #echo -e "DBUG path_rst: "$path_rst;
115
116 #
117 # extract path to the html version of the local rst file
118 #
119 path_html=$line;
120 #echo "DBUG line: $line"
121 # remove problematic text in line that causes regex to fail
122 path_html=$(echo "$path_html" | sed -r 's:, other instance in.*::');
123 #echo "DBUG path_html: $path_html"
124 # grep the rst file path and modify it so we get the local html build path; grep a little bit more to be save
125 path_html=$(echo "$path_html" | grep -oP "(^|/)docs(/.*|)/[\w -]*\.rst");
126 #echo "DBUG path_html: $path_html"
127 path_html=$(echo "$path_html" | sed -r 's:^/docs::');
128 #echo "DBUG path_html: $path_html"
129 path_html=$(echo "$path_html" | sed -r 's:.rst:.html:');
130 #echo "DBUG path_html: $path_html"
131 # create also the path to the web version
132 path_web="https://docs.onap.org/en/latest$path_html"
133 path_web='\e]8;;'$path_web'\aweb\e]8;;\a';
134 #echo "DBUG path_web: $path_web"
135 path_html="file:$html_build_dir$path_html";
136 #echo "DBUG path_html: $path_html"
137 # finally embed the full html path in a message to use mouse-over/context menu of bash to open file
138 #echo -e '\e]8;;'$path_html'\a(html)\e]8;;\a'
139 path_html='\e]8;;'$path_html'\ahtml\e]8;;\a';
140 #echo -e "DBUG path_html: "$path_html;
141
142
143 # extract module name from line (remove all text before module name; then cut out module name)
144 module=$(echo "$line" | sed -r 's:(^.*/doc/docs/submodules/|^docs/submodules/|checking consistency... )::' | cut -f1 -d\/);
145 #echo "DBUG line: $line"
146 #echo "DBUG module: $module"
147
148 # in case the extraction has not lead to a valid module name do some additional investigation
149 if [[ "$module" == "" ]] ; then
150
151 if [[ $line =~ doc/docs/release ]] ; then
152 module="<docs/release>"
153 #echo "DBUG line: $line"
154 #echo "DBUG module: $module"
155 elif [[ $line =~ doc/docs/use-cases ]] ; then
156 module="<docs/use-cases>"
157 #echo "DBUG line: $line"
158 #echo "DBUG module: $module"
159 elif [[ $line =~ doc/docs/guides ]] ; then
160 module="<docs/guides>"
161 #echo "DBUG line: $line"
162 #echo "DBUG module: $module"
163 else
164 module="<docs>"
165 #echo "DBUG line: $line"
166 #echo "DBUG module: $module"
167 fi
168
169 fi
170 #echo "DBUG line: $line";
171 #echo "DBUG module: $module";
172
173 # get the maximum length of the variable entries to adjust table width later on
174 if [[ ${#module} -gt "$maxlength_module" ]]; then
175 maxlength_module=${#module};
176 fi
177 #echo "DBUG maxlength_module=$maxlength_module";
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +0100178
179 # extract rst file name from line and do some formatting to use it later as an array name
180 #echo "DBUG line: $line";
181 rstfile=$(echo "$line" | grep -oP "[\w -]*\.rst");
182 rstfile=$(echo -e ${rstfile} | tr '[:blank:]' '_');
183 #echo "DBUG rst-file: $rstfile";
184
Thomas Kulik961abe12020-03-19 14:49:59 +0100185 # get the maximum length of the variable entries to adjust table width later on
186 if [[ ${#rstfile} -gt "$maxlength_rstfile" ]]; then
187 maxlength_rstfile=${#rstfile};
188 fi
189 #echo "DBUG maxlength_rstfile=$maxlength_rstfile";
190
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +0100191 # count the number of warnings for the module/rstfile combination
Thomas Kulik961abe12020-03-19 14:49:59 +0100192 (( rstfile_array[$module | $rstfile]++ ));
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +0100193
194 # count the number of warnings for the single module
Thomas Kulik961abe12020-03-19 14:49:59 +0100195 #echo "DBUG $module | $rstfile | $message";
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +0100196 (( module_array[$module]++ ));
197
Thomas Kulik961abe12020-03-19 14:49:59 +0100198 # now we have all the information to fill the html/rst/web (file) path arrays
199 htmlfilepath_array[$module | $rstfile]=$path_html;
200 rstfilepath_array[$module | $rstfile]=$path_rst;
201 webpath_array[$module | $rstfile]=$path_web;
202
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +0100203 # extract the warning message and do some formatting
204 #message=$(echo "$line" | sed -r 's:^/.+WARNING\:\ ::');
205 message=$(echo "$line" | sed -r 's:^.+WARNING\:\ ::');
206 message=$(echo -e ${message} | tr '[:blank:]' '_');
207 message=$(echo -e ${message} | tr '/' '_');
208 message=$(echo -e ${message} | tr '.' '_');
209
210 # remove all characters from message which may cause problems in the shell
211 message="$(echo -e "${message}" | sed -e 's/[^A-Za-z0-9_-]//g')";
212 #echo "DBUG message=\"$message\""
213
214 # count the number of warnings for the single message (long version)
215 message_long="$(echo -e "${message}")";
216 (( message_long_array[$message_long]++ ))
217
218 # reduce length of message to group them more easily and then ...
219 # count the number of warnings for the single message (short version)
220 message_short="$(echo -e "${message}" | cut -c -20)";
221 (( message_short_array[$message_short]++ ))
222
223done
224
225#format counter to have always x digits
226counter=$(printf "%05d" $counter);
227echo " ";
228echo " $counter LINES WITH WARNING IN FILE '$logfile'";
229
230echo " ";
231echo "################################################################################";
232echo "~~~ MESSAGES LONG ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~";
233echo "################################################################################";
234echo " ";
235
236#print array content and append to temporary outfile
237for i in "${!message_long_array[@]}"
238do
239 m=$i;
240 n=${message_long_array[$i]};
241 ((nc += n))
242 #format counter to have always x digits
243 n=$(printf "%05d" $n);
Thomas Kulik961abe12020-03-19 14:49:59 +0100244 echo " $n | $m" >>tempoutfile;
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +0100245done
246
247#format counter to have always x digits
248nc=$(printf "%05d" $nc);
249echo " $nc WARNINGS IN TOTAL WITH ${#message_long_array[@]} UNIQUE MESSAGES" >>tempoutfile;
250
251#print a sorted version of the temporary outfile
252sort -br tempoutfile
253
254# clean up
255rm tempoutfile
256nc=0
257
258echo " ";
259echo "################################################################################";
260echo "~~~ MESSAGES SHORTENED (FOR BETTER GROUPING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~";
261echo "################################################################################";
262echo " ";
263
264#print array content and append to temporary outfile
265for i in "${!message_short_array[@]}"
266do
267 m=$i;
268 n=${message_short_array[$i]};
269 ((nc += n))
270 #format counter to have always x digits
271 n=$(printf "%05d" $n);
Thomas Kulik961abe12020-03-19 14:49:59 +0100272 echo " $n | $m" >>tempoutfile;
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +0100273done
274
275#format counter to have always x digits
276nc=$(printf "%05d" $nc);
277echo " $nc WARNINGS IN TOTAL WITH ${#message_short_array[@]} UNIQUE MESSAGES" >>tempoutfile;
278
279#print a sorted version of the temporary outfile
280sort -br tempoutfile
281
282# clean up
283rm tempoutfile
284nc=0
285
286echo " ";
287echo "################################################################################";
288echo "~~~ MODULES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~";
289echo "################################################################################";
290echo " ";
291
292#create temporary outfile
293for i in "${!module_array[@]}"
294do
295 m=$i;
296 n=${module_array[$i]};
297 ((nc += n))
298 n=$(printf "%05d" $n);
Thomas Kulik961abe12020-03-19 14:49:59 +0100299 echo " $n | $m" >>tempoutfile;
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +0100300done
301
302#format counter to have always x digits
303nc=$(printf "%05d" $nc);
304echo " $nc WARNINGS IN TOTAL IN ${#module_array[@]} MODULES" >>tempoutfile;
305
306#print a sorted version of the temporary outfile
307sort -br tempoutfile
308rm tempoutfile
309nc=0
310
311echo " ";
312echo "################################################################################";
313echo "~~~ MODULES WITH RSTFILES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~";
314echo "################################################################################";
315echo " ";
316
317#print array content and append to temporary outfile
318for i in "${!rstfile_array[@]}"
319do
320 m=$i;
321 n=${rstfile_array[$i]};
Thomas Kulik961abe12020-03-19 14:49:59 +0100322 p=${htmlfilepath_array[$i]}
323 r=${rstfilepath_array[$i]}
324 w=${webpath_array[$i]}
325 #echo "DBUG -------------------------------"
326 #echo "DBUG i=$i"
327 #echo "DBUG m=$m"
328 #echo "DBUG n=$n"
329 #echo "DBUG p=$p"
330 #echo -e "DBUG p=$p"
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +0100331 ((nc += n))
332 #format counter to have always x digits
333 n=$(printf "%05d" $n);
Thomas Kulik961abe12020-03-19 14:49:59 +0100334 echo -e " $m | ($r,$p,$w) | $n" >>tempoutfile;
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +0100335done
336
337#format counter to have always x digits
338nc=$(printf "%05d" $nc);
339#in case the name (e.g) index.rst is used multiple times in the same module warnings are combined
340echo " $nc WARNINGS IN TOTAL IN APPROX. ${#rstfile_array[@]} RST FILES" >>tempoutfile;
341
342#print a sorted version of the temporary outfile
343sort -b tempoutfile
344
345# clean up
346rm tempoutfile
347nc=0
348
349echo " ";
350echo "################################################################################";
351echo "~~~ RSTFILES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~";
352echo "################################################################################";
353echo " ";
354
355#print array content and append to temporary outfile
356for i in "${!rstfile_array[@]}"
357do
358 m=$i;
359 n=${rstfile_array[$i]};
Thomas Kulik961abe12020-03-19 14:49:59 +0100360 p=${htmlfilepath_array[$i]}
361 r=${rstfilepath_array[$i]}
362 w=${webpath_array[$i]}
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +0100363 ((nc += n))
364 #format counter to have always x digits
365 n=$(printf "%05d" $n);
Thomas Kulik961abe12020-03-19 14:49:59 +0100366 echo -e " $n | $m | ($r,$p,$w)" >>tempoutfile;
Thomas Kulikfb8a0ee2020-03-11 13:13:52 +0100367done
368
369#format counter to have always x digits
370nc=$(printf "%05d" $nc);
371#in case the name (e.g) index.rst is used multiple times in the same module warnings are combined
372echo " $nc WARNINGS IN TOTAL IN APPROX. ${#rstfile_array[@]} RST FILES" >>tempoutfile;
373
374#print a sorted version of the temporary outfile
375sort -br tempoutfile
376
377# clean up
378rm tempoutfile
379nc=0
380
381echo " ";
382exit
383
384###
385### backup code for future extensions
386###
387
388#
389# Block_quote_ends_without_a_blank_line_unexpected_unindent
390# Bullet_list_ends_without_a_blank_line_unexpected_unindent
391# Citation_[\w-]_is_not_referenced
392# Citation_unit_test_is_not_referenced
393# Content_block_expected_for_the_code_directive_none_found
394# Content_block_expected_for_the_container_directive_none_found
395# Could_not_lex_literal_block_as_bash__Highlighting_skipped
396# Could_not_lex_literal_block_as_console__Highlighting_skipped
397# Could_not_lex_literal_block_as_guess__Highlighting_skipped
398# Could_not_lex_literal_block_as_json__Highlighting_skipped
399# Could_not_lex_literal_block_as_yaml__Highlighting_skipped
400# Definition_list_ends_without_a_blank_line_unexpected_unindent
401# document_isnt_included_in_any_toctree
402# download_file_not_readable
403# Duplicate_explicit_target_name
404# duplicate_label
405# Enumerated_list_ends_without_a_blank_line_unexpected_unindent
406# Error_in_code_directive
407# Error_in_code-block_directive
408# Error_in_image_directive
409# Explicit_markup_ends_without_a_blank_line_unexpected_unindent
410# Field_list_ends_without_a_blank_line_unexpected_unindent
411# Footnote_[0-9.*]_is_not_referenced
412# image_file_not_readable
413# Include_file
414# Inconsistent_literal_block_quoting
415# Inline_emphasis_start-string_without_end-string
416# Inline_interpreted_text_or_phrase_reference_start-string_without_end-string
417# Inline_strong_start-string_without_end-string
418# Inline_substitution_reference_start-string_without_end-string
419# Literal_block_ends_without_a_blank_line_unexpected_unindent
420# Literal_block_expected_none_found
421# Malformed_table
422# Pygments_lexer_name_asn_is_not_known
423# Title_level_inconsistent
424# Title_overline__underline_mismatch
425# Title_overline_too_short
426# Title_underline_too_short
427# toctree_contains_reference_to_nonexisting_document
428# Too_many_autonumbered_footnote_references_only_0_corresponding_footnotes_available
429# undecodable_source_characters_replacing_with
430# undefined_label
431# Unexpected_indentation
432# Unknown_directive_type_clode-block
433# unknown_document
434# Unknown_target_name