Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/csgrep.cc
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,7 @@ int main(int argc, char *argv[])
("file-glob", "expand glob patterns in the names of input files")
("ignore-case,i", "ignore case when matching regular expressions")
("ignore-parser-warnings", "if enabled, parser warnings about the input files do not affect exit code")
("add-input-lines", "if enabled, events in mode=json will also contain input_line numbers from the original input file/stream")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The --add-input-lines option name does not sound intuitive for what it does. Without the context I would understand it as if some input text was being added rather than line numbers. As mentioned above, we should record the file names, too. What about naming it --record-input-locations instead?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should I also rename the JSON fields to input_line_location and input_file_location?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would use input_file and input_line. I see location as an abstraction over file, line, column. Something like: https://github.com/gcc-mirror/gcc/blob/5cd3889135d77bf951e4ffe169868b453c36257d/libcpp/include/line-map.h#L1293

("invert-match,v", "select defects that do not match the selected criteria")
("invert-regex,n", "invert regular expressions in all predicates")
("filter-file,f", po::value<TStringList>(), "read custom filtering rules from a file in JSON format");
Expand All @@ -639,7 +640,7 @@ int main(int argc, char *argv[])
p.add("input-file", -1);

po::store(po::parse_command_line(argc, argv, desc), vm);
po::notify(vm);
po::notify(vm);

po::options_description opts;
opts.add(desc).add(hidden);
Expand Down Expand Up @@ -735,6 +736,9 @@ int main(int argc, char *argv[])
if (vm.count("ignore-parser-warnings"))
eng->setIgnoreParserWarnings(true);

if (vm.count("add-input-lines"))
eng->setAddInputLines(true);

bool hasError = false;

// if no input file is given, read from stdin
Expand Down
1 change: 1 addition & 0 deletions src/lib/defect.hh
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ struct DefEvent {
std::string fileName;
int line = 0;
int column = 0;
int inputLine = 0;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also (optionally) record inputFile. Otherwise the number is ambiguous in case csgrep reads multiple input files.

std::string event;
std::string msg;

Expand Down
3 changes: 2 additions & 1 deletion src/lib/instream.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@

#include "instream.hh"

InStream::InStream(const std::string &fileName, const bool silent):
InStream::InStream(const std::string &fileName, const bool silent, const bool addInputLines):
fileName_(fileName),
silent_(silent),
addInputLines_(addInputLines),
str_((fileName_ == "-")
? std::cin
: fileStr_)
Expand Down
13 changes: 8 additions & 5 deletions src/lib/instream.hh
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,23 @@ struct InFileException {

class InStream {
public:
InStream(const std::string &fileName, bool silent = false);
InStream(const std::string &fileName, bool silent = false,
bool addInputLines = false);
InStream(std::istringstream &str, bool silent = false);
~InStream() = default;

const std::string& fileName() const { return fileName_; }
std::istream& str() const { return str_; }
bool silent() const { return silent_; }
bool anyError() const { return anyError_; }
const std::string& fileName() const { return fileName_; }
std::istream& str() const { return str_; }
bool silent() const { return silent_; }
bool addInputLines() const { return addInputLines_; }
bool anyError() const { return anyError_; }

void handleError(const std::string &msg = "", unsigned long line = 0UL);

private:
const std::string fileName_;
const bool silent_;
const bool addInputLines_ = false;
bool anyError_ = false;
std::ifstream fileStr_;
std::istream &str_;
Expand Down
11 changes: 9 additions & 2 deletions src/lib/parser-cov.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,9 @@ std::ostream& operator<<(std::ostream &str, EToken code)

class ErrFileLexer {
public:
ErrFileLexer(std::istream &input):
ErrFileLexer(std::istream &input, bool addInputLines = false):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be easier if the ErrFileLexer constructor took InStream &. This class is internal to the parser-cov module, so the constructor can be changed easily.

lineReader_(input),
addInputLines_(addInputLines),
hasError_(false)
{
}
Expand All @@ -140,6 +141,7 @@ class ErrFileLexer {

private:
LineReader lineReader_;
bool addInputLines_;
bool hasError_;
Defect def_;
DefEvent evt_;
Expand Down Expand Up @@ -181,6 +183,8 @@ EToken ErrFileLexer::readNext()
evt_ = DefEvent();
evt_.event = sm[/* # */ 1];
evt_.msg = sm[/* msg */ 2];
if (addInputLines_)
evt_.inputLine = lineReader_.lineNo();
return T_COMMENT;
}

Expand All @@ -202,6 +206,9 @@ EToken ErrFileLexer::readNext()
evt_.event = sm[/* event */ 4];
evt_.msg = sm[/* msg */ 5];

if (addInputLines_)
evt_.inputLine = lineReader_.lineNo();

return T_EVENT;
}

Expand Down Expand Up @@ -523,7 +530,7 @@ struct CovParser::Private {
ImpliedAttrDigger digger;

Private(InStream &input_):
lexer(input_.str()),
lexer(input_.str(), input_.addInputLines()),
fileName(input_.fileName()),
silent(input_.silent()),
hasError(false),
Expand Down
14 changes: 10 additions & 4 deletions src/lib/parser-gcc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,10 @@ class AbstractTokenFilter: public ITokenizer {

class Tokenizer: public ITokenizer {
public:
Tokenizer(std::istream &input):
Tokenizer(std::istream &input, bool addInputLines = false):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, the Tokenizer constructor can take InStream & to simplify the code.

input_(input),
lineNo_(0)
lineNo_(0),
addInputLines_(addInputLines)
{
}

Expand All @@ -83,6 +84,7 @@ class Tokenizer: public ITokenizer {
private:
std::istream &input_;
int lineNo_;
bool addInputLines_;

const RE reSideBar_ =
RE("^ *((([0-9]+)? \\| )|(\\+\\+\\+ \\|\\+)).*$");
Expand Down Expand Up @@ -131,6 +133,9 @@ EToken Tokenizer::readNext(DefEvent *pEvt)
*pEvt = DefEvent();
pEvt->msg = line;

if (addInputLines_)
pEvt->inputLine = lineNo_;

// check for line markers produced by gcc-9.2.1 (a.k.a. sidebar)
if (boost::regex_match(pEvt->msg, reSideBar_))
// xxx.c:2:1: note: include '<stdlib.h>' or provide a declaration...
Expand Down Expand Up @@ -387,7 +392,7 @@ EToken MultilineConcatenator::readNext(DefEvent *pEvt)
class BasicGccParser {
public:
BasicGccParser(InStream &input):
rawTokenizer_(input.str()),
rawTokenizer_(input.str(), input.addInputLines()),
noiseFilter_(&rawTokenizer_),
markerConverter_(&noiseFilter_),
tokenizer_(&markerConverter_),
Expand Down Expand Up @@ -535,6 +540,7 @@ bool BasicGccParser::getNext(Defect *pDef)
DefEvent evt;

const EToken tok = tokenizer_.readNext(&evt);

switch (tok) {
case T_NULL:
if (!hasKeyEvent_ && !defCurrent_.events.empty())
Expand Down Expand Up @@ -828,7 +834,7 @@ bool GccParser::getNext(Defect *pDef)
while (d->core.getNext(&d->lastDef) && d->tryMerge(pDef))
;

// initialize verbosityLevel
// initialize verbosityLevel
// FIXME: similar code to KeyEventDigger::initVerbosity()
TEvtList &evtList = pDef->events;
const unsigned evtCount = evtList.size();
Expand Down
4 changes: 3 additions & 1 deletion src/lib/parser-json-simple.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,14 @@ SimpleTreeDecoder::Private::Private(InStream &input):
};

// known per-event subnodes
// 'input_line' is whitelisted, but currently there is no use-case for it
// so it is not re-read
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why cannot we preserve non-zero input_line values already recorded in JSON files?
The use-case would be:

csgrep --mode=json file1.json file2.json > all.json

this->nodeStore[NK_EVENT] = {
"column",
"event",
"file_name",
"h_size",
"input_line",
"line",
"message",
"v_size",
Expand Down Expand Up @@ -189,4 +192,3 @@ bool SimpleTreeDecoder::readNode(Defect *def)

return true;
}

2 changes: 2 additions & 0 deletions src/lib/writer-json-simple.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ static array simpleEncodeEvents(const TEvtList &events)
evtNode["h_size"] = evt.hSize;
if (0 < evt.vSize)
evtNode["v_size"] = evt.vSize;
if (0 < evt.inputLine)
evtNode["input_line"] = evt.inputLine;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to record inputFile as well. Otherwise the number is meaningless when multiple text files are read.


// describe the event
evtNode["event"] = evt.event;
Expand Down
2 changes: 1 addition & 1 deletion src/lib/writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ bool AbstractWriter::handleFile(InStream &input)
bool AbstractWriter::handleFile(const std::string &fileName, bool silent)
{
try {
InStream str(fileName, silent);
InStream str(fileName, silent, addInputLines_);
return this->handleFile(str);
}
catch (const InFileException &e) {
Expand Down
4 changes: 4 additions & 0 deletions src/lib/writer.hh
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,15 @@ class AbstractWriter {
ignoreParserWarnings_ = val;
}

void setAddInputLines(const bool val) {
addInputLines_ = val;
}

private:
EFileFormat inputFormat_ = FF_INVALID;
const TScanProps emptyProps_{};
bool ignoreParserWarnings_ = false;
bool addInputLines_ = false;
};

using TWriterPtr = std::unique_ptr<AbstractWriter>;
Expand Down
1 change: 1 addition & 0 deletions tests/csgrep/0134-csgrep-json-addlines-args.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--mode=json --add-input-lines
7 changes: 7 additions & 0 deletions tests/csgrep/0134-csgrep-json-addlines-stdin.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Error: SHELLCHECK_WARNING:
/etc/rc.d/rc.sysinit:492:13: warning: Quote this to prevent word splitting. [SC2046]
# 490| if [ -n "$SELINUX_STATE" -a "$READONLY" != "yes" ]; then
# 491| if [ -f /.autorelabel ] || strstr "$cmdline" autorelabel ; then
# 492|-> restorecon $(awk '!/^#/ && $4 !~ /noauto/ && $2 ~ /^\// { print $2 }' /etc/fstab) >/dev/null 2>&1
# 493| fi
# 494| fi
62 changes: 62 additions & 0 deletions tests/csgrep/0134-csgrep-json-addlines-stdout.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"defects": [
{
"checker": "SHELLCHECK_WARNING",
"language": "shell",
"tool": "shellcheck",
"hash_v1": "b6311c1fdc52c47d4279cd6650af36e6f8299960",
"key_event_idx": 0,
"events": [
{
"file_name": "/etc/rc.d/rc.sysinit",
"line": 492,
"column": 13,
"input_line": 2,
"event": "warning",
"message": "Quote this to prevent word splitting. [SC2046]",
"verbosity_level": 0
},
{
"file_name": "",
"line": 0,
"input_line": 3,
"event": "#",
"message": " 490| if [ -n \"$SELINUX_STATE\" -a \"$READONLY\" != \"yes\" ]; then",
"verbosity_level": 1
},
{
"file_name": "",
"line": 0,
"input_line": 4,
"event": "#",
"message": " 491| if [ -f /.autorelabel ] || strstr \"$cmdline\" autorelabel ; then",
"verbosity_level": 1
},
{
"file_name": "",
"line": 0,
"input_line": 5,
"event": "#",
"message": " 492|-> \trestorecon $(awk '!/^#/ && $4 !~ /noauto/ && $2 ~ /^\\// { print $2 }' /etc/fstab) >/dev/null 2>&1",
"verbosity_level": 1
},
{
"file_name": "",
"line": 0,
"input_line": 6,
"event": "#",
"message": " 493| fi",
"verbosity_level": 1
},
{
"file_name": "",
"line": 0,
"input_line": 7,
"event": "#",
"message": " 494| fi",
"verbosity_level": 1
}
]
}
]
}
Empty file.
60 changes: 60 additions & 0 deletions tests/csgrep/0135-csgrep-json-addlines-stdin.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{
"defects": [
{
"checker": "SHELLCHECK_WARNING",
"language": "shell",
"key_event_idx": 0,
"events": [
{
"file_name": "/etc/rc.d/rc.sysinit",
"line": 492,
"column": 13,
"input_line": 2,
"event": "warning",
"message": "Quote this to prevent word splitting. [SC2046]",
"verbosity_level": "0"
},
{
"file_name": "",
"line": 0,
"input_line": 3,
"event": "#",
"message": " 490| if [ -n \"$SELINUX_STATE\" -a \"$READONLY\" != \"yes\" ]; then",
"verbosity_level": "1"
},
{
"file_name": "",
"line": 0,
"input_line": 4,
"event": "#",
"message": " 491| if [ -f /.autorelabel ] || strstr \"$cmdline\" autorelabel ; then",
"verbosity_level": "1"
},
{
"file_name": "",
"line": 0,
"input_line": 5,
"event": "#",
"message": " 492|-> \trestorecon $(awk '!/^#/ && $4 !~ /noauto/ && $2 ~ /^\\// { print $2 }' /etc/fstab) >/dev/null 2>&1",
"verbosity_level": "1"
},
{
"file_name": "",
"line": 0,
"input_line": 6,
"event": "#",
"message": " 493| fi",
"verbosity_level": "1"
},
{
"file_name": "",
"line": 0,
"input_line": 7,
"event": "#",
"message": " 494| fi",
"verbosity_level": "1"
}
]
}
]
}
7 changes: 7 additions & 0 deletions tests/csgrep/0135-csgrep-json-addlines-stdout.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Error: SHELLCHECK_WARNING:
/etc/rc.d/rc.sysinit:492:13: warning: Quote this to prevent word splitting. [SC2046]
# 490| if [ -n "$SELINUX_STATE" -a "$READONLY" != "yes" ]; then
# 491| if [ -f /.autorelabel ] || strstr "$cmdline" autorelabel ; then
# 492|-> restorecon $(awk '!/^#/ && $4 !~ /noauto/ && $2 ~ /^\// { print $2 }' /etc/fstab) >/dev/null 2>&1
# 493| fi
# 494| fi
2 changes: 2 additions & 0 deletions tests/csgrep/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,5 @@ test_csgrep("0130-file-glob" )
test_csgrep("0131-unicontrol-perl-man-page" )
test_csgrep("0132-cov-parser-nested-evt" )
test_csgrep("0133-sarif-gcc-pwd" )
test_csgrep("0134-csgrep-json-addlines" )
test_csgrep("0135-csgrep-json-addlines" )