Skip to content

Commit 5980457

Browse files
author
core software devel
committed
solve bug of incorrect matchmap setting scanning folders. Code clean up
1 parent 3a38ca7 commit 5980457

File tree

8 files changed

+18
-42
lines changed

8 files changed

+18
-42
lines changed

inc/match_list.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@
8080

8181
#define MATCH_LIST_TOLERANCE 98.5
8282
typedef struct match_data_t match_data_t; /* Forward declaration */
83-
typedef struct scan_data_t scan_data_t; /* Forward declaration*/
8483

8584
/**
8685
* @brief Define a list of component_data_t

inc/scan.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ typedef struct scan_data_t
6262
int max_components_to_process; /* Max component to retrieve during snippet scanning */
6363
int max_snippets_to_show; //TODO
6464
int max_components_to_show; //TODO
65+
int max_matchmap_size;
6566
bool printed_succed;
6667
} scan_data_t;
6768

inc/scanoss.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,6 @@ extern long microseconds_start;
119119
extern int map_rec_len;
120120
extern bool match_extensions;
121121

122-
/* File tracing -qi */
123-
extern uint8_t trace_id[MD5_LEN];
124-
extern bool trace_on;
125-
126122
/*component hint hold the last component matched/guessed */
127123
extern char * component_hint;
128124

src/help.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ Options:\n\
6565
-h Display this help and exit.\n\
6666
-d Save debugging information to disk (/tmp).\n\
6767
-q Produces no JSON output. Only debugging info via STDERR.\n\
68-
-i Trace specific file id when debugging (requires -q).\n\
6968
\n\
7069
Enviroment variables:\n\
7170
SCANOSS_MATCHMAP_MAX: define the snippet scanning match map size, %d by default.\n\

src/main.c

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,6 @@ struct ldb_table oss_notices;
6363
component_item *ignore_components;
6464
component_item *declared_components;
6565

66-
/* File tracing -qi */
67-
uint8_t trace_id[MD5_LEN];
68-
bool trace_on;
6966
bool lib_encoder_present = false;
7067
#define LDB_VER_MIN "4.1.0"
7168

@@ -275,10 +272,6 @@ int main(int argc, char **argv)
275272
//global var initialization - it must be improved
276273
debug_on = false;
277274
quiet = false;
278-
279-
/* File tracing with -qi */
280-
trace_on = false;
281-
memset(trace_id, 0 ,16);
282275

283276
if (argc <= 1)
284277
{
@@ -298,7 +291,7 @@ int main(int argc, char **argv)
298291
int option;
299292
bool invalid_argument = false;
300293
char * ldb_db_name = NULL;
301-
while ((option = getopt(argc, argv, ":f:s:b:B:c:k:a:F:l:n:i:M:N:wtvhedqH")) != -1)
294+
while ((option = getopt(argc, argv, ":f:s:b:B:c:k:a:F:l:n:M:N:wtvhedqH")) != -1)
302295
{
303296
/* Check valid alpha is entered */
304297
if (optarg)
@@ -361,15 +354,6 @@ int main(int argc, char **argv)
361354
case 'N':
362355
scan_max_components = atol(optarg);
363356
break;
364-
case 'i':
365-
if (strlen(optarg) == (MD5_LEN * 2))
366-
{
367-
ldb_hex_to_bin(optarg, MD5_LEN * 2, trace_id);
368-
trace_on = true;
369-
}
370-
else fprintf(stderr, "Ignoring -i due to invalid length\n");
371-
break;
372-
373357
case 'w':
374358
force_wfp = true;
375359
break;

src/match.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ bool add_component_from_urlid(component_list_t *component_list, uint8_t *url_id,
399399
* @return false
400400
*/
401401

402-
file_recordset *files = NULL;
402+
static file_recordset *files = NULL;
403403

404404
bool component_from_file(uint8_t *key, uint8_t *subkey, int subkey_ln, uint8_t *raw_data, uint32_t datalen, int iteration, void *ptr)
405405
{

src/scan.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ static bool zero_bytes (uint8_t *md5)
115115
*/
116116
static match_t ldb_scan_file(scan_data_t * scan) {
117117

118-
scanlog("Checking entire file\n");
118+
scanlog("Checking entire file %s\n", scan->file_path);
119119

120120
if (zero_bytes(scan->md5)) return MATCH_NONE;
121121

src/snippets.c

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -536,23 +536,25 @@ void wfp_invert(uint32_t wfpint32, uint8_t *out)
536536
static void matchmap_setup(scan_data_t * scan)
537537
{
538538
char * matchmap_env = getenv("SCANOSS_MATCHMAP_MAX");
539+
scan->max_matchmap_size = DEFAULT_MATCHMAP_FILES;
539540
if (matchmap_env)
540541
{
541542
int matchmap_max_files_aux = atoi(matchmap_env);
542543
if (matchmap_max_files_aux > DEFAULT_MATCHMAP_FILES / 4 && matchmap_max_files_aux < DEFAULT_MATCHMAP_FILES * 20)
543544
{
544545
scanlog("matchmap size changed by env variable to: %d\n", matchmap_max_files_aux);
545-
matchmap_max_files = matchmap_max_files_aux;
546+
scan->max_matchmap_size = matchmap_max_files_aux;
546547
}
547548
}
548549
//If we are looking fow multiple snippets, update the matchmap size
549-
matchmap_max_files = scan->max_snippets_to_process * matchmap_max_files;
550+
scan->max_matchmap_size *= scan->max_snippets_to_process;
550551

551552
if (engine_flags & ENABLE_HIGH_ACCURACY)
552553
{
553-
matchmap_max_files *=5;
554-
scanlog("matchmap size changed by high accuracy analisys to: %d\n", matchmap_max_files);
554+
scan->max_matchmap_size *=5;
555+
scanlog("matchmap size changed by high accuracy analisys to: %d\n", scan->max_matchmap_size);
555556
}
557+
matchmap_max_files = scan->max_matchmap_size;
556558
}
557559

558560
typedef struct matchmap_entry_t
@@ -581,7 +583,7 @@ int add_file_to_matchmap(scan_data_t *scan, matchmap_entry_t *item, uint8_t *md5
581583
for (long t = start_pos; t < scan->matchmap_size; t++)
582584
{
583585
//The matchmap is sorted, stop if you are comparing against a different sector
584-
if (*scan->matchmap[t].md5 > *md5 && (scan->matchmap_size < matchmap_max_files))
586+
if (*scan->matchmap[t].md5 > *md5 && (scan->matchmap_size < scan->max_matchmap_size))
585587
{
586588
scanlog("skipping: md5 out of range wfp\n");
587589
return -1;
@@ -614,7 +616,7 @@ int add_file_to_matchmap(scan_data_t *scan, matchmap_entry_t *item, uint8_t *md5
614616
if (found < 0)
615617
{
616618
/* Not found. Add MD5 to map */
617-
if (scan->matchmap_size >= matchmap_max_files)
619+
if (scan->matchmap_size >= scan->max_matchmap_size)
618620
{
619621
scanlog("skipping: matchmap is full\n");
620622
return -1;
@@ -704,11 +706,6 @@ match_t ldb_scan_snippets(scan_data_t *scan)
704706
if (engine_flags & DISABLE_SNIPPET_MATCHING)
705707
return MATCH_NONE;
706708

707-
if (trace_on)
708-
scanlog("Checking snippets. Traced (-qi) matches marked with *\n");
709-
else
710-
scanlog("Checking snippets\n");
711-
712709
matchmap_setup(scan);
713710
adjust_tolerance(scan);
714711

@@ -753,7 +750,7 @@ match_t ldb_scan_snippets(scan_data_t *scan)
753750
memset(map_indirection_index, 0, sizeof(map_indirection_index));
754751

755752
scanlog ("< Snippet scan setup: Total lines: %d ,Matchmap size: %d, Min hits: %d, Min lines: %d, Map max size = %d, Cat N = %d x %d, Cat size = %d >\n",
756-
scan->total_lines, matchmap_max_files, min_match_hits, min_match_lines, map_max_size, MAP_INDIRECTION_CAT_NUMBER, map_indedirection_items_size, MAP_INDIRECTION_CAT_SIZE);
753+
scan->total_lines, scan->max_matchmap_size, min_match_hits, min_match_lines, map_max_size, MAP_INDIRECTION_CAT_NUMBER, map_indedirection_items_size, MAP_INDIRECTION_CAT_SIZE);
757754

758755
for (int i =0; i < scan->hash_count; i++)
759756
{
@@ -802,11 +799,11 @@ match_t ldb_scan_snippets(scan_data_t *scan)
802799
map_lines_indirection[map[map_indirection[i][j]].line] = 1;
803800
lines_coverage++;
804801
}
805-
if (cat_limit > matchmap_max_files)
802+
if (cat_limit > scan->max_matchmap_size)
806803
{
807804
if ((hashes_to_process < scan->hash_count / 10 || (float) lines_coverage / scan->hash_count < 0.6) && cat_limit < MAX_MATCHMAP_FILES)
808805
{
809-
matchmap_max_files += map[map_indirection[i][j]].size;
806+
scan->max_matchmap_size += map[map_indirection[i][j]].size;
810807
}
811808
else
812809
{
@@ -843,10 +840,10 @@ match_t ldb_scan_snippets(scan_data_t *scan)
843840
}
844841
}
845842
}
846-
matchmap_max_files = cat_limit;
843+
scan->max_matchmap_size = cat_limit;
847844
scanlog("Map limit on %d MD5s at %d of %d caths. Selected hashes: %d/%d - lines coverage %d\n",
848-
matchmap_max_files, cat_limit_index, MAP_INDIRECTION_CAT_NUMBER, hashes_to_process, scan->hash_count, (lines_coverage * 100) / scan->total_lines);
849-
scan->matchmap = calloc(matchmap_max_files, sizeof(matchmap_entry));
845+
scan->max_matchmap_size, cat_limit_index, MAP_INDIRECTION_CAT_NUMBER, hashes_to_process, scan->hash_count, (lines_coverage * 100) / scan->total_lines);
846+
scan->matchmap = calloc(scan->max_matchmap_size, sizeof(matchmap_entry));
850847

851848
int map_indexes[scan->hash_count];
852849
memset(map_indexes, 0, sizeof(map_indexes));

0 commit comments

Comments
 (0)