Skip to content

Commit 8e55bab

Browse files
committed
fix: clean DOI with remove \\ before send it to graphql
1 parent 0808bc4 commit 8e55bab

File tree

5 files changed

+97
-1
lines changed

5 files changed

+97
-1
lines changed

services/enrich/src/lib/graphql/api.js

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,12 @@ async function requestGraphql(data, args, index, apikey) {
2020
const map1 = data.map((elem) => elem?.doi);
2121

2222
// remove duplicate, undefined and doi not start with 10.
23-
dois = [...new Set(map1.filter((doi) => doi !== undefined && doi.startsWith('10.')))];
23+
dois = [...new Set(
24+
map1
25+
.filter((doi) => typeof doi === 'string')
26+
.map((doi) => doi.replace(/\\/g, '').toLowerCase().trim())
27+
.filter((doi) => doi.startsWith('10.')),
28+
)];
2429

2530
dois = dois.join('","');
2631

services/enrich/src/lib/json.js

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,15 @@ async function processEnrichJSON(id, index, args, prefix, state) {
219219
let parsedLine;
220220
try {
221221
parsedLine = JSON.parse(line);
222+
223+
// clean malformated DOI
224+
if (parsedLine.doi) {
225+
parsedLine.doi = parsedLine.doi
226+
.replace(/\\/g, '')
227+
.toLowerCase()
228+
.trim();
229+
}
230+
222231
data.push(parsedLine);
223232
} catch (err) {
224233
logger.error(`[job][jsonl]: Cannot parse [${line}] in json format`, err);

services/enrich/tests/integration/jsonl/enrich.test.js

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,4 +501,84 @@ describe('Enrich: job on csv file', () => {
501501
expect(same).toBe(true);
502502
});
503503
});
504+
505+
describe('[job][jsonl]: Enrich 1/1 lines with is_oa', () => {
506+
let id;
507+
let enrichedFile;
508+
509+
it('Should upload the file', async () => {
510+
const response = await request(app)
511+
.post('/upload')
512+
.attach('file', path.resolve(enrichDir, 'mustBeEnrich', 'file03.jsonl'), 'file03.jsonl')
513+
.set('Content-Type', 'application/x-ndjson')
514+
.set('x-api-key', apikey1);
515+
516+
expect(response.statusCode).toBe(200);
517+
518+
id = response?.body;
519+
});
520+
521+
it('Should enrich the file on 1 lines with { is_oa } and download it', async () => {
522+
// start enrich process
523+
const response = await request(app)
524+
.post(`/job/${id}`)
525+
.send({
526+
type: 'jsonl',
527+
index: 'unpaywall-test',
528+
args: '{ is_oa }',
529+
})
530+
.set('x-api-key', apikey1);
531+
532+
expect(response.statusCode).toBe(200);
533+
});
534+
535+
it('Should get the state of enrich', async () => {
536+
let response;
537+
do {
538+
response = await request(app)
539+
.get(`/states/${id}.json`)
540+
.set('x-api-key', apikey1);
541+
expect(response.statusCode).toBe(200);
542+
await setTimeout(100);
543+
} while (!response?.body?.done);
544+
545+
const state = response?.body;
546+
547+
expect(state).toMatchObject({
548+
done: true,
549+
apikey: apikey1,
550+
linesRead: 1,
551+
enrichedLines: 1,
552+
error: false,
553+
});
554+
555+
fieldToBeDefined.forEach((key) => {
556+
expect(state).toHaveProperty(key);
557+
expect(state[key]).not.toBeUndefined();
558+
});
559+
});
560+
561+
it('Should download the enriched file', async () => {
562+
const response = await request(app)
563+
.get(`/enriched/${id}.jsonl`)
564+
.set('x-api-key', apikey1)
565+
.buffer()
566+
.parse(binaryParser);
567+
568+
expect(response.statusCode).toBe(200);
569+
570+
enrichedFile = path.resolve(enrichDir, 'tmp', 'enriched.jsonl');
571+
try {
572+
await fsp.writeFile(enrichedFile, response.body.toString());
573+
} catch (err) {
574+
console.error(`writeFile: ${err}`);
575+
}
576+
});
577+
578+
it('Should be the same', async () => {
579+
const reference = path.resolve(enrichDir, 'enriched', 'jsonl', 'file07.jsonl');
580+
const same = await compareFile(reference, enrichedFile);
581+
expect(same).toBe(true);
582+
});
583+
});
504584
});
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"name":"name1","doi":"10.100/1","is_oa":true}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"name":"name1","doi":"10.100/1\\"}

0 commit comments

Comments
 (0)