Skip to content

Commit 8526286

Browse files
authored
Merge pull request #151 from AlexandrovLab/pdfwriter-fix
Pdfwriter fix - closes AlexandrovLab/SigProfilerExtractor#262
2 parents 3b98292 + 7bb3232 commit 8526286

File tree

6 files changed

+92
-34
lines changed

6 files changed

+92
-34
lines changed

.travis.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,8 @@ cache:
1111
- master
1212

1313
before_install:
14-
if ! [ -f ./src/GRCh37.tar.gz ]; then
15-
wget ftp://alexandrovlab-ftp.ucsd.edu/pub/tools/SigProfilerMatrixGenerator/GRCh37.tar.gz -P ./src/;
16-
fi
14+
- pip install --upgrade pip setuptools packaging
15+
- if ! [ -f ./src/GRCh37.tar.gz ]; then wget ftp://alexandrovlab-ftp.ucsd.edu/pub/tools/SigProfilerMatrixGenerator/GRCh37.tar.gz -P ./src/; fi
1716

1817
install:
1918
- pip install .[tests]

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66

77
## [Unreleased]
88

9+
## [0.1.9] - 2024-11-12
10+
11+
### Changed
12+
- Replaced `PdfMerger` with `PdfWriter` due to deprecation in `pypdf >= 5.0.0`.
13+
14+
### Fixed
15+
- Addressed deprecation issues with `PdfMerger`, ensuring compatibility with recent `pypdf` versions.
16+
917
## [0.1.8] - 2024-08-20
1018

1119
### Added

SigProfilerAssignment/decompose_subroutines.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import pypdf
2424
import scipy
2525

26-
from pypdf import PdfMerger
26+
from pypdf import PdfWriter, PdfReader
2727
import SigProfilerAssignment as spa
2828
from SigProfilerAssignment import single_sample as ss
2929
from scipy.spatial.distance import correlation as cor
@@ -410,7 +410,7 @@ def signature_decomposition(
410410
)
411411
# lognote.write("\n********** Starting Signature Decomposition **********\n\n")
412412
activity_percentages = []
413-
merger = PdfMerger()
413+
merger = PdfWriter()
414414

415415
for i, j in zip(range(signatures.shape[1]), denovo_signature_names):
416416
# Only for context SBS96
@@ -606,7 +606,12 @@ def signature_decomposition(
606606
exome=exome,
607607
volume=volume,
608608
)
609-
merger.append(byte_plot)
609+
610+
byte_plot.seek(0)
611+
reader = PdfReader(byte_plot)
612+
for page in reader.pages:
613+
merger.add_page(page)
614+
610615
with alive_bar(
611616
1, ctrl_c=False, bar="blocks", title=f"Decompositon Plot:{denovo_name}"
612617
) as bar:

SigProfilerAssignment/decomposition.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from sigProfilerPlotting import sigProfilerPlotting as sigPlot
2525
import sigProfilerPlotting
2626
import os, sys
27-
from pypdf import PdfMerger
27+
from pypdf import PdfWriter, PdfReader
2828
import fitz
2929
import time
3030
from pathlib import Path
@@ -93,7 +93,7 @@ def generate_sample_reconstruction(
9393
project = "test_run"
9494
mtype = "96"
9595

96-
final_pdf = PdfMerger()
96+
final_pdf = PdfWriter()
9797
samples = samples_input.copy(deep=True)
9898
samples.reset_index(inplace=True)
9999
for sample_name in samples.columns[1:]:
@@ -133,7 +133,11 @@ def generate_sample_reconstruction(
133133
exome=execution_parameters["exome"],
134134
volume=get_storage_dir(execution_parameters["volume"]),
135135
)
136-
final_pdf.append(result)
136+
137+
result.seek(0)
138+
reader = PdfReader(result)
139+
for page in reader.pages:
140+
final_pdf.add_page(page)
137141

138142
pdf_output_path = os.path.join(
139143
output_dir, "Reconstructed_Sample_Plots_" + str(mtype) + ".pdf"

setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
if os.path.exists("dist"):
77
shutil.rmtree("dist")
88

9-
VERSION = "0.1.8"
9+
VERSION = "0.1.9"
1010

1111

1212
def write_version_py(filename="SigProfilerAssignment/version.py"):
@@ -15,7 +15,7 @@ def write_version_py(filename="SigProfilerAssignment/version.py"):
1515
# THIS FILE IS GENERATED FROM SigProfilerAssignment SETUP.PY
1616
short_version = '%(version)s'
1717
version = '%(version)s'
18-
Update = 'v0.1.8: Add Dockerfile and remove uncessary dependencies'
18+
Update = 'v0.1.9: Replace PdfMerger with PdfReader and PdfWriter for pypdf 5.0.0'
1919
2020
2121
"""
@@ -39,7 +39,7 @@ def write_version_py(filename="SigProfilerAssignment/version.py"):
3939
"SigProfilerMatrixGenerator>=1.2.28",
4040
"sigProfilerPlotting>=1.3.24",
4141
"reportlab>=3.5.42",
42-
"pypdf>=3.1.0",
42+
"pypdf>=5.0.0",
4343
"alive_progress>=2.4.1",
4444
"PyMuPDF>=1.21.0", # required for package "fitz"
4545
]

test.py

Lines changed: 64 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import SigProfilerAssignment as spa
99
from SigProfilerAssignment import Analyzer as Analyze
1010
from SigProfilerAssignment.DecompositionPlots import PlotDecomposition as sp
11-
from pypdf import PdfMerger
11+
from pypdf import PdfWriter, PdfReader
1212
import numpy as np
1313
import pandas as pd
1414
import time
@@ -282,7 +282,7 @@ def denovo_fit_vcf_test():
282282
def gen_SBS96():
283283
np.random.seed(1234567)
284284
s = time.time()
285-
merger = PdfMerger()
285+
merger = PdfWriter()
286286
file1 = "SigProfilerAssignment/DecompositionPlots/ExampleSample/De_Novo_Solution_Signatures_SBS96.txt"
287287
file2 = "SigProfilerAssignment/DecompositionPlots/ExampleSample/Decomposed_Solution_Signatures_SBS96.txt"
288288
denovo_mtx = pd.read_csv(file1, sep="\t")
@@ -309,7 +309,10 @@ def gen_SBS96():
309309
project,
310310
mtype,
311311
)
312-
merger.append(result)
312+
result.seek(0)
313+
reader = PdfReader(result)
314+
for page in reader.pages:
315+
merger.add_page(page)
313316

314317
for ind in range(5, 0, -1):
315318
basis_names = basis_names[:ind]
@@ -329,7 +332,10 @@ def gen_SBS96():
329332
project,
330333
mtype,
331334
)
332-
merger.append(result)
335+
result.seek(0)
336+
reader = PdfReader(result)
337+
for page in reader.pages:
338+
merger.add_page(page)
333339

334340
merger.write(os.path.join(output_path, "Result_Decomposition_Plots_SBS96.pdf"))
335341
return time.time() - s
@@ -338,7 +344,7 @@ def gen_SBS96():
338344
def gen_SBS1536():
339345
np.random.seed(1234567)
340346
s = time.time()
341-
merger = PdfMerger()
347+
merger = PdfWriter()
342348
file1 = "SigProfilerAssignment/DecompositionPlots/ExampleSample/De_Novo_Solution_Signatures_SBS1536.txt"
343349
file2 = "SigProfilerAssignment/DecompositionPlots/ExampleSample/Decomposed_Solution_Signatures_SBS1536.txt"
344350
denovo_mtx = pd.read_csv(file1, sep="\t")
@@ -366,7 +372,10 @@ def gen_SBS1536():
366372
mtype,
367373
)
368374
# sp.run_PlotDecomposition(denovo_mtx, basis_names, weights, output_path, project, mtype, True, statistics, "COSMICv3-GRCh37", "This is where a custom message would go.")
369-
merger.append(result)
375+
result.seek(0)
376+
reader = PdfReader(result)
377+
for page in reader.pages:
378+
merger.add_page(page)
370379

371380
for ind in range(5, 0, -1):
372381
basis_names = basis_names[:ind]
@@ -386,7 +395,10 @@ def gen_SBS1536():
386395
project,
387396
mtype,
388397
)
389-
merger.append(result)
398+
result.seek(0)
399+
reader = PdfReader(result)
400+
for page in reader.pages:
401+
merger.add_page(page)
390402

391403
merger.write(os.path.join(output_path, "Result_Decomposition_Plots_SBS1536.pdf"))
392404
return time.time() - s
@@ -395,7 +407,7 @@ def gen_SBS1536():
395407
def gen_SBS288():
396408
np.random.seed(1234567)
397409
s = time.time()
398-
merger = PdfMerger()
410+
merger = PdfWriter()
399411
file1 = "SigProfilerAssignment/DecompositionPlots/ExampleSample/De_Novo_Solution_Signatures_SBS288.txt"
400412
file2 = "SigProfilerAssignment/DecompositionPlots/ExampleSample/COSMIC_SBS288_Signatures.txt"
401413
denovo_mtx = pd.read_csv(file1, sep="\t")
@@ -422,7 +434,10 @@ def gen_SBS288():
422434
project,
423435
mtype,
424436
)
425-
merger.append(result)
437+
result.seek(0)
438+
reader = PdfReader(result)
439+
for page in reader.pages:
440+
merger.add_page(page)
426441

427442
for ind in range(5, 0, -1):
428443
basis_names = basis_names[:ind]
@@ -442,7 +457,10 @@ def gen_SBS288():
442457
project,
443458
mtype,
444459
)
445-
merger.append(result)
460+
result.seek(0)
461+
reader = PdfReader(result)
462+
for page in reader.pages:
463+
merger.add_page(page)
446464

447465
merger.write(os.path.join(output_path, "Result_Decomposition_Plots_SBS288.pdf"))
448466
return time.time() - s
@@ -451,7 +469,7 @@ def gen_SBS288():
451469
def gen_ID83():
452470
np.random.seed(1234567)
453471
s = time.time()
454-
merger = PdfMerger()
472+
merger = PdfWriter()
455473
file1 = "SigProfilerAssignment/DecompositionPlots/ExampleSample/De_Novo_Solution_Signatures_INDEL.txt"
456474
file2 = "SigProfilerAssignment/DecompositionPlots/ExampleSample/COSMIC_ID83_Signatures.txt"
457475
denovo_mtx = pd.read_csv(file1, sep="\t")
@@ -478,7 +496,10 @@ def gen_ID83():
478496
project,
479497
mtype,
480498
)
481-
merger.append(result)
499+
result.seek(0)
500+
reader = PdfReader(result)
501+
for page in reader.pages:
502+
merger.add_page(page)
482503

483504
for ind in range(5, 0, -1):
484505
basis_names = basis_names[:ind]
@@ -498,7 +519,10 @@ def gen_ID83():
498519
project,
499520
mtype,
500521
)
501-
merger.append(result)
522+
result.seek(0)
523+
reader = PdfReader(result)
524+
for page in reader.pages:
525+
merger.add_page(page)
502526

503527
merger.write(os.path.join(output_path, "Result_Decomposition_Plots_ID83.pdf"))
504528
return time.time() - s
@@ -507,7 +531,7 @@ def gen_ID83():
507531
def gen_DBS78():
508532
np.random.seed(1234567)
509533
s = time.time()
510-
merger = PdfMerger()
534+
merger = PdfWriter()
511535
file1 = "SigProfilerAssignment/DecompositionPlots/ExampleSample/De_Novo_Solution_Signatures_DINUC.txt"
512536
file2 = "SigProfilerAssignment/DecompositionPlots/ExampleSample/COSMIC_DBS78_Signatures.txt"
513537
denovo_mtx = pd.read_csv(file1, sep="\t")
@@ -534,7 +558,10 @@ def gen_DBS78():
534558
project,
535559
mtype,
536560
)
537-
merger.append(result)
561+
result.seek(0)
562+
reader = PdfReader(result)
563+
for page in reader.pages:
564+
merger.add_page(page)
538565

539566
for ind in range(5, 0, -1):
540567
basis_names = basis_names[:ind]
@@ -554,7 +581,10 @@ def gen_DBS78():
554581
project,
555582
mtype,
556583
)
557-
merger.append(result)
584+
result.seek(0)
585+
reader = PdfReader(result)
586+
for page in reader.pages:
587+
merger.add_page(page)
558588

559589
merger.write(os.path.join(output_path, "Result_Decomposition_Plots_DBS78.pdf"))
560590
return time.time() - s
@@ -563,7 +593,7 @@ def gen_DBS78():
563593
def gen_CNV48():
564594
np.random.seed(1234567)
565595
s = time.time()
566-
merger = PdfMerger()
596+
merger = PdfWriter()
567597
file1 = "SigProfilerAssignment/DecompositionPlots/ExampleSample/CNV48_De-Novo_Signatures.txt"
568598
file2 = "SigProfilerAssignment/DecompositionPlots/ExampleSample/COSMIC_CNV48_Signatures.txt"
569599
denovo_mtx = pd.read_csv(file1, sep="\t")
@@ -592,7 +622,10 @@ def gen_CNV48():
592622
mtype,
593623
)
594624
# sp.run_PlotDecomposition(denovo_mtx, basis_names, weights, output_path, project, mtype, True, statistics, "COSMICv3-GRCh37", "This is where a custom message would go.")
595-
merger.append(result)
625+
result.seek(0)
626+
reader = PdfReader(result)
627+
for page in reader.pages:
628+
merger.add_page(page)
596629

597630
for ind in range(5, 0, -1):
598631
basis_names = basis_names[:ind]
@@ -612,7 +645,10 @@ def gen_CNV48():
612645
project,
613646
mtype,
614647
)
615-
merger.append(result)
648+
result.seek(0)
649+
reader = PdfReader(result)
650+
for page in reader.pages:
651+
merger.add_page(page)
616652

617653
merger.write(os.path.join(output_path, "Result_Decomposition_Plots_CNV48.pdf"))
618654
return time.time() - s
@@ -621,7 +657,7 @@ def gen_CNV48():
621657
def gen_SV32():
622658
np.random.seed(1234567)
623659
s = time.time()
624-
merger = PdfMerger()
660+
merger = PdfWriter()
625661
file1 = "SigProfilerAssignment/DecompositionPlots/ExampleSample/SV32_De-Novo_Signatures.txt"
626662
file2 = "SigProfilerAssignment/DecompositionPlots/ExampleSample/COSMIC_SV32_Signatures.txt"
627663
denovo_mtx = pd.read_csv(file1, sep="\t")
@@ -649,7 +685,10 @@ def gen_SV32():
649685
mtype,
650686
)
651687
# sp.run_PlotDecomposition(denovo_mtx, basis_names, weights, output_path, project, mtype, True, statistics, "COSMICv3-GRCh37", "This is where a custom message would go.")
652-
merger.append(result)
688+
result.seek(0)
689+
reader = PdfReader(result)
690+
for page in reader.pages:
691+
merger.add_page(page)
653692

654693
for ind in range(5, 0, -1):
655694
basis_names = basis_names[:ind]
@@ -669,7 +708,10 @@ def gen_SV32():
669708
project,
670709
mtype,
671710
)
672-
merger.append(result)
711+
result.seek(0)
712+
reader = PdfReader(result)
713+
for page in reader.pages:
714+
merger.add_page(page)
673715

674716
merger.write(os.path.join(output_path, "Result_Decomposition_Plots_SV32.pdf"))
675717
return time.time() - s

0 commit comments

Comments
 (0)