Skip to content

Commit 4349fd9

Browse files
committed
So symlinks were totally wrong
1 parent f7887f6 commit 4349fd9

File tree

6 files changed

+156
-52
lines changed

6 files changed

+156
-52
lines changed

SPEC.md

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -416,23 +416,33 @@ Link records describe symbolic links.
416416
|-------|------|-------------|
417417
| `type` | `u8` | Record type: `0x02` |
418418
| `name` | `String` | Link name |
419-
| `target` | `BoxPath` | Target path |
419+
| `target` | `RecordIndex` | Target record index |
420420
| `attrs` | `AttrMap` | Link attributes |
421421

422422
**Requirements:**
423423

424424
- `name` MUST be a valid filename component.
425-
- `target` MUST be a valid BoxPath (see Section 9).
425+
- `target` MUST be a valid RecordIndex pointing to an existing record in the archive.
426426

427427
**Binary Layout:**
428428

429429
```
430430
02 - type (symlink)
431431
[String: name]
432-
[String: target (BoxPath)]
432+
[VLQ: target (RecordIndex)]
433433
[AttrMap: attrs]
434434
```
435435

436+
**Extraction Behavior:**
437+
438+
When extracting a link record, implementations MUST:
439+
440+
1. Resolve the target RecordIndex to obtain the target's full path
441+
2. Compute the relative path from the link's parent directory to the target
442+
3. Create a symbolic link with the computed relative path
443+
444+
This ensures that relative symlinks work correctly regardless of where the archive is extracted.
445+
436446
---
437447

438448
## 9. Path Encoding

src/de.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ impl<'a> DeserializeBorrowed<'a> for DirectoryRecord<'a> {
210210
impl<'a> DeserializeBorrowed<'a> for LinkRecord<'a> {
211211
fn deserialize_borrowed(data: &'a [u8], pos: &mut usize) -> std::io::Result<Self> {
212212
let name = <Cow<'a, str>>::deserialize_borrowed(data, pos)?;
213-
let target = BoxPath::deserialize_borrowed(data, pos)?;
213+
let target = RecordIndex::deserialize_borrowed(data, pos)?;
214214
let attrs = AttrMap::deserialize_borrowed(data, pos)?;
215215

216216
Ok(LinkRecord {
@@ -511,7 +511,7 @@ impl DeserializeOwned for LinkRecord<'static> {
511511
) -> std::io::Result<Self> {
512512
let start = reader.stream_position().await?;
513513
let name = String::deserialize_owned(reader).await?;
514-
let target = BoxPath::deserialize_owned(reader).await?;
514+
let target = RecordIndex::deserialize_owned(reader).await?;
515515
let attrs = <HashMap<usize, Vec<u8>>>::deserialize_owned(reader).await?;
516516

517517
let end = reader.stream_position().await?;

src/file/meta.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,32 @@ impl<'a> BoxMetadata<'a> {
441441
&self.attrs
442442
}
443443

444+
/// Find the path for a given record index.
445+
///
446+
/// This is O(n) for tree traversal, O(n) for FST iteration.
447+
/// Used primarily for resolving symlink targets during extraction.
448+
pub fn path_for_index(&self, target: RecordIndex) -> Option<BoxPath<'static>> {
449+
// Try FST first if available
450+
if let Some(fst) = &self.fst {
451+
for (path_bytes, idx) in fst.prefix_iter(&[]) {
452+
if idx == target.get() {
453+
if let Ok(path_str) = std::str::from_utf8(&path_bytes) {
454+
return Some(BoxPath(Cow::Owned(path_str.to_string())));
455+
}
456+
}
457+
}
458+
}
459+
460+
// Fall back to tree traversal
461+
for item in self.iter() {
462+
if item.index == target {
463+
return Some(item.path);
464+
}
465+
}
466+
467+
None
468+
}
469+
444470
/// Resolve an attribute key index to its string name.
445471
pub fn attr_key_name(&self, idx: usize) -> Option<&str> {
446472
use string_interner::Symbol;

src/file/reader.rs

Lines changed: 92 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ pub enum ExtractError {
9595
#[error("Creating link failed. Path: '{}' -> '{}'", .1.display(), .2.display())]
9696
CreateLinkFailed(#[source] std::io::Error, PathBuf, PathBuf),
9797

98-
#[error("Resolving link failed: Path: '{}' -> '{}'", .1.name, .1.target)]
98+
#[error("Resolving link failed: Path: '{}' -> index {}", .1.name, .1.target.get())]
9999
ResolveLinkFailed(#[source] std::io::Error, LinkRecord<'static>),
100100

101101
#[error("Could not convert to a valid Box path. Path suffix: '{}'", .1)]
@@ -658,8 +658,17 @@ impl BoxFileReader {
658658
.map_err(|e| ExtractError::CreateDirFailed(e, parent.to_path_buf()))?;
659659
}
660660

661-
// Use the target as-is (may be relative or absolute)
662-
let target = link.target.to_path_buf();
661+
// Resolve target index to path and compute relative symlink target
662+
let target_path = self.meta.path_for_index(link.target).ok_or_else(|| {
663+
ExtractError::ResolveLinkFailed(
664+
std::io::Error::new(
665+
std::io::ErrorKind::NotFound,
666+
format!("No path for target index: {}", link.target.get()),
667+
),
668+
link.clone().into_owned(),
669+
)
670+
})?;
671+
let target = self.compute_relative_symlink_target(&path, &target_path);
663672

664673
#[cfg(unix)]
665674
{
@@ -874,17 +883,43 @@ impl BoxFileReader {
874883
}
875884

876885
pub fn resolve_link(&self, link: &LinkRecord<'_>) -> std::io::Result<RecordsItem<'_, 'static>> {
877-
match self.meta.index(&link.target) {
878-
Some(index) => Ok(RecordsItem {
879-
index,
880-
path: link.target.clone().into_owned(),
881-
record: self.meta.record(index).unwrap(),
882-
}),
883-
None => Err(std::io::Error::new(
886+
let index = link.target;
887+
let record = self.meta.record(index).ok_or_else(|| {
888+
std::io::Error::new(
884889
std::io::ErrorKind::NotFound,
885-
format!("No record for link target: {}", link.target),
886-
)),
887-
}
890+
format!("No record for link target index: {}", index.get()),
891+
)
892+
})?;
893+
let path = self.meta.path_for_index(index).ok_or_else(|| {
894+
std::io::Error::new(
895+
std::io::ErrorKind::NotFound,
896+
format!("Could not find path for link target index: {}", index.get()),
897+
)
898+
})?;
899+
Ok(RecordsItem {
900+
index,
901+
path,
902+
record,
903+
})
904+
}
905+
906+
/// Compute the relative path from a link's location to its target.
907+
///
908+
/// Given the link's path and target's path, computes the relative symlink target
909+
/// (e.g., "../x86_64-unknown-linux-musl/libclang_rt.builtins.a").
910+
fn compute_relative_symlink_target(
911+
&self,
912+
link_path: &BoxPath<'_>,
913+
target_path: &BoxPath<'_>,
914+
) -> PathBuf {
915+
let link_parent = link_path
916+
.parent()
917+
.map(|p| p.to_path_buf())
918+
.unwrap_or_default();
919+
let target = target_path.to_path_buf();
920+
921+
// Use pathdiff to compute relative path, or fall back to target if it fails
922+
pathdiff::diff_paths(&target, &link_parent).unwrap_or(target)
888923
}
889924

890925
pub async fn read_bytes(
@@ -957,8 +992,17 @@ impl BoxFileReader {
957992
.map_err(|e| ExtractError::CreateDirFailed(e, parent.to_path_buf()))?;
958993
}
959994

960-
// Use the target as-is (may be relative or absolute)
961-
let target = link.target.to_path_buf();
995+
// Resolve target index to path and compute relative symlink target
996+
let target_path = self.meta.path_for_index(link.target).ok_or_else(|| {
997+
ExtractError::ResolveLinkFailed(
998+
std::io::Error::new(
999+
std::io::ErrorKind::NotFound,
1000+
format!("No path for target index: {}", link.target.get()),
1001+
),
1002+
link.clone().into_owned(),
1003+
)
1004+
})?;
1005+
let target = self.compute_relative_symlink_target(path, &target_path);
9621006

9631007
tokio::fs::symlink(&target, &link_path)
9641008
.await
@@ -975,11 +1019,19 @@ impl BoxFileReader {
9751019
.map_err(|e| ExtractError::CreateDirFailed(e, parent.to_path_buf()))?;
9761020
}
9771021

978-
// Use the target as-is
979-
let target = link.target.to_path_buf();
1022+
// Resolve target index to path and compute relative symlink target
1023+
let target_path = self.meta.path_for_index(link.target).ok_or_else(|| {
1024+
ExtractError::ResolveLinkFailed(
1025+
std::io::Error::new(
1026+
std::io::ErrorKind::NotFound,
1027+
format!("No path for target index: {}", link.target.get()),
1028+
),
1029+
link.clone().into_owned(),
1030+
)
1031+
})?;
1032+
let target = self.compute_relative_symlink_target(path, &target_path);
9801033

9811034
// On Windows, we need to know if it's a dir or file symlink
982-
// Try to resolve to check, but fall back to file symlink
9831035
let is_dir = self
9841036
.resolve_link(link)
9851037
.map(|r| r.record.as_directory().is_some())
@@ -1076,8 +1128,17 @@ impl BoxFileReader {
10761128
.map_err(|e| ExtractError::CreateDirFailed(e, parent.to_path_buf()))?;
10771129
}
10781130

1079-
// Use the target as-is (may be relative or absolute)
1080-
let target = link.target.to_path_buf();
1131+
// Resolve target index to path and compute relative symlink target
1132+
let target_path = self.meta.path_for_index(link.target).ok_or_else(|| {
1133+
ExtractError::ResolveLinkFailed(
1134+
std::io::Error::new(
1135+
std::io::ErrorKind::NotFound,
1136+
format!("No path for target index: {}", link.target.get()),
1137+
),
1138+
link.clone().into_owned(),
1139+
)
1140+
})?;
1141+
let target = self.compute_relative_symlink_target(path, &target_path);
10811142

10821143
tokio::fs::symlink(&target, &link_path)
10831144
.await
@@ -1096,8 +1157,17 @@ impl BoxFileReader {
10961157
.map_err(|e| ExtractError::CreateDirFailed(e, parent.to_path_buf()))?;
10971158
}
10981159

1099-
// Use the target as-is
1100-
let target = link.target.to_path_buf();
1160+
// Resolve target index to path and compute relative symlink target
1161+
let target_path = self.meta.path_for_index(link.target).ok_or_else(|| {
1162+
ExtractError::ResolveLinkFailed(
1163+
std::io::Error::new(
1164+
std::io::ErrorKind::NotFound,
1165+
format!("No path for target index: {}", link.target.get()),
1166+
),
1167+
link.clone().into_owned(),
1168+
)
1169+
})?;
1170+
let target = self.compute_relative_symlink_target(path, &target_path);
11011171

11021172
// On Windows, we need to know if it's a dir or file symlink
11031173
let is_dir = self

src/file/writer.rs

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -470,17 +470,27 @@ impl BoxFileWriter {
470470
pub fn link(
471471
&mut self,
472472
path: BoxPath<'_>,
473-
target: BoxPath<'static>,
473+
target: RecordIndex,
474474
attrs: HashMap<String, Vec<u8>>,
475-
) -> std::io::Result<()> {
475+
) -> std::io::Result<RecordIndex> {
476+
// Validate that the target index exists
477+
if self.meta.record(target).is_none() {
478+
return Err(std::io::Error::new(
479+
std::io::ErrorKind::InvalidInput,
480+
format!(
481+
"Symlink target index {} does not exist in archive",
482+
target.get()
483+
),
484+
));
485+
}
486+
476487
let record = LinkRecord {
477488
name: std::borrow::Cow::Owned(path.filename().to_string()),
478489
target,
479490
attrs: self.convert_attrs(attrs),
480491
};
481492

482-
self.insert_inner(path, record.into())?;
483-
Ok(())
493+
self.insert_inner(path, record.into())
484494
}
485495

486496
pub async fn insert<R: tokio::io::AsyncBufRead + Unpin>(
@@ -831,22 +841,11 @@ impl BoxFileWriter {
831841
}
832842

833843
if file_type.is_symlink() {
844+
// Symlinks require their target to be added first (we need RecordIndex).
845+
// Skip symlinks here - they should be handled externally after all
846+
// files are added (e.g., by bundle.rs which does two-pass processing).
834847
if !options.follow_symlinks {
835-
let target_path = tokio::fs::read_link(&file_path).await?;
836-
let parent_path = file_path.parent().unwrap_or(Path::new(""));
837-
let target_path = parent_path.join(&target_path);
838-
let target_box_path = BoxPath::new(&target_path)?;
839-
let link_meta =
840-
crate::fs::metadata_to_attrs(&meta, options.timestamps, options.ownership);
841-
842-
if self.meta.index(&box_path).is_none() {
843-
self.link(box_path, target_box_path, link_meta)?;
844-
if file_type.is_dir() {
845-
stats.dirs_added += 1;
846-
} else {
847-
stats.links_added += 1;
848-
}
849-
}
848+
continue;
850849
}
851850
} else if file_type.is_dir() {
852851
if self.meta.index(&box_path).is_none() {

src/record.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::borrow::Cow;
22
use std::num::NonZeroU64;
33

44
use crate::file::{BoxMetadata, RecordIndex};
5-
use crate::{AttrMap, compression::Compression, path::BoxPath};
5+
use crate::{AttrMap, compression::Compression};
66

77
#[derive(Debug, Clone)]
88
pub enum Record<'a> {
@@ -106,10 +106,9 @@ impl<'a> Record<'a> {
106106
pub struct LinkRecord<'a> {
107107
pub name: Cow<'a, str>,
108108

109-
/// The target path of the symbolic link, which is the place the link points to. A path is always relative (no leading separator),
110-
/// always delimited by a `UNIT SEPARATOR U+001F` (`"\x1f"`), and may not contain
111-
/// any `.` or `..` path chunks.
112-
pub target: BoxPath<'a>,
109+
/// The target record index. During extraction, this is resolved to compute
110+
/// the relative path from the link's location to the target's location.
111+
pub target: RecordIndex,
113112

114113
/// Optional attributes for the given paths, such as Windows or Unix ACLs, last accessed time, etc.
115114
pub attrs: AttrMap,
@@ -125,7 +124,7 @@ impl LinkRecord<'_> {
125124
pub fn into_owned(self) -> LinkRecord<'static> {
126125
LinkRecord {
127126
name: Cow::Owned(self.name.into_owned()),
128-
target: self.target.into_owned(),
127+
target: self.target,
129128
attrs: self.attrs,
130129
}
131130
}

0 commit comments

Comments
 (0)