wit_deps/
manifest.rs

1use crate::{
2    copy_wits, remove_dir_all, untar, Cache, Digest, DigestReader, Identifier, Lock, LockEntry,
3    LockEntrySource,
4};
5
6use core::convert::identity;
7use core::convert::Infallible;
8use core::fmt;
9use core::ops::Deref;
10use core::str::FromStr;
11
12use std::collections::{HashMap, HashSet};
13use std::env;
14use std::path::{Path, PathBuf};
15use std::sync::Arc;
16
17use anyhow::ensure;
18use anyhow::{bail, Context as _};
19use async_compression::futures::bufread::GzipDecoder;
20use futures::io::BufReader;
21use futures::lock::Mutex;
22use futures::{stream, AsyncWriteExt, StreamExt, TryStreamExt};
23use hex::FromHex;
24use serde::{de, Deserialize};
25use tracing::{debug, error, info, instrument, trace, warn};
26use url::Url;
27
28/// WIT dependency [Manifest] entry
29#[derive(Clone, Debug, Eq, Hash, PartialEq)]
30pub enum Entry {
31    /// Dependency specification expressed as a resource (typically, a gzipped tarball) URL
32    Url {
33        /// Resource URL
34        url: Url,
35        /// Optional sha256 digest of this resource
36        sha256: Option<[u8; 32]>,
37        /// Optional sha512 digest of this resource
38        sha512: Option<[u8; 64]>,
39        /// Subdirectory within resource containing WIT, `wit` by default
40        subdir: Box<str>,
41    },
42    /// Dependency specification expressed as a local path to a directory containing WIT
43    /// definitions
44    Path(PathBuf),
45    // TODO: Support semver queries
46}
47
48impl From<Url> for Entry {
49    fn from(url: Url) -> Self {
50        Self::Url {
51            url,
52            sha256: None,
53            sha512: None,
54            subdir: "wit".into(),
55        }
56    }
57}
58
59impl From<PathBuf> for Entry {
60    fn from(path: PathBuf) -> Self {
61        Self::Path(path)
62    }
63}
64
65impl FromStr for Entry {
66    type Err = Infallible;
67
68    fn from_str(s: &str) -> Result<Self, Self::Err> {
69        match s.parse().ok().filter(|url: &Url| !url.cannot_be_a_base()) {
70            Some(url) => Ok(Self::from(url)),
71            None => Ok(Self::from(PathBuf::from(s))),
72        }
73    }
74}
75
76impl<'de> Deserialize<'de> for Entry {
77    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
78    where
79        D: serde::Deserializer<'de>,
80    {
81        const FIELDS: [&str; 4] = ["path", "sha256", "sha512", "url"];
82
83        struct Visitor;
84        impl<'de> de::Visitor<'de> for Visitor {
85            type Value = Entry;
86
87            fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
88                formatter.write_str("a WIT dependency manifest entry")
89            }
90
91            fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
92            where
93                E: de::Error,
94            {
95                value.parse().map_err(de::Error::custom)
96            }
97
98            fn visit_map<V>(self, mut map: V) -> Result<Self::Value, V::Error>
99            where
100                V: de::MapAccess<'de>,
101            {
102                let mut path = None;
103                let mut sha256 = None;
104                let mut sha512 = None;
105                let mut subdir: Option<String> = None;
106                let mut url = None;
107                while let Some((k, v)) = map.next_entry::<String, String>()? {
108                    match k.as_ref() {
109                        "path" => {
110                            if path.is_some() {
111                                return Err(de::Error::duplicate_field("path"));
112                            }
113                            path = v.parse().map(Some).map_err(|e| {
114                                de::Error::custom(format!("invalid `path` field value: {e}"))
115                            })?;
116                        }
117                        "sha256" => {
118                            if sha256.is_some() {
119                                return Err(de::Error::duplicate_field("sha256"));
120                            }
121                            sha256 = FromHex::from_hex(v).map(Some).map_err(|e| {
122                                de::Error::custom(format!("invalid `sha256` field value: {e}"))
123                            })?;
124                        }
125                        "sha512" => {
126                            if sha512.is_some() {
127                                return Err(de::Error::duplicate_field("sha512"));
128                            }
129                            sha512 = FromHex::from_hex(v).map(Some).map_err(|e| {
130                                de::Error::custom(format!("invalid `sha512` field value: {e}"))
131                            })?;
132                        }
133                        "subdir" => {
134                            if subdir.is_some() {
135                                return Err(de::Error::duplicate_field("subdir"));
136                            }
137                            subdir = v.parse().map(Some).map_err(|e| {
138                                de::Error::custom(format!("invalid `subdir` field value: {e}"))
139                            })?;
140                        }
141                        "url" => {
142                            if url.is_some() {
143                                return Err(de::Error::duplicate_field("url"));
144                            }
145                            url = v.parse().map(Some).map_err(|e| {
146                                de::Error::custom(format!("invalid `url` field value: {e}"))
147                            })?;
148                        }
149                        k => return Err(de::Error::unknown_field(k, &FIELDS)),
150                    }
151                }
152                match (path, sha256, sha512, subdir, url) {
153                    (Some(path), None, None, None, None) => Ok(Entry::Path(path)),
154                    (None, sha256, sha512, None, Some(url)) => Ok(Entry::Url {
155                        url,
156                        sha256,
157                        sha512,
158                        subdir: "wit".into(),
159                    }),
160                    (None, sha256, sha512, Some(subdir), Some(url)) => Ok(Entry::Url {
161                        url,
162                        sha256,
163                        sha512,
164                        subdir: subdir.into_boxed_str(),
165                    }),
166                    (Some(_), None | Some(_), None | Some(_), None | Some(_), None) => {
167                        Err(de::Error::custom(
168                            "`subdir`, `sha256` and `sha512` are not supported in combination with `path`",
169                        ))
170                    }
171                    _ => Err(de::Error::custom("eiter `url` or `path` must be specified")),
172                }
173            }
174        }
175        deserializer.deserialize_struct("Entry", &FIELDS, Visitor)
176    }
177}
178
179fn source_matches(
180    digest: impl Into<Digest>,
181    sha256: Option<[u8; 32]>,
182    sha512: Option<[u8; 64]>,
183) -> bool {
184    let digest = digest.into();
185    sha256.is_none_or(|sha256| sha256 == digest.sha256)
186        && sha512.is_none_or(|sha512| sha512 == digest.sha512)
187}
188
189#[instrument(level = "trace", skip(deps))]
190async fn lock_deps(
191    deps: impl IntoIterator<Item = (Identifier, PathBuf)>,
192) -> anyhow::Result<HashMap<Identifier, LockEntry>> {
193    stream::iter(deps.into_iter().map(|(id, path)| async {
194        let entry = LockEntry::from_transitive_path(path).await?;
195        Ok((id, entry))
196    }))
197    .then(identity)
198    .try_collect()
199    .await
200}
201
202impl Entry {
203    #[instrument(level = "trace", skip(at, out, lock, cache, skip_deps))]
204    async fn lock(
205        self,
206        at: Option<impl AsRef<Path>>,
207        out: impl AsRef<Path>,
208        lock: Option<&LockEntry>,
209        cache: Option<&impl Cache>,
210        skip_deps: &HashSet<Identifier>,
211    ) -> anyhow::Result<(LockEntry, HashMap<Identifier, LockEntry>)> {
212        let out = out.as_ref();
213        let proxy_url = env::var("PROXY_SERVER").ok();
214        let proxy_username = env::var("PROXY_USERNAME").ok();
215        let proxy_password = env::var("PROXY_PASSWORD").ok();
216        let http_client = if let (Some(proxy_url), Some(proxy_username), Some(proxy_password)) =
217            (proxy_url, proxy_username, proxy_password)
218        {
219            let proxy_with_auth = format!(
220                "http://{}:{}@{}",
221                urlencoding::encode(&proxy_username),
222                urlencoding::encode(&proxy_password),
223                proxy_url
224            );
225            let proxy = reqwest::Proxy::all(proxy_with_auth)
226                .context("failed to construct HTTP proxy configuration")?;
227            reqwest::Client::builder()
228                .proxy(proxy)
229                .build()
230                .context("failed to create HTTP client")?
231        } else {
232            reqwest::Client::new()
233        };
234
235        let entry = if let Some(LockEntry {
236            source,
237            digest: ldigest,
238            deps: ldeps,
239        }) = lock
240        {
241            let deps = if ldeps.is_empty() {
242                Ok(HashMap::default())
243            } else {
244                let base = out
245                    .parent()
246                    .with_context(|| format!("`{}` does not have a parent", out.display()))?;
247                lock_deps(ldeps.iter().cloned().map(|id| {
248                    let path = base.join(&id);
249                    (id, path)
250                }))
251                .await
252            };
253            match (LockEntry::digest(out).await, source, deps) {
254                (Ok(digest), Some(source), Ok(deps)) if digest == *ldigest => {
255                    // NOTE: Manually deleting transitive dependencies of this
256                    // dependency from `dst` is considered user error
257                    // TODO: Check that transitive dependencies are in sync
258                    match (self, source) {
259                        (
260                            Self::Url { url, subdir, .. },
261                            LockEntrySource::Url {
262                                url: lurl,
263                                subdir: lsubdir,
264                            },
265                        ) if url == *lurl && subdir == *lsubdir => {
266                            debug!("`{}` is already up-to-date, skip fetch", out.display());
267                            return Ok((
268                                LockEntry::new(
269                                    Some(LockEntrySource::Url { url, subdir }),
270                                    digest,
271                                    deps.keys().cloned().collect(),
272                                ),
273                                deps,
274                            ));
275                        }
276                        (Self::Path(path), LockEntrySource::Path { path: lpath })
277                            if path == *lpath =>
278                        {
279                            debug!("`{}` is already up-to-date, skip copy", out.display());
280                            return Ok((
281                                LockEntry::new(
282                                    Some(LockEntrySource::Path { path }),
283                                    digest,
284                                    deps.keys().cloned().collect(),
285                                ),
286                                deps,
287                            ));
288                        }
289                        (entry, _) => {
290                            debug!("source mismatch");
291                            entry
292                        }
293                    }
294                }
295                (Ok(digest), _, _) => {
296                    debug!(
297                        "`{}` is out-of-date (sha256: {})",
298                        out.display(),
299                        hex::encode(digest.sha256)
300                    );
301                    self
302                }
303                (Err(e), _, _) if e.kind() == std::io::ErrorKind::NotFound => {
304                    debug!("locked dependency for `{}` missing", out.display());
305                    self
306                }
307                (Err(e), _, _) => {
308                    error!(
309                        "failed to compute dependency digest for `{}`: {e}",
310                        out.display()
311                    );
312                    self
313                }
314            }
315        } else {
316            self
317        };
318        match entry {
319            Self::Path(path) => {
320                let src = at.map(|at| at.as_ref().join(&path));
321                let src = src.as_ref().unwrap_or(&path);
322                let deps = copy_wits(src, out, skip_deps).await?;
323                trace!(?deps, "copied WIT definitions to `{}`", out.display());
324                let deps = lock_deps(deps).await?;
325                trace!(
326                    ?deps,
327                    "locked transitive dependencies of `{}`",
328                    out.display()
329                );
330                let digest = LockEntry::digest(out).await?;
331                Ok((
332                    LockEntry::new(
333                        Some(LockEntrySource::Path { path }),
334                        digest,
335                        deps.keys().cloned().collect(),
336                    ),
337                    deps,
338                ))
339            }
340            Self::Url {
341                url,
342                sha256,
343                sha512,
344                subdir,
345            } => {
346                let cache = if let Some(cache) = cache {
347                    match cache.get(&url).await {
348                        Err(e) => error!("failed to get `{url}` from cache: {e}"),
349                        Ok(None) => debug!("`{url}` not present in cache"),
350                        Ok(Some(tar_gz)) => {
351                            let mut hashed = DigestReader::from(tar_gz);
352                            match untar(
353                                GzipDecoder::new(BufReader::new(&mut hashed)),
354                                out,
355                                skip_deps,
356                                &subdir,
357                            )
358                            .await
359                            {
360                                Ok(deps) if source_matches(hashed, sha256, sha512) => {
361                                    debug!("unpacked `{url}` from cache");
362                                    let deps = lock_deps(deps).await?;
363                                    let entry = LockEntry::from_url(
364                                        url,
365                                        out,
366                                        deps.keys().cloned().collect(),
367                                        subdir,
368                                    )
369                                    .await?;
370                                    return Ok((entry, deps));
371                                }
372                                Ok(deps) => {
373                                    warn!("cache hash mismatch for `{url}`");
374                                    remove_dir_all(out).await?;
375                                    for (_, dep) in deps {
376                                        remove_dir_all(&dep).await?;
377                                    }
378                                }
379                                Err(e) => {
380                                    error!("failed to unpack `{url}` contents from cache: {e}");
381                                }
382                            }
383                        }
384                    }
385                    cache.insert(&url).await.ok()
386                } else {
387                    None
388                };
389                let cache = Arc::new(Mutex::new(cache));
390                let (digest, deps) = match url.scheme() {
391                    "http" | "https" => {
392                        info!("fetch `{url}` into `{}`", out.display());
393
394                        let res = http_client
395                            .get(url.clone())
396                            .send()
397                            .await
398                            .context("failed to GET")
399                            .map_err(std::io::Error::other)?
400                            .error_for_status()
401                            .context("GET request failed")
402                            .map_err(std::io::Error::other)?;
403                        let tar_gz = res
404                            .bytes_stream()
405                            .map_err(std::io::Error::other)
406                            .then(|chunk| async {
407                                let chunk = chunk?;
408                                let mut cache = cache.lock().await;
409                                let cache_res = if let Some(w) = cache.as_mut().map(|w| async {
410                                    if let Err(e) = w.write(&chunk).await {
411                                        error!("failed to write chunk to cache: {e}");
412                                        if let Err(e) = w.close().await {
413                                            error!("failed to close cache writer: {e}");
414                                        }
415                                        return Err(e);
416                                    }
417                                    Ok(())
418                                }) {
419                                    Some(w.await)
420                                } else {
421                                    None
422                                }
423                                .transpose();
424                                if cache_res.is_err() {
425                                    // Drop the cache writer if a failure occurs
426                                    cache.take();
427                                }
428                                Ok(chunk)
429                            })
430                            .into_async_read();
431                        let mut hashed = DigestReader::from(Box::pin(tar_gz));
432                        let deps = untar(
433                            GzipDecoder::new(BufReader::new(&mut hashed)),
434                            out,
435                            skip_deps,
436                            &subdir,
437                        )
438                        .await
439                        .with_context(|| format!("failed to unpack contents of `{url}`"))?;
440                        (Digest::from(hashed), deps)
441                    }
442                    "file" => bail!(
443                        r#"`file` scheme is not supported for `url` field, use `path` instead. Try:
444
445```
446mydep = "/path/to/my/dep"
447```
448
449or
450
451```
452[mydep]
453path = "/path/to/my/dep"
454```
455)"#
456                    ),
457                    scheme => bail!("unsupported URL scheme `{scheme}`"),
458                };
459                if let Some(sha256) = sha256 {
460                    if digest.sha256 != sha256 {
461                        remove_dir_all(out).await?;
462                        bail!(
463                            r"sha256 hash mismatch for `{url}`
464got: {}
465expected: {}",
466                            hex::encode(digest.sha256),
467                            hex::encode(sha256),
468                        );
469                    }
470                }
471                if let Some(sha512) = sha512 {
472                    if digest.sha512 != sha512 {
473                        remove_dir_all(out).await?;
474                        bail!(
475                            r"sha512 hash mismatch for `{url}`
476got: {}
477expected: {}",
478                            hex::encode(digest.sha512),
479                            hex::encode(sha512),
480                        );
481                    }
482                }
483                trace!(?deps, "fetched contents of `{url}` to `{}`", out.display());
484                let deps = lock_deps(deps).await?;
485                trace!(?deps, "locked transitive dependencies of `{url}`");
486                let entry =
487                    LockEntry::from_url(url, out, deps.keys().cloned().collect(), subdir).await?;
488                Ok((entry, deps))
489            }
490        }
491    }
492}
493
494/// WIT dependency manifest mapping [Identifiers](Identifier) to [Entries](Entry)
495#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
496pub struct Manifest(HashMap<Identifier, Entry>);
497
498impl Manifest {
499    /// Lock the manifest populating `deps`
500    #[instrument(level = "trace", skip(at, deps, lock, cache))]
501    pub async fn lock(
502        self,
503        at: Option<impl AsRef<Path>>,
504        deps: impl AsRef<Path>,
505        lock: Option<&Lock>,
506        cache: Option<&impl Cache>,
507    ) -> anyhow::Result<Lock> {
508        let at = at.as_ref();
509        let deps = deps.as_ref();
510        // Dependency ids, which are pinned in the manifest
511        let pinned = self.0.keys().cloned().collect();
512        stream::iter(self.0.into_iter().map(|(id, entry)| async {
513            let out = deps.join(&id);
514            let lock = lock.and_then(|lock| lock.get(&id));
515            let (entry, deps) = entry
516                .lock(at, out, lock, cache, &pinned)
517                .await
518                .with_context(|| format!("failed to lock `{id}`"))?;
519            Ok(((id, entry), deps))
520        }))
521        .then(identity)
522        .try_fold(Lock::default(), |mut lock, ((id, entry), deps)| async {
523            use std::collections::btree_map::Entry::{Occupied, Vacant};
524
525            match lock.entry(id) {
526                Occupied(e) => {
527                    error!("duplicate lock entry for direct dependency `{}`", e.key());
528                }
529                Vacant(e) => {
530                    trace!("record lock entry for direct dependency `{}`", e.key());
531                    e.insert(entry);
532                }
533            }
534            for (id, entry) in deps {
535                match lock.entry(id) {
536                    Occupied(e) => {
537                        let other = e.get();
538                        debug_assert!(other.source.is_none());
539                        ensure!(other.digest == entry.digest, "transitive dependency conflict for `{}`, add `{}` to dependency manifest to resolve it", e.key(), e.key());
540                        trace!(
541                            "transitive dependency on `{}` already locked, skip",
542                            e.key()
543                        );
544                    }
545                    Vacant(e) => {
546                        trace!("record lock entry for transitive dependency `{}`", e.key());
547                        e.insert(entry);
548                    }
549                }
550            }
551            Ok(lock)
552        })
553        .await
554    }
555}
556
557impl Deref for Manifest {
558    type Target = HashMap<Identifier, Entry>;
559
560    fn deref(&self) -> &Self::Target {
561        &self.0
562    }
563}
564
565impl FromIterator<(Identifier, Entry)> for Manifest {
566    fn from_iter<T: IntoIterator<Item = (Identifier, Entry)>>(iter: T) -> Self {
567        Self(HashMap::from_iter(iter))
568    }
569}
570
571impl<const N: usize> From<[(Identifier, Entry); N]> for Manifest {
572    fn from(entries: [(Identifier, Entry); N]) -> Self {
573        Self::from_iter(entries)
574    }
575}
576
577#[cfg(test)]
578mod tests {
579    use super::*;
580
581    const FOO_URL: &str = "https://example.com/foo.tar.gz";
582
583    const BAR_URL: &str = "https://example.com/bar";
584    const BAR_SHA256: &str = "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08";
585
586    const BAZ_URL: &str = "http://127.0.0.1/baz";
587    const BAZ_SHA256: &str = "9f86d081884c7d658a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08";
588    const BAZ_SHA512: &str = "ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff";
589
590    #[test]
591    fn decode_url() -> anyhow::Result<()> {
592        let manifest: Manifest = toml::from_str(&format!(
593            r#"
594foo = "{FOO_URL}"
595bar = {{ url = "{BAR_URL}", sha256 = "{BAR_SHA256}" }}
596baz = {{ url = "{BAZ_URL}", sha256 = "{BAZ_SHA256}", sha512 = "{BAZ_SHA512}" }}
597"#
598        ))
599        .context("failed to decode manifest")?;
600        assert_eq!(
601            manifest,
602            Manifest::from([
603                (
604                    "foo".parse().expect("failed to parse `foo` identifier"),
605                    Entry::Url {
606                        url: FOO_URL.parse().expect("failed to parse `foo` URL string"),
607                        sha256: None,
608                        sha512: None,
609                        subdir: "wit".into(),
610                    },
611                ),
612                (
613                    "bar".parse().expect("failed to parse `bar` identifier"),
614                    Entry::Url {
615                        url: BAR_URL.parse().expect("failed to parse `bar` URL"),
616                        sha256: FromHex::from_hex(BAR_SHA256)
617                            .map(Some)
618                            .expect("failed to decode `bar` sha256"),
619                        sha512: None,
620                        subdir: "wit".into(),
621                    }
622                ),
623                (
624                    "baz".parse().expect("failed to `baz` parse identifier"),
625                    Entry::Url {
626                        url: BAZ_URL.parse().expect("failed to parse `baz` URL"),
627                        sha256: FromHex::from_hex(BAZ_SHA256)
628                            .map(Some)
629                            .expect("failed to decode `baz` sha256"),
630                        sha512: FromHex::from_hex(BAZ_SHA512)
631                            .map(Some)
632                            .expect("failed to decode `baz` sha512"),
633                        subdir: "wit".into(),
634                    }
635                )
636            ])
637        );
638        Ok(())
639    }
640
641    #[test]
642    fn decode_path() -> anyhow::Result<()> {
643        let manifest: Manifest = toml::from_str(
644            r#"
645foo = "/path/to/foo"
646bar = { path = "./path/to/bar" }
647"#,
648        )
649        .context("failed to decode manifest")?;
650        assert_eq!(
651            manifest,
652            Manifest::from([
653                (
654                    "foo".parse().expect("failed to parse `foo` identifier"),
655                    Entry::Path(PathBuf::from("/path/to/foo")),
656                ),
657                (
658                    "bar".parse().expect("failed to parse `bar` identifier"),
659                    Entry::Path(PathBuf::from("./path/to/bar")),
660                ),
661            ])
662        );
663        Ok(())
664    }
665}