From fd9671aaa5eaef3472f6138d0a9151f410aefb1b Mon Sep 17 00:00:00 2001 From: Jules Exel Date: Tue, 23 Jun 2026 15:52:57 -0400 Subject: [PATCH] Update asset reference extractor to include custom CDNs not just hard coded agility --- package.json | 2 +- src/lib/assets/asset-reference-extractor.ts | 20 +++-- .../tests/asset-reference-extractor.test.ts | 77 +++++++++++++++++++ src/types/syncAnalysis.ts | 4 +- 4 files changed, 93 insertions(+), 10 deletions(-) diff --git a/package.json b/package.json index 8b302381..02b393e9 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@agility/cli", - "version": "1.0.0-beta.13.16", + "version": "1.0.0-beta.13.18", "description": "Agility CLI for working with your content. (Public Beta)", "repository": { "type": "git", diff --git a/src/lib/assets/asset-reference-extractor.ts b/src/lib/assets/asset-reference-extractor.ts index 865e1a70..902e6c12 100644 --- a/src/lib/assets/asset-reference-extractor.ts +++ b/src/lib/assets/asset-reference-extractor.ts @@ -12,6 +12,7 @@ import { AssetReference, ReferenceExtractionService, } from "../../types/syncAnalysis"; +import { AssetMapper } from "../mappers/asset-mapper"; export class AssetReferenceExtractor implements ReferenceExtractionService { private context?: SyncAnalysisContext; @@ -26,27 +27,30 @@ export class AssetReferenceExtractor implements ReferenceExtractionService { /** * Extract asset references from content fields */ - extractReferences(fields: any): AssetReference[] { - return this.extractAssetReferences(fields); + extractReferences(fields: any, assetMapper?: AssetMapper): AssetReference[] { + return this.extractAssetReferences(fields, assetMapper); } /** * Extract asset references from content fields */ - extractAssetReferences(fields: any): AssetReference[] { + extractAssetReferences(fields: any, assetMapper?: AssetMapper): AssetReference[] { const references: AssetReference[] = []; if (!fields || typeof fields !== "object") { return references; } - // Helper to check if a string is an asset URL - // Matches any subdomain of aglty.io or agilitycms.com (e.g., cdn-usa2.aglty.io, cdn-eu.aglty.io, etc.) + // Helper to check if a string is an asset URL. Matches: + // - any Agility-managed CDN subdomain (cdn.aglty.io, cdn-usa2.aglty.io, *.agilitycms.com, etc.) + // - any URL whose prefix matches a container URL loaded into the asset mapper (supports custom CDN hosts) const isAssetUrl = (url: string): boolean => { if (typeof url !== "string") return false; - // Check for Agility CMS asset URL patterns - match any subdomain - // Examples: cdn-usa2.aglty.io, cdn-eu.aglty.io, cdn.aglty.io, origin.aglty.io, etc. - return url.includes(".aglty.io") || url.includes(".agilitycms.com"); + return ( + url.includes(".aglty.io") || + url.includes(".agilitycms.com") || + assetMapper?.isKnownAssetUrl(url) === true + ); }; const scanForAssets = (obj: any, path: string) => { diff --git a/src/lib/assets/tests/asset-reference-extractor.test.ts b/src/lib/assets/tests/asset-reference-extractor.test.ts index d2347951..3df59dbd 100644 --- a/src/lib/assets/tests/asset-reference-extractor.test.ts +++ b/src/lib/assets/tests/asset-reference-extractor.test.ts @@ -1,5 +1,6 @@ import { resetState } from "core/state"; import { AssetReferenceExtractor } from "lib/assets/asset-reference-extractor"; +import { AssetMapper } from "lib/mappers/asset-mapper"; import { AssetReference, SourceEntities, SyncAnalysisContext } from "types/syncAnalysis"; beforeEach(() => { @@ -23,6 +24,11 @@ const makeContext = (overrides: Partial = {}): SyncAnalysis ...overrides, }); +// Lightweight AssetMapper stub — the extractor only ever calls isKnownAssetUrl. +// Avoids the real constructor, which loads mapping files from disk. +const makeAssetMapper = (isKnownAssetUrl: jest.Mock): AssetMapper => + ({ isKnownAssetUrl } as unknown as AssetMapper); + // ─── extractAssetReferences / extractReferences ─────────────────────────────── describe("AssetReferenceExtractor.extractAssetReferences", () => { @@ -164,6 +170,65 @@ describe("AssetReferenceExtractor.extractAssetReferences", () => { expect(refs[0].url).toBe("https://cdn.aglty.io/guid/assets/bg.jpg"); }); }); + + describe("custom CDN hosts via assetMapper.isKnownAssetUrl", () => { + const CUSTOM_CDN_URL = "https://media.contoso.com/guid/assets/photo.jpg"; + + it("does NOT recognize a custom-CDN URL when no assetMapper is supplied", () => { + const refs = extractor.extractAssetReferences({ image: CUSTOM_CDN_URL }); + expect(refs).toHaveLength(0); + }); + + it("recognizes a custom-CDN URL when the assetMapper reports it as known", () => { + const isKnownAssetUrl = jest.fn().mockReturnValue(true); + const refs = extractor.extractAssetReferences({ image: CUSTOM_CDN_URL }, makeAssetMapper(isKnownAssetUrl)); + + expect(refs).toHaveLength(1); + expect(refs[0]).toEqual({ url: CUSTOM_CDN_URL, fieldPath: "image" }); + expect(isKnownAssetUrl).toHaveBeenCalledWith(CUSTOM_CDN_URL); + }); + + it("ignores a non-asset URL when the assetMapper reports it as unknown", () => { + const isKnownAssetUrl = jest.fn().mockReturnValue(false); + const refs = extractor.extractAssetReferences( + { link: "https://example.com/page" }, + makeAssetMapper(isKnownAssetUrl) + ); + + expect(refs).toHaveLength(0); + expect(isKnownAssetUrl).toHaveBeenCalledWith("https://example.com/page"); + }); + + it("still matches built-in aglty.io domains without consulting the assetMapper", () => { + const isKnownAssetUrl = jest.fn().mockReturnValue(false); + const refs = extractor.extractAssetReferences( + { image: "https://cdn.aglty.io/guid/assets/photo.jpg" }, + makeAssetMapper(isKnownAssetUrl) + ); + + expect(refs).toHaveLength(1); + // Short-circuits on the .aglty.io check before reaching the mapper. + expect(isKnownAssetUrl).not.toHaveBeenCalled(); + }); + + it("recognizes a known custom-CDN URL nested inside an object's url property", () => { + const isKnownAssetUrl = jest.fn().mockReturnValue(true); + const refs = extractor.extractAssetReferences( + { attachment: { url: CUSTOM_CDN_URL } }, + makeAssetMapper(isKnownAssetUrl) + ); + + const assetRef = refs.find((r) => r.url === CUSTOM_CDN_URL); + expect(assetRef).toBeDefined(); + expect(assetRef!.fieldPath).toBe("attachment.url"); + }); + + it("treats an undefined isKnownAssetUrl result as not-an-asset (=== true guard)", () => { + const isKnownAssetUrl = jest.fn().mockReturnValue(undefined); + const refs = extractor.extractAssetReferences({ image: CUSTOM_CDN_URL }, makeAssetMapper(isKnownAssetUrl)); + expect(refs).toHaveLength(0); + }); + }); }); // ─── extractReferences (public alias) ──────────────────────────────────────── @@ -174,6 +239,18 @@ describe("AssetReferenceExtractor.extractReferences", () => { const fields = { image: "https://cdn.aglty.io/guid/assets/pic.jpg" }; expect(extractor.extractReferences(fields)).toEqual(extractor.extractAssetReferences(fields)); }); + + it("forwards the assetMapper through to extractAssetReferences", () => { + const extractor = new AssetReferenceExtractor(); + const isKnownAssetUrl = jest.fn().mockReturnValue(true); + const customUrl = "https://media.contoso.com/guid/assets/pic.jpg"; + + const refs = extractor.extractReferences({ image: customUrl }, makeAssetMapper(isKnownAssetUrl)); + + expect(refs).toHaveLength(1); + expect(refs[0].url).toBe(customUrl); + expect(isKnownAssetUrl).toHaveBeenCalledWith(customUrl); + }); }); // ─── initialize ─────────────────────────────────────────────────────────────── diff --git a/src/types/syncAnalysis.ts b/src/types/syncAnalysis.ts index 52803957..81bad9ea 100644 --- a/src/types/syncAnalysis.ts +++ b/src/types/syncAnalysis.ts @@ -2,6 +2,8 @@ * Shared TypeScript interfaces and types for sync analysis system */ +import { AssetMapper } from "lib/mappers/asset-mapper"; + /** * Model tracking to prevent duplicates across all chain displays */ @@ -53,7 +55,7 @@ export interface ReferenceExtractionService extends SyncAnalysisService { /** * Extract references from the given data structure */ - extractReferences(data: any): any[]; + extractReferences(data: any, assetMapper?: AssetMapper): any[]; } /**