Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 89 additions & 34 deletions .github/scripts/generate_sample_catalog.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,17 @@ const OVERRIDES_PATH = join(REPO_ROOT, 'samples', 'hosted-agent', 'sample-overri

const GITHUB_TOKEN = process.env.GITHUB_TOKEN || '';

const LANGUAGES = ['python', 'csharp'];
const FRAMEWORKS = ['agent-framework', 'bring-your-own', 'langgraph'];
// Languages, frameworks, and protocols are discovered dynamically from the
// samples repo tree (see discoverLanguagesAndFrameworks / parseAgentYaml)
// instead of being restricted by an allowlist. These blacklists are the
// explicit escape hatch to exclude a specific discovered value; empty by
// default means "no restriction" — everything discovered is kept.
/** @type {string[]} */
const BLOCKED_LANGUAGES = [];
/** @type {string[]} */
const BLOCKED_FRAMEWORKS = [];
/** @type {string[]} */
const BLOCKED_PROTOCOLS = [];

/** @type {Record<string, {title: string, placeholder: string, options: Record<string, string>}>} */
const DIMENSION_DEFAULTS = {
Expand Down Expand Up @@ -77,18 +86,17 @@ const TEMPLATE_SELECTION = {
// Path segments must be alphanumeric, hyphens, underscores, or dots
const SAFE_PATH_SEGMENT = /^[a-zA-Z0-9._-]+$/;

// Category segments allowed at the position immediately under a framework
// (`<framework>/<category>/...`). Upstream groups nested templates under these
// folders. This is an ALLOW-LIST: any other category (e.g. `a2a`,
// `invocations_ws`) is treated as not-yet-supported and dropped, so new
// upstream folders never auto-leak into the picker — opt them in here. Flat
// templates that sit directly under a framework (e.g. csharp
// `agent-framework/hello-world`) have no category segment and are always
// surfaced (see findTemplateDirsUnder).
const ALLOWED_CATEGORY_SEGMENTS = new Set(['responses', 'invocations', 'voicelive']);
// Category segments to EXCLUDE at the position immediately under a framework
// (`<framework>/<category>/...`). This is a BLACK-LIST: empty by default means
// every discovered category is surfaced; add a segment here to drop an upstream
// grouping you don't want in the picker yet. Flat templates that sit directly
// under a framework (e.g. csharp `agent-framework/hello-world`) have no category
// segment and are always surfaced (see findTemplateDirsUnder).
/** @type {Set<string>} */
const BLOCKED_CATEGORY_SEGMENTS = new Set();

// De-dupes the per-category "skipped" log line so an excluded category that
// spans many templates (e.g. `a2a`) is reported once, not once per template.
// spans many templates is reported once, not once per template.
/** @type {Set<string>} */
const skippedCategoriesLogged = new Set();

Expand Down Expand Up @@ -193,6 +201,47 @@ async function fetchRepoTree(ref) {
return { tree, truncated: Boolean(data?.truncated) };
}

/**
* Discover the languages and frameworks present in the samples repo tree,
* then drop any listed in the (empty by default) BLOCKED_LANGUAGES /
* BLOCKED_FRAMEWORKS blacklists. A "language" is the path segment directly
* under `samples/` that owns a `hosted-agents/` folder; a "framework" is the
* segment directly under `hosted-agents/`. Discovering these dynamically —
* instead of hard-coding an allowlist — means new upstream languages or
* frameworks are picked up automatically, while the blacklists remain an
* explicit way to exclude a specific value. Hidden or unsafe segments (those
* failing isSafePathSegment, e.g. `.github`) are ignored.
*
* @param {Array<{path: string, type: string}>} tree
* @returns {{ languages: string[], frameworks: string[] }}
*/
function discoverLanguagesAndFrameworks(tree) {
/** @type {Set<string>} */
const languages = new Set();
/** @type {Set<string>} */
const frameworks = new Set();
const pattern = /^samples\/([^/]+)\/hosted-agents\/([^/]+)(?:\/|$)/;

for (const entry of tree) {
const match = entry.path.match(pattern);
if (!match) {
continue;
}
const [, language, framework] = match;
if (isSafePathSegment(language) && !BLOCKED_LANGUAGES.includes(language)) {
languages.add(language);
}
if (isSafePathSegment(framework) && !BLOCKED_FRAMEWORKS.includes(framework)) {
frameworks.add(framework);
}
}

return {
languages: [...languages].sort(),
frameworks: [...frameworks].sort(),
};
}

/**
* Find sample template directories under a `hosted-agents/<framework>/` prefix.
* A template is identified by the presence of an `agent.yaml`. When nested
Expand All @@ -202,11 +251,11 @@ async function fetchRepoTree(ref) {
* `.`, e.g. `.claude/skills`) are skipped, as are segments that fail the
* `SAFE_PATH_SEGMENT` check.
*
* Nested templates must live under an allow-listed category segment
* (`ALLOWED_CATEGORY_SEGMENTS`); flat templates directly under the framework
* (csharp's `agent-framework/<template>` layout) have no category and are
* always kept. This fail-closed rule keeps unsupported upstream groupings
* (e.g. `a2a`, `invocations_ws`) out of the catalog until they are opted in.
* Nested templates are kept unless their category segment is listed in
* `BLOCKED_CATEGORY_SEGMENTS` (empty by default, so everything is surfaced);
* flat templates directly under the framework (csharp's
* `agent-framework/<template>` layout) have no category and are always kept.
* Add a segment to the blacklist to drop an unwanted upstream grouping.
*
* @param {Array<{path: string, type: string}>} tree
* @param {string} prefix Path prefix ending in `/`, e.g. `samples/python/hosted-agents/agent-framework/`.
Expand All @@ -231,16 +280,16 @@ function findTemplateDirsUnder(tree, prefix) {
if (segments.length === 1) {
return true;
}
// Nested templates (`<framework>/<category>/...`) must live under an
// allow-listed category. Anything else (e.g. `a2a`, `invocations_ws`)
// is dropped so new upstream groupings don't auto-leak into the picker.
// Nested templates (`<framework>/<category>/...`) are kept unless the
// category is explicitly blacklisted. Empty blacklist => everything is
// surfaced (fail-open), matching the language/framework/protocol lists.
const category = segments[0];
if (ALLOWED_CATEGORY_SEGMENTS.has(category)) {
if (!BLOCKED_CATEGORY_SEGMENTS.has(category)) {
return true;
}
if (!skippedCategoriesLogged.has(category)) {
skippedCategoriesLogged.add(category);
console.log(`Skipping category "${category}" (not in ALLOWED_CATEGORY_SEGMENTS); e.g. ${dir}`);
console.log(`Skipping category "${category}" (in BLOCKED_CATEGORY_SEGMENTS); e.g. ${dir}`);
}
return false;
})
Expand Down Expand Up @@ -293,19 +342,21 @@ function inferProtocolFromPath(templatePath) {
* Minimal parser for agent.yaml. Extracts the declared protocol(s) and
* whether the sample exposes the AZURE_AI_MODEL_DEPLOYMENT_NAME env var
* (used as a heuristic for `requiresModel`). Does NOT use eval or a real
* YAML library — a regex-y scan is sufficient for our two fields.
* YAML library — a regex-y scan is sufficient for our two fields. Any
* declared protocol is accepted except those listed in BLOCKED_PROTOCOLS
* (empty by default).
* @param {string} content
* @returns {{ protocols: Array<'responses' | 'invocations'>, hasModelEnv: boolean }}
* @returns {{ protocols: string[], hasModelEnv: boolean }}
*/
function parseAgentYaml(content) {
/** @type {{ protocols: Array<'responses' | 'invocations'>, hasModelEnv: boolean }} */
/** @type {{ protocols: string[], hasModelEnv: boolean }} */
const result = { protocols: [], hasModelEnv: false };

for (const line of content.split('\n')) {
const stripped = line.trim();
if (stripped.startsWith('- protocol:')) {
const value = stripped.split(':')[1]?.trim();
if (value === 'responses' || value === 'invocations') {
if (value && !BLOCKED_PROTOCOLS.includes(value)) {
result.protocols.push(value);
}
}
Expand All @@ -321,7 +372,7 @@ function parseAgentYaml(content) {
* Fetch and parse agent.yaml for a sample directory.
* @param {string} samplePath
* @param {string} ref
* @returns {Promise<{ protocols: Array<'responses' | 'invocations'>, hasModelEnv: boolean } | null>}
* @returns {Promise<{ protocols: string[], hasModelEnv: boolean } | null>}
*/
async function fetchAgentYaml(samplePath, ref) {
const rawUrl = `https://raw.githubusercontent.com/microsoft-foundry/foundry-samples/${ref}/${samplePath}/agent.yaml`;
Expand Down Expand Up @@ -537,10 +588,12 @@ function displayNameFromPath(samplePath) {
* Scan the foundry-samples repo and build the flat template list. Uses one
* recursive git-tree call to enumerate every `agent.yaml` under each
* `<language>/hosted-agents/<framework>/` prefix, regardless of intermediate
* directories. This supports both the canonical layout
* (`<framework>/<protocol>/<template>`), the flat layout
* (`<framework>/<template>`), and category-grouped layouts such as
* `bring-your-own/voicelive/hello-world-invocations-voicelive`.
* directories. Languages and frameworks are discovered dynamically from the
* tree (see discoverLanguagesAndFrameworks) rather than hard-coded, then
* filtered through the BLOCKED_LANGUAGES/BLOCKED_FRAMEWORKS blacklists. This
* supports both the canonical layout (`<framework>/<protocol>/<template>`),
* the flat layout (`<framework>/<template>`), and category-grouped layouts
* such as `bring-your-own/voicelive/hello-world-invocations-voicelive`.
*
* @param {string} commitSha
* @returns {Promise<Array<{language: string, framework: string, protocol: string, displayName: string, description: string, path: string, requiresModel: boolean}>>}
Expand All @@ -554,8 +607,10 @@ async function scanTemplates(commitSha) {
warn(`GitHub git-tree API returned truncated=true for ${commitSha}; some samples may be missing from the catalog. Consider pinning to a smaller subtree or re-running.`);
}

for (const language of LANGUAGES) {
for (const framework of FRAMEWORKS) {
const { languages, frameworks } = discoverLanguagesAndFrameworks(tree);

for (const language of languages) {
for (const framework of frameworks) {
const prefix = `samples/${language}/hosted-agents/${framework}/`;
const templateDirs = findTemplateDirsUnder(tree, prefix);

Expand All @@ -566,7 +621,7 @@ async function scanTemplates(commitSha) {
continue;
}

/** @type {'responses' | 'invocations'} */
/** @type {string} */
const protocol = agentInfo.protocols.length > 0
? agentInfo.protocols[0]
: inferProtocolFromPath(templatePath);
Expand Down
Loading