#!/usr/bin/env npx tsx /** * MCP Speech Transcription Tool — Monetized with SettleGrid * * A complete MCP server that transcribes audio via OpenAI Whisper and * summarizes transcripts via Claude. Fork, add your keys, and deploy. * * Setup: * 1. npm install @settlegrid/mcp * 2. Set OPENAI_API_KEY, ANTHROPIC_API_KEY, and SETTLEGRID_API_KEY in your env * 3. Register your tool at settlegrid.ai/dashboard/tools * 4. Run: npx tsx mcp-speech-transcription.ts * * Pricing: 5 cents/min transcription, 8 cents/min with speakers, 4 cents per summary * - Whisper API costs $0.006/min * - 5 cents = ~8x margin on basic transcription * - Speaker diarization adds post-processing, 8 cents = ~5x margin * - Claude summary costs ~$0.008, 4 cents = ~5x margin * * Revenue: You keep 95-100% (100% on Free tier, 95% on paid tiers) */ import { settlegrid } from '@settlegrid/mcp' // ── SettleGrid Setup ──────────────────────────────────────────────────────── const sg = settlegrid.init({ toolSlug: 'my-speech-transcription', // Replace with your tool slug pricing: { defaultCostCents: 5, methods: { transcribe: { costCents: 5, displayName: 'Transcribe Audio' }, transcribe_with_speakers: { costCents: 8, displayName: 'Transcribe with Speakers' }, summarize_transcript: { costCents: 4, displayName: 'Summarize Transcript' }, }, }, }) // ── Whisper API Helper ────────────────────────────────────────────────────── const AUDIO_URL_RE = /^https?:\/\/.+\.(mp3|mp4|wav|m4a|webm|ogg|flac|mpeg)(\?.*)?$/i async function callWhisper(audioUrl: string, language?: string): Promise> { if (!audioUrl || !AUDIO_URL_RE.test(audioUrl)) { throw new Error('A valid audio URL is required (mp3, mp4, wav, m4a, webm, ogg, flac)') } const audioResponse = await fetch(audioUrl) if (!audioResponse.ok) throw new Error(`Failed to fetch audio: ${audioResponse.status}`) const audioBlob = await audioResponse.blob() if (audioBlob.size > 25 * 1024 * 1024) throw new Error('Audio file exceeds 25 MB limit') const form = new FormData() form.append('file', audioBlob, 'audio.mp3') form.append('model', 'whisper-1') form.append('response_format', 'verbose_json') if (language) form.append('language', language) const response = await fetch('https://api.openai.com/v1/audio/transcriptions', { method: 'POST', headers: { Authorization: `Bearer ${process.env.OPENAI_API_KEY!}` }, body: form, }) if (!response.ok) throw new Error(`Whisper API returned ${response.status}: ${response.statusText}`) return response.json() } // ── Transcription Methods ─────────────────────────────────────────────────── interface TranscribeArgs { audioUrl: string; language?: string } interface Segment { start: number; end: number; text: string } async function transcribe(args: TranscribeArgs): Promise<{ result: { text: string; language: string; duration: number; segments: Segment[] } }> { const data = await callWhisper(args.audioUrl, args.language) const segments: Segment[] = ((data.segments as Array>) ?? []).map((s) => ({ start: s.start as number, end: s.end as number, text: (s.text as string).trim(), })) return { result: { text: (data.text as string) ?? '', language: (data.language as string) ?? 'unknown', duration: (data.duration as number) ?? 0, segments } } } async function transcribeWithSpeakers(args: TranscribeArgs): Promise<{ result: { text: string; language: string; duration: number; segments: Array; speakerCount: number } }> { const data = await callWhisper(args.audioUrl, args.language) const rawSegments = (data.segments as Array>) ?? [] let currentSpeaker = 0 const segments = rawSegments.map((s, i) => { if (i > 0) { const gap = (s.start as number) - (rawSegments[i - 1].end as number) if (gap > 2) currentSpeaker = (currentSpeaker + 1) % 10 } return { start: s.start as number, end: s.end as number, text: (s.text as string).trim(), speaker: `Speaker ${currentSpeaker + 1}` } }) return { result: { text: (data.text as string) ?? '', language: (data.language as string) ?? 'unknown', duration: (data.duration as number) ?? 0, segments, speakerCount: new Set(segments.map((s) => s.speaker)).size } } } interface SummarizeArgs { transcript: string; style?: 'bullets' | 'paragraph' | 'action-items' } async function summarizeTranscript(args: SummarizeArgs): Promise<{ summary: string }> { if (!args.transcript || args.transcript.trim().length === 0) throw new Error('Transcript text must be non-empty') if (args.transcript.length > 100_000) throw new Error('Transcript exceeds 100,000 character limit') const style = args.style ?? 'bullets' const response = await fetch('https://api.anthropic.com/v1/messages', { method: 'POST', headers: { 'Content-Type': 'application/json', 'x-api-key': process.env.ANTHROPIC_API_KEY!, 'anthropic-version': '2023-06-01' }, body: JSON.stringify({ model: 'claude-sonnet-4-20250514', max_tokens: 2048, system: `You are a transcript summarizer. Produce a ${style} summary covering key topics, decisions, and action items.`, messages: [{ role: 'user', content: `Summarize this transcript:\n\n${args.transcript}` }], }), }) if (!response.ok) throw new Error(`Claude API returned ${response.status}: ${response.statusText}`) const data = await response.json() return { summary: data.content?.[0]?.text ?? '' } } // ── Wrap with SettleGrid Billing ───────────────────────────────────────────── export const billedTranscribe = sg.wrap(transcribe, { method: 'transcribe' }) export const billedTranscribeWithSpeakers = sg.wrap(transcribeWithSpeakers, { method: 'transcribe_with_speakers' }) export const billedSummarize = sg.wrap(summarizeTranscript, { method: 'summarize_transcript' }) // ── REST Alternative ──────────────────────────────────────────────────────── // import { settlegridMiddleware } from '@settlegrid/mcp/rest' // // const withBilling = settlegridMiddleware({ // toolSlug: 'my-speech-transcription', // pricing: { // defaultCostCents: 5, // methods: { // transcribe: { costCents: 5 }, // transcribe_with_speakers: { costCents: 8 }, // summarize_transcript: { costCents: 4 }, // }, // }, // }) // // export async function POST(request: Request) { // return withBilling(request, async () => { // const { audioUrl } = await request.json() // const result = await transcribe({ audioUrl }) // return Response.json(result) // }, 'transcribe') // }