Use the following prompt to generate the code:
“Create a Text-to-Speech and Speech-to-Text tool with HTML, CSS, and JavaScript. Include features like language selection, voice modulation, and a download option for audio files.” ( to get the full prompt click on our Patreon here to copy and customize the full prompt
The code:
<!DOCTYPE html>
<html lang=”en”>
<head>
<meta charset=”UTF-8″>
<meta name=”viewport” content=”width=device-width, initial-scale=1.0″>
<title>TTS & STT Enhanced Application</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
font-family: Arial, sans-serif;
}
body {
background: linear-gradient(135deg, #6b48ff, #ffffff);
min-height: 100vh;
padding: 20px;
color: #333;
}
.container {
max-width: 900px;
margin: 0 auto;
background: rgba(255, 255, 255, 0.95);
border-radius: 15px;
padding: 30px;
box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2);
}
header {
text-align: center;
margin-bottom: 30px;
}
h1 {
color: #6b48ff;
margin-bottom: 10px;
}
h2 {
color: #6b48ff;
margin-bottom: 15px;
}
.section {
margin-bottom: 30px;
}
textarea {
width: 100%;
height: 100px;
padding: 10px;
border-radius: 8px;
border: 1px solid #ddd;
resize: vertical;
}
select, button {
padding: 8px 15px;
margin: 5px;
border-radius: 5px;
border: none;
background: #6b48ff;
color: white;
cursor: pointer;
}
button:hover {
background: #5438cc;
}
.controls {
display: flex;
flex-wrap: wrap;
gap: 10px;
margin-top: 10px;
}
.slider-container {
margin: 10px 0;
}
input[type=”range”] {
width: 100%;
}
.output {
margin-top: 20px;
padding: 15px;
background: #f5f5f5;
border-radius: 8px;
position: relative;
}
.history {
max-height: 200px;
overflow-y: auto;
margin-top: 20px;
padding: 10px;
background: #f0f0f0;
border-radius: 8px;
}
.learn-more {
background: #fff;
padding: 20px;
border-radius: 10px;
margin-top: 30px;
}
.learn-more details {
margin: 10px 0;
}
.learn-more summary {
cursor: pointer;
color: #6b48ff;
margin-bottom: 5px;
}
@media (max-width: 600px) {
.container {
padding: 20px;
}
.controls {
flex-direction: column;
}
}
</style>
</head>
<body>
<div class=”container”>
<header>
<h1>TTS & STT Application</h1>
</header>
<!– TTS Section –>
<div class=”section”>
<h2>Text-to-Speech</h2>
<textarea id=”ttsInput” placeholder=”Enter text to speak…”></textarea>
<div class=”controls”>
<select id=”ttsLanguage”></select>
<select id=”ttsVoice”></select>
<select id=”emotion”>
<option value=”neutral”>Neutral</option>
<option value=”happy”>Happy</option>
<option value=”sad”>Sad</option>
<option value=”angry”>Angry</option>
</select>
<select id=”translateTo”>
<option value=””>No Translation</option>
<option value=”es”>Spanish</option>
<option value=”fr”>French</option>
<option value=”de”>German</option>
</select>
<button onclick=”speak()”>Play</button>
<button onclick=”pause()”>Pause</button>
<button onclick=”resume()”>Resume</button>
<button onclick=”stop()”>Stop</button>
<button onclick=”downloadTTS()”>Download</button>
<button onclick=”clearTTS()”>Clear</button>
</div>
<div class=”slider-container”>
<label>Rate: <span id=”rateValue”>1</span></label>
<input type=”range” id=”rate” min=”0.5″ max=”2″ step=”0.1″ value=”1″>
</div>
<div class=”slider-container”>
<label>Pitch: <span id=”pitchValue”>1</span></label>
<input type=”range” id=”pitch” min=”0.5″ max=”2″ step=”0.1″ value=”1″>
</div>
<div class=”history” id=”ttsHistory”></div>
</div>
<!– STT Section –>
<div class=”section”>
<h2>Speech-to-Text</h2>
<div class=”controls”>
<select id=”sttLanguage”>
<option value=”en-US”>English</option>
<option value=”es-ES”>Spanish</option>
<option value=”fr-FR”>French</option>
<option value=”de-DE”>German</option>
</select>
<button id=”recordBtn”>Start Recording</button>
<button onclick=”copySTT()”>Copy</button>
<button onclick=”clearSTT()”>Clear</button>
</div>
<div class=”output” id=”sttOutput”></div>
<div class=”output” id=”translatedOutput”></div>
<div class=”history” id=”sttHistory”></div>
</div>
<!– Learn More Section –>
<div class=”learn-more”>
<h2>Learn More</h2>
<details>
<summary>What is Text-to-Speech (TTS)?</summary>
<p>Text-to-Speech (TTS) is a technology that converts written text into spoken words. It uses synthetic voices to read text aloud, making it useful for accessibility, language learning, and entertainment.</p>
</details>
<details>
<summary>What is Speech-to-Text (STT)?</summary>
<p>Speech-to-Text (STT) converts spoken words into written text. It uses speech recognition technology to transcribe what you say, perfect for dictation, note-taking, or real-time translation.</p>
</details>
<details>
<summary>How to Use TTS</summary>
<p>1. Type your text in the TTS input box.<br>
2. Select a language and voice from the dropdowns.<br>
3. Choose an emotion and translation option if desired.<br>
4. Adjust rate and pitch using the sliders.<br>
5. Click “Play” to hear it, or “Download” to save the audio.<br>
6. Use “Pause,” “Resume,” or “Stop” to control playback.</p>
</details>
<details>
<summary>How to Use STT</summary>
<p>1. Select your spoken language from the dropdown.<br>
2. Click “Start Recording” and speak clearly.<br>
3. Click “Stop Recording” when done.<br>
4. View the transcription and optional translation.<br>
5. Click “Copy” to copy the text to your clipboard.<br>
6. Use “Clear” to reset the output.</p>
</details>
<details>
<summary>Additional Features</summary>
<p>- <strong>Emotion Modulation:</strong> Add emotions to TTS output.<br>
– <strong>Translation:</strong> Translate text or speech in real-time.<br>
– <strong>History:</strong> View past TTS and STT entries.<br>
– <strong>Download:</strong> Save TTS output as an audio file.<br>
– <strong>Copy:</strong> Easily copy STT results.</p>
</details>
</div>
</div>
<script>
// TTS Setup
const synth = window.speechSynthesis;
let voices = [];
const ttsInput = document.getElementById(‘ttsInput’);
const ttsLanguage = document.getElementById(‘ttsLanguage’);
const ttsVoice = document.getElementById(‘ttsVoice’);
const emotion = document.getElementById(’emotion’);
const translateTo = document.getElementById(‘translateTo’);
const rate = document.getElementById(‘rate’);
const pitch = document.getElementById(‘pitch’);
const rateValue = document.getElementById(‘rateValue’);
const pitchValue = document.getElementById(‘pitchValue’);
const ttsHistory = document.getElementById(‘ttsHistory’);
// STT Setup
const recognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();
const sttLanguage = document.getElementById(‘sttLanguage’);
const recordBtn = document.getElementById(‘recordBtn’);
const sttOutput = document.getElementById(‘sttOutput’);
const translatedOutput = document.getElementById(‘translatedOutput’);
const sttHistory = document.getElementById(‘sttHistory’);
// Populate voices
function populateVoices() {
voices = synth.getVoices();
ttsLanguage.innerHTML = ”;
ttsVoice.innerHTML = ”;
const languages = […new Set(voices.map(voice => voice.lang))];
languages.forEach(lang => {
const option = document.createElement(‘option’);
option.value = lang;
option.text = lang;
ttsLanguage.appendChild(option);
});
updateVoices();
}
function updateVoices() {
ttsVoice.innerHTML = ”;
voices.filter(voice => voice.lang === ttsLanguage.value)
.forEach(voice => {
const option = document.createElement(‘option’);
option.value = voice.name;
option.text = voice.name;
ttsVoice.appendChild(option);
});
}
synth.onvoiceschanged = populateVoices;
ttsLanguage.onchange = updateVoices;
// Slider updates
rate.oninput = () => rateValue.textContent = rate.value;
pitch.oninput = () => pitchValue.textContent = pitch.value;
// Emotion adjustments
function getEmotionAdjustments(emotion) {
switch(emotion) {
case ‘happy’: return { rate: 1.2, pitch: 1.2 };
case ‘sad’: return { rate: 0.8, pitch: 0.8 };
case ‘angry’: return { rate: 1.1, pitch: 0.9 };
default: return { rate: 1, pitch: 1 };
}
}
// Translation (mock API call)
async function translateText(text, targetLang) {
if (!targetLang) return text;
return text + ` (Translated to ${targetLang})`; // Replace with real API
}
// TTS Functions
let currentUtterance = null;
async function speak() {
const text = ttsInput.value;
const targetLang = translateTo.value;
const emotionAdj = getEmotionAdjustments(emotion.value);
const translatedText = await translateText(text, targetLang);
currentUtterance = new SpeechSynthesisUtterance(translatedText);
const selectedVoice = voices.find(voice => voice.name === ttsVoice.value);
currentUtterance.voice = selectedVoice;
currentUtterance.rate = parseFloat(rate.value) * emotionAdj.rate;
currentUtterance.pitch = parseFloat(pitch.value) * emotionAdj.pitch;
synth.speak(currentUtterance);
addToHistory(ttsHistory, `TTS: ${translatedText}`);
}
function pause() { synth.pause(); }
function resume() { synth.resume(); }
function stop() { synth.cancel(); }
function downloadTTS() {
if (!currentUtterance) {
alert(‘Please generate speech first!’);
return;
}
// Note: Browser TTS doesn’t directly support audio file generation
// This is a placeholder – real implementation requires a TTS API
alert(‘Download feature requires server-side TTS API integration.’);
// Example with Blob would go here with real audio data
}
function clearTTS() {
ttsInput.value = ”;
stop();
}
// STT Setup
recognition.continuous = true;
recognition.interimResults = true;
let isRecording = false;
recordBtn.onclick = () => {
if (!isRecording) {
recognition.lang = sttLanguage.value;
recognition.start();
recordBtn.textContent = ‘Stop Recording’;
} else {
recognition.stop();
recordBtn.textContent = ‘Start Recording’;
}
isRecording = !isRecording;
};
recognition.onresult = async (event) => {
let transcript = ”;
for (let i = event.resultIndex; i < event.results.length; i++) {
transcript += event.results[i][0].transcript;
}
sttOutput.textContent = transcript;
const targetLang = translateTo.value;
if (targetLang) {
const translated = await translateText(transcript, targetLang);
translatedOutput.textContent = `Translated: ${translated}`;
addToHistory(sttHistory, `STT: ${transcript} -> ${translated}`);
} else {
addToHistory(sttHistory, `STT: ${transcript}`);
}
};
recognition.onerror = (event) => {
sttOutput.textContent = ‘Error: ‘ + event.error;
};
function copySTT() {
const text = sttOutput.textContent;
navigator.clipboard.writeText(text).then(() => {
alert(‘Text copied to clipboard!’);
});
}
function clearSTT() {
sttOutput.textContent = ”;
translatedOutput.textContent = ”;
if (isRecording) recognition.stop();
isRecording = false;
recordBtn.textContent = ‘Start Recording’;
}
// History Management
function addToHistory(element, text) {
const entry = document.createElement(‘p’);
entry.textContent = `${new Date().toLocaleTimeString()} – ${text}`;
element.insertBefore(entry, element.firstChild);
}
// Initial population of voices
populateVoices();
</script>
</body>
</html>