Schema Design Philosophy
MongoDB schema design is fundamentally different from relational databases. Instead of normalizing data, you design schemas around your query patterns.
Mongoose Schema Definition
const mongoose = require('mongoose');
const { Schema } = mongoose;
// Address schema (embedded subdocument)
const addressSchema = new Schema({
street: { type: String, required: true },
city: { type: String, required: true },
state: { type: String, required: true },
zip: { type: String, required: true, match: /^\d{5}(-\d{4})?$/ },
isPrimary: { type: Boolean, default: false },
});
// User schema
const userSchema = new Schema({
name: {
type: String,
required: [true, 'Name is required'],
trim: true,
minlength: 2,
maxlength: 100,
},
email: {
type: String,
required: true,
unique: true,
lowercase: true,
match: [/^\S+@\S+\.\S+$/, 'Invalid email format'],
},
passwordHash: { type: String, required: true, select: false },
role: { type: String, enum: ['user', 'admin', 'moderator'], default: 'user' },
addresses: [addressSchema], // Embedded
orders: [{ type: Schema.Types.ObjectId, ref: 'Order' }], // Referenced
lastLoginAt: Date,
}, {
timestamps: true, // Adds createdAt and updatedAt
toJSON: { virtuals: true },
toObject: { virtuals: true },
});
// Virtual field (computed, not stored)
userSchema.virtual('displayName').get(function() {
return `${this.name} (${this.role})`;
});
// Compound index
userSchema.index({ email: 1 }, { unique: true });
userSchema.index({ role: 1, createdAt: -1 });
const User = mongoose.model('User', userSchema);Middleware (Pre/Post Hooks)
const bcrypt = require('bcrypt');
// Pre-save hook — hash password before saving
userSchema.pre('save', async function(next) {
if (!this.isModified('passwordHash')) return next();
this.passwordHash = await bcrypt.hash(this.passwordHash, 12);
next();
});
// Pre-find hook — exclude deleted users by default
userSchema.pre(/^find/, function(next) {
this.find({ deletedAt: { $exists: false } });
next();
});
// Post-save hook — log new user creation
userSchema.post('save', function(doc) {
if (doc.wasNew) {
console.log(`New user created: ${doc.email}`);
}
});
// Instance method
userSchema.methods.comparePassword = async function(candidatePassword) {
return bcrypt.compare(candidatePassword, this.passwordHash);
};
// Static method
userSchema.statics.findByEmail = function(email) {
return this.findOne({ email: email.toLowerCase() });
};Population (Joins)
// Order schema with references
const orderSchema = new Schema({
user: { type: Schema.Types.ObjectId, ref: 'User', required: true },
items: [{
product: { type: Schema.Types.ObjectId, ref: 'Product' },
quantity: { type: Number, min: 1 },
price: Number,
}],
total: Number,
status: { type: String, enum: ['pending', 'shipped', 'delivered'], default: 'pending' },
}, { timestamps: true });
// Populate user and product references
const order = await Order.findById(orderId)
.populate('user', 'name email') // Only select name and email
.populate('items.product', 'name price image');
// Virtual populate (reverse reference without storing IDs)
userSchema.virtual('recentOrders', {
ref: 'Order',
localField: '_id',
foreignField: 'user',
options: { sort: { createdAt: -1 }, limit: 5 },
});
const user = await User.findById(userId).populate('recentOrders');Aggregation Pipeline
For complex data transformations, use the aggregation pipeline.
// Revenue by category for the last 30 days
const revenue = await Order.aggregate([
// Stage 1: Filter recent delivered orders
{
$match: {
status: 'delivered',
createdAt: { $gte: new Date(Date.now() - 30 * 24 * 60 * 60 * 1000) },
},
},
// Stage 2: Unwind items array (one doc per item)
{ $unwind: '$items' },
// Stage 3: Lookup product details
{
$lookup: {
from: 'products',
localField: 'items.product',
foreignField: '_id',
as: 'productInfo',
},
},
{ $unwind: '$productInfo' },
// Stage 4: Group by category
{
$group: {
_id: '$productInfo.category',
totalRevenue: { $sum: { $multiply: ['$items.price', '$items.quantity'] } },
orderCount: { $sum: 1 },
avgOrderValue: { $avg: { $multiply: ['$items.price', '$items.quantity'] } },
},
},
// Stage 5: Sort by revenue
{ $sort: { totalRevenue: -1 } },
// Stage 6: Reshape output
{
$project: {
_id: 0,
category: '$_id',
totalRevenue: { $round: ['$totalRevenue', 2] },
orderCount: 1,
avgOrderValue: { $round: ['$avgOrderValue', 2] },
},
},
]);Indexing Strategies
// Single field index
userSchema.index({ email: 1 });
// Compound index (order matters!)
// Supports queries on: { role }, { role, createdAt }, but NOT { createdAt } alone
orderSchema.index({ status: 1, createdAt: -1 });
// Text index for full-text search
productSchema.index({ name: 'text', description: 'text' });
const results = await Product.find({ $text: { $search: 'wireless headphones' } });
// TTL index (auto-delete expired documents)
sessionSchema.index({ expiresAt: 1 }, { expireAfterSeconds: 0 });
// Partial index (index only matching documents)
userSchema.index(
{ email: 1 },
{ partialFilterExpression: { role: 'admin' } }
);Common Pitfalls and Fixes
1. Unbounded Arrays
// BAD: Array grows forever — hits 16MB limit
const chatSchema = new Schema({
participants: [ObjectId],
messages: [{ // Can grow to millions!
sender: ObjectId,
text: String,
sentAt: Date,
}],
});
// GOOD: Separate collection for messages
const messageSchema = new Schema({
chatId: { type: ObjectId, ref: 'Chat', index: true },
sender: { type: ObjectId, ref: 'User' },
text: String,
sentAt: { type: Date, default: Date.now },
});
messageSchema.index({ chatId: 1, sentAt: -1 });2. Missing lean() for Read-Only Queries
// Without lean(): Returns full Mongoose documents (change tracking, virtuals, methods)
// ~3x slower, ~3x more memory
const users = await User.find({ role: 'user' });
// With lean(): Returns plain JavaScript objects
// Much faster for read-only operations
const users = await User.find({ role: 'user' }).lean();3. N+1 Query Problem
// BAD: One query per order for user data
const orders = await Order.find();
for (const order of orders) {
order.userName = (await User.findById(order.user)).name; // N queries!
}
// GOOD: Use populate
const orders = await Order.find().populate('user', 'name');
// BETTER for complex joins: Use aggregation $lookup
const orders = await Order.aggregate([
{ $lookup: { from: 'users', localField: 'user', foreignField: '_id', as: 'userInfo' } },
{ $unwind: '$userInfo' },
{ $project: { total: 1, status: 1, userName: '$userInfo.name' } },
]);4. Not Handling Connection Errors
const mongoose = require('mongoose');
async function connectDB() {
try {
await mongoose.connect(process.env.MONGODB_URI, {
maxPoolSize: 10,
serverSelectionTimeoutMS: 5000,
socketTimeoutMS: 45000,
});
console.log('MongoDB connected');
} catch (err) {
console.error('MongoDB connection error:', err);
process.exit(1);
}
}
mongoose.connection.on('disconnected', () => {
console.warn('MongoDB disconnected. Attempting reconnect...');
});
mongoose.connection.on('error', (err) => {
console.error('MongoDB error:', err);
});
// Graceful shutdown
process.on('SIGTERM', async () => {
await mongoose.connection.close();
process.exit(0);
});Performance Tips
- Use
select()to return only needed fields — reduces network transfer and memory - Use
lean()for read-only queries — skips hydration overhead - Create compound indexes matching your query patterns — check with
explain() - Avoid
$whereand JavaScript execution in queries — can’t use indexes - Use
bulkWrite()for batch operations — reduces round trips - Set
maxPoolSizebased on your workload — default 5 is often too low
// Bulk operations
await User.bulkWrite([
{ updateOne: { filter: { _id: id1 }, update: { $set: { role: 'admin' } } } },
{ updateOne: { filter: { _id: id2 }, update: { $inc: { loginCount: 1 } } } },
{ deleteOne: { filter: { _id: id3 } } },
]);MongoDB with Mongoose is powerful when you design schemas around access patterns. The key mistakes to avoid: unbounded arrays, missing indexes, and over-populating when aggregation would be more efficient.

